diff options
Diffstat (limited to 'fs/ntfs')
44 files changed, 26034 insertions, 0 deletions
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog new file mode 100644 index 000000000000..1d2ad15f1533 --- /dev/null +++ b/fs/ntfs/ChangeLog | |||
@@ -0,0 +1,1350 @@ | |||
1 | ToDo/Notes: | ||
2 | - Find and fix bugs. | ||
3 | - Checkpoint or disable the user space journal ($UsnJrnl). | ||
4 | - In between ntfs_prepare/commit_write, need exclusion between | ||
5 | simultaneous file extensions. Need perhaps an NInoResizeUnderway() | ||
6 | flag which we can set in ntfs_prepare_write() and clear again in | ||
7 | ntfs_commit_write(). Just have to be careful in readpage/writepage, | ||
8 | as well as in truncate, that we play nice... We might need to have | ||
9 | a data_size field in the ntfs_inode to store the real attribute | ||
10 | length. Also need to be careful with initialized_size extention in | ||
11 | ntfs_prepare_write. Basically, just be _very_ careful in this code... | ||
12 | OTOH, perhaps i_sem, which is held accross generic_file_write is | ||
13 | sufficient for synchronisation here. We then just need to make sure | ||
14 | ntfs_readpage/writepage/truncate interoperate properly with us. | ||
15 | UPDATE: The above is all ok as it is due to i_sem held. The only | ||
16 | thing that needs to be checked is ntfs_writepage() which does not | ||
17 | hold i_sem. It cannot change i_size but it needs to cope with a | ||
18 | concurrent i_size change. | ||
19 | - Implement mft.c::sync_mft_mirror_umount(). We currently will just | ||
20 | leave the volume dirty on umount if the final iput(vol->mft_ino) | ||
21 | causes a write of any mirrored mft records due to the mft mirror | ||
22 | inode having been discarded already. Whether this can actually ever | ||
23 | happen is unclear however so it is worth waiting until someone hits | ||
24 | the problem. | ||
25 | - Enable the code for setting the NT4 compatibility flag when we start | ||
26 | making NTFS 1.2 specific modifications. | ||
27 | |||
28 | 2.1.23-WIP | ||
29 | |||
30 | - Add printk rate limiting for ntfs_warning() and ntfs_error() when | ||
31 | compiled without debug. This avoids a possible denial of service | ||
32 | attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this | ||
33 | out. | ||
34 | |||
35 | 2.1.22 - Many bug and race fixes and error handling improvements. | ||
36 | |||
37 | - Improve error handling in fs/ntfs/inode.c::ntfs_truncate(). | ||
38 | - Change fs/ntfs/inode.c::ntfs_truncate() to return an error code | ||
39 | instead of void and provide a helper ntfs_truncate_vfs() for the | ||
40 | vfs ->truncate method. | ||
41 | - Add a new ntfs inode flag NInoTruncateFailed() and modify | ||
42 | fs/ntfs/inode.c::ntfs_truncate() to set and clear it appropriately. | ||
43 | - Fix min_size and max_size definitions in ATTR_DEF structure in | ||
44 | fs/ntfs/layout.h to be signed. | ||
45 | - Add attribute definition handling helpers to fs/ntfs/attrib.[hc]: | ||
46 | ntfs_attr_size_bounds_check(), ntfs_attr_can_be_non_resident(), and | ||
47 | ntfs_attr_can_be_resident(), which in turn use the new private helper | ||
48 | ntfs_attr_find_in_attrdef(). | ||
49 | - In fs/ntfs/aops.c::mark_ntfs_record_dirty(), take the | ||
50 | mapping->private_lock around the dirtying of the buffer heads | ||
51 | analagous to the way it is done in __set_page_dirty_buffers(). | ||
52 | - Ensure the mft record size does not exceed the PAGE_CACHE_SIZE at | ||
53 | mount time as this cannot work with the current implementation. | ||
54 | - Check for location of attribute name and improve error handling in | ||
55 | general in fs/ntfs/inode.c::ntfs_read_locked_inode() and friends. | ||
56 | - In fs/ntfs/aops.c::ntfs_writepage(), if the page is fully outside | ||
57 | i_size, i.e. race with truncate, invalidate the buffers on the page | ||
58 | so that they become freeable and hence the page does not leak. | ||
59 | - Remove unused function fs/ntfs/runlist.c::ntfs_rl_merge(). (Adrian | ||
60 | Bunk) | ||
61 | - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_find() that resulted in | ||
62 | a NULL pointer dereference in the error code path when a corrupt | ||
63 | attribute was found. (Thanks to Domen Puncer for the bug report.) | ||
64 | - Add MODULE_VERSION() to fs/ntfs/super.c. | ||
65 | - Make several functions and variables static. (Adrian Bunk) | ||
66 | - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() so it allocates | ||
67 | buffers for the page if they are not present and then marks the | ||
68 | buffers belonging to the ntfs record dirty. This causes the buffers | ||
69 | to become busy and hence they are safe from removal until the page | ||
70 | has been written out. | ||
71 | - Fix stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find() in the | ||
72 | error handling code path that resulted in a BUG() due to trying to | ||
73 | unmap an extent mft record when the mapping of it had failed and it | ||
74 | thus was not mapped. (Thanks to Ken MacFerrin for the bug report.) | ||
75 | - Drop the runlist lock after the vcn has been read in | ||
76 | fs/ntfs/lcnalloc.c::__ntfs_cluster_free(). | ||
77 | - Rewrite handling of multi sector transfer errors. We now do not set | ||
78 | PageError() when such errors are detected in the async i/o handler | ||
79 | fs/ntfs/aops.c::ntfs_end_buffer_async_read(). All users of mst | ||
80 | protected attributes now check the magic of each ntfs record as they | ||
81 | use it and act appropriately. This has the effect of making errors | ||
82 | granular per ntfs record rather than per page which solves the case | ||
83 | where we cannot access any of the ntfs records in a page when a | ||
84 | single one of them had an mst error. (Thanks to Ken MacFerrin for | ||
85 | the bug report.) | ||
86 | - Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date() | ||
87 | where we failed to release i_sem on the $Quota/$Q attribute inode. | ||
88 | - Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup(). | ||
89 | - Add mapping of unmapped buffers to all remaining code paths, i.e. | ||
90 | fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(), | ||
91 | and write_mft_record_nolock(). From now on we require that the | ||
92 | complete runlist for the mft mirror is always mapped into memory. | ||
93 | - Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror(). | ||
94 | - Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block(). | ||
95 | - Cleanup fs/ntfs/aops.c::ntfs_{read,write}page() since we know that a | ||
96 | resident attribute will be smaller than a page which makes the code | ||
97 | simpler. Also make the code more tolerant to concurrent ->truncate. | ||
98 | |||
99 | 2.1.21 - Fix some races and bugs, rewrite mft write code, add mft allocator. | ||
100 | |||
101 | - Implement extent mft record deallocation | ||
102 | fs/ntfs/mft.c::ntfs_extent_mft_record_free(). | ||
103 | - Splitt runlist related functions off from attrib.[hc] to runlist.[hc]. | ||
104 | - Add vol->mft_data_pos and initialize it at mount time. | ||
105 | - Rename init_runlist() to ntfs_init_runlist(), ntfs_vcn_to_lcn() to | ||
106 | ntfs_rl_vcn_to_lcn(), decompress_mapping_pairs() to | ||
107 | ntfs_mapping_pairs_decompress(), ntfs_merge_runlists() to | ||
108 | ntfs_runlists_merge() and adapt all callers. | ||
109 | - Add fs/ntfs/runlist.[hc]::ntfs_get_nr_significant_bytes(), | ||
110 | ntfs_get_size_for_mapping_pairs(), ntfs_write_significant_bytes(), | ||
111 | and ntfs_mapping_pairs_build(), adapted from libntfs. | ||
112 | - Make fs/ntfs/lcnalloc.c::ntfs_cluster_free_from_rl_nolock() not | ||
113 | static and add a declaration for it to lcnalloc.h. | ||
114 | - Add fs/ntfs/lcnalloc.h::ntfs_cluster_free_from_rl() which is a static | ||
115 | inline wrapper for ntfs_cluster_free_from_rl_nolock() which takes the | ||
116 | cluster bitmap lock for the duration of the call. | ||
117 | - Add fs/ntfs/attrib.[hc]::ntfs_attr_record_resize(). | ||
118 | - Implement the equivalent of memset() for an ntfs attribute in | ||
119 | fs/ntfs/attrib.[hc]::ntfs_attr_set() and switch | ||
120 | fs/ntfs/logfile.c::ntfs_empty_logfile() to using it. | ||
121 | - Remove unnecessary casts from LCN_* constants. | ||
122 | - Implement fs/ntfs/runlist.c::ntfs_rl_truncate_nolock(). | ||
123 | - Add MFT_RECORD_OLD as a copy of MFT_RECORD in fs/ntfs/layout.h and | ||
124 | change MFT_RECORD to contain the NTFS 3.1+ specific fields. | ||
125 | - Add a helper function fs/ntfs/aops.c::mark_ntfs_record_dirty() which | ||
126 | marks all buffers belonging to an ntfs record dirty, followed by | ||
127 | marking the page the ntfs record is in dirty and also marking the vfs | ||
128 | inode containing the ntfs record dirty (I_DIRTY_PAGES). | ||
129 | - Switch fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to using the | ||
130 | new helper fs/ntfs/aops.c::mark_ntfs_record_dirty() and remove the no | ||
131 | longer needed fs/ntfs/index.[hc]::__ntfs_index_entry_mark_dirty(). | ||
132 | - Move ntfs_{un,}map_page() from ntfs.h to aops.h and fix resulting | ||
133 | include errors. | ||
134 | - Move the typedefs for runlist_element and runlist from types.h to | ||
135 | runlist.h and fix resulting include errors. | ||
136 | - Remove unused {__,}format_mft_record() from fs/ntfs/mft.c. | ||
137 | - Modify fs/ntfs/mft.c::__mark_mft_record_dirty() to use the helper | ||
138 | mark_ntfs_record_dirty() which also changes the behaviour in that we | ||
139 | now set the buffers belonging to the mft record dirty as well as the | ||
140 | page itself. | ||
141 | - Update fs/ntfs/mft.c::write_mft_record_nolock() and sync_mft_mirror() | ||
142 | to cope with the fact that there now are dirty buffers in mft pages. | ||
143 | - Update fs/ntfs/inode.c::ntfs_write_inode() to also use the helper | ||
144 | mark_ntfs_record_dirty() and thus to set the buffers belonging to the | ||
145 | mft record dirty as well as the page itself. | ||
146 | - Fix compiler warnings on x86-64 in fs/ntfs/dir.c. (Randy Dunlap, | ||
147 | slightly modified by me) | ||
148 | - Add fs/ntfs/mft.c::try_map_mft_record() which fails with -EALREADY if | ||
149 | the mft record is already locked and otherwise behaves the same way | ||
150 | as fs/ntfs/mft.c::map_mft_record(). | ||
151 | - Modify fs/ntfs/mft.c::write_mft_record_nolock() so that it only | ||
152 | writes the mft record if the buffers belonging to it are dirty. | ||
153 | Otherwise we assume that it was written out by other means already. | ||
154 | - Attempting to write outside initialized size is _not_ a bug so remove | ||
155 | the bug check from fs/ntfs/aops.c::ntfs_write_mst_block(). It is in | ||
156 | fact required to write outside initialized size when preparing to | ||
157 | extend the initialized size. | ||
158 | - Map the page instead of using page_address() before writing to it in | ||
159 | fs/ntfs/aops.c::ntfs_mft_writepage(). | ||
160 | - Provide exclusion between opening an inode / mapping an mft record | ||
161 | and accessing the mft record in fs/ntfs/mft.c::ntfs_mft_writepage() | ||
162 | by setting the page not uptodate throughout ntfs_mft_writepage(). | ||
163 | - Clear the page uptodate flag in fs/ntfs/aops.c::ntfs_write_mst_block() | ||
164 | to ensure noone can see the page whilst the mst fixups are applied. | ||
165 | - Add the helper fs/ntfs/mft.c::ntfs_may_write_mft_record() which | ||
166 | checks if an mft record may be written out safely obtaining any | ||
167 | necessary locks in the process. This is used by | ||
168 | fs/ntfs/aops.c::ntfs_write_mst_block(). | ||
169 | - Modify fs/ntfs/aops.c::ntfs_write_mst_block() to also work for | ||
170 | writing mft records and improve its error handling in the process. | ||
171 | Now if any of the records in the page fail to be written out, all | ||
172 | other records will be written out instead of aborting completely. | ||
173 | - Remove ntfs_mft_aops and update all users to use ntfs_mst_aops. | ||
174 | - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to set the | ||
175 | ntfs_mst_aops for all inodes which are NInoMstProtected() and | ||
176 | ntfs_aops for all other inodes. | ||
177 | - Rename fs/ntfs/mft.c::sync_mft_mirror{,_umount}() to | ||
178 | ntfs_sync_mft_mirror{,_umount}() and change their parameters so they | ||
179 | no longer require an ntfs inode to be present. Update all callers. | ||
180 | - Cleanup the error handling in fs/ntfs/mft.c::ntfs_sync_mft_mirror(). | ||
181 | - Clear the page uptodate flag in fs/ntfs/mft.c::ntfs_sync_mft_mirror() | ||
182 | to ensure noone can see the page whilst the mst fixups are applied. | ||
183 | - Remove the no longer needed fs/ntfs/mft.c::ntfs_mft_writepage() and | ||
184 | fs/ntfs/mft.c::try_map_mft_record(). | ||
185 | - Fix callers of fs/ntfs/aops.c::mark_ntfs_record_dirty() to call it | ||
186 | with the ntfs inode which contains the page rather than the ntfs | ||
187 | inode the mft record of which is in the page. | ||
188 | - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by moving the | ||
189 | index inode bitmap inode release code from there to | ||
190 | fs/ntfs/inode.c::ntfs_clear_big_inode(). (Thanks to Christoph | ||
191 | Hellwig for spotting this.) | ||
192 | - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by taking the | ||
193 | inode semaphore around the code that sets ni->itype.index.bmp_ino to | ||
194 | NULL and reorganize the code to optimize it a bit. (Thanks to | ||
195 | Christoph Hellwig for spotting this.) | ||
196 | - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() to no longer take the | ||
197 | ntfs inode as a parameter as this is confusing and misleading and the | ||
198 | needed ntfs inode is available via NTFS_I(page->mapping->host). | ||
199 | Adapt all callers to this change. | ||
200 | - Modify fs/ntfs/mft.c::write_mft_record_nolock() and | ||
201 | fs/ntfs/aops.c::ntfs_write_mst_block() to only check the dirty state | ||
202 | of the first buffer in a record and to take this as the ntfs record | ||
203 | dirty state. We cannot look at the dirty state for subsequent | ||
204 | buffers because we might be racing with | ||
205 | fs/ntfs/aops.c::mark_ntfs_record_dirty(). | ||
206 | - Move the static inline ntfs_init_big_inode() from fs/ntfs/inode.c to | ||
207 | inode.h and make fs/ntfs/inode.c::__ntfs_init_inode() non-static and | ||
208 | add a declaration for it to inode.h. Fix some compilation issues | ||
209 | that resulted due to #includes and header file interdependencies. | ||
210 | - Simplify setup of i_mode in fs/ntfs/inode.c::ntfs_read_locked_inode(). | ||
211 | - Add helpers fs/ntfs/layout.h::MK_MREF() and MK_LE_MREF(). | ||
212 | - Modify fs/ntfs/mft.c::map_extent_mft_record() to only verify the mft | ||
213 | record sequence number if it is specified (i.e. not zero). | ||
214 | - Add fs/ntfs/mft.[hc]::ntfs_mft_record_alloc() and various helper | ||
215 | functions used by it. | ||
216 | - Update Documentation/filesystems/ntfs.txt with instructions on how to | ||
217 | use the Device-Mapper driver with NTFS ftdisk/LDM raid. This removes | ||
218 | the linear raid problem with the Software RAID / MD driver when one | ||
219 | or more of the devices has an odd number of sectors. | ||
220 | |||
221 | 2.1.20 - Fix two stupid bugs introduced in 2.1.18 release. | ||
222 | |||
223 | - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_reinit_search_ctx() | ||
224 | where we did not clear ctx->al_entry but it was still set due to | ||
225 | changes in ntfs_attr_lookup() and ntfs_external_attr_find() in | ||
226 | particular. | ||
227 | - Fix another stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find() | ||
228 | where we forgot to unmap the extent mft record when we had finished | ||
229 | enumerating an attribute which caused a bug check to trigger when the | ||
230 | VFS calls ->clear_inode. | ||
231 | |||
232 | 2.1.19 - Many cleanups, improvements, and a minor bug fix. | ||
233 | |||
234 | - Update ->setattr (fs/ntfs/inode.c::ntfs_setattr()) to refuse to | ||
235 | change the uid, gid, and mode of an inode as we do not support NTFS | ||
236 | ACLs yet. | ||
237 | - Remove BKL use from ntfs_setattr() syncing up with the rest of the | ||
238 | kernel. | ||
239 | - Get rid of the ugly transparent union in fs/ntfs/dir.c::ntfs_readdir() | ||
240 | and ntfs_filldir() as per suggestion from Al Viro. | ||
241 | - Change '\0' and L'\0' to simply 0 as per advice from Linus Torvalds. | ||
242 | - Update ->truncate (fs/ntfs/inode.c::ntfs_truncate()) to check if the | ||
243 | inode size has changed and to only output an error if so. | ||
244 | - Rename fs/ntfs/attrib.h::attribute_value_length() to ntfs_attr_size(). | ||
245 | - Add le{16,32,64} as well as sle{16,32,64} data types to | ||
246 | fs/ntfs/types.h. | ||
247 | - Change ntfschar to be le16 instead of u16 in fs/ntfs/types.h. | ||
248 | - Add le versions of VCN, LCN, and LSN called leVCN, leLCN, and leLSN, | ||
249 | respectively, to fs/ntfs/types.h. | ||
250 | - Update endianness conversion macros in fs/ntfs/endian.h to use the | ||
251 | new types as appropriate. | ||
252 | - Do proper type casting when using sle64_to_cpup() in fs/ntfs/dir.c | ||
253 | and index.c. | ||
254 | - Add leMFT_REF data type to fs/ntfs/layout.h. | ||
255 | - Update all NTFS header files with the new little endian data types. | ||
256 | Affected files are fs/ntfs/layout.h, logfile.h, and time.h. | ||
257 | - Do proper type casting when using ntfs_is_*_recordp() in | ||
258 | fs/ntfs/logfile.c, mft.c, and super.c. | ||
259 | - Fix all the sparse bitwise warnings. Had to change all the typedef | ||
260 | enums storing little endian values to simple enums plus a typedef for | ||
261 | the datatype to make sparse happy. | ||
262 | - Fix a bug found by the new sparse bitwise warnings where the default | ||
263 | upcase table was defined as a pointer to wchar_t rather than ntfschar | ||
264 | in fs/ntfs/ntfs.h and super.c. | ||
265 | - Change {const_,}cpu_to_le{16,32}(0) to just 0 as suggested by Al Viro. | ||
266 | |||
267 | 2.1.18 - Fix scheduling latencies at mount time as well as an endianness bug. | ||
268 | |||
269 | - Remove vol->nr_mft_records as it was pretty meaningless and optimize | ||
270 | the calculation of total/free inodes as used by statfs(). | ||
271 | - Fix scheduling latencies in ntfs_fill_super() by dropping the BKL | ||
272 | because the code itself is using the ntfs_lock semaphore which | ||
273 | provides safe locking. (Ingo Molnar) | ||
274 | - Fix a potential bug in fs/ntfs/mft.c::map_extent_mft_record() that | ||
275 | could occur in the future for when we start closing/freeing extent | ||
276 | inodes if we don't set base_ni->ext.extent_ntfs_inos to NULL after | ||
277 | we free it. | ||
278 | - Rename {find,lookup}_attr() to ntfs_attr_{find,lookup}() as well as | ||
279 | find_external_attr() to ntfs_external_attr_find() to cleanup the | ||
280 | namespace a bit and to be more consistent with libntfs. | ||
281 | - Rename {{re,}init,get,put}_attr_search_ctx() to | ||
282 | ntfs_attr_{{re,}init,get,put}_search_ctx() as well as the type | ||
283 | attr_search_context to ntfs_attr_search_ctx. | ||
284 | - Force use of ntfs_attr_find() in ntfs_attr_lookup() when searching | ||
285 | for the attribute list attribute itself. | ||
286 | - Fix endianness bug in ntfs_external_attr_find(). | ||
287 | - Change ntfs_{external_,}attr_find() to return 0 on success, -ENOENT | ||
288 | if the attribute is not found, and -EIO on real error. In the case | ||
289 | of -ENOENT, the search context is updated to describe the attribute | ||
290 | before which the attribute being searched for would need to be | ||
291 | inserted if such an action were to be desired and in the case of | ||
292 | ntfs_external_attr_find() the search context is also updated to | ||
293 | indicate the attribute list entry before which the attribute list | ||
294 | entry of the attribute being searched for would need to be inserted | ||
295 | if such an action were to be desired. Also make ntfs_find_attr() | ||
296 | static and remove its prototype from attrib.h as it is not used | ||
297 | anywhere other than attrib.c. Update ntfs_attr_lookup() and all | ||
298 | callers of ntfs_{external,}attr_{find,lookup}() for the new return | ||
299 | values. | ||
300 | - Minor cleanup of fs/ntfs/inode.c::ntfs_init_locked_inode(). | ||
301 | |||
302 | 2.1.17 - Fix bugs in mount time error code paths and other updates. | ||
303 | |||
304 | - Implement bitmap modification code (fs/ntfs/bitmap.[hc]). This | ||
305 | includes functions to set/clear a single bit or a run of bits. | ||
306 | - Add fs/ntfs/attrib.[hc]::ntfs_find_vcn() which returns the locked | ||
307 | runlist element containing a particular vcn. It also takes care of | ||
308 | mapping any needed runlist fragments. | ||
309 | - Implement cluster (de-)allocation code (fs/ntfs/lcnalloc.[hc]). | ||
310 | - Load attribute definition table from $AttrDef at mount time. | ||
311 | - Fix bugs in mount time error code paths involving (de)allocation of | ||
312 | the default and volume upcase tables. | ||
313 | - Remove ntfs_nr_mounts as it is no longer used. | ||
314 | |||
315 | 2.1.16 - Implement access time updates, file sync, async io, and read/writev. | ||
316 | |||
317 | - Add support for readv/writev and aio_read/aio_write (fs/ntfs/file.c). | ||
318 | This is done by setting the appropriate file operations pointers to | ||
319 | the generic helper functions provided by mm/filemap.c. | ||
320 | - Implement fsync, fdatasync, and msync both for files (fs/ntfs/file.c) | ||
321 | and directories (fs/ntfs/dir.c). | ||
322 | - Add support for {a,m,c}time updates to inode.c::ntfs_write_inode(). | ||
323 | Note, except for the root directory and any other system files opened | ||
324 | by the user, the system files will not have their access times | ||
325 | updated as they are only accessed at the inode level an hence the | ||
326 | file level functions which cause the times to be updated are never | ||
327 | invoked. | ||
328 | |||
329 | 2.1.15 - Invalidate quotas when (re)mounting read-write. | ||
330 | |||
331 | - Add new element itype.index.collation_rule to the ntfs inode | ||
332 | structure and set it appropriately in ntfs_read_locked_inode(). | ||
333 | - Implement a new inode type "index" to allow efficient access to the | ||
334 | indices found in various system files and adapt inode handling | ||
335 | accordingly (fs/ntfs/inode.[hc]). An index inode is essentially an | ||
336 | attribute inode (NInoAttr() is true) with an attribute type of | ||
337 | AT_INDEX_ALLOCATION. As such, it is no longer allowed to call | ||
338 | ntfs_attr_iget() with an attribute type of AT_INDEX_ALLOCATION as | ||
339 | there would be no way to distinguish between normal attribute inodes | ||
340 | and index inodes. The function to obtain an index inode is | ||
341 | ntfs_index_iget() and it uses the helper function | ||
342 | ntfs_read_locked_index_inode(). Note, we do not overload | ||
343 | ntfs_attr_iget() as indices consist of multiple attributes so using | ||
344 | ntfs_attr_iget() to obtain an index inode would be confusing. | ||
345 | - Ensure that there is no overflow when doing page->index << | ||
346 | PAGE_CACHE_SHIFT by casting page->index to s64 in fs/ntfs/aops.c. | ||
347 | - Use atomic kmap instead of kmap() in fs/ntfs/aops.c::ntfs_read_page() | ||
348 | and ntfs_read_block(). | ||
349 | - Use case sensitive attribute lookups instead of case insensitive ones. | ||
350 | - Lock all page cache pages belonging to mst protected attributes while | ||
351 | accessing them to ensure we never see corrupt data while the page is | ||
352 | under writeout. | ||
353 | - Add framework for generic ntfs collation (fs/ntfs/collation.[hc]). | ||
354 | We have ntfs_is_collation_rule_supported() to check if the collation | ||
355 | rule you want to use is supported and ntfs_collation() which actually | ||
356 | collates two data items. We currently only support COLLATION_BINARY | ||
357 | and COLLATION_NTOFS_ULONG but support for other collation rules will | ||
358 | be added as the need arises. | ||
359 | - Add a new type, ntfs_index_context, to allow retrieval of an index | ||
360 | entry using the corresponding index key. To get an index context, | ||
361 | use ntfs_index_ctx_get() and to release it, use ntfs_index_ctx_put(). | ||
362 | This also adds a new slab cache for the index contexts. To lookup a | ||
363 | key in an index inode, use ntfs_index_lookup(). After modifying an | ||
364 | index entry, call ntfs_index_entry_flush_dcache_page() followed by | ||
365 | ntfs_index_entry_mark_dirty() to ensure the changes are written out | ||
366 | to disk. For details see fs/ntfs/index.[hc]. Note, at present, if | ||
367 | an index entry is in the index allocation attribute rather than the | ||
368 | index root attribute it will not be written out (you will get a | ||
369 | warning message about discarded changes instead). | ||
370 | - Load the quota file ($Quota) and check if quota tracking is enabled | ||
371 | and if so, mark the quotas out of date. This causes windows to | ||
372 | rescan the volume on boot and update all quota entries. | ||
373 | - Add a set_page_dirty address space operation for ntfs_m[fs]t_aops. | ||
374 | It is simply set to __set_page_dirty_nobuffers() to make sure that | ||
375 | running set_page_dirty() on a page containing mft/ntfs records will | ||
376 | not affect the dirty state of the page buffers. | ||
377 | - Add fs/ntfs/index.c::__ntfs_index_entry_mark_dirty() which sets all | ||
378 | buffers that are inside the ntfs record in the page dirty after which | ||
379 | it sets the page dirty. This allows ->writepage to only write the | ||
380 | dirty index records rather than having to write all the records in | ||
381 | the page. Modify fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to | ||
382 | use this rather than __set_page_dirty_nobuffers(). | ||
383 | - Implement fs/ntfs/aops.c::ntfs_write_mst_block() which enables the | ||
384 | writing of page cache pages belonging to mst protected attributes | ||
385 | like the index allocation attribute in directory indices and other | ||
386 | indices like $Quota/$Q, etc. This means that the quota is now marked | ||
387 | out of date on all volumes rather than only on ones where the quota | ||
388 | defaults entry is in the index root attribute of the $Quota/$Q index. | ||
389 | |||
390 | 2.1.14 - Fix an NFSd caused deadlock reported by several users. | ||
391 | |||
392 | - Modify fs/ntfs/ntfs_readdir() to copy the index root attribute value | ||
393 | to a buffer so that we can put the search context and unmap the mft | ||
394 | record before calling the filldir() callback. We need to do this | ||
395 | because of NFSd which calls ->lookup() from its filldir callback() | ||
396 | and this causes NTFS to deadlock as ntfs_lookup() maps the mft record | ||
397 | of the directory and since ntfs_readdir() has got it mapped already | ||
398 | ntfs_lookup() deadlocks. | ||
399 | |||
400 | 2.1.13 - Enable overwriting of resident files and housekeeping of system files. | ||
401 | |||
402 | - Implement writing of mft records (fs/ntfs/mft.[hc]), which includes | ||
403 | keeping the mft mirror in sync with the mft when mirrored mft records | ||
404 | are written. The functions are write_mft_record{,_nolock}(). The | ||
405 | implementation is quite rudimentary for now with lots of things not | ||
406 | implemented yet but I am not sure any of them can actually occur so | ||
407 | I will wait for people to hit each one and only then implement it. | ||
408 | - Commit open system inodes at umount time. This should make it | ||
409 | virtually impossible for sync_mft_mirror_umount() to ever be needed. | ||
410 | - Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the | ||
411 | ntfs super operations. This gives us inode writing via the VFS inode | ||
412 | dirty code paths. Note: Access time updates are not implemented yet. | ||
413 | - Implement fs/ntfs/mft.[hc]::{,__}mark_mft_record_dirty() and make | ||
414 | fs/ntfs/aops.c::ntfs_writepage() and ntfs_commit_write() use it, thus | ||
415 | finally enabling resident file overwrite! (-8 This also includes a | ||
416 | placeholder for ->writepage (ntfs_mft_writepage()), which for now | ||
417 | just redirties the page and returns. Also, at umount time, we for | ||
418 | now throw away all mft data page cache pages after the last call to | ||
419 | ntfs_commit_inode() in the hope that all inodes will have been | ||
420 | written out by then and hence no dirty (meta)data will be lost. We | ||
421 | also check for this case and emit an error message telling the user | ||
422 | to run chkdsk. | ||
423 | - Use set_page_writeback() and end_page_writeback() in the resident | ||
424 | attribute code path of fs/ntfs/aops.c::ntfs_writepage() otherwise | ||
425 | the radix-tree tag PAGECACHE_TAG_DIRTY remains set even though the | ||
426 | page is clean. | ||
427 | - Implement ntfs_mft_writepage() so it now checks if any of the mft | ||
428 | records in the page are dirty and if so redirties the page and | ||
429 | returns. Otherwise it just returns (after doing set_page_writeback(), | ||
430 | unlock_page(), end_page_writeback() or the radix-tree tag | ||
431 | PAGECACHE_TAG_DIRTY remains set even though the page is clean), thus | ||
432 | alowing the VM to do with the page as it pleases. Also, at umount | ||
433 | time, now only throw away dirty mft (meta)data pages if dirty inodes | ||
434 | are present and ask the user to email us if they see this happening. | ||
435 | - Add functions ntfs_{clear,set}_volume_flags(), to modify the volume | ||
436 | information flags (fs/ntfs/super.c). | ||
437 | - Mark the volume dirty when (re)mounting read-write and mark it clean | ||
438 | when unmounting or remounting read-only. If any volume errors are | ||
439 | found, the volume is left marked dirty to force chkdsk to run. | ||
440 | - Add code to set the NT4 compatibility flag when (re)mounting | ||
441 | read-write for newer NTFS versions but leave it commented out for now | ||
442 | since we do not make any modifications that are NTFS 1.2 specific yet | ||
443 | and since setting this flag breaks Captive-NTFS which is not nice. | ||
444 | This code must be enabled once we start writing NTFS 1.2 specific | ||
445 | changes otherwise Windows NTFS driver might crash / cause corruption. | ||
446 | |||
447 | 2.1.12 - Fix the second fix to the decompression engine and some cleanups. | ||
448 | |||
449 | - Add a new address space operations struct, ntfs_mst_aops, for mst | ||
450 | protected attributes. This is because the default ntfs_aops do not | ||
451 | make sense with mst protected data and were they to write anything to | ||
452 | such an attribute they would cause data corruption so we provide | ||
453 | ntfs_mst_aops which does not have any write related operations set. | ||
454 | - Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also | ||
455 | includes an adapted ntfs_commit_inode() and an implementation of | ||
456 | ntfs_write_inode() which for now just cleans dirty inodes without | ||
457 | writing them (it does emit a warning that this is happening). | ||
458 | - Undo the second decompression engine fix (see 2.1.9 release ChangeLog | ||
459 | entry) as it was only fixing a theoretical bug but at the same time | ||
460 | it badly broke the handling of sparse and uncompressed compression | ||
461 | blocks. | ||
462 | |||
463 | 2.1.11 - Driver internal cleanups. | ||
464 | |||
465 | - Only build logfile.o if building the driver with read-write support. | ||
466 | - Really final white space cleanups. | ||
467 | - Use generic_ffs() instead of ffs() in logfile.c which allows the | ||
468 | log_page_size variable to be optimized by gcc into a constant. | ||
469 | - Rename uchar_t to ntfschar everywhere as uchar_t is unsigned 1-byte | ||
470 | char as defined by POSIX and as found on some systems. | ||
471 | |||
472 | 2.1.10 - Force read-only (re)mounting of volumes with unsupported volume flags. | ||
473 | |||
474 | - Finish off the white space cleanups (remove trailing spaces, etc). | ||
475 | - Clean up ntfs_fill_super() and ntfs_read_inode_mount() by removing | ||
476 | the kludges around the first iget(). Instead of (re)setting ->s_op | ||
477 | we have the $MFT inode set up by explicit new_inode() / set ->i_ino / | ||
478 | insert_inode_hash() / call ntfs_read_inode_mount() directly. This | ||
479 | kills the need for second super_operations and allows to return error | ||
480 | from ntfs_read_inode_mount() without resorting to ugly "poisoning" | ||
481 | tricks. (Al Viro) | ||
482 | - Force read-only (re)mounting if any of the following bits are set in | ||
483 | the volume information flags: | ||
484 | VOLUME_IS_DIRTY, VOLUME_RESIZE_LOG_FILE, | ||
485 | VOLUME_UPGRADE_ON_MOUNT, VOLUME_DELETE_USN_UNDERWAY, | ||
486 | VOLUME_REPAIR_OBJECT_ID, VOLUME_MODIFIED_BY_CHKDSK | ||
487 | To make this easier we define VOLUME_MUST_MOUNT_RO_MASK with all the | ||
488 | above bits set so the test is made easy. | ||
489 | |||
490 | 2.1.9 - Fix two bugs in decompression engine. | ||
491 | |||
492 | - Fix a bug where we would not always detect that we have reached the | ||
493 | end of a compression block because we were ending at minus one byte | ||
494 | which is effectively the same as being at the end. The fix is to | ||
495 | check whether the uncompressed buffer has been fully filled and if so | ||
496 | we assume we have reached the end of the compression block. A big | ||
497 | thank you to Marcin Gibuła for the bug report, the assistance in | ||
498 | tracking down the bug and testing the fix. | ||
499 | - Fix a possible bug where when a compressed read is truncated to the | ||
500 | end of the file, the offset inside the last page was not truncated. | ||
501 | |||
502 | 2.1.8 - Handle $MFT mirror and $LogFile, improve time handling, and cleanups. | ||
503 | |||
504 | - Use get_bh() instead of manual atomic_inc() in fs/ntfs/compress.c. | ||
505 | - Modify fs/ntfs/time.c::ntfs2utc(), get_current_ntfs_time(), and | ||
506 | utc2ntfs() to work with struct timespec instead of time_t on the | ||
507 | Linux UTC time side thus preserving the full precision of the NTFS | ||
508 | time and only loosing up to 99 nano-seconds in the Linux UTC time. | ||
509 | - Move fs/ntfs/time.c to fs/ntfs/time.h and make the time functions | ||
510 | static inline. | ||
511 | - Remove unused ntfs_dirty_inode(). | ||
512 | - Cleanup super operations declaration in fs/ntfs/super.c. | ||
513 | - Wrap flush_dcache_mft_record_page() in #ifdef NTFS_RW. | ||
514 | - Add NInoTestSetFoo() and NInoTestClearFoo() macro magic to | ||
515 | fs/ntfs/inode.h and use it to declare NInoTest{Set,Clear}Dirty. | ||
516 | - Move typedefs for ntfs_attr and test_t from fs/ntfs/inode.c to | ||
517 | fs/ntfs/inode.h so they can be used elsewhere. | ||
518 | - Determine the mft mirror size as the number of mirrored mft records | ||
519 | and store it in ntfs_volume->mftmirr_size (fs/ntfs/super.c). | ||
520 | - Load the mft mirror at mount time and compare the mft records stored | ||
521 | in it to the ones in the mft. Force a read-only mount if the two do | ||
522 | not match (fs/ntfs/super.c). | ||
523 | - Fix type casting related warnings on 64-bit architectures. Thanks | ||
524 | to Meelis Roos for reporting them. | ||
525 | - Move %L to %ll as %L is floating point and %ll is integer which is | ||
526 | what we want. | ||
527 | - Read the journal ($LogFile) and determine if the volume has been | ||
528 | shutdown cleanly and force a read-only mount if not (fs/ntfs/super.c | ||
529 | and fs/ntfs/logfile.c). This is a little bit of a crude check in | ||
530 | that we only look at the restart areas and not at the actual log | ||
531 | records so that there will be a very small number of cases where we | ||
532 | think that a volume is dirty when in fact it is clean. This should | ||
533 | only affect volumes that have not been shutdown cleanly and did not | ||
534 | have any pending, non-check-pointed i/o. | ||
535 | - If the $LogFile indicates a clean shutdown and a read-write (re)mount | ||
536 | is requested, empty $LogFile by overwriting it with 0xff bytes to | ||
537 | ensure that Windows cannot cause data corruption by replaying a stale | ||
538 | journal after Linux has written to the volume. | ||
539 | |||
540 | 2.1.7 - Enable NFS exporting of mounted NTFS volumes. | ||
541 | |||
542 | - Set i_generation in the VFS inode from the seq_no of the NTFS inode. | ||
543 | - Make ntfs_lookup() NFS export safe, i.e. use d_splice_alias(), etc. | ||
544 | - Implement ->get_dentry() in fs/ntfs/namei.c::ntfs_get_dentry() as the | ||
545 | default doesn't allow inode number 0 which is a valid inode on NTFS | ||
546 | and even if it did allow that it uses iget() instead of ntfs_iget() | ||
547 | which makes it useless for us. | ||
548 | - Implement ->get_parent() in fs/ntfs/namei.c::ntfs_get_parent() as the | ||
549 | default just returns -EACCES which is not very useful. | ||
550 | - Define export operations (->s_export_op) for NTFS (ntfs_export_ops) | ||
551 | and set them up in the super block at mount time (super.c) this | ||
552 | allows mounted NTFS volumes to be exported via NFS. | ||
553 | - Add missing return -EOPNOTSUPP; in | ||
554 | fs/ntfs/aops.c::ntfs_commit_nonresident_write(). | ||
555 | - Enforce no atime and no dir atime updates at mount/remount time as | ||
556 | they are not implemented yet anyway. | ||
557 | - Move a few assignments in fs/ntfs/attrib.c::load_attribute_list() to | ||
558 | after a NULL check. Thanks to Dave Jones for pointing this out. | ||
559 | |||
560 | 2.1.6 - Fix minor bug in handling of compressed directories. | ||
561 | |||
562 | - Fix bug in handling of compressed directories. A compressed | ||
563 | directory is not really compressed so when we set the ->i_blocks | ||
564 | field of a compressed directory inode we were setting it from the | ||
565 | non-existing field ni->itype.compressed.size which gave random | ||
566 | results... For directories we now always use ni->allocated_size. | ||
567 | |||
568 | 2.1.5 - Fix minor bug in attribute list attribute handling. | ||
569 | |||
570 | - Fix bug in attribute list handling. Actually it is not as much a bug | ||
571 | as too much protection in that we were not allowing attribute lists | ||
572 | which waste space on disk while Windows XP clearly allows it and in | ||
573 | fact creates such attribute lists so our driver was failing. | ||
574 | - Update NTFS documentation ready for 2.6 kernel release. | ||
575 | |||
576 | 2.1.4 - Reduce compiler requirements. | ||
577 | |||
578 | - Remove all uses of unnamed structs and unions in the driver to make | ||
579 | old and newer gcc versions happy. Makes it a bit uglier IMO but at | ||
580 | least people will stop hassling me about it. | ||
581 | |||
582 | 2.1.3 - Important bug fixes in corner cases. | ||
583 | |||
584 | - super.c::parse_ntfs_boot_sector(): Correct the check for 64-bit | ||
585 | clusters. (Philipp Thomas) | ||
586 | - attrib.c::load_attribute_list(): Fix bug when initialized_size is a | ||
587 | multiple of the block_size but not the cluster size. (Szabolcs | ||
588 | Szakacsits <szaka@sienet.hu>) | ||
589 | |||
590 | 2.1.2 - Important bug fixes aleviating the hangs in statfs. | ||
591 | |||
592 | - Fix buggy free cluster and free inode determination logic. | ||
593 | |||
594 | 2.1.1 - Minor updates. | ||
595 | |||
596 | - Add handling for initialized_size != data_size in compressed files. | ||
597 | - Reduce function local stack usage from 0x3d4 bytes to just noise in | ||
598 | fs/ntfs/upcase.c. (Randy Dunlap <rddunlap@osdl.ord>) | ||
599 | - Remove compiler warnings for newer gcc. | ||
600 | - Pages are no longer kmapped by mm/filemap.c::generic_file_write() | ||
601 | around calls to ->{prepare,commit}_write. Adapt NTFS appropriately | ||
602 | in fs/ntfs/aops.c::ntfs_prepare_nonresident_write() by using | ||
603 | kmap_atomic(KM_USER0). | ||
604 | |||
605 | 2.1.0 - First steps towards write support: implement file overwrite. | ||
606 | |||
607 | - Add configuration option for developmental write support with an | ||
608 | appropriately scary configuration help text. | ||
609 | - Initial implementation of fs/ntfs/aops.c::ntfs_writepage() and its | ||
610 | helper fs/ntfs/aops.c::ntfs_write_block(). This enables mmap(2) based | ||
611 | overwriting of existing files on ntfs. Note: Resident files are | ||
612 | only written into memory, and not written out to disk at present, so | ||
613 | avoid writing to files smaller than about 1kiB. | ||
614 | - Initial implementation of fs/ntfs/aops.c::ntfs_prepare_write(), its | ||
615 | helper fs/ntfs/aops.c::ntfs_prepare_nonresident_write() and their | ||
616 | counterparts, fs/ntfs/aops.c::ntfs_commit_write(), and | ||
617 | fs/ntfs/aops.c::ntfs_commit_nonresident_write(), respectively. Also, | ||
618 | add generic_file_write() to the ntfs file operations (fs/ntfs/file.c). | ||
619 | This enables write(2) based overwriting of existing files on ntfs. | ||
620 | Note: As with mmap(2) based overwriting, resident files are only | ||
621 | written into memory, and not written out to disk at present, so avoid | ||
622 | writing to files smaller than about 1kiB. | ||
623 | - Implement ->truncate (fs/ntfs/inode.c::ntfs_truncate()) and | ||
624 | ->setattr() (fs/ntfs/inode.c::ntfs_setattr()) inode operations for | ||
625 | files with the purpose of intercepting and aborting all i_size | ||
626 | changes which we do not support yet. ntfs_truncate() actually only | ||
627 | emits a warning message but AFAICS our interception of i_size changes | ||
628 | elsewhere means ntfs_truncate() never gets called for i_size changes. | ||
629 | It is only called from generic_file_write() when we fail in | ||
630 | ntfs_prepare_{,nonresident_}write() in order to discard any | ||
631 | instantiated buffers beyond i_size. Thus i_size is not actually | ||
632 | changed so our warning message is enough. Unfortunately it is not | ||
633 | possible to easily determine if i_size is being changed or not hence | ||
634 | we just emit an appropriately worded error message. | ||
635 | |||
636 | 2.0.25 - Small bug fixes and cleanups. | ||
637 | |||
638 | - Unlock the page in an out of memory error code path in | ||
639 | fs/ntfs/aops.c::ntfs_read_block(). | ||
640 | - If fs/ntfs/aops.c::ntfs_read_page() is called on an uptodate page, | ||
641 | just unlock the page and return. (This can happen due to ->writepage | ||
642 | clearing PageUptodate() during write out of MstProtected() | ||
643 | attributes. | ||
644 | - Remove leaked write code again. | ||
645 | |||
646 | 2.0.24 - Cleanups. | ||
647 | |||
648 | - Treat BUG_ON() as ASSERT() not VERIFY(), i.e. do not use side effects | ||
649 | inside BUG_ON(). (Adam J. Richter) | ||
650 | - Split logical OR expressions inside BUG_ON() into individual BUG_ON() | ||
651 | calls for improved debugging. (Adam J. Richter) | ||
652 | - Add errors flag to the ntfs volume state, accessed via | ||
653 | NVol{,Set,Clear}Errors(vol). | ||
654 | - Do not allow read-write remounts of read-only volumes with errors. | ||
655 | - Clarify comment for ntfs file operation sendfile which was added by | ||
656 | Christoph Hellwig a while ago (just using generic_file_sendfile()) | ||
657 | to say that ntfs ->sendfile is only used for the case where the | ||
658 | source data is on the ntfs partition and the destination is | ||
659 | somewhere else, i.e. nothing we need to concern ourselves with. | ||
660 | - Add generic_file_write() as our ntfs file write operation. | ||
661 | |||
662 | 2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures). | ||
663 | |||
664 | - Massive internal locking changes to mft record locking. Fixes lock | ||
665 | recursion and replaces the mrec_lock read/write semaphore with a | ||
666 | mutex. Also removes the now superfluous mft_count. This fixes several | ||
667 | race conditions and deadlocks, especially in the future write code. | ||
668 | - Fix ntfs over loopback for compressed files by adding an | ||
669 | optimization barrier. (gcc was screwing up otherwise ?) | ||
670 | - Miscellaneous cleanups all over the code and a fix or two in error | ||
671 | handling code paths. | ||
672 | Thanks go to Christoph Hellwig for pointing out the following two: | ||
673 | - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). | ||
674 | - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too. | ||
675 | |||
676 | 2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers. | ||
677 | |||
678 | - Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once | ||
679 | at entry/exit respectively. | ||
680 | - Use C99 initializers for structures. | ||
681 | - Remove unused variable blocks from fs/ntfs/aops.c::ntfs_read_block(). | ||
682 | |||
683 | 2.0.21 - Check for, and refuse to work with too large files/directories/volumes. | ||
684 | |||
685 | - Limit volume size at mount time to 2TiB on architectures where | ||
686 | unsigned long is 32-bits (fs/ntfs/super.c::parse_ntfs_boot_sector()). | ||
687 | This is the most we can do without overflowing the 32-bit limit of | ||
688 | the block device size imposed on us by sb_bread() and sb_getblk() | ||
689 | for the time being. | ||
690 | - Limit file/directory size at open() time to 16TiB on architectures | ||
691 | where unsigned long is 32-bits (fs/ntfs/file.c::ntfs_file_open() and | ||
692 | fs/ntfs/dir.c::ntfs_dir_open()). This is the most we can do without | ||
693 | overflowing the page cache page index. | ||
694 | |||
695 | 2.0.20 - Support non-resident directory index bitmaps, fix page leak in readdir. | ||
696 | |||
697 | - Move the directory index bitmap to use an attribute inode instead of | ||
698 | having special fields for it inside the ntfs inode structure. This | ||
699 | means that the index bitmaps now use the page cache for i/o, too, | ||
700 | and also as a side effect we get support for non-resident index | ||
701 | bitmaps for free. | ||
702 | - Simplify/cleanup error handling in fs/ntfs/dir.c::ntfs_readdir() and | ||
703 | fix a page leak that manifested itself in some cases. | ||
704 | - Add fs/ntfs/inode.c::ntfs_put_inode(), which we need to release the | ||
705 | index bitmap inode on the final iput(). | ||
706 | |||
707 | 2.0.19 - Fix race condition, improvements, and optimizations in i/o interface. | ||
708 | |||
709 | - Apply block optimization added to fs/ntfs/aops.c::ntfs_read_block() | ||
710 | to fs/ntfs/compress.c::ntfs_file_read_compressed_block() as well. | ||
711 | - Drop the "file" from ntfs_file_read_compressed_block(). | ||
712 | - Rename fs/ntfs/aops.c::ntfs_enb_buffer_read_async() to | ||
713 | ntfs_end_buffer_async_read() (more like the fs/buffer.c counterpart). | ||
714 | - Update ntfs_end_buffer_async_read() with the improved logic from | ||
715 | its updated counterpart fs/buffer.c::end_buffer_async_read(). Apply | ||
716 | further logic improvements to better determine when we set PageError. | ||
717 | - Update submission of buffers in fs/ntfs/aops.c::ntfs_read_block() to | ||
718 | check for the buffers being uptodate first in line with the updated | ||
719 | fs/buffer.c::block_read_full_page(). This plugs a small race | ||
720 | condition. | ||
721 | |||
722 | 2.0.18 - Fix race condition in reading of compressed files. | ||
723 | |||
724 | - There was a narrow window between checking a buffer head for being | ||
725 | uptodate and locking it in ntfs_file_read_compressed_block(). We now | ||
726 | lock the buffer and then check whether it is uptodate or not. | ||
727 | |||
728 | 2.0.17 - Cleanups and optimizations - shrinking the ToDo list. | ||
729 | |||
730 | - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to return an error | ||
731 | code and update callers, i.e. ntfs_iget(), to pass that error code | ||
732 | up instead of just using -EIO. | ||
733 | - Modifications to super.c to ensure that both mount and remount | ||
734 | cannot set any write related options when the driver is compiled | ||
735 | read-only. | ||
736 | - Optimize block resolution in fs/ntfs/aops.c::ntfs_read_block() to | ||
737 | cache the current runlist element. This should improve performance | ||
738 | when reading very large and/or very fragmented data. | ||
739 | |||
740 | 2.0.16 - Convert access to $MFT/$BITMAP to attribute inode API. | ||
741 | |||
742 | - Fix a stupid bug introduced in 2.0.15 where we were unmapping the | ||
743 | wrong inode in fs/ntfs/inode.c::ntfs_attr_iget(). | ||
744 | - Fix debugging check in fs/ntfs/aops.c::ntfs_read_block(). | ||
745 | - Convert $MFT/$BITMAP access to attribute inode API and remove all | ||
746 | remnants of the ugly mftbmp address space and operations hack. This | ||
747 | means we finally have only one readpage function as well as only one | ||
748 | async io completion handler. Yey! The mft bitmap is now just an | ||
749 | attribute inode and is accessed from vol->mftbmp_ino just as if it | ||
750 | were a normal file. Fake inodes rule. (-: | ||
751 | |||
752 | 2.0.15 - Fake inodes based attribute i/o via the pagecache, fixes and cleanups. | ||
753 | |||
754 | - Fix silly bug in fs/ntfs/super.c::parse_options() which was causing | ||
755 | remounts to fail when the partition had an entry in /etc/fstab and | ||
756 | the entry specified the nls= option. | ||
757 | - Apply same macro magic used in fs/ntfs/inode.h to fs/ntfs/volume.h to | ||
758 | expand all the helper functions NVolFoo(), NVolSetFoo(), and | ||
759 | NVolClearFoo(). | ||
760 | - Move copyright statement from driver initialisation message to | ||
761 | module description (fs/super.c). This makes the initialisation | ||
762 | message fit on one line and fits in better with rest of kernel. | ||
763 | - Update fs/ntfs/attrib.c::map_run_list() to work on both real and | ||
764 | attribute inodes, and both for files and directories. | ||
765 | - Implement fake attribute inodes allowing all attribute i/o to go via | ||
766 | the page cache and to use all the normal vfs/mm functionality: | ||
767 | - Add ntfs_attr_iget() and its helper ntfs_read_locked_attr_inode() | ||
768 | to fs/ntfs/inode.c. | ||
769 | - Add needed cleanup code to ntfs_clear_big_inode(). | ||
770 | - Merge address space operations for files and directories (aops.c), | ||
771 | now just have ntfs_aops: | ||
772 | - Rename: | ||
773 | end_buffer_read_attr_async() -> ntfs_end_buffer_read_async(), | ||
774 | ntfs_attr_read_block() -> ntfs_read_block(), | ||
775 | ntfs_file_read_page() -> ntfs_readpage(). | ||
776 | - Rewrite fs/ntfs/aops.c::ntfs_readpage() to work on both real and | ||
777 | attribute inodes, and both for files and directories. | ||
778 | - Remove obsolete fs/ntfs/aops.c::ntfs_mst_readpage(). | ||
779 | |||
780 | 2.0.14 - Run list merging code cleanup, minor locking changes, typo fixes. | ||
781 | |||
782 | - Change fs/ntfs/super.c::ntfs_statfs() to not rely on BKL by moving | ||
783 | the locking out of super.c::get_nr_free_mft_records() and taking and | ||
784 | dropping the mftbmp_lock rw_semaphore in ntfs_statfs() itself. | ||
785 | - Bring attribute runlist merging code (fs/ntfs/attrib.c) in sync with | ||
786 | current userspace ntfs library code. This means that if a merge | ||
787 | fails the original runlists are always left unmodified instead of | ||
788 | being silently corrupted. | ||
789 | - Misc typo fixes. | ||
790 | |||
791 | 2.0.13 - Use iget5_locked() in preparation for fake inodes and small cleanups. | ||
792 | |||
793 | - Remove nr_mft_bits and the now superfluous union with nr_mft_records | ||
794 | from ntfs_volume structure. | ||
795 | - Remove nr_lcn_bits and the now superfluous union with nr_clusters | ||
796 | from ntfs_volume structure. | ||
797 | - Use iget5_locked() and friends instead of conventional iget(). Wrap | ||
798 | the call in fs/ntfs/inode.c::ntfs_iget() and update callers of iget() | ||
799 | to use ntfs_iget(). Leave only one iget() call at mount time so we | ||
800 | don't need an ntfs_iget_mount(). | ||
801 | - Change fs/ntfs/inode.c::ntfs_new_extent_inode() to take mft_no as an | ||
802 | additional argument. | ||
803 | |||
804 | 2.0.12 - Initial cleanup of address space operations following 2.0.11 changes. | ||
805 | |||
806 | - Merge fs/ntfs/aops.c::end_buffer_read_mst_async() and | ||
807 | fs/ntfs/aops.c::end_buffer_read_file_async() into one function | ||
808 | fs/ntfs/aops.c::end_buffer_read_attr_async() using NInoMstProtected() | ||
809 | to determine whether to apply mst fixups or not. | ||
810 | - Above change allows merging fs/ntfs/aops.c::ntfs_file_read_block() | ||
811 | and fs/ntfs/aops.c::ntfs_mst_readpage() into one function | ||
812 | fs/ntfs/aops.c::ntfs_attr_read_block(). Also, create a tiny wrapper | ||
813 | fs/ntfs/aops.c::ntfs_mst_readpage() to transform the parameters from | ||
814 | the VFS readpage function prototype to the ntfs_attr_read_block() | ||
815 | function prototype. | ||
816 | |||
817 | 2.0.11 - Initial preparations for fake inode based attribute i/o. | ||
818 | |||
819 | - Move definition of ntfs_inode_state_bits to fs/ntfs/inode.h and | ||
820 | do some macro magic (adapted from include/linux/buffer_head.h) to | ||
821 | expand all the helper functions NInoFoo(), NInoSetFoo(), and | ||
822 | NInoClearFoo(). | ||
823 | - Add new flag to ntfs_inode_state_bits: NI_Sparse. | ||
824 | - Add new fields to ntfs_inode structure to allow use of fake inodes | ||
825 | for attribute i/o: type, name, name_len. Also add new state bits: | ||
826 | NI_Attr, which, if set, indicates the inode is a fake inode, and | ||
827 | NI_MstProtected, which, if set, indicates the attribute uses multi | ||
828 | sector transfer protection, i.e. fixups need to be applied after | ||
829 | reads and before/after writes. | ||
830 | - Rename fs/ntfs/inode.c::ntfs_{new,clear,destroy}_inode() to | ||
831 | ntfs_{new,clear,destroy}_extent_inode() and update callers. | ||
832 | - Use ntfs_clear_extent_inode() in fs/ntfs/inode.c::__ntfs_clear_inode() | ||
833 | instead of ntfs_destroy_extent_inode(). | ||
834 | - Cleanup memory deallocations in {__,}ntfs_clear_{,big_}inode(). | ||
835 | - Make all operations on ntfs inode state bits use the NIno* functions. | ||
836 | - Set up the new ntfs inode fields and state bits in | ||
837 | fs/ntfs/inode.c::ntfs_read_inode() and add appropriate cleanup of | ||
838 | allocated memory to __ntfs_clear_inode(). | ||
839 | - Cleanup ntfs_inode structure a bit for better ordering of elements | ||
840 | w.r.t. their size to allow better packing of the structure in memory. | ||
841 | |||
842 | 2.0.10 - There can only be 2^32 - 1 inodes on an NTFS volume. | ||
843 | |||
844 | - Add check at mount time to verify that the number of inodes on the | ||
845 | volume does not exceed 2^32 - 1, which is the maximum allowed for | ||
846 | NTFS according to Microsoft. | ||
847 | - Change mft_no member of ntfs_inode structure to be unsigned long. | ||
848 | Update all users. This makes ntfs_inode->mft_no just a copy of struct | ||
849 | inode->i_ino. But we can't just always use struct inode->i_ino and | ||
850 | remove mft_no because extent inodes do not have an attached struct | ||
851 | inode. | ||
852 | |||
853 | 2.0.9 - Decompression engine now uses a single buffer and other cleanups. | ||
854 | |||
855 | - Change decompression engine to use a single buffer protected by a | ||
856 | spin lock instead of per-CPU buffers. (Rusty Russell) | ||
857 | - Do not update cb_pos when handling a partial final page during | ||
858 | decompression of a sparse compression block, as the value is later | ||
859 | reset without being read/used. (Rusty Russell) | ||
860 | - Switch to using the new KM_BIO_SRC_IRQ for atomic kmap()s. (Andrew | ||
861 | Morton) | ||
862 | - Change buffer size in ntfs_readdir()/ntfs_filldir() to use | ||
863 | NLS_MAX_CHARSET_SIZE which makes the buffers almost 1kiB each but | ||
864 | it also makes everything safer so it is a good thing. | ||
865 | - Miscellaneous minor cleanups to comments. | ||
866 | |||
867 | 2.0.8 - Major updates for handling of case sensitivity and dcache aliasing. | ||
868 | |||
869 | Big thanks go to Al Viro and other inhabitants of #kernel for investing | ||
870 | their time to discuss the case sensitivity and dcache aliasing issues. | ||
871 | |||
872 | - Remove unused source file fs/ntfs/attraops.c. | ||
873 | - Remove show_inodes mount option(s), thus dropping support for | ||
874 | displaying of short file names. | ||
875 | - Remove deprecated mount option posix. | ||
876 | - Restore show_sys_files mount option. | ||
877 | - Add new mount option case_sensitive, to determine if the driver | ||
878 | treats file names as case sensitive or not. If case sensitive, create | ||
879 | file names in the POSIX namespace. Otherwise create file names in the | ||
880 | LONG/WIN32 namespace. Note, files remain accessible via their short | ||
881 | file name, if it exists. | ||
882 | - Remove really dumb logic bug in boot sector recovery code. | ||
883 | - Fix dcache aliasing issues wrt short/long file names via changes | ||
884 | to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and | ||
885 | fs/ntfs/namei.c::ntfs_lookup(): | ||
886 | - Add additional argument to ntfs_lookup_inode_by_name() in which we | ||
887 | return information about the matching file name if the case is not | ||
888 | matching or the match is a short file name. See comments above the | ||
889 | function definition for details. | ||
890 | - Change ntfs_lookup() to only create dcache entries for the correctly | ||
891 | cased file name and only for the WIN32 namespace counterpart of DOS | ||
892 | namespace file names. This ensures we have only one dentry per | ||
893 | directory and also removes all dcache aliasing issues between short | ||
894 | and long file names once we add write support. See comments above | ||
895 | function for details. | ||
896 | - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls(). | ||
897 | |||
898 | 2.0.7 - Minor cleanups and updates for changes in core kernel code. | ||
899 | |||
900 | - Remove much of the NULL struct element initializers. | ||
901 | - Various updates to make compatible with recent kernels. | ||
902 | - Remove defines of MAX_BUF_PER_PAGE and include linux/buffer_head.h | ||
903 | in fs/ntfs/ntfs.h instead. | ||
904 | - Remove no longer needed KERNEL_VERSION checks. We are now in the | ||
905 | kernel proper so they are no longer needed. | ||
906 | |||
907 | 2.0.6 - Major bugfix to make compatible with other kernel changes. | ||
908 | |||
909 | - Initialize the mftbmp address space properly now that there are more | ||
910 | fields in the struct address_space. This was leading to hangs and | ||
911 | oopses on umount since 2.5.12 because of changes to other parts of | ||
912 | the kernel. We probably want a kernel generic init_address_space() | ||
913 | function... | ||
914 | - Drop BKL from ntfs_readdir() after consultation with Al Viro. The | ||
915 | only caller of ->readdir() is vfs_readdir() which holds i_sem during | ||
916 | the call, and i_sem is sufficient protection against changes in the | ||
917 | directory inode (including ->i_size). | ||
918 | - Use generic_file_llseek() for directories (as opposed to | ||
919 | default_llseek()) as this downs i_sem instead of the BKL which is | ||
920 | what we now need for exclusion against ->f_pos changes considering we | ||
921 | no longer take the BKL in ntfs_readdir(). | ||
922 | |||
923 | 2.0.5 - Major bugfix. Buffer overflow in extent inode handling. | ||
924 | |||
925 | - No need to set old blocksize in super.c::ntfs_fill_super() as the | ||
926 | VFS does so via invocation of deactivate_super() calling | ||
927 | fs->fill_super() calling block_kill_super() which does it. | ||
928 | - BKL moved from VFS into dir.c::ntfs_readdir(). (Linus Torvalds) | ||
929 | -> Do we really need it? I don't think so as we have exclusion on | ||
930 | the directory ntfs_inode rw_semaphore mrec_lock. We mmight have to | ||
931 | move the ->f_pos accesses under the mrec_lock though. Check this... | ||
932 | - Fix really, really, really stupid buffer overflow in extent inode | ||
933 | handling in mft.c::map_extent_mft_record(). | ||
934 | |||
935 | 2.0.4 - Cleanups and updates for kernel 2.5.11. | ||
936 | |||
937 | - Add documentation on how to use the MD driver to be able to use NTFS | ||
938 | stripe and volume sets in Linux and generally cleanup documentation | ||
939 | a bit. | ||
940 | Remove all uses of kdev_t in favour of struct block_device *: | ||
941 | - Change compress.c::ntfs_file_read_compressed_block() to use | ||
942 | sb_getblk() instead of getblk(). | ||
943 | - Change super.c::ntfs_fill_super() to use bdev_hardsect_size() instead | ||
944 | of get_hardsect_size(). | ||
945 | - No need to get old blocksize in super.c::ntfs_fill_super() as | ||
946 | fs/super.c::get_sb_bdev() already does this. | ||
947 | - Set bh->b_bdev instead of bh->b_dev throughout aops.c. | ||
948 | |||
949 | 2.0.3 - Small bug fixes, cleanups, and performance improvements. | ||
950 | |||
951 | - Remove some dead code from mft.c. | ||
952 | - Optimize readpage and read_block functions throughout aops.c so that | ||
953 | only initialized blocks are read. Non-initialized ones have their | ||
954 | buffer head mapped, zeroed, and set up to date, without scheduling | ||
955 | any i/o. Thanks to Al Viro for advice on how to avoid the device i/o. | ||
956 | Thanks go to Andrew Morton for spotting the below: | ||
957 | - Fix buglet in allocate_compression_buffers() error code path. | ||
958 | - Call flush_dcache_page() after modifying page cache page contents in | ||
959 | ntfs_file_readpage(). | ||
960 | - Check for existence of page buffers throughout aops.c before calling | ||
961 | create_empty_buffers(). This happens when an I/O error occurs and the | ||
962 | read is retried. (It also happens once writing is implemented so that | ||
963 | needed doing anyway but I had left it for later...) | ||
964 | - Don't BUG_ON() uptodate and/or mapped buffers throughout aops.c in | ||
965 | readpage and read_block functions. Reasoning same as above (i.e. I/O | ||
966 | error retries and future write code paths.) | ||
967 | |||
968 | 2.0.2 - Minor updates and cleanups. | ||
969 | |||
970 | - Cleanup: rename mst.c::__post_read_mst_fixup to post_write_mst_fixup | ||
971 | and cleanup the code a bit, removing the unused size parameter. | ||
972 | - Change default fmask to 0177 and update documentation. | ||
973 | - Change attrib.c::get_attr_search_ctx() to return the search context | ||
974 | directly instead of taking the address of a pointer. A return value | ||
975 | of NULL means the allocation failed. Updated all callers | ||
976 | appropriately. | ||
977 | - Update to 2.5.9 kernel (preserving backwards compatibility) by | ||
978 | replacing all occurences of page->buffers with page_buffers(page). | ||
979 | - Fix minor bugs in runlist merging, also minor cleanup. | ||
980 | - Updates to bootsector layout and mft mirror contents descriptions. | ||
981 | - Small bug fix in error detection in unistr.c and some cleanups. | ||
982 | - Grow name buffer allocations in unistr.c in aligned mutlipled of 64 | ||
983 | bytes. | ||
984 | |||
985 | 2.0.1 - Minor updates. | ||
986 | |||
987 | - Make default umask correspond to documentation. | ||
988 | - Improve documentation. | ||
989 | - Set default mode to include execute bit. The {u,f,d}mask can be used | ||
990 | to take it away if desired. This allows binaries to be executed from | ||
991 | a mounted ntfs partition. | ||
992 | |||
993 | 2.0.0 - New version number. Remove TNG from the name. Now in the kernel. | ||
994 | |||
995 | - Add kill_super, just keeping up with the vfs changes in the kernel. | ||
996 | - Repeat some changes from tng-0.0.8 that somehow got lost on the way | ||
997 | from the CVS import into BitKeeper. | ||
998 | - Begin to implement proper handling of allocated_size vs | ||
999 | initialized_size vs data_size (i.e. i_size). Done are | ||
1000 | mft.c::ntfs_mft_readpage(), aops.c::end_buffer_read_index_async(), | ||
1001 | and attrib.c::load_attribute_list(). | ||
1002 | - Lock the runlist in attrib.c::load_attribute_list() while using it. | ||
1003 | - Fix memory leak in ntfs_file_read_compressed_block() and generally | ||
1004 | clean up compress.c a little, removing some uncommented/unused debug | ||
1005 | code. | ||
1006 | - Tidy up dir.c a little bit. | ||
1007 | - Don't bother getting the runlist in inode.c::ntfs_read_inode(). | ||
1008 | - Merge mft.c::ntfs_mft_readpage() and aops.c::ntfs_index_readpage() | ||
1009 | creating aops.c::ntfs_mst_readpage(), improving the handling of | ||
1010 | holes and overflow in the process and implementing the correct | ||
1011 | equivalent of ntfs_file_get_block() in ntfs_mst_readpage() itself. | ||
1012 | I am aiming for correctness at the moment. Modularisation can come | ||
1013 | later. | ||
1014 | - Rename aops.c::end_buffer_read_index_async() to | ||
1015 | end_buffer_read_mst_async() and optimize the overflow checking and | ||
1016 | handling. | ||
1017 | - Use the host of the mftbmp address space mapping to hold the ntfs | ||
1018 | volume. This is needed so the async i/o completion handler can | ||
1019 | retrieve a pointer to the volume. Hopefully this will not cause | ||
1020 | problems elsewhere in the kernel... Otherwise will need to use a | ||
1021 | fake inode. | ||
1022 | - Complete implementation of proper handling of allocated_size vs | ||
1023 | initialized_size vs data_size (i.e. i_size) in whole driver. | ||
1024 | Basically aops.c is now completely rewritten. | ||
1025 | - Change NTFS driver name to just NTFS and set version number to 2.0.0 | ||
1026 | to make a clear distinction from the old driver which is still on | ||
1027 | version 1.1.22. | ||
1028 | |||
1029 | tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/ | ||
1030 | |||
1031 | - Replace bdevname(sb->s_dev) with sb->s_id. | ||
1032 | - Remove now superfluous new-line characters in all callers of | ||
1033 | ntfs_debug(). | ||
1034 | - Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for | ||
1035 | directories. Without this the "find" utility gets very upset which is | ||
1036 | fair enough as Linux/Unix do not support directory hard links. | ||
1037 | - Further runlist merging work. (Richard Russon) | ||
1038 | - Backwards compatibility for gcc-2.95. (Richard Russon) | ||
1039 | - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. | ||
1040 | - Convert to new file system declaration using ->ntfs_get_sb() and | ||
1041 | replacing ntfs_read_super() with ntfs_fill_super(). | ||
1042 | - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index | ||
1043 | overflow on 32-bit architectures. | ||
1044 | - Cleanup upcase loading code to use ntfs_(un)map_page(). | ||
1045 | - Disable/reenable preemtion in critical sections of compession engine. | ||
1046 | - Replace device size determination in ntfs_fill_super() with | ||
1047 | sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous | ||
1048 | function super.c::get_nr_blocks(). | ||
1049 | - Implement a mount time option (show_inodes) allowing choice of which | ||
1050 | types of inode names readdir() returns and modify ntfs_filldir() | ||
1051 | accordingly. There are several parameters to show_inodes: | ||
1052 | system: system files | ||
1053 | win32: long file names (including POSIX file names) [DEFAULT] | ||
1054 | long: same as win32 | ||
1055 | dos: short file names only (excluding POSIX file names) | ||
1056 | short: same as dos | ||
1057 | posix: same as both win32 and dos | ||
1058 | all: all file names | ||
1059 | Note that the options are additive, i.e. specifying: | ||
1060 | -o show_inodes=system,show_inodes=win32,show_inodes=dos | ||
1061 | is the same as specifying: | ||
1062 | -o show_inodes=all | ||
1063 | Note that the "posix" and "all" options will show all directory | ||
1064 | names, BUT the link count on each directory inode entry is set to 1, | ||
1065 | due to Linux not supporting directory hard links. This may well | ||
1066 | confuse some userspace applications, since the directory names will | ||
1067 | have the same inode numbers. Thus it is NOT advisable to use the | ||
1068 | "posix" or "all" options. We provide them only for completeness sake. | ||
1069 | - Add copies of allocated_size, initialized_size, and compressed_size to | ||
1070 | the ntfs inode structure and set them up in | ||
1071 | inode.c::ntfs_read_inode(). These reflect the unnamed data attribute | ||
1072 | for files and the index allocation attribute for directories. | ||
1073 | - Add copies of allocated_size and initialized_size to ntfs inode for | ||
1074 | $BITMAP attribute of large directories and set them up in | ||
1075 | inode.c::ntfs_read_inode(). | ||
1076 | - Add copies of allocated_size and initialized_size to ntfs volume for | ||
1077 | $BITMAP attribute of $MFT and set them up in | ||
1078 | super.c::load_system_files(). | ||
1079 | - Parse deprecated ntfs driver options (iocharset, show_sys_files, | ||
1080 | posix, and utf8) and tell user what the new options to use are. Note | ||
1081 | we still do support them but they will be removed with kernel 2.7.x. | ||
1082 | - Change all occurences of integer long long printf formatting to hex | ||
1083 | as printk() will not support long long integer format if/when the | ||
1084 | div64 patch goes into the kernel. | ||
1085 | - Make slab caches have stable names and change the names to what they | ||
1086 | were intended to be. These changes are required/made possible by the | ||
1087 | new slab cache name handling which removes the length limitation by | ||
1088 | requiring the caller of kmem_cache_create() to supply a stable name | ||
1089 | which is then referenced but not copied. | ||
1090 | - Rename run_list structure to run_list_element and create a new | ||
1091 | run_list structure containing a pointer to a run_list_element | ||
1092 | structure and a read/write semaphore. Adapt all users of runlists | ||
1093 | to new scheme and take and release the lock as needed. This fixes a | ||
1094 | nasty race as the run_list changes even when inodes are locked for | ||
1095 | reading and even when the inode isn't locked at all, so we really | ||
1096 | needed the serialization. We use a semaphore rather than a spinlock | ||
1097 | as memory allocations can sleep and doing everything GFP_ATOMIC | ||
1098 | would be silly. | ||
1099 | - Cleanup read_inode() removing all code checking for lowest_vcn != 0. | ||
1100 | This can never happen due to the nature of lookup_attr() and how we | ||
1101 | support attribute lists. If it did happen it would imply the inode | ||
1102 | being corrupt. | ||
1103 | - Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as | ||
1104 | bad if found. | ||
1105 | - Update to 2.5.6-pre2 changes in struct address_space. | ||
1106 | - Use parent_ino() when accessing d_parent inode number in dir.c. | ||
1107 | - Import Sourceforge CVS repository into BitKeeper repository: | ||
1108 | http://linux-ntfs.bkbits.net/ntfs-tng-2.5 | ||
1109 | - Update fs/Makefile, fs/Config.help, fs/Config.in, and | ||
1110 | Documentation/filesystems/ntfs.txt for NTFS TNG. | ||
1111 | - Create kernel configuration option controlling whether debugging | ||
1112 | is enabled or not. | ||
1113 | - Add the required export of end_buffer_io_sync() from the patches | ||
1114 | directory to the kernel code. | ||
1115 | - Update inode.c::ntfs_show_options() with show_inodes mount option. | ||
1116 | - Update errors mount option. | ||
1117 | |||
1118 | tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only! | ||
1119 | |||
1120 | - Cleanup mft.c and it's debug/error output in particular. Fix a minor | ||
1121 | bug in mapping of extent inodes. Update all the comments to fit all | ||
1122 | the recent code changes. | ||
1123 | - Modify vcn_to_lcn() to cope with entirely unmapped runlists. | ||
1124 | - Cleanups in compress.c, mostly comments and folding help. | ||
1125 | - Implement attrib.c::map_run_list() as a generic helper. | ||
1126 | - Make compress.c::ntfs_file_read_compressed_block() use map_run_list() | ||
1127 | thus making code shorter and enabling attribute list support. | ||
1128 | - Cleanup incorrect use of [su]64 with %L printf format specifier in | ||
1129 | all source files. Type casts to [unsigned] long long added to correct | ||
1130 | the mismatches (important for architectures which have long long not | ||
1131 | being 64 bits). | ||
1132 | - Merge async io completion handlers for directory indexes and $MFT | ||
1133 | data into one by setting the index_block_size{_bits} of the ntfs | ||
1134 | inode for $MFT to the mft_record_size{_bits} of the ntfs_volume. | ||
1135 | - Cleanup aops.c, update comments. | ||
1136 | - Make ntfs_file_get_block() use map_run_list() so all files now | ||
1137 | support attribute lists. | ||
1138 | - Make ntfs_dir_readpage() almost verbatim copy of | ||
1139 | block_read_full_page() by using ntfs_file_get_block() with only real | ||
1140 | difference being the use of our own async io completion handler | ||
1141 | rather than the default one, thus reducing the amount of code and | ||
1142 | automatically enabling attribute list support for directory indices. | ||
1143 | - Fix bug in load_attribute_list() - forgot to call brelse in error | ||
1144 | code path. | ||
1145 | - Change parameters to find_attr() and lookup_attr(). We no longer | ||
1146 | pass in the upcase table and its length. These can be gotten from | ||
1147 | ctx->ntfs_ino->vol->upcase{_len}. Update all callers. | ||
1148 | - Cleanups in attrib.c. | ||
1149 | - Implement merging of runlists, attrib.c::merge_run_lists() and its | ||
1150 | helpers. (Richard Russon) | ||
1151 | - Attribute lists part 2, attribute extents and multi part runlists: | ||
1152 | enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of | ||
1153 | further runlist parts via attrib.c::map_run_list(). | ||
1154 | - Tiny endianness bug fix in decompress_mapping_pairs(). | ||
1155 | |||
1156 | tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements. | ||
1157 | |||
1158 | - Enable encrypted directories. (Their index root is marked encrypted | ||
1159 | to indicate that new files in that directory should be created | ||
1160 | encrypted.) | ||
1161 | - Fix bug in NInoBmpNonResident() macro. (Cut and paste error.) | ||
1162 | - Enable $Extend system directory. Most (if not all) extended system | ||
1163 | files do not have unnamed data attributes so ntfs_read_inode() had to | ||
1164 | special case them but that is ok, as the special casing recovery | ||
1165 | happens inside an error code path so there is zero slow down in the | ||
1166 | normal fast path. The special casing is done by introducing a new | ||
1167 | function inode.c::ntfs_is_extended_system_file() which checks if any | ||
1168 | of the hard links in the inode point to $Extend as being their parent | ||
1169 | directory and if they do we assume this is an extended system file. | ||
1170 | - Create a sysctl/proc interface to allow {dis,en}abling of debug output | ||
1171 | when compiled with -DDEBUG. Default is debug messages to be disabled. | ||
1172 | To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug | ||
1173 | (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl | ||
1174 | interface is enabled). Inspired by old ntfs driver. | ||
1175 | - Add debug_msgs insmod/kernel boot parameter to set whether debug | ||
1176 | messages are {dis,en}abled. This is useful to enable debug messages | ||
1177 | during ntfs initialization and is the only way to activate debugging | ||
1178 | when the sysctl interface is not enabled. | ||
1179 | - Cleanup debug output in various places. | ||
1180 | - Remove all dollar signs ($) from the source (except comments) to | ||
1181 | enable compilation on architectures whose gcc compiler does not | ||
1182 | support dollar signs in the names of variables/constants. Attribute | ||
1183 | types now start with AT_ instead of $ and $I30 is now just I30. | ||
1184 | - Cleanup ntfs_lookup() and add consistency check of sequence numbers. | ||
1185 | - Load complete runlist for $MFT/$BITMAP during mount and cleanup | ||
1186 | access functions. This means we now cope with $MFT/$BITMAP being | ||
1187 | spread accross several mft records. | ||
1188 | - Disable modification of mft_zone_multiplier on remount. We can always | ||
1189 | reenable this later on if we really want to, but we will need to make | ||
1190 | sure we readjust the mft_zone size / layout accordingly. | ||
1191 | |||
1192 | tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments. | ||
1193 | |||
1194 | - Use sb_set_blocksize() instead of set_blocksize() and verify the | ||
1195 | return value. | ||
1196 | - Use sb_bread() instead of bread() throughout. | ||
1197 | - Add index_vcn_size{_bits} to ntfs_inode structure to store the size | ||
1198 | of a directory index block vcn. Apply resulting simplifications in | ||
1199 | dir.c everywhere. | ||
1200 | - Fix a small bug somewhere (but forgot what it was). | ||
1201 | - Change ntfs_{debug,error,warning} to enable gcc to do type checking | ||
1202 | on the printf-format parameter list and fix bugs reported by gcc | ||
1203 | as a result. (Richard Russon) | ||
1204 | - Move inode allocation strategy to Al's new stuff but maintain the | ||
1205 | divorce of ntfs_inode from struct inode. To achieve this we have two | ||
1206 | separate slab caches, one for big ntfs inodes containing a struct | ||
1207 | inode and pure ntfs inodes and at the same time fix some faulty | ||
1208 | error code paths in ntfs_read_inode(). | ||
1209 | - Show mount options in proc (inode.c::ntfs_show_options()). | ||
1210 | |||
1211 | tng-0.0.4 - Big changes, getting in line with Al Viro's comments. | ||
1212 | |||
1213 | - Modified (un)map_mft_record functions to be common for read and write | ||
1214 | case. To specify which is which, added extra parameter at front of | ||
1215 | parameter list. Pass either READ or WRITE to this, each has the | ||
1216 | obvious meaning. | ||
1217 | - General cleanups to allow for easier folding in vi. | ||
1218 | - attrib.c::decompress_mapping_pairs() now accepts the old runlist | ||
1219 | argument, and invokes attrib.c::merge_run_lists() to merge the old | ||
1220 | and the new runlists. | ||
1221 | - Removed attrib.c::find_first_attr(). | ||
1222 | - Implemented loading of attribute list and complete runlist for $MFT. | ||
1223 | This means we now cope with $MFT being spread across several mft | ||
1224 | records. | ||
1225 | - Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax. | ||
1226 | - Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels. | ||
1227 | - Make ntfs_volume be allocated via kmalloc() instead of using a slab | ||
1228 | cache. There are too little ntfs_volume structures at any one time | ||
1229 | to justify a private slab cache. | ||
1230 | - Fix bogus kmap() use in async io completion. Now use kmap_atomic(). | ||
1231 | Use KM_BIO_IRQ on advice from IRC/kernel... | ||
1232 | - Use ntfs_map_page() in map_mft_record() and create ->readpage method | ||
1233 | for reading $MFT (ntfs_mft_readpage). In the process create dedicated | ||
1234 | address space operations (ntfs_mft_aops) for $MFT inode mapping. Also | ||
1235 | removed the now superfluous exports from the kernel core patch. | ||
1236 | - Fix a bug where kfree() was used insted of ntfs_free(). | ||
1237 | - Change map_mft_record() to take ntfs_inode as argument instead of | ||
1238 | vfs inode. Dito for unmap_mft_record(). Adapt all callers. | ||
1239 | - Add pointer to ntfs_volume to ntfs_inode. | ||
1240 | - Add mft record number and sequence number to ntfs_inode. Stop using | ||
1241 | i_ino and i_generation for in-driver purposes. | ||
1242 | - Implement attrib.c::merge_run_lists(). (Richard Russon) | ||
1243 | - Remove use of proper inodes by extent inodes. Move i_ino and | ||
1244 | i_generation to ntfs_inode to do this. Apply simplifications that | ||
1245 | result and remove iget_no_wait(), etc. | ||
1246 | - Pass ntfs_inode everywhere in the driver (used to be struct inode). | ||
1247 | - Add reference counting in ntfs_inode for the ntfs inode itself and | ||
1248 | for the mapped mft record. | ||
1249 | - Extend mft record mapping so we can (un)map extent mft records (new | ||
1250 | functions (un)map_extent_mft_record), and so mappings are reference | ||
1251 | counted and don't have to happen twice if already mapped - just ref | ||
1252 | count increases. | ||
1253 | - Add -o iocharset as alias to -o nls for backwards compatibility. | ||
1254 | - The latest core patch is now tiny. In fact just a single additional | ||
1255 | export is necessary over the base kernel. | ||
1256 | |||
1257 | tng-0.0.3 - Cleanups, enhancements, bug fixes. | ||
1258 | |||
1259 | - Work on attrib.c::decompress_mapping_pairs() to detect base extents | ||
1260 | and setup the runlist appropriately using knowledge provided by the | ||
1261 | sizes in the base attribute record. | ||
1262 | - Balance the get_/put_attr_search_ctx() calls so we don't leak memory | ||
1263 | any more. | ||
1264 | - Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single | ||
1265 | page or use vmalloc depending on the amount of memory requested. | ||
1266 | - Cleanup error output. The __FUNCTION__ "(): " is now added | ||
1267 | automatically. Introduced a new header file debug.h to support this | ||
1268 | and also moved ntfs_debug() function into it. | ||
1269 | - Make reading of compressed files more intelligent and especially get | ||
1270 | rid of the vmalloc_nofs() from readpage(). This now uses per CPU | ||
1271 | buffers (allocated at first mount with cluster size <= 4kiB and | ||
1272 | deallocated on last umount with cluster size <= 4kiB), and | ||
1273 | asynchronous io for the compressed data using a list of buffer heads. | ||
1274 | Er, we use synchronous io as async io only works on whole pages | ||
1275 | covered by buffers and not on individual buffer heads... | ||
1276 | - Bug fix for reading compressed files with sparse compression blocks. | ||
1277 | |||
1278 | tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs. | ||
1279 | |||
1280 | - Fixed handling of directories when cluster size exceeds index block | ||
1281 | size. | ||
1282 | - Hide DOS only name space directory entries from readdir() but allow | ||
1283 | them in lookup(). This should fix the problem that Linux doesn't | ||
1284 | support directory hard links, while still allowing access to entries | ||
1285 | via their short file name. This also has the benefit of mimicking | ||
1286 | what Windows users are used to, so it is the ideal solution. | ||
1287 | - Implemented sync_page everywhere so no more hangs in D state when | ||
1288 | waiting for a page. | ||
1289 | - Stop using bforget() in favour of brelse(). | ||
1290 | - Stop locking buffers unnecessarily. | ||
1291 | - Implemented compressed files (inode->mapping contains uncompressed | ||
1292 | data, raw compressed data is currently bread() into a vmalloc()ed | ||
1293 | memory buffer). | ||
1294 | - Enable compressed directories. (Their index root is marked compressed | ||
1295 | to indicate that new files in that directory should be created | ||
1296 | compressed.) | ||
1297 | - Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning | ||
1298 | functions. (Thanks to Will Dyson for pointing this out.) | ||
1299 | - Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and | ||
1300 | ntfs_sb_info) out of the common inode and super_block structures and | ||
1301 | started using the generic_ip and generic_sbp pointers instead. This | ||
1302 | makes ntfs entirely private with respect to the kernel tree. | ||
1303 | - Detect compiler version and abort with error message if gcc less than | ||
1304 | 2.96 is used. | ||
1305 | - Fix bug in name comparison function in unistr.c. | ||
1306 | - Implement attribute lists part 1, the infrastructure: search contexts | ||
1307 | and operations, find_external_attr(), lookup_attr()) and make the | ||
1308 | code use the infrastructure. | ||
1309 | - Fix stupid buffer overflow bug that became apparent on larger run | ||
1310 | list containing attributes. | ||
1311 | - Fix bugs in readdir() that became apparent on larger directories. | ||
1312 | |||
1313 | The driver is now really useful and survives the test | ||
1314 | find . -type f -exec md5sum "{}" \; | ||
1315 | without any error messages on a over 1GiB sized partition with >16k | ||
1316 | files on it, including compressed files and directories and many files | ||
1317 | and directories with attribute lists. | ||
1318 | |||
1319 | tng-0.0.1 - The first useful version. | ||
1320 | |||
1321 | - Added ntfs_lookup(). | ||
1322 | - Added default upcase generation and handling. | ||
1323 | - Added compile options to be shown on module init. | ||
1324 | - Many bug fixes that were "hidden" before. | ||
1325 | - Update to latest kernel. | ||
1326 | - Added ntfs_readdir(). | ||
1327 | - Added file operations for mmap(), read(), open() and llseek(). We just | ||
1328 | use the generic ones. The whole point of going through implementing | ||
1329 | readpage() methods and where possible get_block() call backs is that | ||
1330 | this allows us to make use of the generic high level methods provided | ||
1331 | by the kernel. | ||
1332 | |||
1333 | The driver is now actually useful! Yey. (-: It undoubtedly has got bugs | ||
1334 | though and it doesn't implement accesssing compressed files yet. Also, | ||
1335 | accessing files with attribute list attributes is not implemented yet | ||
1336 | either. But for small or simple file systems it should work and allow | ||
1337 | you to list directories, use stat on directory entries and the file | ||
1338 | system, open, read, mmap and llseek around in files. A big mile stone | ||
1339 | has been reached! | ||
1340 | |||
1341 | tng-0.0.0 - Initial version tag. | ||
1342 | |||
1343 | Initial driver implementation. The driver can mount and umount simple | ||
1344 | NTFS file systems (i.e. ones without attribute lists in the system | ||
1345 | files). If the mount fails there might be problems in the error handling | ||
1346 | code paths, so be warned. Otherwise it seems to be loading the system | ||
1347 | files nicely and the mft record read mapping/unmapping seems to be | ||
1348 | working nicely, too. Proof of inode metadata in the page cache and non- | ||
1349 | resident file unnamed stream data in the page cache concepts is thus | ||
1350 | complete. | ||
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile new file mode 100644 index 000000000000..7b66381a0b0f --- /dev/null +++ b/fs/ntfs/Makefile | |||
@@ -0,0 +1,19 @@ | |||
1 | # Rules for making the NTFS driver. | ||
2 | |||
3 | obj-$(CONFIG_NTFS_FS) += ntfs.o | ||
4 | |||
5 | ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ | ||
6 | index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ | ||
7 | unistr.o upcase.o | ||
8 | |||
9 | EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.22\" | ||
10 | |||
11 | ifeq ($(CONFIG_NTFS_DEBUG),y) | ||
12 | EXTRA_CFLAGS += -DDEBUG | ||
13 | endif | ||
14 | |||
15 | ifeq ($(CONFIG_NTFS_RW),y) | ||
16 | EXTRA_CFLAGS += -DNTFS_RW | ||
17 | |||
18 | ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o | ||
19 | endif | ||
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c new file mode 100644 index 000000000000..45d56e41ed98 --- /dev/null +++ b/fs/ntfs/aops.c | |||
@@ -0,0 +1,2324 @@ | |||
1 | /** | ||
2 | * aops.c - NTFS kernel address space operations and page cache handling. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #include <linux/errno.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/pagemap.h> | ||
27 | #include <linux/swap.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/writeback.h> | ||
30 | |||
31 | #include "aops.h" | ||
32 | #include "attrib.h" | ||
33 | #include "debug.h" | ||
34 | #include "inode.h" | ||
35 | #include "mft.h" | ||
36 | #include "runlist.h" | ||
37 | #include "types.h" | ||
38 | #include "ntfs.h" | ||
39 | |||
40 | /** | ||
41 | * ntfs_end_buffer_async_read - async io completion for reading attributes | ||
42 | * @bh: buffer head on which io is completed | ||
43 | * @uptodate: whether @bh is now uptodate or not | ||
44 | * | ||
45 | * Asynchronous I/O completion handler for reading pages belonging to the | ||
46 | * attribute address space of an inode. The inodes can either be files or | ||
47 | * directories or they can be fake inodes describing some attribute. | ||
48 | * | ||
49 | * If NInoMstProtected(), perform the post read mst fixups when all IO on the | ||
50 | * page has been completed and mark the page uptodate or set the error bit on | ||
51 | * the page. To determine the size of the records that need fixing up, we | ||
52 | * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs | ||
53 | * record size, and index_block_size_bits, to the log(base 2) of the ntfs | ||
54 | * record size. | ||
55 | */ | ||
56 | static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | ||
57 | { | ||
58 | static DEFINE_SPINLOCK(page_uptodate_lock); | ||
59 | unsigned long flags; | ||
60 | struct buffer_head *tmp; | ||
61 | struct page *page; | ||
62 | ntfs_inode *ni; | ||
63 | int page_uptodate = 1; | ||
64 | |||
65 | page = bh->b_page; | ||
66 | ni = NTFS_I(page->mapping->host); | ||
67 | |||
68 | if (likely(uptodate)) { | ||
69 | s64 file_ofs; | ||
70 | |||
71 | set_buffer_uptodate(bh); | ||
72 | |||
73 | file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + | ||
74 | bh_offset(bh); | ||
75 | /* Check for the current buffer head overflowing. */ | ||
76 | if (file_ofs + bh->b_size > ni->initialized_size) { | ||
77 | char *addr; | ||
78 | int ofs = 0; | ||
79 | |||
80 | if (file_ofs < ni->initialized_size) | ||
81 | ofs = ni->initialized_size - file_ofs; | ||
82 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | ||
83 | memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); | ||
84 | flush_dcache_page(page); | ||
85 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
86 | } | ||
87 | } else { | ||
88 | clear_buffer_uptodate(bh); | ||
89 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", | ||
90 | (unsigned long long)bh->b_blocknr); | ||
91 | SetPageError(page); | ||
92 | } | ||
93 | spin_lock_irqsave(&page_uptodate_lock, flags); | ||
94 | clear_buffer_async_read(bh); | ||
95 | unlock_buffer(bh); | ||
96 | tmp = bh; | ||
97 | do { | ||
98 | if (!buffer_uptodate(tmp)) | ||
99 | page_uptodate = 0; | ||
100 | if (buffer_async_read(tmp)) { | ||
101 | if (likely(buffer_locked(tmp))) | ||
102 | goto still_busy; | ||
103 | /* Async buffers must be locked. */ | ||
104 | BUG(); | ||
105 | } | ||
106 | tmp = tmp->b_this_page; | ||
107 | } while (tmp != bh); | ||
108 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | ||
109 | /* | ||
110 | * If none of the buffers had errors then we can set the page uptodate, | ||
111 | * but we first have to perform the post read mst fixups, if the | ||
112 | * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. | ||
113 | * Note we ignore fixup errors as those are detected when | ||
114 | * map_mft_record() is called which gives us per record granularity | ||
115 | * rather than per page granularity. | ||
116 | */ | ||
117 | if (!NInoMstProtected(ni)) { | ||
118 | if (likely(page_uptodate && !PageError(page))) | ||
119 | SetPageUptodate(page); | ||
120 | } else { | ||
121 | char *addr; | ||
122 | unsigned int i, recs; | ||
123 | u32 rec_size; | ||
124 | |||
125 | rec_size = ni->itype.index.block_size; | ||
126 | recs = PAGE_CACHE_SIZE / rec_size; | ||
127 | /* Should have been verified before we got here... */ | ||
128 | BUG_ON(!recs); | ||
129 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | ||
130 | for (i = 0; i < recs; i++) | ||
131 | post_read_mst_fixup((NTFS_RECORD*)(addr + | ||
132 | i * rec_size), rec_size); | ||
133 | flush_dcache_page(page); | ||
134 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
135 | if (likely(!PageError(page) && page_uptodate)) | ||
136 | SetPageUptodate(page); | ||
137 | } | ||
138 | unlock_page(page); | ||
139 | return; | ||
140 | still_busy: | ||
141 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * ntfs_read_block - fill a @page of an address space with data | ||
147 | * @page: page cache page to fill with data | ||
148 | * | ||
149 | * Fill the page @page of the address space belonging to the @page->host inode. | ||
150 | * We read each buffer asynchronously and when all buffers are read in, our io | ||
151 | * completion handler ntfs_end_buffer_read_async(), if required, automatically | ||
152 | * applies the mst fixups to the page before finally marking it uptodate and | ||
153 | * unlocking it. | ||
154 | * | ||
155 | * We only enforce allocated_size limit because i_size is checked for in | ||
156 | * generic_file_read(). | ||
157 | * | ||
158 | * Return 0 on success and -errno on error. | ||
159 | * | ||
160 | * Contains an adapted version of fs/buffer.c::block_read_full_page(). | ||
161 | */ | ||
162 | static int ntfs_read_block(struct page *page) | ||
163 | { | ||
164 | VCN vcn; | ||
165 | LCN lcn; | ||
166 | ntfs_inode *ni; | ||
167 | ntfs_volume *vol; | ||
168 | runlist_element *rl; | ||
169 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
170 | sector_t iblock, lblock, zblock; | ||
171 | unsigned int blocksize, vcn_ofs; | ||
172 | int i, nr; | ||
173 | unsigned char blocksize_bits; | ||
174 | |||
175 | ni = NTFS_I(page->mapping->host); | ||
176 | vol = ni->vol; | ||
177 | |||
178 | /* $MFT/$DATA must have its complete runlist in memory at all times. */ | ||
179 | BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); | ||
180 | |||
181 | blocksize_bits = VFS_I(ni)->i_blkbits; | ||
182 | blocksize = 1 << blocksize_bits; | ||
183 | |||
184 | if (!page_has_buffers(page)) | ||
185 | create_empty_buffers(page, blocksize, 0); | ||
186 | bh = head = page_buffers(page); | ||
187 | if (unlikely(!bh)) { | ||
188 | unlock_page(page); | ||
189 | return -ENOMEM; | ||
190 | } | ||
191 | |||
192 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
193 | lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; | ||
194 | zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; | ||
195 | |||
196 | /* Loop through all the buffers in the page. */ | ||
197 | rl = NULL; | ||
198 | nr = i = 0; | ||
199 | do { | ||
200 | u8 *kaddr; | ||
201 | |||
202 | if (unlikely(buffer_uptodate(bh))) | ||
203 | continue; | ||
204 | if (unlikely(buffer_mapped(bh))) { | ||
205 | arr[nr++] = bh; | ||
206 | continue; | ||
207 | } | ||
208 | bh->b_bdev = vol->sb->s_bdev; | ||
209 | /* Is the block within the allowed limits? */ | ||
210 | if (iblock < lblock) { | ||
211 | BOOL is_retry = FALSE; | ||
212 | |||
213 | /* Convert iblock into corresponding vcn and offset. */ | ||
214 | vcn = (VCN)iblock << blocksize_bits >> | ||
215 | vol->cluster_size_bits; | ||
216 | vcn_ofs = ((VCN)iblock << blocksize_bits) & | ||
217 | vol->cluster_size_mask; | ||
218 | if (!rl) { | ||
219 | lock_retry_remap: | ||
220 | down_read(&ni->runlist.lock); | ||
221 | rl = ni->runlist.rl; | ||
222 | } | ||
223 | if (likely(rl != NULL)) { | ||
224 | /* Seek to element containing target vcn. */ | ||
225 | while (rl->length && rl[1].vcn <= vcn) | ||
226 | rl++; | ||
227 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
228 | } else | ||
229 | lcn = LCN_RL_NOT_MAPPED; | ||
230 | /* Successful remap. */ | ||
231 | if (lcn >= 0) { | ||
232 | /* Setup buffer head to correct block. */ | ||
233 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) | ||
234 | + vcn_ofs) >> blocksize_bits; | ||
235 | set_buffer_mapped(bh); | ||
236 | /* Only read initialized data blocks. */ | ||
237 | if (iblock < zblock) { | ||
238 | arr[nr++] = bh; | ||
239 | continue; | ||
240 | } | ||
241 | /* Fully non-initialized data block, zero it. */ | ||
242 | goto handle_zblock; | ||
243 | } | ||
244 | /* It is a hole, need to zero it. */ | ||
245 | if (lcn == LCN_HOLE) | ||
246 | goto handle_hole; | ||
247 | /* If first try and runlist unmapped, map and retry. */ | ||
248 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { | ||
249 | int err; | ||
250 | is_retry = TRUE; | ||
251 | /* | ||
252 | * Attempt to map runlist, dropping lock for | ||
253 | * the duration. | ||
254 | */ | ||
255 | up_read(&ni->runlist.lock); | ||
256 | err = ntfs_map_runlist(ni, vcn); | ||
257 | if (likely(!err)) | ||
258 | goto lock_retry_remap; | ||
259 | rl = NULL; | ||
260 | lcn = err; | ||
261 | } | ||
262 | /* Hard error, zero out region. */ | ||
263 | bh->b_blocknr = -1; | ||
264 | SetPageError(page); | ||
265 | ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " | ||
266 | "attribute type 0x%x, vcn 0x%llx, " | ||
267 | "offset 0x%x because its location on " | ||
268 | "disk could not be determined%s " | ||
269 | "(error code %lli).", ni->mft_no, | ||
270 | ni->type, (unsigned long long)vcn, | ||
271 | vcn_ofs, is_retry ? " even after " | ||
272 | "retrying" : "", (long long)lcn); | ||
273 | } | ||
274 | /* | ||
275 | * Either iblock was outside lblock limits or | ||
276 | * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion | ||
277 | * of the page and set the buffer uptodate. | ||
278 | */ | ||
279 | handle_hole: | ||
280 | bh->b_blocknr = -1UL; | ||
281 | clear_buffer_mapped(bh); | ||
282 | handle_zblock: | ||
283 | kaddr = kmap_atomic(page, KM_USER0); | ||
284 | memset(kaddr + i * blocksize, 0, blocksize); | ||
285 | flush_dcache_page(page); | ||
286 | kunmap_atomic(kaddr, KM_USER0); | ||
287 | set_buffer_uptodate(bh); | ||
288 | } while (i++, iblock++, (bh = bh->b_this_page) != head); | ||
289 | |||
290 | /* Release the lock if we took it. */ | ||
291 | if (rl) | ||
292 | up_read(&ni->runlist.lock); | ||
293 | |||
294 | /* Check we have at least one buffer ready for i/o. */ | ||
295 | if (nr) { | ||
296 | struct buffer_head *tbh; | ||
297 | |||
298 | /* Lock the buffers. */ | ||
299 | for (i = 0; i < nr; i++) { | ||
300 | tbh = arr[i]; | ||
301 | lock_buffer(tbh); | ||
302 | tbh->b_end_io = ntfs_end_buffer_async_read; | ||
303 | set_buffer_async_read(tbh); | ||
304 | } | ||
305 | /* Finally, start i/o on the buffers. */ | ||
306 | for (i = 0; i < nr; i++) { | ||
307 | tbh = arr[i]; | ||
308 | if (likely(!buffer_uptodate(tbh))) | ||
309 | submit_bh(READ, tbh); | ||
310 | else | ||
311 | ntfs_end_buffer_async_read(tbh, 1); | ||
312 | } | ||
313 | return 0; | ||
314 | } | ||
315 | /* No i/o was scheduled on any of the buffers. */ | ||
316 | if (likely(!PageError(page))) | ||
317 | SetPageUptodate(page); | ||
318 | else /* Signal synchronous i/o error. */ | ||
319 | nr = -EIO; | ||
320 | unlock_page(page); | ||
321 | return nr; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * ntfs_readpage - fill a @page of a @file with data from the device | ||
326 | * @file: open file to which the page @page belongs or NULL | ||
327 | * @page: page cache page to fill with data | ||
328 | * | ||
329 | * For non-resident attributes, ntfs_readpage() fills the @page of the open | ||
330 | * file @file by calling the ntfs version of the generic block_read_full_page() | ||
331 | * function, ntfs_read_block(), which in turn creates and reads in the buffers | ||
332 | * associated with the page asynchronously. | ||
333 | * | ||
334 | * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the | ||
335 | * data from the mft record (which at this stage is most likely in memory) and | ||
336 | * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as | ||
337 | * even if the mft record is not cached at this point in time, we need to wait | ||
338 | * for it to be read in before we can do the copy. | ||
339 | * | ||
340 | * Return 0 on success and -errno on error. | ||
341 | */ | ||
342 | static int ntfs_readpage(struct file *file, struct page *page) | ||
343 | { | ||
344 | loff_t i_size; | ||
345 | ntfs_inode *ni, *base_ni; | ||
346 | u8 *kaddr; | ||
347 | ntfs_attr_search_ctx *ctx; | ||
348 | MFT_RECORD *mrec; | ||
349 | u32 attr_len; | ||
350 | int err = 0; | ||
351 | |||
352 | BUG_ON(!PageLocked(page)); | ||
353 | /* | ||
354 | * This can potentially happen because we clear PageUptodate() during | ||
355 | * ntfs_writepage() of MstProtected() attributes. | ||
356 | */ | ||
357 | if (PageUptodate(page)) { | ||
358 | unlock_page(page); | ||
359 | return 0; | ||
360 | } | ||
361 | ni = NTFS_I(page->mapping->host); | ||
362 | |||
363 | /* NInoNonResident() == NInoIndexAllocPresent() */ | ||
364 | if (NInoNonResident(ni)) { | ||
365 | /* | ||
366 | * Only unnamed $DATA attributes can be compressed or | ||
367 | * encrypted. | ||
368 | */ | ||
369 | if (ni->type == AT_DATA && !ni->name_len) { | ||
370 | /* If file is encrypted, deny access, just like NT4. */ | ||
371 | if (NInoEncrypted(ni)) { | ||
372 | err = -EACCES; | ||
373 | goto err_out; | ||
374 | } | ||
375 | /* Compressed data streams are handled in compress.c. */ | ||
376 | if (NInoCompressed(ni)) | ||
377 | return ntfs_read_compressed_block(page); | ||
378 | } | ||
379 | /* Normal data stream. */ | ||
380 | return ntfs_read_block(page); | ||
381 | } | ||
382 | /* | ||
383 | * Attribute is resident, implying it is not compressed or encrypted. | ||
384 | * This also means the attribute is smaller than an mft record and | ||
385 | * hence smaller than a page, so can simply zero out any pages with | ||
386 | * index above 0. We can also do this if the file size is 0. | ||
387 | */ | ||
388 | if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { | ||
389 | kaddr = kmap_atomic(page, KM_USER0); | ||
390 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
391 | flush_dcache_page(page); | ||
392 | kunmap_atomic(kaddr, KM_USER0); | ||
393 | goto done; | ||
394 | } | ||
395 | if (!NInoAttr(ni)) | ||
396 | base_ni = ni; | ||
397 | else | ||
398 | base_ni = ni->ext.base_ntfs_ino; | ||
399 | /* Map, pin, and lock the mft record. */ | ||
400 | mrec = map_mft_record(base_ni); | ||
401 | if (IS_ERR(mrec)) { | ||
402 | err = PTR_ERR(mrec); | ||
403 | goto err_out; | ||
404 | } | ||
405 | ctx = ntfs_attr_get_search_ctx(base_ni, mrec); | ||
406 | if (unlikely(!ctx)) { | ||
407 | err = -ENOMEM; | ||
408 | goto unm_err_out; | ||
409 | } | ||
410 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
411 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
412 | if (unlikely(err)) | ||
413 | goto put_unm_err_out; | ||
414 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | ||
415 | i_size = i_size_read(VFS_I(ni)); | ||
416 | if (unlikely(attr_len > i_size)) | ||
417 | attr_len = i_size; | ||
418 | kaddr = kmap_atomic(page, KM_USER0); | ||
419 | /* Copy the data to the page. */ | ||
420 | memcpy(kaddr, (u8*)ctx->attr + | ||
421 | le16_to_cpu(ctx->attr->data.resident.value_offset), | ||
422 | attr_len); | ||
423 | /* Zero the remainder of the page. */ | ||
424 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
425 | flush_dcache_page(page); | ||
426 | kunmap_atomic(kaddr, KM_USER0); | ||
427 | put_unm_err_out: | ||
428 | ntfs_attr_put_search_ctx(ctx); | ||
429 | unm_err_out: | ||
430 | unmap_mft_record(base_ni); | ||
431 | done: | ||
432 | SetPageUptodate(page); | ||
433 | err_out: | ||
434 | unlock_page(page); | ||
435 | return err; | ||
436 | } | ||
437 | |||
438 | #ifdef NTFS_RW | ||
439 | |||
440 | /** | ||
441 | * ntfs_write_block - write a @page to the backing store | ||
442 | * @page: page cache page to write out | ||
443 | * @wbc: writeback control structure | ||
444 | * | ||
445 | * This function is for writing pages belonging to non-resident, non-mst | ||
446 | * protected attributes to their backing store. | ||
447 | * | ||
448 | * For a page with buffers, map and write the dirty buffers asynchronously | ||
449 | * under page writeback. For a page without buffers, create buffers for the | ||
450 | * page, then proceed as above. | ||
451 | * | ||
452 | * If a page doesn't have buffers the page dirty state is definitive. If a page | ||
453 | * does have buffers, the page dirty state is just a hint, and the buffer dirty | ||
454 | * state is definitive. (A hint which has rules: dirty buffers against a clean | ||
455 | * page is illegal. Other combinations are legal and need to be handled. In | ||
456 | * particular a dirty page containing clean buffers for example.) | ||
457 | * | ||
458 | * Return 0 on success and -errno on error. | ||
459 | * | ||
460 | * Based on ntfs_read_block() and __block_write_full_page(). | ||
461 | */ | ||
462 | static int ntfs_write_block(struct page *page, struct writeback_control *wbc) | ||
463 | { | ||
464 | VCN vcn; | ||
465 | LCN lcn; | ||
466 | sector_t block, dblock, iblock; | ||
467 | struct inode *vi; | ||
468 | ntfs_inode *ni; | ||
469 | ntfs_volume *vol; | ||
470 | runlist_element *rl; | ||
471 | struct buffer_head *bh, *head; | ||
472 | unsigned int blocksize, vcn_ofs; | ||
473 | int err; | ||
474 | BOOL need_end_writeback; | ||
475 | unsigned char blocksize_bits; | ||
476 | |||
477 | vi = page->mapping->host; | ||
478 | ni = NTFS_I(vi); | ||
479 | vol = ni->vol; | ||
480 | |||
481 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
482 | "0x%lx.", ni->mft_no, ni->type, page->index); | ||
483 | |||
484 | BUG_ON(!NInoNonResident(ni)); | ||
485 | BUG_ON(NInoMstProtected(ni)); | ||
486 | |||
487 | blocksize_bits = vi->i_blkbits; | ||
488 | blocksize = 1 << blocksize_bits; | ||
489 | |||
490 | if (!page_has_buffers(page)) { | ||
491 | BUG_ON(!PageUptodate(page)); | ||
492 | create_empty_buffers(page, blocksize, | ||
493 | (1 << BH_Uptodate) | (1 << BH_Dirty)); | ||
494 | } | ||
495 | bh = head = page_buffers(page); | ||
496 | if (unlikely(!bh)) { | ||
497 | ntfs_warning(vol->sb, "Error allocating page buffers. " | ||
498 | "Redirtying page so we try again later."); | ||
499 | /* | ||
500 | * Put the page back on mapping->dirty_pages, but leave its | ||
501 | * buffer's dirty state as-is. | ||
502 | */ | ||
503 | redirty_page_for_writepage(wbc, page); | ||
504 | unlock_page(page); | ||
505 | return 0; | ||
506 | } | ||
507 | |||
508 | /* NOTE: Different naming scheme to ntfs_read_block()! */ | ||
509 | |||
510 | /* The first block in the page. */ | ||
511 | block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
512 | |||
513 | /* The first out of bounds block for the data size. */ | ||
514 | dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; | ||
515 | |||
516 | /* The last (fully or partially) initialized block. */ | ||
517 | iblock = ni->initialized_size >> blocksize_bits; | ||
518 | |||
519 | /* | ||
520 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | ||
521 | * here, and the (potentially unmapped) buffers may become dirty at | ||
522 | * any time. If a buffer becomes dirty here after we've inspected it | ||
523 | * then we just miss that fact, and the page stays dirty. | ||
524 | * | ||
525 | * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; | ||
526 | * handle that here by just cleaning them. | ||
527 | */ | ||
528 | |||
529 | /* | ||
530 | * Loop through all the buffers in the page, mapping all the dirty | ||
531 | * buffers to disk addresses and handling any aliases from the | ||
532 | * underlying block device's mapping. | ||
533 | */ | ||
534 | rl = NULL; | ||
535 | err = 0; | ||
536 | do { | ||
537 | BOOL is_retry = FALSE; | ||
538 | |||
539 | if (unlikely(block >= dblock)) { | ||
540 | /* | ||
541 | * Mapped buffers outside i_size will occur, because | ||
542 | * this page can be outside i_size when there is a | ||
543 | * truncate in progress. The contents of such buffers | ||
544 | * were zeroed by ntfs_writepage(). | ||
545 | * | ||
546 | * FIXME: What about the small race window where | ||
547 | * ntfs_writepage() has not done any clearing because | ||
548 | * the page was within i_size but before we get here, | ||
549 | * vmtruncate() modifies i_size? | ||
550 | */ | ||
551 | clear_buffer_dirty(bh); | ||
552 | set_buffer_uptodate(bh); | ||
553 | continue; | ||
554 | } | ||
555 | |||
556 | /* Clean buffers are not written out, so no need to map them. */ | ||
557 | if (!buffer_dirty(bh)) | ||
558 | continue; | ||
559 | |||
560 | /* Make sure we have enough initialized size. */ | ||
561 | if (unlikely((block >= iblock) && | ||
562 | (ni->initialized_size < vi->i_size))) { | ||
563 | /* | ||
564 | * If this page is fully outside initialized size, zero | ||
565 | * out all pages between the current initialized size | ||
566 | * and the current page. Just use ntfs_readpage() to do | ||
567 | * the zeroing transparently. | ||
568 | */ | ||
569 | if (block > iblock) { | ||
570 | // TODO: | ||
571 | // For each page do: | ||
572 | // - read_cache_page() | ||
573 | // Again for each page do: | ||
574 | // - wait_on_page_locked() | ||
575 | // - Check (PageUptodate(page) && | ||
576 | // !PageError(page)) | ||
577 | // Update initialized size in the attribute and | ||
578 | // in the inode. | ||
579 | // Again, for each page do: | ||
580 | // __set_page_dirty_buffers(); | ||
581 | // page_cache_release() | ||
582 | // We don't need to wait on the writes. | ||
583 | // Update iblock. | ||
584 | } | ||
585 | /* | ||
586 | * The current page straddles initialized size. Zero | ||
587 | * all non-uptodate buffers and set them uptodate (and | ||
588 | * dirty?). Note, there aren't any non-uptodate buffers | ||
589 | * if the page is uptodate. | ||
590 | * FIXME: For an uptodate page, the buffers may need to | ||
591 | * be written out because they were not initialized on | ||
592 | * disk before. | ||
593 | */ | ||
594 | if (!PageUptodate(page)) { | ||
595 | // TODO: | ||
596 | // Zero any non-uptodate buffers up to i_size. | ||
597 | // Set them uptodate and dirty. | ||
598 | } | ||
599 | // TODO: | ||
600 | // Update initialized size in the attribute and in the | ||
601 | // inode (up to i_size). | ||
602 | // Update iblock. | ||
603 | // FIXME: This is inefficient. Try to batch the two | ||
604 | // size changes to happen in one go. | ||
605 | ntfs_error(vol->sb, "Writing beyond initialized size " | ||
606 | "is not supported yet. Sorry."); | ||
607 | err = -EOPNOTSUPP; | ||
608 | break; | ||
609 | // Do NOT set_buffer_new() BUT DO clear buffer range | ||
610 | // outside write request range. | ||
611 | // set_buffer_uptodate() on complete buffers as well as | ||
612 | // set_buffer_dirty(). | ||
613 | } | ||
614 | |||
615 | /* No need to map buffers that are already mapped. */ | ||
616 | if (buffer_mapped(bh)) | ||
617 | continue; | ||
618 | |||
619 | /* Unmapped, dirty buffer. Need to map it. */ | ||
620 | bh->b_bdev = vol->sb->s_bdev; | ||
621 | |||
622 | /* Convert block into corresponding vcn and offset. */ | ||
623 | vcn = (VCN)block << blocksize_bits; | ||
624 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
625 | vcn >>= vol->cluster_size_bits; | ||
626 | if (!rl) { | ||
627 | lock_retry_remap: | ||
628 | down_read(&ni->runlist.lock); | ||
629 | rl = ni->runlist.rl; | ||
630 | } | ||
631 | if (likely(rl != NULL)) { | ||
632 | /* Seek to element containing target vcn. */ | ||
633 | while (rl->length && rl[1].vcn <= vcn) | ||
634 | rl++; | ||
635 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
636 | } else | ||
637 | lcn = LCN_RL_NOT_MAPPED; | ||
638 | /* Successful remap. */ | ||
639 | if (lcn >= 0) { | ||
640 | /* Setup buffer head to point to correct block. */ | ||
641 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) + | ||
642 | vcn_ofs) >> blocksize_bits; | ||
643 | set_buffer_mapped(bh); | ||
644 | continue; | ||
645 | } | ||
646 | /* It is a hole, need to instantiate it. */ | ||
647 | if (lcn == LCN_HOLE) { | ||
648 | // TODO: Instantiate the hole. | ||
649 | // clear_buffer_new(bh); | ||
650 | // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
651 | ntfs_error(vol->sb, "Writing into sparse regions is " | ||
652 | "not supported yet. Sorry."); | ||
653 | err = -EOPNOTSUPP; | ||
654 | break; | ||
655 | } | ||
656 | /* If first try and runlist unmapped, map and retry. */ | ||
657 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { | ||
658 | is_retry = TRUE; | ||
659 | /* | ||
660 | * Attempt to map runlist, dropping lock for | ||
661 | * the duration. | ||
662 | */ | ||
663 | up_read(&ni->runlist.lock); | ||
664 | err = ntfs_map_runlist(ni, vcn); | ||
665 | if (likely(!err)) | ||
666 | goto lock_retry_remap; | ||
667 | rl = NULL; | ||
668 | lcn = err; | ||
669 | } | ||
670 | /* Failed to map the buffer, even after retrying. */ | ||
671 | bh->b_blocknr = -1; | ||
672 | ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " | ||
673 | "attribute type 0x%x, vcn 0x%llx, offset 0x%x " | ||
674 | "because its location on disk could not be " | ||
675 | "determined%s (error code %lli).", ni->mft_no, | ||
676 | ni->type, (unsigned long long)vcn, | ||
677 | vcn_ofs, is_retry ? " even after " | ||
678 | "retrying" : "", (long long)lcn); | ||
679 | if (!err) | ||
680 | err = -EIO; | ||
681 | break; | ||
682 | } while (block++, (bh = bh->b_this_page) != head); | ||
683 | |||
684 | /* Release the lock if we took it. */ | ||
685 | if (rl) | ||
686 | up_read(&ni->runlist.lock); | ||
687 | |||
688 | /* For the error case, need to reset bh to the beginning. */ | ||
689 | bh = head; | ||
690 | |||
691 | /* Just an optimization, so ->readpage() isn't called later. */ | ||
692 | if (unlikely(!PageUptodate(page))) { | ||
693 | int uptodate = 1; | ||
694 | do { | ||
695 | if (!buffer_uptodate(bh)) { | ||
696 | uptodate = 0; | ||
697 | bh = head; | ||
698 | break; | ||
699 | } | ||
700 | } while ((bh = bh->b_this_page) != head); | ||
701 | if (uptodate) | ||
702 | SetPageUptodate(page); | ||
703 | } | ||
704 | |||
705 | /* Setup all mapped, dirty buffers for async write i/o. */ | ||
706 | do { | ||
707 | get_bh(bh); | ||
708 | if (buffer_mapped(bh) && buffer_dirty(bh)) { | ||
709 | lock_buffer(bh); | ||
710 | if (test_clear_buffer_dirty(bh)) { | ||
711 | BUG_ON(!buffer_uptodate(bh)); | ||
712 | mark_buffer_async_write(bh); | ||
713 | } else | ||
714 | unlock_buffer(bh); | ||
715 | } else if (unlikely(err)) { | ||
716 | /* | ||
717 | * For the error case. The buffer may have been set | ||
718 | * dirty during attachment to a dirty page. | ||
719 | */ | ||
720 | if (err != -ENOMEM) | ||
721 | clear_buffer_dirty(bh); | ||
722 | } | ||
723 | } while ((bh = bh->b_this_page) != head); | ||
724 | |||
725 | if (unlikely(err)) { | ||
726 | // TODO: Remove the -EOPNOTSUPP check later on... | ||
727 | if (unlikely(err == -EOPNOTSUPP)) | ||
728 | err = 0; | ||
729 | else if (err == -ENOMEM) { | ||
730 | ntfs_warning(vol->sb, "Error allocating memory. " | ||
731 | "Redirtying page so we try again " | ||
732 | "later."); | ||
733 | /* | ||
734 | * Put the page back on mapping->dirty_pages, but | ||
735 | * leave its buffer's dirty state as-is. | ||
736 | */ | ||
737 | redirty_page_for_writepage(wbc, page); | ||
738 | err = 0; | ||
739 | } else | ||
740 | SetPageError(page); | ||
741 | } | ||
742 | |||
743 | BUG_ON(PageWriteback(page)); | ||
744 | set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ | ||
745 | unlock_page(page); | ||
746 | |||
747 | /* | ||
748 | * Submit the prepared buffers for i/o. Note the page is unlocked, | ||
749 | * and the async write i/o completion handler can end_page_writeback() | ||
750 | * at any time after the *first* submit_bh(). So the buffers can then | ||
751 | * disappear... | ||
752 | */ | ||
753 | need_end_writeback = TRUE; | ||
754 | do { | ||
755 | struct buffer_head *next = bh->b_this_page; | ||
756 | if (buffer_async_write(bh)) { | ||
757 | submit_bh(WRITE, bh); | ||
758 | need_end_writeback = FALSE; | ||
759 | } | ||
760 | put_bh(bh); | ||
761 | bh = next; | ||
762 | } while (bh != head); | ||
763 | |||
764 | /* If no i/o was started, need to end_page_writeback(). */ | ||
765 | if (unlikely(need_end_writeback)) | ||
766 | end_page_writeback(page); | ||
767 | |||
768 | ntfs_debug("Done."); | ||
769 | return err; | ||
770 | } | ||
771 | |||
772 | /** | ||
773 | * ntfs_write_mst_block - write a @page to the backing store | ||
774 | * @page: page cache page to write out | ||
775 | * @wbc: writeback control structure | ||
776 | * | ||
777 | * This function is for writing pages belonging to non-resident, mst protected | ||
778 | * attributes to their backing store. The only supported attributes are index | ||
779 | * allocation and $MFT/$DATA. Both directory inodes and index inodes are | ||
780 | * supported for the index allocation case. | ||
781 | * | ||
782 | * The page must remain locked for the duration of the write because we apply | ||
783 | * the mst fixups, write, and then undo the fixups, so if we were to unlock the | ||
784 | * page before undoing the fixups, any other user of the page will see the | ||
785 | * page contents as corrupt. | ||
786 | * | ||
787 | * We clear the page uptodate flag for the duration of the function to ensure | ||
788 | * exclusion for the $MFT/$DATA case against someone mapping an mft record we | ||
789 | * are about to apply the mst fixups to. | ||
790 | * | ||
791 | * Return 0 on success and -errno on error. | ||
792 | * | ||
793 | * Based on ntfs_write_block(), ntfs_mft_writepage(), and | ||
794 | * write_mft_record_nolock(). | ||
795 | */ | ||
796 | static int ntfs_write_mst_block(struct page *page, | ||
797 | struct writeback_control *wbc) | ||
798 | { | ||
799 | sector_t block, dblock, rec_block; | ||
800 | struct inode *vi = page->mapping->host; | ||
801 | ntfs_inode *ni = NTFS_I(vi); | ||
802 | ntfs_volume *vol = ni->vol; | ||
803 | u8 *kaddr; | ||
804 | unsigned char bh_size_bits = vi->i_blkbits; | ||
805 | unsigned int bh_size = 1 << bh_size_bits; | ||
806 | unsigned int rec_size = ni->itype.index.block_size; | ||
807 | ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; | ||
808 | struct buffer_head *bh, *head, *tbh, *rec_start_bh; | ||
809 | int max_bhs = PAGE_CACHE_SIZE / bh_size; | ||
810 | struct buffer_head *bhs[max_bhs]; | ||
811 | runlist_element *rl; | ||
812 | int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; | ||
813 | unsigned rec_size_bits; | ||
814 | BOOL sync, is_mft, page_is_dirty, rec_is_dirty; | ||
815 | |||
816 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
817 | "0x%lx.", vi->i_ino, ni->type, page->index); | ||
818 | BUG_ON(!NInoNonResident(ni)); | ||
819 | BUG_ON(!NInoMstProtected(ni)); | ||
820 | is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); | ||
821 | /* | ||
822 | * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page | ||
823 | * in its page cache were to be marked dirty. However this should | ||
824 | * never happen with the current driver and considering we do not | ||
825 | * handle this case here we do want to BUG(), at least for now. | ||
826 | */ | ||
827 | BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || | ||
828 | (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); | ||
829 | BUG_ON(!max_bhs); | ||
830 | |||
831 | /* Were we called for sync purposes? */ | ||
832 | sync = (wbc->sync_mode == WB_SYNC_ALL); | ||
833 | |||
834 | /* Make sure we have mapped buffers. */ | ||
835 | BUG_ON(!page_has_buffers(page)); | ||
836 | bh = head = page_buffers(page); | ||
837 | BUG_ON(!bh); | ||
838 | |||
839 | rec_size_bits = ni->itype.index.block_size_bits; | ||
840 | BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits)); | ||
841 | bhs_per_rec = rec_size >> bh_size_bits; | ||
842 | BUG_ON(!bhs_per_rec); | ||
843 | |||
844 | /* The first block in the page. */ | ||
845 | rec_block = block = (sector_t)page->index << | ||
846 | (PAGE_CACHE_SHIFT - bh_size_bits); | ||
847 | |||
848 | /* The first out of bounds block for the data size. */ | ||
849 | dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; | ||
850 | |||
851 | rl = NULL; | ||
852 | err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; | ||
853 | page_is_dirty = rec_is_dirty = FALSE; | ||
854 | rec_start_bh = NULL; | ||
855 | do { | ||
856 | BOOL is_retry = FALSE; | ||
857 | |||
858 | if (likely(block < rec_block)) { | ||
859 | if (unlikely(block >= dblock)) { | ||
860 | clear_buffer_dirty(bh); | ||
861 | continue; | ||
862 | } | ||
863 | /* | ||
864 | * This block is not the first one in the record. We | ||
865 | * ignore the buffer's dirty state because we could | ||
866 | * have raced with a parallel mark_ntfs_record_dirty(). | ||
867 | */ | ||
868 | if (!rec_is_dirty) | ||
869 | continue; | ||
870 | if (unlikely(err2)) { | ||
871 | if (err2 != -ENOMEM) | ||
872 | clear_buffer_dirty(bh); | ||
873 | continue; | ||
874 | } | ||
875 | } else /* if (block == rec_block) */ { | ||
876 | BUG_ON(block > rec_block); | ||
877 | /* This block is the first one in the record. */ | ||
878 | rec_block += bhs_per_rec; | ||
879 | err2 = 0; | ||
880 | if (unlikely(block >= dblock)) { | ||
881 | clear_buffer_dirty(bh); | ||
882 | continue; | ||
883 | } | ||
884 | if (!buffer_dirty(bh)) { | ||
885 | /* Clean records are not written out. */ | ||
886 | rec_is_dirty = FALSE; | ||
887 | continue; | ||
888 | } | ||
889 | rec_is_dirty = TRUE; | ||
890 | rec_start_bh = bh; | ||
891 | } | ||
892 | /* Need to map the buffer if it is not mapped already. */ | ||
893 | if (unlikely(!buffer_mapped(bh))) { | ||
894 | VCN vcn; | ||
895 | LCN lcn; | ||
896 | unsigned int vcn_ofs; | ||
897 | |||
898 | /* Obtain the vcn and offset of the current block. */ | ||
899 | vcn = (VCN)block << bh_size_bits; | ||
900 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
901 | vcn >>= vol->cluster_size_bits; | ||
902 | if (!rl) { | ||
903 | lock_retry_remap: | ||
904 | down_read(&ni->runlist.lock); | ||
905 | rl = ni->runlist.rl; | ||
906 | } | ||
907 | if (likely(rl != NULL)) { | ||
908 | /* Seek to element containing target vcn. */ | ||
909 | while (rl->length && rl[1].vcn <= vcn) | ||
910 | rl++; | ||
911 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
912 | } else | ||
913 | lcn = LCN_RL_NOT_MAPPED; | ||
914 | /* Successful remap. */ | ||
915 | if (likely(lcn >= 0)) { | ||
916 | /* Setup buffer head to correct block. */ | ||
917 | bh->b_blocknr = ((lcn << | ||
918 | vol->cluster_size_bits) + | ||
919 | vcn_ofs) >> bh_size_bits; | ||
920 | set_buffer_mapped(bh); | ||
921 | } else { | ||
922 | /* | ||
923 | * Remap failed. Retry to map the runlist once | ||
924 | * unless we are working on $MFT which always | ||
925 | * has the whole of its runlist in memory. | ||
926 | */ | ||
927 | if (!is_mft && !is_retry && | ||
928 | lcn == LCN_RL_NOT_MAPPED) { | ||
929 | is_retry = TRUE; | ||
930 | /* | ||
931 | * Attempt to map runlist, dropping | ||
932 | * lock for the duration. | ||
933 | */ | ||
934 | up_read(&ni->runlist.lock); | ||
935 | err2 = ntfs_map_runlist(ni, vcn); | ||
936 | if (likely(!err2)) | ||
937 | goto lock_retry_remap; | ||
938 | if (err2 == -ENOMEM) | ||
939 | page_is_dirty = TRUE; | ||
940 | lcn = err2; | ||
941 | } else | ||
942 | err2 = -EIO; | ||
943 | /* Hard error. Abort writing this record. */ | ||
944 | if (!err || err == -ENOMEM) | ||
945 | err = err2; | ||
946 | bh->b_blocknr = -1; | ||
947 | ntfs_error(vol->sb, "Cannot write ntfs record " | ||
948 | "0x%llx (inode 0x%lx, " | ||
949 | "attribute type 0x%x) because " | ||
950 | "its location on disk could " | ||
951 | "not be determined (error " | ||
952 | "code %lli).", (s64)block << | ||
953 | bh_size_bits >> | ||
954 | vol->mft_record_size_bits, | ||
955 | ni->mft_no, ni->type, | ||
956 | (long long)lcn); | ||
957 | /* | ||
958 | * If this is not the first buffer, remove the | ||
959 | * buffers in this record from the list of | ||
960 | * buffers to write and clear their dirty bit | ||
961 | * if not error -ENOMEM. | ||
962 | */ | ||
963 | if (rec_start_bh != bh) { | ||
964 | while (bhs[--nr_bhs] != rec_start_bh) | ||
965 | ; | ||
966 | if (err2 != -ENOMEM) { | ||
967 | do { | ||
968 | clear_buffer_dirty( | ||
969 | rec_start_bh); | ||
970 | } while ((rec_start_bh = | ||
971 | rec_start_bh-> | ||
972 | b_this_page) != | ||
973 | bh); | ||
974 | } | ||
975 | } | ||
976 | continue; | ||
977 | } | ||
978 | } | ||
979 | BUG_ON(!buffer_uptodate(bh)); | ||
980 | BUG_ON(nr_bhs >= max_bhs); | ||
981 | bhs[nr_bhs++] = bh; | ||
982 | } while (block++, (bh = bh->b_this_page) != head); | ||
983 | if (unlikely(rl)) | ||
984 | up_read(&ni->runlist.lock); | ||
985 | /* If there were no dirty buffers, we are done. */ | ||
986 | if (!nr_bhs) | ||
987 | goto done; | ||
988 | /* Map the page so we can access its contents. */ | ||
989 | kaddr = kmap(page); | ||
990 | /* Clear the page uptodate flag whilst the mst fixups are applied. */ | ||
991 | BUG_ON(!PageUptodate(page)); | ||
992 | ClearPageUptodate(page); | ||
993 | for (i = 0; i < nr_bhs; i++) { | ||
994 | unsigned int ofs; | ||
995 | |||
996 | /* Skip buffers which are not at the beginning of records. */ | ||
997 | if (i % bhs_per_rec) | ||
998 | continue; | ||
999 | tbh = bhs[i]; | ||
1000 | ofs = bh_offset(tbh); | ||
1001 | if (is_mft) { | ||
1002 | ntfs_inode *tni; | ||
1003 | unsigned long mft_no; | ||
1004 | |||
1005 | /* Get the mft record number. */ | ||
1006 | mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) | ||
1007 | >> rec_size_bits; | ||
1008 | /* Check whether to write this mft record. */ | ||
1009 | tni = NULL; | ||
1010 | if (!ntfs_may_write_mft_record(vol, mft_no, | ||
1011 | (MFT_RECORD*)(kaddr + ofs), &tni)) { | ||
1012 | /* | ||
1013 | * The record should not be written. This | ||
1014 | * means we need to redirty the page before | ||
1015 | * returning. | ||
1016 | */ | ||
1017 | page_is_dirty = TRUE; | ||
1018 | /* | ||
1019 | * Remove the buffers in this mft record from | ||
1020 | * the list of buffers to write. | ||
1021 | */ | ||
1022 | do { | ||
1023 | bhs[i] = NULL; | ||
1024 | } while (++i % bhs_per_rec); | ||
1025 | continue; | ||
1026 | } | ||
1027 | /* | ||
1028 | * The record should be written. If a locked ntfs | ||
1029 | * inode was returned, add it to the array of locked | ||
1030 | * ntfs inodes. | ||
1031 | */ | ||
1032 | if (tni) | ||
1033 | locked_nis[nr_locked_nis++] = tni; | ||
1034 | } | ||
1035 | /* Apply the mst protection fixups. */ | ||
1036 | err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs), | ||
1037 | rec_size); | ||
1038 | if (unlikely(err2)) { | ||
1039 | if (!err || err == -ENOMEM) | ||
1040 | err = -EIO; | ||
1041 | ntfs_error(vol->sb, "Failed to apply mst fixups " | ||
1042 | "(inode 0x%lx, attribute type 0x%x, " | ||
1043 | "page index 0x%lx, page offset 0x%x)!" | ||
1044 | " Unmount and run chkdsk.", vi->i_ino, | ||
1045 | ni->type, page->index, ofs); | ||
1046 | /* | ||
1047 | * Mark all the buffers in this record clean as we do | ||
1048 | * not want to write corrupt data to disk. | ||
1049 | */ | ||
1050 | do { | ||
1051 | clear_buffer_dirty(bhs[i]); | ||
1052 | bhs[i] = NULL; | ||
1053 | } while (++i % bhs_per_rec); | ||
1054 | continue; | ||
1055 | } | ||
1056 | nr_recs++; | ||
1057 | } | ||
1058 | /* If no records are to be written out, we are done. */ | ||
1059 | if (!nr_recs) | ||
1060 | goto unm_done; | ||
1061 | flush_dcache_page(page); | ||
1062 | /* Lock buffers and start synchronous write i/o on them. */ | ||
1063 | for (i = 0; i < nr_bhs; i++) { | ||
1064 | tbh = bhs[i]; | ||
1065 | if (!tbh) | ||
1066 | continue; | ||
1067 | if (unlikely(test_set_buffer_locked(tbh))) | ||
1068 | BUG(); | ||
1069 | /* The buffer dirty state is now irrelevant, just clean it. */ | ||
1070 | clear_buffer_dirty(tbh); | ||
1071 | BUG_ON(!buffer_uptodate(tbh)); | ||
1072 | BUG_ON(!buffer_mapped(tbh)); | ||
1073 | get_bh(tbh); | ||
1074 | tbh->b_end_io = end_buffer_write_sync; | ||
1075 | submit_bh(WRITE, tbh); | ||
1076 | } | ||
1077 | /* Synchronize the mft mirror now if not @sync. */ | ||
1078 | if (is_mft && !sync) | ||
1079 | goto do_mirror; | ||
1080 | do_wait: | ||
1081 | /* Wait on i/o completion of buffers. */ | ||
1082 | for (i = 0; i < nr_bhs; i++) { | ||
1083 | tbh = bhs[i]; | ||
1084 | if (!tbh) | ||
1085 | continue; | ||
1086 | wait_on_buffer(tbh); | ||
1087 | if (unlikely(!buffer_uptodate(tbh))) { | ||
1088 | ntfs_error(vol->sb, "I/O error while writing ntfs " | ||
1089 | "record buffer (inode 0x%lx, " | ||
1090 | "attribute type 0x%x, page index " | ||
1091 | "0x%lx, page offset 0x%lx)! Unmount " | ||
1092 | "and run chkdsk.", vi->i_ino, ni->type, | ||
1093 | page->index, bh_offset(tbh)); | ||
1094 | if (!err || err == -ENOMEM) | ||
1095 | err = -EIO; | ||
1096 | /* | ||
1097 | * Set the buffer uptodate so the page and buffer | ||
1098 | * states do not become out of sync. | ||
1099 | */ | ||
1100 | set_buffer_uptodate(tbh); | ||
1101 | } | ||
1102 | } | ||
1103 | /* If @sync, now synchronize the mft mirror. */ | ||
1104 | if (is_mft && sync) { | ||
1105 | do_mirror: | ||
1106 | for (i = 0; i < nr_bhs; i++) { | ||
1107 | unsigned long mft_no; | ||
1108 | unsigned int ofs; | ||
1109 | |||
1110 | /* | ||
1111 | * Skip buffers which are not at the beginning of | ||
1112 | * records. | ||
1113 | */ | ||
1114 | if (i % bhs_per_rec) | ||
1115 | continue; | ||
1116 | tbh = bhs[i]; | ||
1117 | /* Skip removed buffers (and hence records). */ | ||
1118 | if (!tbh) | ||
1119 | continue; | ||
1120 | ofs = bh_offset(tbh); | ||
1121 | /* Get the mft record number. */ | ||
1122 | mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) | ||
1123 | >> rec_size_bits; | ||
1124 | if (mft_no < vol->mftmirr_size) | ||
1125 | ntfs_sync_mft_mirror(vol, mft_no, | ||
1126 | (MFT_RECORD*)(kaddr + ofs), | ||
1127 | sync); | ||
1128 | } | ||
1129 | if (!sync) | ||
1130 | goto do_wait; | ||
1131 | } | ||
1132 | /* Remove the mst protection fixups again. */ | ||
1133 | for (i = 0; i < nr_bhs; i++) { | ||
1134 | if (!(i % bhs_per_rec)) { | ||
1135 | tbh = bhs[i]; | ||
1136 | if (!tbh) | ||
1137 | continue; | ||
1138 | post_write_mst_fixup((NTFS_RECORD*)(kaddr + | ||
1139 | bh_offset(tbh))); | ||
1140 | } | ||
1141 | } | ||
1142 | flush_dcache_page(page); | ||
1143 | unm_done: | ||
1144 | /* Unlock any locked inodes. */ | ||
1145 | while (nr_locked_nis-- > 0) { | ||
1146 | ntfs_inode *tni, *base_tni; | ||
1147 | |||
1148 | tni = locked_nis[nr_locked_nis]; | ||
1149 | /* Get the base inode. */ | ||
1150 | down(&tni->extent_lock); | ||
1151 | if (tni->nr_extents >= 0) | ||
1152 | base_tni = tni; | ||
1153 | else { | ||
1154 | base_tni = tni->ext.base_ntfs_ino; | ||
1155 | BUG_ON(!base_tni); | ||
1156 | } | ||
1157 | up(&tni->extent_lock); | ||
1158 | ntfs_debug("Unlocking %s inode 0x%lx.", | ||
1159 | tni == base_tni ? "base" : "extent", | ||
1160 | tni->mft_no); | ||
1161 | up(&tni->mrec_lock); | ||
1162 | atomic_dec(&tni->count); | ||
1163 | iput(VFS_I(base_tni)); | ||
1164 | } | ||
1165 | SetPageUptodate(page); | ||
1166 | kunmap(page); | ||
1167 | done: | ||
1168 | if (unlikely(err && err != -ENOMEM)) { | ||
1169 | /* | ||
1170 | * Set page error if there is only one ntfs record in the page. | ||
1171 | * Otherwise we would loose per-record granularity. | ||
1172 | */ | ||
1173 | if (ni->itype.index.block_size == PAGE_CACHE_SIZE) | ||
1174 | SetPageError(page); | ||
1175 | NVolSetErrors(vol); | ||
1176 | } | ||
1177 | if (page_is_dirty) { | ||
1178 | ntfs_debug("Page still contains one or more dirty ntfs " | ||
1179 | "records. Redirtying the page starting at " | ||
1180 | "record 0x%lx.", page->index << | ||
1181 | (PAGE_CACHE_SHIFT - rec_size_bits)); | ||
1182 | redirty_page_for_writepage(wbc, page); | ||
1183 | unlock_page(page); | ||
1184 | } else { | ||
1185 | /* | ||
1186 | * Keep the VM happy. This must be done otherwise the | ||
1187 | * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though | ||
1188 | * the page is clean. | ||
1189 | */ | ||
1190 | BUG_ON(PageWriteback(page)); | ||
1191 | set_page_writeback(page); | ||
1192 | unlock_page(page); | ||
1193 | end_page_writeback(page); | ||
1194 | } | ||
1195 | if (likely(!err)) | ||
1196 | ntfs_debug("Done."); | ||
1197 | return err; | ||
1198 | } | ||
1199 | |||
1200 | /** | ||
1201 | * ntfs_writepage - write a @page to the backing store | ||
1202 | * @page: page cache page to write out | ||
1203 | * @wbc: writeback control structure | ||
1204 | * | ||
1205 | * This is called from the VM when it wants to have a dirty ntfs page cache | ||
1206 | * page cleaned. The VM has already locked the page and marked it clean. | ||
1207 | * | ||
1208 | * For non-resident attributes, ntfs_writepage() writes the @page by calling | ||
1209 | * the ntfs version of the generic block_write_full_page() function, | ||
1210 | * ntfs_write_block(), which in turn if necessary creates and writes the | ||
1211 | * buffers associated with the page asynchronously. | ||
1212 | * | ||
1213 | * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying | ||
1214 | * the data to the mft record (which at this stage is most likely in memory). | ||
1215 | * The mft record is then marked dirty and written out asynchronously via the | ||
1216 | * vfs inode dirty code path for the inode the mft record belongs to or via the | ||
1217 | * vm page dirty code path for the page the mft record is in. | ||
1218 | * | ||
1219 | * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page(). | ||
1220 | * | ||
1221 | * Return 0 on success and -errno on error. | ||
1222 | */ | ||
1223 | static int ntfs_writepage(struct page *page, struct writeback_control *wbc) | ||
1224 | { | ||
1225 | loff_t i_size; | ||
1226 | struct inode *vi; | ||
1227 | ntfs_inode *ni, *base_ni; | ||
1228 | char *kaddr; | ||
1229 | ntfs_attr_search_ctx *ctx; | ||
1230 | MFT_RECORD *m; | ||
1231 | u32 attr_len; | ||
1232 | int err; | ||
1233 | |||
1234 | BUG_ON(!PageLocked(page)); | ||
1235 | |||
1236 | vi = page->mapping->host; | ||
1237 | i_size = i_size_read(vi); | ||
1238 | |||
1239 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
1240 | if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> | ||
1241 | PAGE_CACHE_SHIFT)) { | ||
1242 | /* | ||
1243 | * The page may have dirty, unmapped buffers. Make them | ||
1244 | * freeable here, so the page does not leak. | ||
1245 | */ | ||
1246 | block_invalidatepage(page, 0); | ||
1247 | unlock_page(page); | ||
1248 | ntfs_debug("Write outside i_size - truncated?"); | ||
1249 | return 0; | ||
1250 | } | ||
1251 | ni = NTFS_I(vi); | ||
1252 | |||
1253 | /* NInoNonResident() == NInoIndexAllocPresent() */ | ||
1254 | if (NInoNonResident(ni)) { | ||
1255 | /* | ||
1256 | * Only unnamed $DATA attributes can be compressed, encrypted, | ||
1257 | * and/or sparse. | ||
1258 | */ | ||
1259 | if (ni->type == AT_DATA && !ni->name_len) { | ||
1260 | /* If file is encrypted, deny access, just like NT4. */ | ||
1261 | if (NInoEncrypted(ni)) { | ||
1262 | unlock_page(page); | ||
1263 | ntfs_debug("Denying write access to encrypted " | ||
1264 | "file."); | ||
1265 | return -EACCES; | ||
1266 | } | ||
1267 | /* Compressed data streams are handled in compress.c. */ | ||
1268 | if (NInoCompressed(ni)) { | ||
1269 | // TODO: Implement and replace this check with | ||
1270 | // return ntfs_write_compressed_block(page); | ||
1271 | unlock_page(page); | ||
1272 | ntfs_error(vi->i_sb, "Writing to compressed " | ||
1273 | "files is not supported yet. " | ||
1274 | "Sorry."); | ||
1275 | return -EOPNOTSUPP; | ||
1276 | } | ||
1277 | // TODO: Implement and remove this check. | ||
1278 | if (NInoSparse(ni)) { | ||
1279 | unlock_page(page); | ||
1280 | ntfs_error(vi->i_sb, "Writing to sparse files " | ||
1281 | "is not supported yet. Sorry."); | ||
1282 | return -EOPNOTSUPP; | ||
1283 | } | ||
1284 | } | ||
1285 | /* We have to zero every time due to mmap-at-end-of-file. */ | ||
1286 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { | ||
1287 | /* The page straddles i_size. */ | ||
1288 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; | ||
1289 | kaddr = kmap_atomic(page, KM_USER0); | ||
1290 | memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); | ||
1291 | flush_dcache_page(page); | ||
1292 | kunmap_atomic(kaddr, KM_USER0); | ||
1293 | } | ||
1294 | /* Handle mst protected attributes. */ | ||
1295 | if (NInoMstProtected(ni)) | ||
1296 | return ntfs_write_mst_block(page, wbc); | ||
1297 | /* Normal data stream. */ | ||
1298 | return ntfs_write_block(page, wbc); | ||
1299 | } | ||
1300 | /* | ||
1301 | * Attribute is resident, implying it is not compressed, encrypted, | ||
1302 | * sparse, or mst protected. This also means the attribute is smaller | ||
1303 | * than an mft record and hence smaller than a page, so can simply | ||
1304 | * return error on any pages with index above 0. | ||
1305 | */ | ||
1306 | BUG_ON(page_has_buffers(page)); | ||
1307 | BUG_ON(!PageUptodate(page)); | ||
1308 | if (unlikely(page->index > 0)) { | ||
1309 | ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. " | ||
1310 | "Aborting write.", page->index); | ||
1311 | BUG_ON(PageWriteback(page)); | ||
1312 | set_page_writeback(page); | ||
1313 | unlock_page(page); | ||
1314 | end_page_writeback(page); | ||
1315 | return -EIO; | ||
1316 | } | ||
1317 | if (!NInoAttr(ni)) | ||
1318 | base_ni = ni; | ||
1319 | else | ||
1320 | base_ni = ni->ext.base_ntfs_ino; | ||
1321 | /* Map, pin, and lock the mft record. */ | ||
1322 | m = map_mft_record(base_ni); | ||
1323 | if (IS_ERR(m)) { | ||
1324 | err = PTR_ERR(m); | ||
1325 | m = NULL; | ||
1326 | ctx = NULL; | ||
1327 | goto err_out; | ||
1328 | } | ||
1329 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1330 | if (unlikely(!ctx)) { | ||
1331 | err = -ENOMEM; | ||
1332 | goto err_out; | ||
1333 | } | ||
1334 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
1335 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1336 | if (unlikely(err)) | ||
1337 | goto err_out; | ||
1338 | /* | ||
1339 | * Keep the VM happy. This must be done otherwise the radix-tree tag | ||
1340 | * PAGECACHE_TAG_DIRTY remains set even though the page is clean. | ||
1341 | */ | ||
1342 | BUG_ON(PageWriteback(page)); | ||
1343 | set_page_writeback(page); | ||
1344 | unlock_page(page); | ||
1345 | |||
1346 | /* | ||
1347 | * Here, we don't need to zero the out of bounds area everytime because | ||
1348 | * the below memcpy() already takes care of the mmap-at-end-of-file | ||
1349 | * requirements. If the file is converted to a non-resident one, then | ||
1350 | * the code path use is switched to the non-resident one where the | ||
1351 | * zeroing happens on each ntfs_writepage() invocation. | ||
1352 | * | ||
1353 | * The above also applies nicely when i_size is decreased. | ||
1354 | * | ||
1355 | * When i_size is increased, the memory between the old and new i_size | ||
1356 | * _must_ be zeroed (or overwritten with new data). Otherwise we will | ||
1357 | * expose data to userspace/disk which should never have been exposed. | ||
1358 | * | ||
1359 | * FIXME: Ensure that i_size increases do the zeroing/overwriting and | ||
1360 | * if we cannot guarantee that, then enable the zeroing below. If the | ||
1361 | * zeroing below is enabled, we MUST move the unlock_page() from above | ||
1362 | * to after the kunmap_atomic(), i.e. just before the | ||
1363 | * end_page_writeback(). | ||
1364 | * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size | ||
1365 | * increases for resident attributes so those are ok. | ||
1366 | * TODO: ntfs_truncate(), others? | ||
1367 | */ | ||
1368 | |||
1369 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | ||
1370 | i_size = i_size_read(VFS_I(ni)); | ||
1371 | kaddr = kmap_atomic(page, KM_USER0); | ||
1372 | if (unlikely(attr_len > i_size)) { | ||
1373 | /* Zero out of bounds area in the mft record. */ | ||
1374 | memset((u8*)ctx->attr + le16_to_cpu( | ||
1375 | ctx->attr->data.resident.value_offset) + | ||
1376 | i_size, 0, attr_len - i_size); | ||
1377 | attr_len = i_size; | ||
1378 | } | ||
1379 | /* Copy the data from the page to the mft record. */ | ||
1380 | memcpy((u8*)ctx->attr + | ||
1381 | le16_to_cpu(ctx->attr->data.resident.value_offset), | ||
1382 | kaddr, attr_len); | ||
1383 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1384 | /* Zero out of bounds area in the page cache page. */ | ||
1385 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
1386 | flush_dcache_page(page); | ||
1387 | kunmap_atomic(kaddr, KM_USER0); | ||
1388 | |||
1389 | end_page_writeback(page); | ||
1390 | |||
1391 | /* Mark the mft record dirty, so it gets written back. */ | ||
1392 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1393 | ntfs_attr_put_search_ctx(ctx); | ||
1394 | unmap_mft_record(base_ni); | ||
1395 | return 0; | ||
1396 | err_out: | ||
1397 | if (err == -ENOMEM) { | ||
1398 | ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " | ||
1399 | "page so we try again later."); | ||
1400 | /* | ||
1401 | * Put the page back on mapping->dirty_pages, but leave its | ||
1402 | * buffers' dirty state as-is. | ||
1403 | */ | ||
1404 | redirty_page_for_writepage(wbc, page); | ||
1405 | err = 0; | ||
1406 | } else { | ||
1407 | ntfs_error(vi->i_sb, "Resident attribute write failed with " | ||
1408 | "error %i. Setting page error flag.", err); | ||
1409 | SetPageError(page); | ||
1410 | } | ||
1411 | unlock_page(page); | ||
1412 | if (ctx) | ||
1413 | ntfs_attr_put_search_ctx(ctx); | ||
1414 | if (m) | ||
1415 | unmap_mft_record(base_ni); | ||
1416 | return err; | ||
1417 | } | ||
1418 | |||
1419 | /** | ||
1420 | * ntfs_prepare_nonresident_write - | ||
1421 | * | ||
1422 | */ | ||
1423 | static int ntfs_prepare_nonresident_write(struct page *page, | ||
1424 | unsigned from, unsigned to) | ||
1425 | { | ||
1426 | VCN vcn; | ||
1427 | LCN lcn; | ||
1428 | sector_t block, ablock, iblock; | ||
1429 | struct inode *vi; | ||
1430 | ntfs_inode *ni; | ||
1431 | ntfs_volume *vol; | ||
1432 | runlist_element *rl; | ||
1433 | struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; | ||
1434 | unsigned int vcn_ofs, block_start, block_end, blocksize; | ||
1435 | int err; | ||
1436 | BOOL is_retry; | ||
1437 | unsigned char blocksize_bits; | ||
1438 | |||
1439 | vi = page->mapping->host; | ||
1440 | ni = NTFS_I(vi); | ||
1441 | vol = ni->vol; | ||
1442 | |||
1443 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
1444 | "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type, | ||
1445 | page->index, from, to); | ||
1446 | |||
1447 | BUG_ON(!NInoNonResident(ni)); | ||
1448 | |||
1449 | blocksize_bits = vi->i_blkbits; | ||
1450 | blocksize = 1 << blocksize_bits; | ||
1451 | |||
1452 | /* | ||
1453 | * create_empty_buffers() will create uptodate/dirty buffers if the | ||
1454 | * page is uptodate/dirty. | ||
1455 | */ | ||
1456 | if (!page_has_buffers(page)) | ||
1457 | create_empty_buffers(page, blocksize, 0); | ||
1458 | bh = head = page_buffers(page); | ||
1459 | if (unlikely(!bh)) | ||
1460 | return -ENOMEM; | ||
1461 | |||
1462 | /* The first block in the page. */ | ||
1463 | block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
1464 | |||
1465 | /* | ||
1466 | * The first out of bounds block for the allocated size. No need to | ||
1467 | * round up as allocated_size is in multiples of cluster size and the | ||
1468 | * minimum cluster size is 512 bytes, which is equal to the smallest | ||
1469 | * blocksize. | ||
1470 | */ | ||
1471 | ablock = ni->allocated_size >> blocksize_bits; | ||
1472 | |||
1473 | /* The last (fully or partially) initialized block. */ | ||
1474 | iblock = ni->initialized_size >> blocksize_bits; | ||
1475 | |||
1476 | /* Loop through all the buffers in the page. */ | ||
1477 | block_start = 0; | ||
1478 | rl = NULL; | ||
1479 | err = 0; | ||
1480 | do { | ||
1481 | block_end = block_start + blocksize; | ||
1482 | /* | ||
1483 | * If buffer @bh is outside the write, just mark it uptodate | ||
1484 | * if the page is uptodate and continue with the next buffer. | ||
1485 | */ | ||
1486 | if (block_end <= from || block_start >= to) { | ||
1487 | if (PageUptodate(page)) { | ||
1488 | if (!buffer_uptodate(bh)) | ||
1489 | set_buffer_uptodate(bh); | ||
1490 | } | ||
1491 | continue; | ||
1492 | } | ||
1493 | /* | ||
1494 | * @bh is at least partially being written to. | ||
1495 | * Make sure it is not marked as new. | ||
1496 | */ | ||
1497 | //if (buffer_new(bh)) | ||
1498 | // clear_buffer_new(bh); | ||
1499 | |||
1500 | if (block >= ablock) { | ||
1501 | // TODO: block is above allocated_size, need to | ||
1502 | // allocate it. Best done in one go to accommodate not | ||
1503 | // only block but all above blocks up to and including: | ||
1504 | // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize | ||
1505 | // - 1) >> blobksize_bits. Obviously will need to round | ||
1506 | // up to next cluster boundary, too. This should be | ||
1507 | // done with a helper function, so it can be reused. | ||
1508 | ntfs_error(vol->sb, "Writing beyond allocated size " | ||
1509 | "is not supported yet. Sorry."); | ||
1510 | err = -EOPNOTSUPP; | ||
1511 | goto err_out; | ||
1512 | // Need to update ablock. | ||
1513 | // Need to set_buffer_new() on all block bhs that are | ||
1514 | // newly allocated. | ||
1515 | } | ||
1516 | /* | ||
1517 | * Now we have enough allocated size to fulfill the whole | ||
1518 | * request, i.e. block < ablock is true. | ||
1519 | */ | ||
1520 | if (unlikely((block >= iblock) && | ||
1521 | (ni->initialized_size < vi->i_size))) { | ||
1522 | /* | ||
1523 | * If this page is fully outside initialized size, zero | ||
1524 | * out all pages between the current initialized size | ||
1525 | * and the current page. Just use ntfs_readpage() to do | ||
1526 | * the zeroing transparently. | ||
1527 | */ | ||
1528 | if (block > iblock) { | ||
1529 | // TODO: | ||
1530 | // For each page do: | ||
1531 | // - read_cache_page() | ||
1532 | // Again for each page do: | ||
1533 | // - wait_on_page_locked() | ||
1534 | // - Check (PageUptodate(page) && | ||
1535 | // !PageError(page)) | ||
1536 | // Update initialized size in the attribute and | ||
1537 | // in the inode. | ||
1538 | // Again, for each page do: | ||
1539 | // __set_page_dirty_buffers(); | ||
1540 | // page_cache_release() | ||
1541 | // We don't need to wait on the writes. | ||
1542 | // Update iblock. | ||
1543 | } | ||
1544 | /* | ||
1545 | * The current page straddles initialized size. Zero | ||
1546 | * all non-uptodate buffers and set them uptodate (and | ||
1547 | * dirty?). Note, there aren't any non-uptodate buffers | ||
1548 | * if the page is uptodate. | ||
1549 | * FIXME: For an uptodate page, the buffers may need to | ||
1550 | * be written out because they were not initialized on | ||
1551 | * disk before. | ||
1552 | */ | ||
1553 | if (!PageUptodate(page)) { | ||
1554 | // TODO: | ||
1555 | // Zero any non-uptodate buffers up to i_size. | ||
1556 | // Set them uptodate and dirty. | ||
1557 | } | ||
1558 | // TODO: | ||
1559 | // Update initialized size in the attribute and in the | ||
1560 | // inode (up to i_size). | ||
1561 | // Update iblock. | ||
1562 | // FIXME: This is inefficient. Try to batch the two | ||
1563 | // size changes to happen in one go. | ||
1564 | ntfs_error(vol->sb, "Writing beyond initialized size " | ||
1565 | "is not supported yet. Sorry."); | ||
1566 | err = -EOPNOTSUPP; | ||
1567 | goto err_out; | ||
1568 | // Do NOT set_buffer_new() BUT DO clear buffer range | ||
1569 | // outside write request range. | ||
1570 | // set_buffer_uptodate() on complete buffers as well as | ||
1571 | // set_buffer_dirty(). | ||
1572 | } | ||
1573 | |||
1574 | /* Need to map unmapped buffers. */ | ||
1575 | if (!buffer_mapped(bh)) { | ||
1576 | /* Unmapped buffer. Need to map it. */ | ||
1577 | bh->b_bdev = vol->sb->s_bdev; | ||
1578 | |||
1579 | /* Convert block into corresponding vcn and offset. */ | ||
1580 | vcn = (VCN)block << blocksize_bits >> | ||
1581 | vol->cluster_size_bits; | ||
1582 | vcn_ofs = ((VCN)block << blocksize_bits) & | ||
1583 | vol->cluster_size_mask; | ||
1584 | |||
1585 | is_retry = FALSE; | ||
1586 | if (!rl) { | ||
1587 | lock_retry_remap: | ||
1588 | down_read(&ni->runlist.lock); | ||
1589 | rl = ni->runlist.rl; | ||
1590 | } | ||
1591 | if (likely(rl != NULL)) { | ||
1592 | /* Seek to element containing target vcn. */ | ||
1593 | while (rl->length && rl[1].vcn <= vcn) | ||
1594 | rl++; | ||
1595 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
1596 | } else | ||
1597 | lcn = LCN_RL_NOT_MAPPED; | ||
1598 | if (unlikely(lcn < 0)) { | ||
1599 | /* | ||
1600 | * We extended the attribute allocation above. | ||
1601 | * If we hit an ENOENT here it means that the | ||
1602 | * allocation was insufficient which is a bug. | ||
1603 | */ | ||
1604 | BUG_ON(lcn == LCN_ENOENT); | ||
1605 | |||
1606 | /* It is a hole, need to instantiate it. */ | ||
1607 | if (lcn == LCN_HOLE) { | ||
1608 | // TODO: Instantiate the hole. | ||
1609 | // clear_buffer_new(bh); | ||
1610 | // unmap_underlying_metadata(bh->b_bdev, | ||
1611 | // bh->b_blocknr); | ||
1612 | // For non-uptodate buffers, need to | ||
1613 | // zero out the region outside the | ||
1614 | // request in this bh or all bhs, | ||
1615 | // depending on what we implemented | ||
1616 | // above. | ||
1617 | // Need to flush_dcache_page(). | ||
1618 | // Or could use set_buffer_new() | ||
1619 | // instead? | ||
1620 | ntfs_error(vol->sb, "Writing into " | ||
1621 | "sparse regions is " | ||
1622 | "not supported yet. " | ||
1623 | "Sorry."); | ||
1624 | err = -EOPNOTSUPP; | ||
1625 | goto err_out; | ||
1626 | } else if (!is_retry && | ||
1627 | lcn == LCN_RL_NOT_MAPPED) { | ||
1628 | is_retry = TRUE; | ||
1629 | /* | ||
1630 | * Attempt to map runlist, dropping | ||
1631 | * lock for the duration. | ||
1632 | */ | ||
1633 | up_read(&ni->runlist.lock); | ||
1634 | err = ntfs_map_runlist(ni, vcn); | ||
1635 | if (likely(!err)) | ||
1636 | goto lock_retry_remap; | ||
1637 | rl = NULL; | ||
1638 | lcn = err; | ||
1639 | } | ||
1640 | /* | ||
1641 | * Failed to map the buffer, even after | ||
1642 | * retrying. | ||
1643 | */ | ||
1644 | bh->b_blocknr = -1; | ||
1645 | ntfs_error(vol->sb, "Failed to write to inode " | ||
1646 | "0x%lx, attribute type 0x%x, " | ||
1647 | "vcn 0x%llx, offset 0x%x " | ||
1648 | "because its location on disk " | ||
1649 | "could not be determined%s " | ||
1650 | "(error code %lli).", | ||
1651 | ni->mft_no, ni->type, | ||
1652 | (unsigned long long)vcn, | ||
1653 | vcn_ofs, is_retry ? " even " | ||
1654 | "after retrying" : "", | ||
1655 | (long long)lcn); | ||
1656 | if (!err) | ||
1657 | err = -EIO; | ||
1658 | goto err_out; | ||
1659 | } | ||
1660 | /* We now have a successful remap, i.e. lcn >= 0. */ | ||
1661 | |||
1662 | /* Setup buffer head to correct block. */ | ||
1663 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) | ||
1664 | + vcn_ofs) >> blocksize_bits; | ||
1665 | set_buffer_mapped(bh); | ||
1666 | |||
1667 | // FIXME: Something analogous to this is needed for | ||
1668 | // each newly allocated block, i.e. BH_New. | ||
1669 | // FIXME: Might need to take this out of the | ||
1670 | // if (!buffer_mapped(bh)) {}, depending on how we | ||
1671 | // implement things during the allocated_size and | ||
1672 | // initialized_size extension code above. | ||
1673 | if (buffer_new(bh)) { | ||
1674 | clear_buffer_new(bh); | ||
1675 | unmap_underlying_metadata(bh->b_bdev, | ||
1676 | bh->b_blocknr); | ||
1677 | if (PageUptodate(page)) { | ||
1678 | set_buffer_uptodate(bh); | ||
1679 | continue; | ||
1680 | } | ||
1681 | /* | ||
1682 | * Page is _not_ uptodate, zero surrounding | ||
1683 | * region. NOTE: This is how we decide if to | ||
1684 | * zero or not! | ||
1685 | */ | ||
1686 | if (block_end > to || block_start < from) { | ||
1687 | void *kaddr; | ||
1688 | |||
1689 | kaddr = kmap_atomic(page, KM_USER0); | ||
1690 | if (block_end > to) | ||
1691 | memset(kaddr + to, 0, | ||
1692 | block_end - to); | ||
1693 | if (block_start < from) | ||
1694 | memset(kaddr + block_start, 0, | ||
1695 | from - | ||
1696 | block_start); | ||
1697 | flush_dcache_page(page); | ||
1698 | kunmap_atomic(kaddr, KM_USER0); | ||
1699 | } | ||
1700 | continue; | ||
1701 | } | ||
1702 | } | ||
1703 | /* @bh is mapped, set it uptodate if the page is uptodate. */ | ||
1704 | if (PageUptodate(page)) { | ||
1705 | if (!buffer_uptodate(bh)) | ||
1706 | set_buffer_uptodate(bh); | ||
1707 | continue; | ||
1708 | } | ||
1709 | /* | ||
1710 | * The page is not uptodate. The buffer is mapped. If it is not | ||
1711 | * uptodate, and it is only partially being written to, we need | ||
1712 | * to read the buffer in before the write, i.e. right now. | ||
1713 | */ | ||
1714 | if (!buffer_uptodate(bh) && | ||
1715 | (block_start < from || block_end > to)) { | ||
1716 | ll_rw_block(READ, 1, &bh); | ||
1717 | *wait_bh++ = bh; | ||
1718 | } | ||
1719 | } while (block++, block_start = block_end, | ||
1720 | (bh = bh->b_this_page) != head); | ||
1721 | |||
1722 | /* Release the lock if we took it. */ | ||
1723 | if (rl) { | ||
1724 | up_read(&ni->runlist.lock); | ||
1725 | rl = NULL; | ||
1726 | } | ||
1727 | |||
1728 | /* If we issued read requests, let them complete. */ | ||
1729 | while (wait_bh > wait) { | ||
1730 | wait_on_buffer(*--wait_bh); | ||
1731 | if (!buffer_uptodate(*wait_bh)) | ||
1732 | return -EIO; | ||
1733 | } | ||
1734 | |||
1735 | ntfs_debug("Done."); | ||
1736 | return 0; | ||
1737 | err_out: | ||
1738 | /* | ||
1739 | * Zero out any newly allocated blocks to avoid exposing stale data. | ||
1740 | * If BH_New is set, we know that the block was newly allocated in the | ||
1741 | * above loop. | ||
1742 | * FIXME: What about initialized_size increments? Have we done all the | ||
1743 | * required zeroing above? If not this error handling is broken, and | ||
1744 | * in particular the if (block_end <= from) check is completely bogus. | ||
1745 | */ | ||
1746 | bh = head; | ||
1747 | block_start = 0; | ||
1748 | is_retry = FALSE; | ||
1749 | do { | ||
1750 | block_end = block_start + blocksize; | ||
1751 | if (block_end <= from) | ||
1752 | continue; | ||
1753 | if (block_start >= to) | ||
1754 | break; | ||
1755 | if (buffer_new(bh)) { | ||
1756 | void *kaddr; | ||
1757 | |||
1758 | clear_buffer_new(bh); | ||
1759 | kaddr = kmap_atomic(page, KM_USER0); | ||
1760 | memset(kaddr + block_start, 0, bh->b_size); | ||
1761 | kunmap_atomic(kaddr, KM_USER0); | ||
1762 | set_buffer_uptodate(bh); | ||
1763 | mark_buffer_dirty(bh); | ||
1764 | is_retry = TRUE; | ||
1765 | } | ||
1766 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
1767 | if (is_retry) | ||
1768 | flush_dcache_page(page); | ||
1769 | if (rl) | ||
1770 | up_read(&ni->runlist.lock); | ||
1771 | return err; | ||
1772 | } | ||
1773 | |||
1774 | /** | ||
1775 | * ntfs_prepare_write - prepare a page for receiving data | ||
1776 | * | ||
1777 | * This is called from generic_file_write() with i_sem held on the inode | ||
1778 | * (@page->mapping->host). The @page is locked but not kmap()ped. The source | ||
1779 | * data has not yet been copied into the @page. | ||
1780 | * | ||
1781 | * Need to extend the attribute/fill in holes if necessary, create blocks and | ||
1782 | * make partially overwritten blocks uptodate, | ||
1783 | * | ||
1784 | * i_size is not to be modified yet. | ||
1785 | * | ||
1786 | * Return 0 on success or -errno on error. | ||
1787 | * | ||
1788 | * Should be using block_prepare_write() [support for sparse files] or | ||
1789 | * cont_prepare_write() [no support for sparse files]. Cannot do that due to | ||
1790 | * ntfs specifics but can look at them for implementation guidance. | ||
1791 | * | ||
1792 | * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is | ||
1793 | * the first byte in the page that will be written to and @to is the first byte | ||
1794 | * after the last byte that will be written to. | ||
1795 | */ | ||
1796 | static int ntfs_prepare_write(struct file *file, struct page *page, | ||
1797 | unsigned from, unsigned to) | ||
1798 | { | ||
1799 | s64 new_size; | ||
1800 | struct inode *vi = page->mapping->host; | ||
1801 | ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); | ||
1802 | ntfs_volume *vol = ni->vol; | ||
1803 | ntfs_attr_search_ctx *ctx = NULL; | ||
1804 | MFT_RECORD *m = NULL; | ||
1805 | ATTR_RECORD *a; | ||
1806 | u8 *kaddr; | ||
1807 | u32 attr_len; | ||
1808 | int err; | ||
1809 | |||
1810 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
1811 | "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, | ||
1812 | page->index, from, to); | ||
1813 | BUG_ON(!PageLocked(page)); | ||
1814 | BUG_ON(from > PAGE_CACHE_SIZE); | ||
1815 | BUG_ON(to > PAGE_CACHE_SIZE); | ||
1816 | BUG_ON(from > to); | ||
1817 | BUG_ON(NInoMstProtected(ni)); | ||
1818 | /* | ||
1819 | * If a previous ntfs_truncate() failed, repeat it and abort if it | ||
1820 | * fails again. | ||
1821 | */ | ||
1822 | if (unlikely(NInoTruncateFailed(ni))) { | ||
1823 | down_write(&vi->i_alloc_sem); | ||
1824 | err = ntfs_truncate(vi); | ||
1825 | up_write(&vi->i_alloc_sem); | ||
1826 | if (err || NInoTruncateFailed(ni)) { | ||
1827 | if (!err) | ||
1828 | err = -EIO; | ||
1829 | goto err_out; | ||
1830 | } | ||
1831 | } | ||
1832 | /* If the attribute is not resident, deal with it elsewhere. */ | ||
1833 | if (NInoNonResident(ni)) { | ||
1834 | /* | ||
1835 | * Only unnamed $DATA attributes can be compressed, encrypted, | ||
1836 | * and/or sparse. | ||
1837 | */ | ||
1838 | if (ni->type == AT_DATA && !ni->name_len) { | ||
1839 | /* If file is encrypted, deny access, just like NT4. */ | ||
1840 | if (NInoEncrypted(ni)) { | ||
1841 | ntfs_debug("Denying write access to encrypted " | ||
1842 | "file."); | ||
1843 | return -EACCES; | ||
1844 | } | ||
1845 | /* Compressed data streams are handled in compress.c. */ | ||
1846 | if (NInoCompressed(ni)) { | ||
1847 | // TODO: Implement and replace this check with | ||
1848 | // return ntfs_write_compressed_block(page); | ||
1849 | ntfs_error(vi->i_sb, "Writing to compressed " | ||
1850 | "files is not supported yet. " | ||
1851 | "Sorry."); | ||
1852 | return -EOPNOTSUPP; | ||
1853 | } | ||
1854 | // TODO: Implement and remove this check. | ||
1855 | if (NInoSparse(ni)) { | ||
1856 | ntfs_error(vi->i_sb, "Writing to sparse files " | ||
1857 | "is not supported yet. Sorry."); | ||
1858 | return -EOPNOTSUPP; | ||
1859 | } | ||
1860 | } | ||
1861 | /* Normal data stream. */ | ||
1862 | return ntfs_prepare_nonresident_write(page, from, to); | ||
1863 | } | ||
1864 | /* | ||
1865 | * Attribute is resident, implying it is not compressed, encrypted, or | ||
1866 | * sparse. | ||
1867 | */ | ||
1868 | BUG_ON(page_has_buffers(page)); | ||
1869 | new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; | ||
1870 | /* If we do not need to resize the attribute allocation we are done. */ | ||
1871 | if (new_size <= vi->i_size) | ||
1872 | goto done; | ||
1873 | |||
1874 | // FIXME: We abort for now as this code is not safe. | ||
1875 | ntfs_error(vi->i_sb, "Changing the file size is not supported yet. " | ||
1876 | "Sorry."); | ||
1877 | return -EOPNOTSUPP; | ||
1878 | |||
1879 | /* Map, pin, and lock the (base) mft record. */ | ||
1880 | if (!NInoAttr(ni)) | ||
1881 | base_ni = ni; | ||
1882 | else | ||
1883 | base_ni = ni->ext.base_ntfs_ino; | ||
1884 | m = map_mft_record(base_ni); | ||
1885 | if (IS_ERR(m)) { | ||
1886 | err = PTR_ERR(m); | ||
1887 | m = NULL; | ||
1888 | ctx = NULL; | ||
1889 | goto err_out; | ||
1890 | } | ||
1891 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1892 | if (unlikely(!ctx)) { | ||
1893 | err = -ENOMEM; | ||
1894 | goto err_out; | ||
1895 | } | ||
1896 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
1897 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1898 | if (unlikely(err)) { | ||
1899 | if (err == -ENOENT) | ||
1900 | err = -EIO; | ||
1901 | goto err_out; | ||
1902 | } | ||
1903 | m = ctx->mrec; | ||
1904 | a = ctx->attr; | ||
1905 | /* The total length of the attribute value. */ | ||
1906 | attr_len = le32_to_cpu(a->data.resident.value_length); | ||
1907 | BUG_ON(vi->i_size != attr_len); | ||
1908 | /* Check if new size is allowed in $AttrDef. */ | ||
1909 | err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); | ||
1910 | if (unlikely(err)) { | ||
1911 | if (err == -ERANGE) { | ||
1912 | ntfs_error(vol->sb, "Write would cause the inode " | ||
1913 | "0x%lx to exceed the maximum size for " | ||
1914 | "its attribute type (0x%x). Aborting " | ||
1915 | "write.", vi->i_ino, | ||
1916 | le32_to_cpu(ni->type)); | ||
1917 | } else { | ||
1918 | ntfs_error(vol->sb, "Inode 0x%lx has unknown " | ||
1919 | "attribute type 0x%x. Aborting " | ||
1920 | "write.", vi->i_ino, | ||
1921 | le32_to_cpu(ni->type)); | ||
1922 | err = -EIO; | ||
1923 | } | ||
1924 | goto err_out2; | ||
1925 | } | ||
1926 | /* | ||
1927 | * Extend the attribute record to be able to store the new attribute | ||
1928 | * size. | ||
1929 | */ | ||
1930 | if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a, | ||
1931 | le16_to_cpu(a->data.resident.value_offset) + | ||
1932 | new_size)) { | ||
1933 | /* Not enough space in the mft record. */ | ||
1934 | ntfs_error(vol->sb, "Not enough space in the mft record for " | ||
1935 | "the resized attribute value. This is not " | ||
1936 | "supported yet. Aborting write."); | ||
1937 | err = -EOPNOTSUPP; | ||
1938 | goto err_out2; | ||
1939 | } | ||
1940 | /* | ||
1941 | * We have enough space in the mft record to fit the write. This | ||
1942 | * implies the attribute is smaller than the mft record and hence the | ||
1943 | * attribute must be in a single page and hence page->index must be 0. | ||
1944 | */ | ||
1945 | BUG_ON(page->index); | ||
1946 | /* | ||
1947 | * If the beginning of the write is past the old size, enlarge the | ||
1948 | * attribute value up to the beginning of the write and fill it with | ||
1949 | * zeroes. | ||
1950 | */ | ||
1951 | if (from > attr_len) { | ||
1952 | memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + | ||
1953 | attr_len, 0, from - attr_len); | ||
1954 | a->data.resident.value_length = cpu_to_le32(from); | ||
1955 | /* Zero the corresponding area in the page as well. */ | ||
1956 | if (PageUptodate(page)) { | ||
1957 | kaddr = kmap_atomic(page, KM_USER0); | ||
1958 | memset(kaddr + attr_len, 0, from - attr_len); | ||
1959 | kunmap_atomic(kaddr, KM_USER0); | ||
1960 | flush_dcache_page(page); | ||
1961 | } | ||
1962 | } | ||
1963 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1964 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1965 | ntfs_attr_put_search_ctx(ctx); | ||
1966 | unmap_mft_record(base_ni); | ||
1967 | /* | ||
1968 | * Because resident attributes are handled by memcpy() to/from the | ||
1969 | * corresponding MFT record, and because this form of i/o is byte | ||
1970 | * aligned rather than block aligned, there is no need to bring the | ||
1971 | * page uptodate here as in the non-resident case where we need to | ||
1972 | * bring the buffers straddled by the write uptodate before | ||
1973 | * generic_file_write() does the copying from userspace. | ||
1974 | * | ||
1975 | * We thus defer the uptodate bringing of the page region outside the | ||
1976 | * region written to to ntfs_commit_write(), which makes the code | ||
1977 | * simpler and saves one atomic kmap which is good. | ||
1978 | */ | ||
1979 | done: | ||
1980 | ntfs_debug("Done."); | ||
1981 | return 0; | ||
1982 | err_out: | ||
1983 | if (err == -ENOMEM) | ||
1984 | ntfs_warning(vi->i_sb, "Error allocating memory required to " | ||
1985 | "prepare the write."); | ||
1986 | else { | ||
1987 | ntfs_error(vi->i_sb, "Resident attribute prepare write failed " | ||
1988 | "with error %i.", err); | ||
1989 | NVolSetErrors(vol); | ||
1990 | make_bad_inode(vi); | ||
1991 | } | ||
1992 | err_out2: | ||
1993 | if (ctx) | ||
1994 | ntfs_attr_put_search_ctx(ctx); | ||
1995 | if (m) | ||
1996 | unmap_mft_record(base_ni); | ||
1997 | return err; | ||
1998 | } | ||
1999 | |||
2000 | /** | ||
2001 | * ntfs_commit_nonresident_write - | ||
2002 | * | ||
2003 | */ | ||
2004 | static int ntfs_commit_nonresident_write(struct page *page, | ||
2005 | unsigned from, unsigned to) | ||
2006 | { | ||
2007 | s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; | ||
2008 | struct inode *vi = page->mapping->host; | ||
2009 | struct buffer_head *bh, *head; | ||
2010 | unsigned int block_start, block_end, blocksize; | ||
2011 | BOOL partial; | ||
2012 | |||
2013 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
2014 | "0x%lx, from = %u, to = %u.", vi->i_ino, | ||
2015 | NTFS_I(vi)->type, page->index, from, to); | ||
2016 | blocksize = 1 << vi->i_blkbits; | ||
2017 | |||
2018 | // FIXME: We need a whole slew of special cases in here for compressed | ||
2019 | // files for example... | ||
2020 | // For now, we know ntfs_prepare_write() would have failed so we can't | ||
2021 | // get here in any of the cases which we have to special case, so we | ||
2022 | // are just a ripped off, unrolled generic_commit_write(). | ||
2023 | |||
2024 | bh = head = page_buffers(page); | ||
2025 | block_start = 0; | ||
2026 | partial = FALSE; | ||
2027 | do { | ||
2028 | block_end = block_start + blocksize; | ||
2029 | if (block_end <= from || block_start >= to) { | ||
2030 | if (!buffer_uptodate(bh)) | ||
2031 | partial = TRUE; | ||
2032 | } else { | ||
2033 | set_buffer_uptodate(bh); | ||
2034 | mark_buffer_dirty(bh); | ||
2035 | } | ||
2036 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
2037 | /* | ||
2038 | * If this is a partial write which happened to make all buffers | ||
2039 | * uptodate then we can optimize away a bogus ->readpage() for the next | ||
2040 | * read(). Here we 'discover' whether the page went uptodate as a | ||
2041 | * result of this (potentially partial) write. | ||
2042 | */ | ||
2043 | if (!partial) | ||
2044 | SetPageUptodate(page); | ||
2045 | /* | ||
2046 | * Not convinced about this at all. See disparity comment above. For | ||
2047 | * now we know ntfs_prepare_write() would have failed in the write | ||
2048 | * exceeds i_size case, so this will never trigger which is fine. | ||
2049 | */ | ||
2050 | if (pos > vi->i_size) { | ||
2051 | ntfs_error(vi->i_sb, "Writing beyond the existing file size is " | ||
2052 | "not supported yet. Sorry."); | ||
2053 | return -EOPNOTSUPP; | ||
2054 | // vi->i_size = pos; | ||
2055 | // mark_inode_dirty(vi); | ||
2056 | } | ||
2057 | ntfs_debug("Done."); | ||
2058 | return 0; | ||
2059 | } | ||
2060 | |||
2061 | /** | ||
2062 | * ntfs_commit_write - commit the received data | ||
2063 | * | ||
2064 | * This is called from generic_file_write() with i_sem held on the inode | ||
2065 | * (@page->mapping->host). The @page is locked but not kmap()ped. The source | ||
2066 | * data has already been copied into the @page. ntfs_prepare_write() has been | ||
2067 | * called before the data copied and it returned success so we can take the | ||
2068 | * results of various BUG checks and some error handling for granted. | ||
2069 | * | ||
2070 | * Need to mark modified blocks dirty so they get written out later when | ||
2071 | * ntfs_writepage() is invoked by the VM. | ||
2072 | * | ||
2073 | * Return 0 on success or -errno on error. | ||
2074 | * | ||
2075 | * Should be using generic_commit_write(). This marks buffers uptodate and | ||
2076 | * dirty, sets the page uptodate if all buffers in the page are uptodate, and | ||
2077 | * updates i_size if the end of io is beyond i_size. In that case, it also | ||
2078 | * marks the inode dirty. | ||
2079 | * | ||
2080 | * Cannot use generic_commit_write() due to ntfs specialities but can look at | ||
2081 | * it for implementation guidance. | ||
2082 | * | ||
2083 | * If things have gone as outlined in ntfs_prepare_write(), then we do not | ||
2084 | * need to do any page content modifications here at all, except in the write | ||
2085 | * to resident attribute case, where we need to do the uptodate bringing here | ||
2086 | * which we combine with the copying into the mft record which means we save | ||
2087 | * one atomic kmap. | ||
2088 | */ | ||
2089 | static int ntfs_commit_write(struct file *file, struct page *page, | ||
2090 | unsigned from, unsigned to) | ||
2091 | { | ||
2092 | struct inode *vi = page->mapping->host; | ||
2093 | ntfs_inode *base_ni, *ni = NTFS_I(vi); | ||
2094 | char *kaddr, *kattr; | ||
2095 | ntfs_attr_search_ctx *ctx; | ||
2096 | MFT_RECORD *m; | ||
2097 | ATTR_RECORD *a; | ||
2098 | u32 attr_len; | ||
2099 | int err; | ||
2100 | |||
2101 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
2102 | "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, | ||
2103 | page->index, from, to); | ||
2104 | /* If the attribute is not resident, deal with it elsewhere. */ | ||
2105 | if (NInoNonResident(ni)) { | ||
2106 | /* Only unnamed $DATA attributes can be compressed/encrypted. */ | ||
2107 | if (ni->type == AT_DATA && !ni->name_len) { | ||
2108 | /* Encrypted files need separate handling. */ | ||
2109 | if (NInoEncrypted(ni)) { | ||
2110 | // We never get here at present! | ||
2111 | BUG(); | ||
2112 | } | ||
2113 | /* Compressed data streams are handled in compress.c. */ | ||
2114 | if (NInoCompressed(ni)) { | ||
2115 | // TODO: Implement this! | ||
2116 | // return ntfs_write_compressed_block(page); | ||
2117 | // We never get here at present! | ||
2118 | BUG(); | ||
2119 | } | ||
2120 | } | ||
2121 | /* Normal data stream. */ | ||
2122 | return ntfs_commit_nonresident_write(page, from, to); | ||
2123 | } | ||
2124 | /* | ||
2125 | * Attribute is resident, implying it is not compressed, encrypted, or | ||
2126 | * sparse. | ||
2127 | */ | ||
2128 | if (!NInoAttr(ni)) | ||
2129 | base_ni = ni; | ||
2130 | else | ||
2131 | base_ni = ni->ext.base_ntfs_ino; | ||
2132 | /* Map, pin, and lock the mft record. */ | ||
2133 | m = map_mft_record(base_ni); | ||
2134 | if (IS_ERR(m)) { | ||
2135 | err = PTR_ERR(m); | ||
2136 | m = NULL; | ||
2137 | ctx = NULL; | ||
2138 | goto err_out; | ||
2139 | } | ||
2140 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
2141 | if (unlikely(!ctx)) { | ||
2142 | err = -ENOMEM; | ||
2143 | goto err_out; | ||
2144 | } | ||
2145 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
2146 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
2147 | if (unlikely(err)) { | ||
2148 | if (err == -ENOENT) | ||
2149 | err = -EIO; | ||
2150 | goto err_out; | ||
2151 | } | ||
2152 | a = ctx->attr; | ||
2153 | /* The total length of the attribute value. */ | ||
2154 | attr_len = le32_to_cpu(a->data.resident.value_length); | ||
2155 | BUG_ON(from > attr_len); | ||
2156 | kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); | ||
2157 | kaddr = kmap_atomic(page, KM_USER0); | ||
2158 | /* Copy the received data from the page to the mft record. */ | ||
2159 | memcpy(kattr + from, kaddr + from, to - from); | ||
2160 | /* Update the attribute length if necessary. */ | ||
2161 | if (to > attr_len) { | ||
2162 | attr_len = to; | ||
2163 | a->data.resident.value_length = cpu_to_le32(attr_len); | ||
2164 | } | ||
2165 | /* | ||
2166 | * If the page is not uptodate, bring the out of bounds area(s) | ||
2167 | * uptodate by copying data from the mft record to the page. | ||
2168 | */ | ||
2169 | if (!PageUptodate(page)) { | ||
2170 | if (from > 0) | ||
2171 | memcpy(kaddr, kattr, from); | ||
2172 | if (to < attr_len) | ||
2173 | memcpy(kaddr + to, kattr + to, attr_len - to); | ||
2174 | /* Zero the region outside the end of the attribute value. */ | ||
2175 | if (attr_len < PAGE_CACHE_SIZE) | ||
2176 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
2177 | /* | ||
2178 | * The probability of not having done any of the above is | ||
2179 | * extremely small, so we just flush unconditionally. | ||
2180 | */ | ||
2181 | flush_dcache_page(page); | ||
2182 | SetPageUptodate(page); | ||
2183 | } | ||
2184 | kunmap_atomic(kaddr, KM_USER0); | ||
2185 | /* Update i_size if necessary. */ | ||
2186 | if (vi->i_size < attr_len) { | ||
2187 | ni->allocated_size = ni->initialized_size = attr_len; | ||
2188 | i_size_write(vi, attr_len); | ||
2189 | } | ||
2190 | /* Mark the mft record dirty, so it gets written back. */ | ||
2191 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
2192 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
2193 | ntfs_attr_put_search_ctx(ctx); | ||
2194 | unmap_mft_record(base_ni); | ||
2195 | ntfs_debug("Done."); | ||
2196 | return 0; | ||
2197 | err_out: | ||
2198 | if (err == -ENOMEM) { | ||
2199 | ntfs_warning(vi->i_sb, "Error allocating memory required to " | ||
2200 | "commit the write."); | ||
2201 | if (PageUptodate(page)) { | ||
2202 | ntfs_warning(vi->i_sb, "Page is uptodate, setting " | ||
2203 | "dirty so the write will be retried " | ||
2204 | "later on by the VM."); | ||
2205 | /* | ||
2206 | * Put the page on mapping->dirty_pages, but leave its | ||
2207 | * buffers' dirty state as-is. | ||
2208 | */ | ||
2209 | __set_page_dirty_nobuffers(page); | ||
2210 | err = 0; | ||
2211 | } else | ||
2212 | ntfs_error(vi->i_sb, "Page is not uptodate. Written " | ||
2213 | "data has been lost."); | ||
2214 | } else { | ||
2215 | ntfs_error(vi->i_sb, "Resident attribute commit write failed " | ||
2216 | "with error %i.", err); | ||
2217 | NVolSetErrors(ni->vol); | ||
2218 | make_bad_inode(vi); | ||
2219 | } | ||
2220 | if (ctx) | ||
2221 | ntfs_attr_put_search_ctx(ctx); | ||
2222 | if (m) | ||
2223 | unmap_mft_record(base_ni); | ||
2224 | return err; | ||
2225 | } | ||
2226 | |||
2227 | #endif /* NTFS_RW */ | ||
2228 | |||
2229 | /** | ||
2230 | * ntfs_aops - general address space operations for inodes and attributes | ||
2231 | */ | ||
2232 | struct address_space_operations ntfs_aops = { | ||
2233 | .readpage = ntfs_readpage, /* Fill page with data. */ | ||
2234 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
2235 | disk request queue. */ | ||
2236 | #ifdef NTFS_RW | ||
2237 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | ||
2238 | .prepare_write = ntfs_prepare_write, /* Prepare page and buffers | ||
2239 | ready to receive data. */ | ||
2240 | .commit_write = ntfs_commit_write, /* Commit received data. */ | ||
2241 | #endif /* NTFS_RW */ | ||
2242 | }; | ||
2243 | |||
2244 | /** | ||
2245 | * ntfs_mst_aops - general address space operations for mst protecteed inodes | ||
2246 | * and attributes | ||
2247 | */ | ||
2248 | struct address_space_operations ntfs_mst_aops = { | ||
2249 | .readpage = ntfs_readpage, /* Fill page with data. */ | ||
2250 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
2251 | disk request queue. */ | ||
2252 | #ifdef NTFS_RW | ||
2253 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | ||
2254 | .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty | ||
2255 | without touching the buffers | ||
2256 | belonging to the page. */ | ||
2257 | #endif /* NTFS_RW */ | ||
2258 | }; | ||
2259 | |||
2260 | #ifdef NTFS_RW | ||
2261 | |||
2262 | /** | ||
2263 | * mark_ntfs_record_dirty - mark an ntfs record dirty | ||
2264 | * @page: page containing the ntfs record to mark dirty | ||
2265 | * @ofs: byte offset within @page at which the ntfs record begins | ||
2266 | * | ||
2267 | * Set the buffers and the page in which the ntfs record is located dirty. | ||
2268 | * | ||
2269 | * The latter also marks the vfs inode the ntfs record belongs to dirty | ||
2270 | * (I_DIRTY_PAGES only). | ||
2271 | * | ||
2272 | * If the page does not have buffers, we create them and set them uptodate. | ||
2273 | * The page may not be locked which is why we need to handle the buffers under | ||
2274 | * the mapping->private_lock. Once the buffers are marked dirty we no longer | ||
2275 | * need the lock since try_to_free_buffers() does not free dirty buffers. | ||
2276 | */ | ||
2277 | void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { | ||
2278 | struct address_space *mapping = page->mapping; | ||
2279 | ntfs_inode *ni = NTFS_I(mapping->host); | ||
2280 | struct buffer_head *bh, *head, *buffers_to_free = NULL; | ||
2281 | unsigned int end, bh_size, bh_ofs; | ||
2282 | |||
2283 | BUG_ON(!PageUptodate(page)); | ||
2284 | end = ofs + ni->itype.index.block_size; | ||
2285 | bh_size = 1 << VFS_I(ni)->i_blkbits; | ||
2286 | spin_lock(&mapping->private_lock); | ||
2287 | if (unlikely(!page_has_buffers(page))) { | ||
2288 | spin_unlock(&mapping->private_lock); | ||
2289 | bh = head = alloc_page_buffers(page, bh_size, 1); | ||
2290 | spin_lock(&mapping->private_lock); | ||
2291 | if (likely(!page_has_buffers(page))) { | ||
2292 | struct buffer_head *tail; | ||
2293 | |||
2294 | do { | ||
2295 | set_buffer_uptodate(bh); | ||
2296 | tail = bh; | ||
2297 | bh = bh->b_this_page; | ||
2298 | } while (bh); | ||
2299 | tail->b_this_page = head; | ||
2300 | attach_page_buffers(page, head); | ||
2301 | } else | ||
2302 | buffers_to_free = bh; | ||
2303 | } | ||
2304 | bh = head = page_buffers(page); | ||
2305 | do { | ||
2306 | bh_ofs = bh_offset(bh); | ||
2307 | if (bh_ofs + bh_size <= ofs) | ||
2308 | continue; | ||
2309 | if (unlikely(bh_ofs >= end)) | ||
2310 | break; | ||
2311 | set_buffer_dirty(bh); | ||
2312 | } while ((bh = bh->b_this_page) != head); | ||
2313 | spin_unlock(&mapping->private_lock); | ||
2314 | __set_page_dirty_nobuffers(page); | ||
2315 | if (unlikely(buffers_to_free)) { | ||
2316 | do { | ||
2317 | bh = buffers_to_free->b_this_page; | ||
2318 | free_buffer_head(buffers_to_free); | ||
2319 | buffers_to_free = bh; | ||
2320 | } while (buffers_to_free); | ||
2321 | } | ||
2322 | } | ||
2323 | |||
2324 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h new file mode 100644 index 000000000000..3b74e66ca2ff --- /dev/null +++ b/fs/ntfs/aops.h | |||
@@ -0,0 +1,109 @@ | |||
1 | /** | ||
2 | * aops.h - Defines for NTFS kernel address space operations and page cache | ||
3 | * handling. Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_AOPS_H | ||
25 | #define _LINUX_NTFS_AOPS_H | ||
26 | |||
27 | #include <linux/mm.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/pagemap.h> | ||
30 | #include <linux/fs.h> | ||
31 | |||
32 | #include "inode.h" | ||
33 | |||
34 | /** | ||
35 | * ntfs_unmap_page - release a page that was mapped using ntfs_map_page() | ||
36 | * @page: the page to release | ||
37 | * | ||
38 | * Unpin, unmap and release a page that was obtained from ntfs_map_page(). | ||
39 | */ | ||
40 | static inline void ntfs_unmap_page(struct page *page) | ||
41 | { | ||
42 | kunmap(page); | ||
43 | page_cache_release(page); | ||
44 | } | ||
45 | |||
46 | /** | ||
47 | * ntfs_map_page - map a page into accessible memory, reading it if necessary | ||
48 | * @mapping: address space for which to obtain the page | ||
49 | * @index: index into the page cache for @mapping of the page to map | ||
50 | * | ||
51 | * Read a page from the page cache of the address space @mapping at position | ||
52 | * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes. | ||
53 | * | ||
54 | * If the page is not in memory it is loaded from disk first using the readpage | ||
55 | * method defined in the address space operations of @mapping and the page is | ||
56 | * added to the page cache of @mapping in the process. | ||
57 | * | ||
58 | * If the page belongs to an mst protected attribute and it is marked as such | ||
59 | * in its ntfs inode (NInoMstProtected()) the mst fixups are applied but no | ||
60 | * error checking is performed. This means the caller has to verify whether | ||
61 | * the ntfs record(s) contained in the page are valid or not using one of the | ||
62 | * ntfs_is_XXXX_record{,p}() macros, where XXXX is the record type you are | ||
63 | * expecting to see. (For details of the macros, see fs/ntfs/layout.h.) | ||
64 | * | ||
65 | * If the page is in high memory it is mapped into memory directly addressible | ||
66 | * by the kernel. | ||
67 | * | ||
68 | * Finally the page count is incremented, thus pinning the page into place. | ||
69 | * | ||
70 | * The above means that page_address(page) can be used on all pages obtained | ||
71 | * with ntfs_map_page() to get the kernel virtual address of the page. | ||
72 | * | ||
73 | * When finished with the page, the caller has to call ntfs_unmap_page() to | ||
74 | * unpin, unmap and release the page. | ||
75 | * | ||
76 | * Note this does not grant exclusive access. If such is desired, the caller | ||
77 | * must provide it independently of the ntfs_{un}map_page() calls by using | ||
78 | * a {rw_}semaphore or other means of serialization. A spin lock cannot be | ||
79 | * used as ntfs_map_page() can block. | ||
80 | * | ||
81 | * The unlocked and uptodate page is returned on success or an encoded error | ||
82 | * on failure. Caller has to test for error using the IS_ERR() macro on the | ||
83 | * return value. If that evaluates to TRUE, the negative error code can be | ||
84 | * obtained using PTR_ERR() on the return value of ntfs_map_page(). | ||
85 | */ | ||
86 | static inline struct page *ntfs_map_page(struct address_space *mapping, | ||
87 | unsigned long index) | ||
88 | { | ||
89 | struct page *page = read_cache_page(mapping, index, | ||
90 | (filler_t*)mapping->a_ops->readpage, NULL); | ||
91 | |||
92 | if (!IS_ERR(page)) { | ||
93 | wait_on_page_locked(page); | ||
94 | kmap(page); | ||
95 | if (PageUptodate(page) && !PageError(page)) | ||
96 | return page; | ||
97 | ntfs_unmap_page(page); | ||
98 | return ERR_PTR(-EIO); | ||
99 | } | ||
100 | return page; | ||
101 | } | ||
102 | |||
103 | #ifdef NTFS_RW | ||
104 | |||
105 | extern void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs); | ||
106 | |||
107 | #endif /* NTFS_RW */ | ||
108 | |||
109 | #endif /* _LINUX_NTFS_AOPS_H */ | ||
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c new file mode 100644 index 000000000000..1ff7f90a18b0 --- /dev/null +++ b/fs/ntfs/attrib.c | |||
@@ -0,0 +1,1258 @@ | |||
1 | /** | ||
2 | * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * Copyright (c) 2002 Richard Russon | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | |||
25 | #include "attrib.h" | ||
26 | #include "debug.h" | ||
27 | #include "layout.h" | ||
28 | #include "mft.h" | ||
29 | #include "ntfs.h" | ||
30 | #include "types.h" | ||
31 | |||
32 | /** | ||
33 | * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode | ||
34 | * @ni: ntfs inode for which to map (part of) a runlist | ||
35 | * @vcn: map runlist part containing this vcn | ||
36 | * | ||
37 | * Map the part of a runlist containing the @vcn of the ntfs inode @ni. | ||
38 | * | ||
39 | * Return 0 on success and -errno on error. | ||
40 | * | ||
41 | * Locking: - The runlist must be unlocked on entry and is unlocked on return. | ||
42 | * - This function takes the lock for writing and modifies the runlist. | ||
43 | */ | ||
44 | int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) | ||
45 | { | ||
46 | ntfs_inode *base_ni; | ||
47 | ntfs_attr_search_ctx *ctx; | ||
48 | MFT_RECORD *mrec; | ||
49 | int err = 0; | ||
50 | |||
51 | ntfs_debug("Mapping runlist part containing vcn 0x%llx.", | ||
52 | (unsigned long long)vcn); | ||
53 | |||
54 | if (!NInoAttr(ni)) | ||
55 | base_ni = ni; | ||
56 | else | ||
57 | base_ni = ni->ext.base_ntfs_ino; | ||
58 | |||
59 | mrec = map_mft_record(base_ni); | ||
60 | if (IS_ERR(mrec)) | ||
61 | return PTR_ERR(mrec); | ||
62 | ctx = ntfs_attr_get_search_ctx(base_ni, mrec); | ||
63 | if (unlikely(!ctx)) { | ||
64 | err = -ENOMEM; | ||
65 | goto err_out; | ||
66 | } | ||
67 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
68 | CASE_SENSITIVE, vcn, NULL, 0, ctx); | ||
69 | if (unlikely(err)) | ||
70 | goto put_err_out; | ||
71 | |||
72 | down_write(&ni->runlist.lock); | ||
73 | /* Make sure someone else didn't do the work while we were sleeping. */ | ||
74 | if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= | ||
75 | LCN_RL_NOT_MAPPED)) { | ||
76 | runlist_element *rl; | ||
77 | |||
78 | rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr, | ||
79 | ni->runlist.rl); | ||
80 | if (IS_ERR(rl)) | ||
81 | err = PTR_ERR(rl); | ||
82 | else | ||
83 | ni->runlist.rl = rl; | ||
84 | } | ||
85 | up_write(&ni->runlist.lock); | ||
86 | |||
87 | put_err_out: | ||
88 | ntfs_attr_put_search_ctx(ctx); | ||
89 | err_out: | ||
90 | unmap_mft_record(base_ni); | ||
91 | return err; | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode | ||
96 | * @ni: ntfs inode describing the runlist to search | ||
97 | * @vcn: vcn to find | ||
98 | * @need_write: if false, lock for reading and if true, lock for writing | ||
99 | * | ||
100 | * Find the virtual cluster number @vcn in the runlist described by the ntfs | ||
101 | * inode @ni and return the address of the runlist element containing the @vcn. | ||
102 | * The runlist is left locked and the caller has to unlock it. If @need_write | ||
103 | * is true, the runlist is locked for writing and if @need_write is false, the | ||
104 | * runlist is locked for reading. In the error case, the runlist is not left | ||
105 | * locked. | ||
106 | * | ||
107 | * Note you need to distinguish between the lcn of the returned runlist element | ||
108 | * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on | ||
109 | * read and allocate clusters on write. | ||
110 | * | ||
111 | * Return the runlist element containing the @vcn on success and | ||
112 | * ERR_PTR(-errno) on error. You need to test the return value with IS_ERR() | ||
113 | * to decide if the return is success or failure and PTR_ERR() to get to the | ||
114 | * error code if IS_ERR() is true. | ||
115 | * | ||
116 | * The possible error return codes are: | ||
117 | * -ENOENT - No such vcn in the runlist, i.e. @vcn is out of bounds. | ||
118 | * -ENOMEM - Not enough memory to map runlist. | ||
119 | * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). | ||
120 | * | ||
121 | * Locking: - The runlist must be unlocked on entry. | ||
122 | * - On failing return, the runlist is unlocked. | ||
123 | * - On successful return, the runlist is locked. If @need_write us | ||
124 | * true, it is locked for writing. Otherwise is is locked for | ||
125 | * reading. | ||
126 | */ | ||
127 | runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, | ||
128 | const BOOL need_write) | ||
129 | { | ||
130 | runlist_element *rl; | ||
131 | int err = 0; | ||
132 | BOOL is_retry = FALSE; | ||
133 | |||
134 | ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.", | ||
135 | ni->mft_no, (unsigned long long)vcn, | ||
136 | !need_write ? "read" : "writ"); | ||
137 | BUG_ON(!ni); | ||
138 | BUG_ON(!NInoNonResident(ni)); | ||
139 | BUG_ON(vcn < 0); | ||
140 | lock_retry_remap: | ||
141 | if (!need_write) | ||
142 | down_read(&ni->runlist.lock); | ||
143 | else | ||
144 | down_write(&ni->runlist.lock); | ||
145 | rl = ni->runlist.rl; | ||
146 | if (likely(rl && vcn >= rl[0].vcn)) { | ||
147 | while (likely(rl->length)) { | ||
148 | if (likely(vcn < rl[1].vcn)) { | ||
149 | if (likely(rl->lcn >= LCN_HOLE)) { | ||
150 | ntfs_debug("Done."); | ||
151 | return rl; | ||
152 | } | ||
153 | break; | ||
154 | } | ||
155 | rl++; | ||
156 | } | ||
157 | if (likely(rl->lcn != LCN_RL_NOT_MAPPED)) { | ||
158 | if (likely(rl->lcn == LCN_ENOENT)) | ||
159 | err = -ENOENT; | ||
160 | else | ||
161 | err = -EIO; | ||
162 | } | ||
163 | } | ||
164 | if (!need_write) | ||
165 | up_read(&ni->runlist.lock); | ||
166 | else | ||
167 | up_write(&ni->runlist.lock); | ||
168 | if (!err && !is_retry) { | ||
169 | /* | ||
170 | * The @vcn is in an unmapped region, map the runlist and | ||
171 | * retry. | ||
172 | */ | ||
173 | err = ntfs_map_runlist(ni, vcn); | ||
174 | if (likely(!err)) { | ||
175 | is_retry = TRUE; | ||
176 | goto lock_retry_remap; | ||
177 | } | ||
178 | /* | ||
179 | * -EINVAL and -ENOENT coming from a failed mapping attempt are | ||
180 | * equivalent to i/o errors for us as they should not happen in | ||
181 | * our code paths. | ||
182 | */ | ||
183 | if (err == -EINVAL || err == -ENOENT) | ||
184 | err = -EIO; | ||
185 | } else if (!err) | ||
186 | err = -EIO; | ||
187 | ntfs_error(ni->vol->sb, "Failed with error code %i.", err); | ||
188 | return ERR_PTR(err); | ||
189 | } | ||
190 | |||
191 | /** | ||
192 | * ntfs_attr_find - find (next) attribute in mft record | ||
193 | * @type: attribute type to find | ||
194 | * @name: attribute name to find (optional, i.e. NULL means don't care) | ||
195 | * @name_len: attribute name length (only needed if @name present) | ||
196 | * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) | ||
197 | * @val: attribute value to find (optional, resident attributes only) | ||
198 | * @val_len: attribute value length | ||
199 | * @ctx: search context with mft record and attribute to search from | ||
200 | * | ||
201 | * You should not need to call this function directly. Use ntfs_attr_lookup() | ||
202 | * instead. | ||
203 | * | ||
204 | * ntfs_attr_find() takes a search context @ctx as parameter and searches the | ||
205 | * mft record specified by @ctx->mrec, beginning at @ctx->attr, for an | ||
206 | * attribute of @type, optionally @name and @val. | ||
207 | * | ||
208 | * If the attribute is found, ntfs_attr_find() returns 0 and @ctx->attr will | ||
209 | * point to the found attribute. | ||
210 | * | ||
211 | * If the attribute is not found, ntfs_attr_find() returns -ENOENT and | ||
212 | * @ctx->attr will point to the attribute before which the attribute being | ||
213 | * searched for would need to be inserted if such an action were to be desired. | ||
214 | * | ||
215 | * On actual error, ntfs_attr_find() returns -EIO. In this case @ctx->attr is | ||
216 | * undefined and in particular do not rely on it not changing. | ||
217 | * | ||
218 | * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it | ||
219 | * is FALSE, the search begins after @ctx->attr. | ||
220 | * | ||
221 | * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and | ||
222 | * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record | ||
223 | * @ctx->mrec belongs. This is so we can get at the ntfs volume and hence at | ||
224 | * the upcase table. If @ic is CASE_SENSITIVE, the comparison is case | ||
225 | * sensitive. When @name is present, @name_len is the @name length in Unicode | ||
226 | * characters. | ||
227 | * | ||
228 | * If @name is not present (NULL), we assume that the unnamed attribute is | ||
229 | * being searched for. | ||
230 | * | ||
231 | * Finally, the resident attribute value @val is looked for, if present. If | ||
232 | * @val is not present (NULL), @val_len is ignored. | ||
233 | * | ||
234 | * ntfs_attr_find() only searches the specified mft record and it ignores the | ||
235 | * presence of an attribute list attribute (unless it is the one being searched | ||
236 | * for, obviously). If you need to take attribute lists into consideration, | ||
237 | * use ntfs_attr_lookup() instead (see below). This also means that you cannot | ||
238 | * use ntfs_attr_find() to search for extent records of non-resident | ||
239 | * attributes, as extents with lowest_vcn != 0 are usually described by the | ||
240 | * attribute list attribute only. - Note that it is possible that the first | ||
241 | * extent is only in the attribute list while the last extent is in the base | ||
242 | * mft record, so do not rely on being able to find the first extent in the | ||
243 | * base mft record. | ||
244 | * | ||
245 | * Warning: Never use @val when looking for attribute types which can be | ||
246 | * non-resident as this most likely will result in a crash! | ||
247 | */ | ||
248 | static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, | ||
249 | const u32 name_len, const IGNORE_CASE_BOOL ic, | ||
250 | const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx) | ||
251 | { | ||
252 | ATTR_RECORD *a; | ||
253 | ntfs_volume *vol = ctx->ntfs_ino->vol; | ||
254 | ntfschar *upcase = vol->upcase; | ||
255 | u32 upcase_len = vol->upcase_len; | ||
256 | |||
257 | /* | ||
258 | * Iterate over attributes in mft record starting at @ctx->attr, or the | ||
259 | * attribute following that, if @ctx->is_first is TRUE. | ||
260 | */ | ||
261 | if (ctx->is_first) { | ||
262 | a = ctx->attr; | ||
263 | ctx->is_first = FALSE; | ||
264 | } else | ||
265 | a = (ATTR_RECORD*)((u8*)ctx->attr + | ||
266 | le32_to_cpu(ctx->attr->length)); | ||
267 | for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { | ||
268 | if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + | ||
269 | le32_to_cpu(ctx->mrec->bytes_allocated)) | ||
270 | break; | ||
271 | ctx->attr = a; | ||
272 | if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || | ||
273 | a->type == AT_END)) | ||
274 | return -ENOENT; | ||
275 | if (unlikely(!a->length)) | ||
276 | break; | ||
277 | if (a->type != type) | ||
278 | continue; | ||
279 | /* | ||
280 | * If @name is present, compare the two names. If @name is | ||
281 | * missing, assume we want an unnamed attribute. | ||
282 | */ | ||
283 | if (!name) { | ||
284 | /* The search failed if the found attribute is named. */ | ||
285 | if (a->name_length) | ||
286 | return -ENOENT; | ||
287 | } else if (!ntfs_are_names_equal(name, name_len, | ||
288 | (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset)), | ||
289 | a->name_length, ic, upcase, upcase_len)) { | ||
290 | register int rc; | ||
291 | |||
292 | rc = ntfs_collate_names(name, name_len, | ||
293 | (ntfschar*)((u8*)a + | ||
294 | le16_to_cpu(a->name_offset)), | ||
295 | a->name_length, 1, IGNORE_CASE, | ||
296 | upcase, upcase_len); | ||
297 | /* | ||
298 | * If @name collates before a->name, there is no | ||
299 | * matching attribute. | ||
300 | */ | ||
301 | if (rc == -1) | ||
302 | return -ENOENT; | ||
303 | /* If the strings are not equal, continue search. */ | ||
304 | if (rc) | ||
305 | continue; | ||
306 | rc = ntfs_collate_names(name, name_len, | ||
307 | (ntfschar*)((u8*)a + | ||
308 | le16_to_cpu(a->name_offset)), | ||
309 | a->name_length, 1, CASE_SENSITIVE, | ||
310 | upcase, upcase_len); | ||
311 | if (rc == -1) | ||
312 | return -ENOENT; | ||
313 | if (rc) | ||
314 | continue; | ||
315 | } | ||
316 | /* | ||
317 | * The names match or @name not present and attribute is | ||
318 | * unnamed. If no @val specified, we have found the attribute | ||
319 | * and are done. | ||
320 | */ | ||
321 | if (!val) | ||
322 | return 0; | ||
323 | /* @val is present; compare values. */ | ||
324 | else { | ||
325 | register int rc; | ||
326 | |||
327 | rc = memcmp(val, (u8*)a + le16_to_cpu( | ||
328 | a->data.resident.value_offset), | ||
329 | min_t(u32, val_len, le32_to_cpu( | ||
330 | a->data.resident.value_length))); | ||
331 | /* | ||
332 | * If @val collates before the current attribute's | ||
333 | * value, there is no matching attribute. | ||
334 | */ | ||
335 | if (!rc) { | ||
336 | register u32 avl; | ||
337 | |||
338 | avl = le32_to_cpu( | ||
339 | a->data.resident.value_length); | ||
340 | if (val_len == avl) | ||
341 | return 0; | ||
342 | if (val_len < avl) | ||
343 | return -ENOENT; | ||
344 | } else if (rc < 0) | ||
345 | return -ENOENT; | ||
346 | } | ||
347 | } | ||
348 | ntfs_error(vol->sb, "Inode is corrupt. Run chkdsk."); | ||
349 | NVolSetErrors(vol); | ||
350 | return -EIO; | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * load_attribute_list - load an attribute list into memory | ||
355 | * @vol: ntfs volume from which to read | ||
356 | * @runlist: runlist of the attribute list | ||
357 | * @al_start: destination buffer | ||
358 | * @size: size of the destination buffer in bytes | ||
359 | * @initialized_size: initialized size of the attribute list | ||
360 | * | ||
361 | * Walk the runlist @runlist and load all clusters from it copying them into | ||
362 | * the linear buffer @al. The maximum number of bytes copied to @al is @size | ||
363 | * bytes. Note, @size does not need to be a multiple of the cluster size. If | ||
364 | * @initialized_size is less than @size, the region in @al between | ||
365 | * @initialized_size and @size will be zeroed and not read from disk. | ||
366 | * | ||
367 | * Return 0 on success or -errno on error. | ||
368 | */ | ||
369 | int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start, | ||
370 | const s64 size, const s64 initialized_size) | ||
371 | { | ||
372 | LCN lcn; | ||
373 | u8 *al = al_start; | ||
374 | u8 *al_end = al + initialized_size; | ||
375 | runlist_element *rl; | ||
376 | struct buffer_head *bh; | ||
377 | struct super_block *sb; | ||
378 | unsigned long block_size; | ||
379 | unsigned long block, max_block; | ||
380 | int err = 0; | ||
381 | unsigned char block_size_bits; | ||
382 | |||
383 | ntfs_debug("Entering."); | ||
384 | if (!vol || !runlist || !al || size <= 0 || initialized_size < 0 || | ||
385 | initialized_size > size) | ||
386 | return -EINVAL; | ||
387 | if (!initialized_size) { | ||
388 | memset(al, 0, size); | ||
389 | return 0; | ||
390 | } | ||
391 | sb = vol->sb; | ||
392 | block_size = sb->s_blocksize; | ||
393 | block_size_bits = sb->s_blocksize_bits; | ||
394 | down_read(&runlist->lock); | ||
395 | rl = runlist->rl; | ||
396 | /* Read all clusters specified by the runlist one run at a time. */ | ||
397 | while (rl->length) { | ||
398 | lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); | ||
399 | ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.", | ||
400 | (unsigned long long)rl->vcn, | ||
401 | (unsigned long long)lcn); | ||
402 | /* The attribute list cannot be sparse. */ | ||
403 | if (lcn < 0) { | ||
404 | ntfs_error(sb, "ntfs_rl_vcn_to_lcn() failed. Cannot " | ||
405 | "read attribute list."); | ||
406 | goto err_out; | ||
407 | } | ||
408 | block = lcn << vol->cluster_size_bits >> block_size_bits; | ||
409 | /* Read the run from device in chunks of block_size bytes. */ | ||
410 | max_block = block + (rl->length << vol->cluster_size_bits >> | ||
411 | block_size_bits); | ||
412 | ntfs_debug("max_block = 0x%lx.", max_block); | ||
413 | do { | ||
414 | ntfs_debug("Reading block = 0x%lx.", block); | ||
415 | bh = sb_bread(sb, block); | ||
416 | if (!bh) { | ||
417 | ntfs_error(sb, "sb_bread() failed. Cannot " | ||
418 | "read attribute list."); | ||
419 | goto err_out; | ||
420 | } | ||
421 | if (al + block_size >= al_end) | ||
422 | goto do_final; | ||
423 | memcpy(al, bh->b_data, block_size); | ||
424 | brelse(bh); | ||
425 | al += block_size; | ||
426 | } while (++block < max_block); | ||
427 | rl++; | ||
428 | } | ||
429 | if (initialized_size < size) { | ||
430 | initialize: | ||
431 | memset(al_start + initialized_size, 0, size - initialized_size); | ||
432 | } | ||
433 | done: | ||
434 | up_read(&runlist->lock); | ||
435 | return err; | ||
436 | do_final: | ||
437 | if (al < al_end) { | ||
438 | /* | ||
439 | * Partial block. | ||
440 | * | ||
441 | * Note: The attribute list can be smaller than its allocation | ||
442 | * by multiple clusters. This has been encountered by at least | ||
443 | * two people running Windows XP, thus we cannot do any | ||
444 | * truncation sanity checking here. (AIA) | ||
445 | */ | ||
446 | memcpy(al, bh->b_data, al_end - al); | ||
447 | brelse(bh); | ||
448 | if (initialized_size < size) | ||
449 | goto initialize; | ||
450 | goto done; | ||
451 | } | ||
452 | brelse(bh); | ||
453 | /* Real overflow! */ | ||
454 | ntfs_error(sb, "Attribute list buffer overflow. Read attribute list " | ||
455 | "is truncated."); | ||
456 | err_out: | ||
457 | err = -EIO; | ||
458 | goto done; | ||
459 | } | ||
460 | |||
461 | /** | ||
462 | * ntfs_external_attr_find - find an attribute in the attribute list of an inode | ||
463 | * @type: attribute type to find | ||
464 | * @name: attribute name to find (optional, i.e. NULL means don't care) | ||
465 | * @name_len: attribute name length (only needed if @name present) | ||
466 | * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) | ||
467 | * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) | ||
468 | * @val: attribute value to find (optional, resident attributes only) | ||
469 | * @val_len: attribute value length | ||
470 | * @ctx: search context with mft record and attribute to search from | ||
471 | * | ||
472 | * You should not need to call this function directly. Use ntfs_attr_lookup() | ||
473 | * instead. | ||
474 | * | ||
475 | * Find an attribute by searching the attribute list for the corresponding | ||
476 | * attribute list entry. Having found the entry, map the mft record if the | ||
477 | * attribute is in a different mft record/inode, ntfs_attr_find() the attribute | ||
478 | * in there and return it. | ||
479 | * | ||
480 | * On first search @ctx->ntfs_ino must be the base mft record and @ctx must | ||
481 | * have been obtained from a call to ntfs_attr_get_search_ctx(). On subsequent | ||
482 | * calls @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is | ||
483 | * then the base inode). | ||
484 | * | ||
485 | * After finishing with the attribute/mft record you need to call | ||
486 | * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any | ||
487 | * mapped inodes, etc). | ||
488 | * | ||
489 | * If the attribute is found, ntfs_external_attr_find() returns 0 and | ||
490 | * @ctx->attr will point to the found attribute. @ctx->mrec will point to the | ||
491 | * mft record in which @ctx->attr is located and @ctx->al_entry will point to | ||
492 | * the attribute list entry for the attribute. | ||
493 | * | ||
494 | * If the attribute is not found, ntfs_external_attr_find() returns -ENOENT and | ||
495 | * @ctx->attr will point to the attribute in the base mft record before which | ||
496 | * the attribute being searched for would need to be inserted if such an action | ||
497 | * were to be desired. @ctx->mrec will point to the mft record in which | ||
498 | * @ctx->attr is located and @ctx->al_entry will point to the attribute list | ||
499 | * entry of the attribute before which the attribute being searched for would | ||
500 | * need to be inserted if such an action were to be desired. | ||
501 | * | ||
502 | * Thus to insert the not found attribute, one wants to add the attribute to | ||
503 | * @ctx->mrec (the base mft record) and if there is not enough space, the | ||
504 | * attribute should be placed in a newly allocated extent mft record. The | ||
505 | * attribute list entry for the inserted attribute should be inserted in the | ||
506 | * attribute list attribute at @ctx->al_entry. | ||
507 | * | ||
508 | * On actual error, ntfs_external_attr_find() returns -EIO. In this case | ||
509 | * @ctx->attr is undefined and in particular do not rely on it not changing. | ||
510 | */ | ||
511 | static int ntfs_external_attr_find(const ATTR_TYPE type, | ||
512 | const ntfschar *name, const u32 name_len, | ||
513 | const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, | ||
514 | const u8 *val, const u32 val_len, ntfs_attr_search_ctx *ctx) | ||
515 | { | ||
516 | ntfs_inode *base_ni, *ni; | ||
517 | ntfs_volume *vol; | ||
518 | ATTR_LIST_ENTRY *al_entry, *next_al_entry; | ||
519 | u8 *al_start, *al_end; | ||
520 | ATTR_RECORD *a; | ||
521 | ntfschar *al_name; | ||
522 | u32 al_name_len; | ||
523 | int err = 0; | ||
524 | static const char *es = " Unmount and run chkdsk."; | ||
525 | |||
526 | ni = ctx->ntfs_ino; | ||
527 | base_ni = ctx->base_ntfs_ino; | ||
528 | ntfs_debug("Entering for inode 0x%lx, type 0x%x.", ni->mft_no, type); | ||
529 | if (!base_ni) { | ||
530 | /* First call happens with the base mft record. */ | ||
531 | base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino; | ||
532 | ctx->base_mrec = ctx->mrec; | ||
533 | } | ||
534 | if (ni == base_ni) | ||
535 | ctx->base_attr = ctx->attr; | ||
536 | if (type == AT_END) | ||
537 | goto not_found; | ||
538 | vol = base_ni->vol; | ||
539 | al_start = base_ni->attr_list; | ||
540 | al_end = al_start + base_ni->attr_list_size; | ||
541 | if (!ctx->al_entry) | ||
542 | ctx->al_entry = (ATTR_LIST_ENTRY*)al_start; | ||
543 | /* | ||
544 | * Iterate over entries in attribute list starting at @ctx->al_entry, | ||
545 | * or the entry following that, if @ctx->is_first is TRUE. | ||
546 | */ | ||
547 | if (ctx->is_first) { | ||
548 | al_entry = ctx->al_entry; | ||
549 | ctx->is_first = FALSE; | ||
550 | } else | ||
551 | al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + | ||
552 | le16_to_cpu(ctx->al_entry->length)); | ||
553 | for (;; al_entry = next_al_entry) { | ||
554 | /* Out of bounds check. */ | ||
555 | if ((u8*)al_entry < base_ni->attr_list || | ||
556 | (u8*)al_entry > al_end) | ||
557 | break; /* Inode is corrupt. */ | ||
558 | ctx->al_entry = al_entry; | ||
559 | /* Catch the end of the attribute list. */ | ||
560 | if ((u8*)al_entry == al_end) | ||
561 | goto not_found; | ||
562 | if (!al_entry->length) | ||
563 | break; | ||
564 | if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + | ||
565 | le16_to_cpu(al_entry->length) > al_end) | ||
566 | break; | ||
567 | next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + | ||
568 | le16_to_cpu(al_entry->length)); | ||
569 | if (le32_to_cpu(al_entry->type) > le32_to_cpu(type)) | ||
570 | goto not_found; | ||
571 | if (type != al_entry->type) | ||
572 | continue; | ||
573 | /* | ||
574 | * If @name is present, compare the two names. If @name is | ||
575 | * missing, assume we want an unnamed attribute. | ||
576 | */ | ||
577 | al_name_len = al_entry->name_length; | ||
578 | al_name = (ntfschar*)((u8*)al_entry + al_entry->name_offset); | ||
579 | if (!name) { | ||
580 | if (al_name_len) | ||
581 | goto not_found; | ||
582 | } else if (!ntfs_are_names_equal(al_name, al_name_len, name, | ||
583 | name_len, ic, vol->upcase, vol->upcase_len)) { | ||
584 | register int rc; | ||
585 | |||
586 | rc = ntfs_collate_names(name, name_len, al_name, | ||
587 | al_name_len, 1, IGNORE_CASE, | ||
588 | vol->upcase, vol->upcase_len); | ||
589 | /* | ||
590 | * If @name collates before al_name, there is no | ||
591 | * matching attribute. | ||
592 | */ | ||
593 | if (rc == -1) | ||
594 | goto not_found; | ||
595 | /* If the strings are not equal, continue search. */ | ||
596 | if (rc) | ||
597 | continue; | ||
598 | /* | ||
599 | * FIXME: Reverse engineering showed 0, IGNORE_CASE but | ||
600 | * that is inconsistent with ntfs_attr_find(). The | ||
601 | * subsequent rc checks were also different. Perhaps I | ||
602 | * made a mistake in one of the two. Need to recheck | ||
603 | * which is correct or at least see what is going on... | ||
604 | * (AIA) | ||
605 | */ | ||
606 | rc = ntfs_collate_names(name, name_len, al_name, | ||
607 | al_name_len, 1, CASE_SENSITIVE, | ||
608 | vol->upcase, vol->upcase_len); | ||
609 | if (rc == -1) | ||
610 | goto not_found; | ||
611 | if (rc) | ||
612 | continue; | ||
613 | } | ||
614 | /* | ||
615 | * The names match or @name not present and attribute is | ||
616 | * unnamed. Now check @lowest_vcn. Continue search if the | ||
617 | * next attribute list entry still fits @lowest_vcn. Otherwise | ||
618 | * we have reached the right one or the search has failed. | ||
619 | */ | ||
620 | if (lowest_vcn && (u8*)next_al_entry >= al_start && | ||
621 | (u8*)next_al_entry + 6 < al_end && | ||
622 | (u8*)next_al_entry + le16_to_cpu( | ||
623 | next_al_entry->length) <= al_end && | ||
624 | sle64_to_cpu(next_al_entry->lowest_vcn) <= | ||
625 | lowest_vcn && | ||
626 | next_al_entry->type == al_entry->type && | ||
627 | next_al_entry->name_length == al_name_len && | ||
628 | ntfs_are_names_equal((ntfschar*)((u8*) | ||
629 | next_al_entry + | ||
630 | next_al_entry->name_offset), | ||
631 | next_al_entry->name_length, | ||
632 | al_name, al_name_len, CASE_SENSITIVE, | ||
633 | vol->upcase, vol->upcase_len)) | ||
634 | continue; | ||
635 | if (MREF_LE(al_entry->mft_reference) == ni->mft_no) { | ||
636 | if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) { | ||
637 | ntfs_error(vol->sb, "Found stale mft " | ||
638 | "reference in attribute list " | ||
639 | "of base inode 0x%lx.%s", | ||
640 | base_ni->mft_no, es); | ||
641 | err = -EIO; | ||
642 | break; | ||
643 | } | ||
644 | } else { /* Mft references do not match. */ | ||
645 | /* If there is a mapped record unmap it first. */ | ||
646 | if (ni != base_ni) | ||
647 | unmap_extent_mft_record(ni); | ||
648 | /* Do we want the base record back? */ | ||
649 | if (MREF_LE(al_entry->mft_reference) == | ||
650 | base_ni->mft_no) { | ||
651 | ni = ctx->ntfs_ino = base_ni; | ||
652 | ctx->mrec = ctx->base_mrec; | ||
653 | } else { | ||
654 | /* We want an extent record. */ | ||
655 | ctx->mrec = map_extent_mft_record(base_ni, | ||
656 | le64_to_cpu( | ||
657 | al_entry->mft_reference), &ni); | ||
658 | if (IS_ERR(ctx->mrec)) { | ||
659 | ntfs_error(vol->sb, "Failed to map " | ||
660 | "extent mft record " | ||
661 | "0x%lx of base inode " | ||
662 | "0x%lx.%s", | ||
663 | MREF_LE(al_entry-> | ||
664 | mft_reference), | ||
665 | base_ni->mft_no, es); | ||
666 | err = PTR_ERR(ctx->mrec); | ||
667 | if (err == -ENOENT) | ||
668 | err = -EIO; | ||
669 | /* Cause @ctx to be sanitized below. */ | ||
670 | ni = NULL; | ||
671 | break; | ||
672 | } | ||
673 | ctx->ntfs_ino = ni; | ||
674 | } | ||
675 | ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + | ||
676 | le16_to_cpu(ctx->mrec->attrs_offset)); | ||
677 | } | ||
678 | /* | ||
679 | * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the | ||
680 | * mft record containing the attribute represented by the | ||
681 | * current al_entry. | ||
682 | */ | ||
683 | /* | ||
684 | * We could call into ntfs_attr_find() to find the right | ||
685 | * attribute in this mft record but this would be less | ||
686 | * efficient and not quite accurate as ntfs_attr_find() ignores | ||
687 | * the attribute instance numbers for example which become | ||
688 | * important when one plays with attribute lists. Also, | ||
689 | * because a proper match has been found in the attribute list | ||
690 | * entry above, the comparison can now be optimized. So it is | ||
691 | * worth re-implementing a simplified ntfs_attr_find() here. | ||
692 | */ | ||
693 | a = ctx->attr; | ||
694 | /* | ||
695 | * Use a manual loop so we can still use break and continue | ||
696 | * with the same meanings as above. | ||
697 | */ | ||
698 | do_next_attr_loop: | ||
699 | if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + | ||
700 | le32_to_cpu(ctx->mrec->bytes_allocated)) | ||
701 | break; | ||
702 | if (a->type == AT_END) | ||
703 | continue; | ||
704 | if (!a->length) | ||
705 | break; | ||
706 | if (al_entry->instance != a->instance) | ||
707 | goto do_next_attr; | ||
708 | /* | ||
709 | * If the type and/or the name are mismatched between the | ||
710 | * attribute list entry and the attribute record, there is | ||
711 | * corruption so we break and return error EIO. | ||
712 | */ | ||
713 | if (al_entry->type != a->type) | ||
714 | break; | ||
715 | if (!ntfs_are_names_equal((ntfschar*)((u8*)a + | ||
716 | le16_to_cpu(a->name_offset)), a->name_length, | ||
717 | al_name, al_name_len, CASE_SENSITIVE, | ||
718 | vol->upcase, vol->upcase_len)) | ||
719 | break; | ||
720 | ctx->attr = a; | ||
721 | /* | ||
722 | * If no @val specified or @val specified and it matches, we | ||
723 | * have found it! | ||
724 | */ | ||
725 | if (!val || (!a->non_resident && le32_to_cpu( | ||
726 | a->data.resident.value_length) == val_len && | ||
727 | !memcmp((u8*)a + | ||
728 | le16_to_cpu(a->data.resident.value_offset), | ||
729 | val, val_len))) { | ||
730 | ntfs_debug("Done, found."); | ||
731 | return 0; | ||
732 | } | ||
733 | do_next_attr: | ||
734 | /* Proceed to the next attribute in the current mft record. */ | ||
735 | a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); | ||
736 | goto do_next_attr_loop; | ||
737 | } | ||
738 | if (!err) { | ||
739 | ntfs_error(vol->sb, "Base inode 0x%lx contains corrupt " | ||
740 | "attribute list attribute.%s", base_ni->mft_no, | ||
741 | es); | ||
742 | err = -EIO; | ||
743 | } | ||
744 | if (ni != base_ni) { | ||
745 | if (ni) | ||
746 | unmap_extent_mft_record(ni); | ||
747 | ctx->ntfs_ino = base_ni; | ||
748 | ctx->mrec = ctx->base_mrec; | ||
749 | ctx->attr = ctx->base_attr; | ||
750 | } | ||
751 | if (err != -ENOMEM) | ||
752 | NVolSetErrors(vol); | ||
753 | return err; | ||
754 | not_found: | ||
755 | /* | ||
756 | * If we were looking for AT_END, we reset the search context @ctx and | ||
757 | * use ntfs_attr_find() to seek to the end of the base mft record. | ||
758 | */ | ||
759 | if (type == AT_END) { | ||
760 | ntfs_attr_reinit_search_ctx(ctx); | ||
761 | return ntfs_attr_find(AT_END, name, name_len, ic, val, val_len, | ||
762 | ctx); | ||
763 | } | ||
764 | /* | ||
765 | * The attribute was not found. Before we return, we want to ensure | ||
766 | * @ctx->mrec and @ctx->attr indicate the position at which the | ||
767 | * attribute should be inserted in the base mft record. Since we also | ||
768 | * want to preserve @ctx->al_entry we cannot reinitialize the search | ||
769 | * context using ntfs_attr_reinit_search_ctx() as this would set | ||
770 | * @ctx->al_entry to NULL. Thus we do the necessary bits manually (see | ||
771 | * ntfs_attr_init_search_ctx() below). Note, we _only_ preserve | ||
772 | * @ctx->al_entry as the remaining fields (base_*) are identical to | ||
773 | * their non base_ counterparts and we cannot set @ctx->base_attr | ||
774 | * correctly yet as we do not know what @ctx->attr will be set to by | ||
775 | * the call to ntfs_attr_find() below. | ||
776 | */ | ||
777 | if (ni != base_ni) | ||
778 | unmap_extent_mft_record(ni); | ||
779 | ctx->mrec = ctx->base_mrec; | ||
780 | ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + | ||
781 | le16_to_cpu(ctx->mrec->attrs_offset)); | ||
782 | ctx->is_first = TRUE; | ||
783 | ctx->ntfs_ino = base_ni; | ||
784 | ctx->base_ntfs_ino = NULL; | ||
785 | ctx->base_mrec = NULL; | ||
786 | ctx->base_attr = NULL; | ||
787 | /* | ||
788 | * In case there are multiple matches in the base mft record, need to | ||
789 | * keep enumerating until we get an attribute not found response (or | ||
790 | * another error), otherwise we would keep returning the same attribute | ||
791 | * over and over again and all programs using us for enumeration would | ||
792 | * lock up in a tight loop. | ||
793 | */ | ||
794 | do { | ||
795 | err = ntfs_attr_find(type, name, name_len, ic, val, val_len, | ||
796 | ctx); | ||
797 | } while (!err); | ||
798 | ntfs_debug("Done, not found."); | ||
799 | return err; | ||
800 | } | ||
801 | |||
802 | /** | ||
803 | * ntfs_attr_lookup - find an attribute in an ntfs inode | ||
804 | * @type: attribute type to find | ||
805 | * @name: attribute name to find (optional, i.e. NULL means don't care) | ||
806 | * @name_len: attribute name length (only needed if @name present) | ||
807 | * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) | ||
808 | * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) | ||
809 | * @val: attribute value to find (optional, resident attributes only) | ||
810 | * @val_len: attribute value length | ||
811 | * @ctx: search context with mft record and attribute to search from | ||
812 | * | ||
813 | * Find an attribute in an ntfs inode. On first search @ctx->ntfs_ino must | ||
814 | * be the base mft record and @ctx must have been obtained from a call to | ||
815 | * ntfs_attr_get_search_ctx(). | ||
816 | * | ||
817 | * This function transparently handles attribute lists and @ctx is used to | ||
818 | * continue searches where they were left off at. | ||
819 | * | ||
820 | * After finishing with the attribute/mft record you need to call | ||
821 | * ntfs_attr_put_search_ctx() to cleanup the search context (unmapping any | ||
822 | * mapped inodes, etc). | ||
823 | * | ||
824 | * Return 0 if the search was successful and -errno if not. | ||
825 | * | ||
826 | * When 0, @ctx->attr is the found attribute and it is in mft record | ||
827 | * @ctx->mrec. If an attribute list attribute is present, @ctx->al_entry is | ||
828 | * the attribute list entry of the found attribute. | ||
829 | * | ||
830 | * When -ENOENT, @ctx->attr is the attribute which collates just after the | ||
831 | * attribute being searched for, i.e. if one wants to add the attribute to the | ||
832 | * mft record this is the correct place to insert it into. If an attribute | ||
833 | * list attribute is present, @ctx->al_entry is the attribute list entry which | ||
834 | * collates just after the attribute list entry of the attribute being searched | ||
835 | * for, i.e. if one wants to add the attribute to the mft record this is the | ||
836 | * correct place to insert its attribute list entry into. | ||
837 | * | ||
838 | * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is | ||
839 | * then undefined and in particular you should not rely on it not changing. | ||
840 | */ | ||
841 | int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, | ||
842 | const u32 name_len, const IGNORE_CASE_BOOL ic, | ||
843 | const VCN lowest_vcn, const u8 *val, const u32 val_len, | ||
844 | ntfs_attr_search_ctx *ctx) | ||
845 | { | ||
846 | ntfs_inode *base_ni; | ||
847 | |||
848 | ntfs_debug("Entering."); | ||
849 | if (ctx->base_ntfs_ino) | ||
850 | base_ni = ctx->base_ntfs_ino; | ||
851 | else | ||
852 | base_ni = ctx->ntfs_ino; | ||
853 | /* Sanity check, just for debugging really. */ | ||
854 | BUG_ON(!base_ni); | ||
855 | if (!NInoAttrList(base_ni) || type == AT_ATTRIBUTE_LIST) | ||
856 | return ntfs_attr_find(type, name, name_len, ic, val, val_len, | ||
857 | ctx); | ||
858 | return ntfs_external_attr_find(type, name, name_len, ic, lowest_vcn, | ||
859 | val, val_len, ctx); | ||
860 | } | ||
861 | |||
862 | /** | ||
863 | * ntfs_attr_init_search_ctx - initialize an attribute search context | ||
864 | * @ctx: attribute search context to initialize | ||
865 | * @ni: ntfs inode with which to initialize the search context | ||
866 | * @mrec: mft record with which to initialize the search context | ||
867 | * | ||
868 | * Initialize the attribute search context @ctx with @ni and @mrec. | ||
869 | */ | ||
870 | static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx, | ||
871 | ntfs_inode *ni, MFT_RECORD *mrec) | ||
872 | { | ||
873 | ctx->mrec = mrec; | ||
874 | /* Sanity checks are performed elsewhere. */ | ||
875 | ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)); | ||
876 | ctx->is_first = TRUE; | ||
877 | ctx->ntfs_ino = ni; | ||
878 | ctx->al_entry = NULL; | ||
879 | ctx->base_ntfs_ino = NULL; | ||
880 | ctx->base_mrec = NULL; | ||
881 | ctx->base_attr = NULL; | ||
882 | } | ||
883 | |||
884 | /** | ||
885 | * ntfs_attr_reinit_search_ctx - reinitialize an attribute search context | ||
886 | * @ctx: attribute search context to reinitialize | ||
887 | * | ||
888 | * Reinitialize the attribute search context @ctx, unmapping an associated | ||
889 | * extent mft record if present, and initialize the search context again. | ||
890 | * | ||
891 | * This is used when a search for a new attribute is being started to reset | ||
892 | * the search context to the beginning. | ||
893 | */ | ||
894 | void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx) | ||
895 | { | ||
896 | if (likely(!ctx->base_ntfs_ino)) { | ||
897 | /* No attribute list. */ | ||
898 | ctx->is_first = TRUE; | ||
899 | /* Sanity checks are performed elsewhere. */ | ||
900 | ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + | ||
901 | le16_to_cpu(ctx->mrec->attrs_offset)); | ||
902 | /* | ||
903 | * This needs resetting due to ntfs_external_attr_find() which | ||
904 | * can leave it set despite having zeroed ctx->base_ntfs_ino. | ||
905 | */ | ||
906 | ctx->al_entry = NULL; | ||
907 | return; | ||
908 | } /* Attribute list. */ | ||
909 | if (ctx->ntfs_ino != ctx->base_ntfs_ino) | ||
910 | unmap_extent_mft_record(ctx->ntfs_ino); | ||
911 | ntfs_attr_init_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec); | ||
912 | return; | ||
913 | } | ||
914 | |||
915 | /** | ||
916 | * ntfs_attr_get_search_ctx - allocate/initialize a new attribute search context | ||
917 | * @ni: ntfs inode with which to initialize the search context | ||
918 | * @mrec: mft record with which to initialize the search context | ||
919 | * | ||
920 | * Allocate a new attribute search context, initialize it with @ni and @mrec, | ||
921 | * and return it. Return NULL if allocation failed. | ||
922 | */ | ||
923 | ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) | ||
924 | { | ||
925 | ntfs_attr_search_ctx *ctx; | ||
926 | |||
927 | ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); | ||
928 | if (ctx) | ||
929 | ntfs_attr_init_search_ctx(ctx, ni, mrec); | ||
930 | return ctx; | ||
931 | } | ||
932 | |||
933 | /** | ||
934 | * ntfs_attr_put_search_ctx - release an attribute search context | ||
935 | * @ctx: attribute search context to free | ||
936 | * | ||
937 | * Release the attribute search context @ctx, unmapping an associated extent | ||
938 | * mft record if present. | ||
939 | */ | ||
940 | void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx) | ||
941 | { | ||
942 | if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino) | ||
943 | unmap_extent_mft_record(ctx->ntfs_ino); | ||
944 | kmem_cache_free(ntfs_attr_ctx_cache, ctx); | ||
945 | return; | ||
946 | } | ||
947 | |||
948 | /** | ||
949 | * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file | ||
950 | * @vol: ntfs volume to which the attribute belongs | ||
951 | * @type: attribute type which to find | ||
952 | * | ||
953 | * Search for the attribute definition record corresponding to the attribute | ||
954 | * @type in the $AttrDef system file. | ||
955 | * | ||
956 | * Return the attribute type definition record if found and NULL if not found. | ||
957 | */ | ||
958 | static ATTR_DEF *ntfs_attr_find_in_attrdef(const ntfs_volume *vol, | ||
959 | const ATTR_TYPE type) | ||
960 | { | ||
961 | ATTR_DEF *ad; | ||
962 | |||
963 | BUG_ON(!vol->attrdef); | ||
964 | BUG_ON(!type); | ||
965 | for (ad = vol->attrdef; (u8*)ad - (u8*)vol->attrdef < | ||
966 | vol->attrdef_size && ad->type; ++ad) { | ||
967 | /* We have not found it yet, carry on searching. */ | ||
968 | if (likely(le32_to_cpu(ad->type) < le32_to_cpu(type))) | ||
969 | continue; | ||
970 | /* We found the attribute; return it. */ | ||
971 | if (likely(ad->type == type)) | ||
972 | return ad; | ||
973 | /* We have gone too far already. No point in continuing. */ | ||
974 | break; | ||
975 | } | ||
976 | /* Attribute not found. */ | ||
977 | ntfs_debug("Attribute type 0x%x not found in $AttrDef.", | ||
978 | le32_to_cpu(type)); | ||
979 | return NULL; | ||
980 | } | ||
981 | |||
982 | /** | ||
983 | * ntfs_attr_size_bounds_check - check a size of an attribute type for validity | ||
984 | * @vol: ntfs volume to which the attribute belongs | ||
985 | * @type: attribute type which to check | ||
986 | * @size: size which to check | ||
987 | * | ||
988 | * Check whether the @size in bytes is valid for an attribute of @type on the | ||
989 | * ntfs volume @vol. This information is obtained from $AttrDef system file. | ||
990 | * | ||
991 | * Return 0 if valid, -ERANGE if not valid, or -ENOENT if the attribute is not | ||
992 | * listed in $AttrDef. | ||
993 | */ | ||
994 | int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type, | ||
995 | const s64 size) | ||
996 | { | ||
997 | ATTR_DEF *ad; | ||
998 | |||
999 | BUG_ON(size < 0); | ||
1000 | /* | ||
1001 | * $ATTRIBUTE_LIST has a maximum size of 256kiB, but this is not | ||
1002 | * listed in $AttrDef. | ||
1003 | */ | ||
1004 | if (unlikely(type == AT_ATTRIBUTE_LIST && size > 256 * 1024)) | ||
1005 | return -ERANGE; | ||
1006 | /* Get the $AttrDef entry for the attribute @type. */ | ||
1007 | ad = ntfs_attr_find_in_attrdef(vol, type); | ||
1008 | if (unlikely(!ad)) | ||
1009 | return -ENOENT; | ||
1010 | /* Do the bounds check. */ | ||
1011 | if (((sle64_to_cpu(ad->min_size) > 0) && | ||
1012 | size < sle64_to_cpu(ad->min_size)) || | ||
1013 | ((sle64_to_cpu(ad->max_size) > 0) && size > | ||
1014 | sle64_to_cpu(ad->max_size))) | ||
1015 | return -ERANGE; | ||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | /** | ||
1020 | * ntfs_attr_can_be_non_resident - check if an attribute can be non-resident | ||
1021 | * @vol: ntfs volume to which the attribute belongs | ||
1022 | * @type: attribute type which to check | ||
1023 | * | ||
1024 | * Check whether the attribute of @type on the ntfs volume @vol is allowed to | ||
1025 | * be non-resident. This information is obtained from $AttrDef system file. | ||
1026 | * | ||
1027 | * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, or | ||
1028 | * -ENOENT if the attribute is not listed in $AttrDef. | ||
1029 | */ | ||
1030 | int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type) | ||
1031 | { | ||
1032 | ATTR_DEF *ad; | ||
1033 | |||
1034 | /* | ||
1035 | * $DATA is always allowed to be non-resident even if $AttrDef does not | ||
1036 | * specify this in the flags of the $DATA attribute definition record. | ||
1037 | */ | ||
1038 | if (type == AT_DATA) | ||
1039 | return 0; | ||
1040 | /* Find the attribute definition record in $AttrDef. */ | ||
1041 | ad = ntfs_attr_find_in_attrdef(vol, type); | ||
1042 | if (unlikely(!ad)) | ||
1043 | return -ENOENT; | ||
1044 | /* Check the flags and return the result. */ | ||
1045 | if (ad->flags & CAN_BE_NON_RESIDENT) | ||
1046 | return 0; | ||
1047 | return -EPERM; | ||
1048 | } | ||
1049 | |||
1050 | /** | ||
1051 | * ntfs_attr_can_be_resident - check if an attribute can be resident | ||
1052 | * @vol: ntfs volume to which the attribute belongs | ||
1053 | * @type: attribute type which to check | ||
1054 | * | ||
1055 | * Check whether the attribute of @type on the ntfs volume @vol is allowed to | ||
1056 | * be resident. This information is derived from our ntfs knowledge and may | ||
1057 | * not be completely accurate, especially when user defined attributes are | ||
1058 | * present. Basically we allow everything to be resident except for index | ||
1059 | * allocation and $EA attributes. | ||
1060 | * | ||
1061 | * Return 0 if the attribute is allowed to be non-resident and -EPERM if not. | ||
1062 | * | ||
1063 | * Warning: In the system file $MFT the attribute $Bitmap must be non-resident | ||
1064 | * otherwise windows will not boot (blue screen of death)! We cannot | ||
1065 | * check for this here as we do not know which inode's $Bitmap is | ||
1066 | * being asked about so the caller needs to special case this. | ||
1067 | */ | ||
1068 | int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) | ||
1069 | { | ||
1070 | if (type != AT_INDEX_ALLOCATION && type != AT_EA) | ||
1071 | return 0; | ||
1072 | return -EPERM; | ||
1073 | } | ||
1074 | |||
1075 | /** | ||
1076 | * ntfs_attr_record_resize - resize an attribute record | ||
1077 | * @m: mft record containing attribute record | ||
1078 | * @a: attribute record to resize | ||
1079 | * @new_size: new size in bytes to which to resize the attribute record @a | ||
1080 | * | ||
1081 | * Resize the attribute record @a, i.e. the resident part of the attribute, in | ||
1082 | * the mft record @m to @new_size bytes. | ||
1083 | * | ||
1084 | * Return 0 on success and -errno on error. The following error codes are | ||
1085 | * defined: | ||
1086 | * -ENOSPC - Not enough space in the mft record @m to perform the resize. | ||
1087 | * | ||
1088 | * Note: On error, no modifications have been performed whatsoever. | ||
1089 | * | ||
1090 | * Warning: If you make a record smaller without having copied all the data you | ||
1091 | * are interested in the data may be overwritten. | ||
1092 | */ | ||
1093 | int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size) | ||
1094 | { | ||
1095 | ntfs_debug("Entering for new_size %u.", new_size); | ||
1096 | /* Align to 8 bytes if it is not already done. */ | ||
1097 | if (new_size & 7) | ||
1098 | new_size = (new_size + 7) & ~7; | ||
1099 | /* If the actual attribute length has changed, move things around. */ | ||
1100 | if (new_size != le32_to_cpu(a->length)) { | ||
1101 | u32 new_muse = le32_to_cpu(m->bytes_in_use) - | ||
1102 | le32_to_cpu(a->length) + new_size; | ||
1103 | /* Not enough space in this mft record. */ | ||
1104 | if (new_muse > le32_to_cpu(m->bytes_allocated)) | ||
1105 | return -ENOSPC; | ||
1106 | /* Move attributes following @a to their new location. */ | ||
1107 | memmove((u8*)a + new_size, (u8*)a + le32_to_cpu(a->length), | ||
1108 | le32_to_cpu(m->bytes_in_use) - ((u8*)a - | ||
1109 | (u8*)m) - le32_to_cpu(a->length)); | ||
1110 | /* Adjust @m to reflect the change in used space. */ | ||
1111 | m->bytes_in_use = cpu_to_le32(new_muse); | ||
1112 | /* Adjust @a to reflect the new size. */ | ||
1113 | if (new_size >= offsetof(ATTR_REC, length) + sizeof(a->length)) | ||
1114 | a->length = cpu_to_le32(new_size); | ||
1115 | } | ||
1116 | return 0; | ||
1117 | } | ||
1118 | |||
1119 | /** | ||
1120 | * ntfs_attr_set - fill (a part of) an attribute with a byte | ||
1121 | * @ni: ntfs inode describing the attribute to fill | ||
1122 | * @ofs: offset inside the attribute at which to start to fill | ||
1123 | * @cnt: number of bytes to fill | ||
1124 | * @val: the unsigned 8-bit value with which to fill the attribute | ||
1125 | * | ||
1126 | * Fill @cnt bytes of the attribute described by the ntfs inode @ni starting at | ||
1127 | * byte offset @ofs inside the attribute with the constant byte @val. | ||
1128 | * | ||
1129 | * This function is effectively like memset() applied to an ntfs attribute. | ||
1130 | * | ||
1131 | * Return 0 on success and -errno on error. An error code of -ESPIPE means | ||
1132 | * that @ofs + @cnt were outside the end of the attribute and no write was | ||
1133 | * performed. | ||
1134 | */ | ||
1135 | int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) | ||
1136 | { | ||
1137 | ntfs_volume *vol = ni->vol; | ||
1138 | struct address_space *mapping; | ||
1139 | struct page *page; | ||
1140 | u8 *kaddr; | ||
1141 | pgoff_t idx, end; | ||
1142 | unsigned int start_ofs, end_ofs, size; | ||
1143 | |||
1144 | ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.", | ||
1145 | (long long)ofs, (long long)cnt, val); | ||
1146 | BUG_ON(ofs < 0); | ||
1147 | BUG_ON(cnt < 0); | ||
1148 | if (!cnt) | ||
1149 | goto done; | ||
1150 | mapping = VFS_I(ni)->i_mapping; | ||
1151 | /* Work out the starting index and page offset. */ | ||
1152 | idx = ofs >> PAGE_CACHE_SHIFT; | ||
1153 | start_ofs = ofs & ~PAGE_CACHE_MASK; | ||
1154 | /* Work out the ending index and page offset. */ | ||
1155 | end = ofs + cnt; | ||
1156 | end_ofs = end & ~PAGE_CACHE_MASK; | ||
1157 | /* If the end is outside the inode size return -ESPIPE. */ | ||
1158 | if (unlikely(end > VFS_I(ni)->i_size)) { | ||
1159 | ntfs_error(vol->sb, "Request exceeds end of attribute."); | ||
1160 | return -ESPIPE; | ||
1161 | } | ||
1162 | end >>= PAGE_CACHE_SHIFT; | ||
1163 | /* If there is a first partial page, need to do it the slow way. */ | ||
1164 | if (start_ofs) { | ||
1165 | page = read_cache_page(mapping, idx, | ||
1166 | (filler_t*)mapping->a_ops->readpage, NULL); | ||
1167 | if (IS_ERR(page)) { | ||
1168 | ntfs_error(vol->sb, "Failed to read first partial " | ||
1169 | "page (sync error, index 0x%lx).", idx); | ||
1170 | return PTR_ERR(page); | ||
1171 | } | ||
1172 | wait_on_page_locked(page); | ||
1173 | if (unlikely(!PageUptodate(page))) { | ||
1174 | ntfs_error(vol->sb, "Failed to read first partial page " | ||
1175 | "(async error, index 0x%lx).", idx); | ||
1176 | page_cache_release(page); | ||
1177 | return PTR_ERR(page); | ||
1178 | } | ||
1179 | /* | ||
1180 | * If the last page is the same as the first page, need to | ||
1181 | * limit the write to the end offset. | ||
1182 | */ | ||
1183 | size = PAGE_CACHE_SIZE; | ||
1184 | if (idx == end) | ||
1185 | size = end_ofs; | ||
1186 | kaddr = kmap_atomic(page, KM_USER0); | ||
1187 | memset(kaddr + start_ofs, val, size - start_ofs); | ||
1188 | flush_dcache_page(page); | ||
1189 | kunmap_atomic(kaddr, KM_USER0); | ||
1190 | set_page_dirty(page); | ||
1191 | page_cache_release(page); | ||
1192 | if (idx == end) | ||
1193 | goto done; | ||
1194 | idx++; | ||
1195 | } | ||
1196 | /* Do the whole pages the fast way. */ | ||
1197 | for (; idx < end; idx++) { | ||
1198 | /* Find or create the current page. (The page is locked.) */ | ||
1199 | page = grab_cache_page(mapping, idx); | ||
1200 | if (unlikely(!page)) { | ||
1201 | ntfs_error(vol->sb, "Insufficient memory to grab " | ||
1202 | "page (index 0x%lx).", idx); | ||
1203 | return -ENOMEM; | ||
1204 | } | ||
1205 | kaddr = kmap_atomic(page, KM_USER0); | ||
1206 | memset(kaddr, val, PAGE_CACHE_SIZE); | ||
1207 | flush_dcache_page(page); | ||
1208 | kunmap_atomic(kaddr, KM_USER0); | ||
1209 | /* | ||
1210 | * If the page has buffers, mark them uptodate since buffer | ||
1211 | * state and not page state is definitive in 2.6 kernels. | ||
1212 | */ | ||
1213 | if (page_has_buffers(page)) { | ||
1214 | struct buffer_head *bh, *head; | ||
1215 | |||
1216 | bh = head = page_buffers(page); | ||
1217 | do { | ||
1218 | set_buffer_uptodate(bh); | ||
1219 | } while ((bh = bh->b_this_page) != head); | ||
1220 | } | ||
1221 | /* Now that buffers are uptodate, set the page uptodate, too. */ | ||
1222 | SetPageUptodate(page); | ||
1223 | /* | ||
1224 | * Set the page and all its buffers dirty and mark the inode | ||
1225 | * dirty, too. The VM will write the page later on. | ||
1226 | */ | ||
1227 | set_page_dirty(page); | ||
1228 | /* Finally unlock and release the page. */ | ||
1229 | unlock_page(page); | ||
1230 | page_cache_release(page); | ||
1231 | } | ||
1232 | /* If there is a last partial page, need to do it the slow way. */ | ||
1233 | if (end_ofs) { | ||
1234 | page = read_cache_page(mapping, idx, | ||
1235 | (filler_t*)mapping->a_ops->readpage, NULL); | ||
1236 | if (IS_ERR(page)) { | ||
1237 | ntfs_error(vol->sb, "Failed to read last partial page " | ||
1238 | "(sync error, index 0x%lx).", idx); | ||
1239 | return PTR_ERR(page); | ||
1240 | } | ||
1241 | wait_on_page_locked(page); | ||
1242 | if (unlikely(!PageUptodate(page))) { | ||
1243 | ntfs_error(vol->sb, "Failed to read last partial page " | ||
1244 | "(async error, index 0x%lx).", idx); | ||
1245 | page_cache_release(page); | ||
1246 | return PTR_ERR(page); | ||
1247 | } | ||
1248 | kaddr = kmap_atomic(page, KM_USER0); | ||
1249 | memset(kaddr, val, end_ofs); | ||
1250 | flush_dcache_page(page); | ||
1251 | kunmap_atomic(kaddr, KM_USER0); | ||
1252 | set_page_dirty(page); | ||
1253 | page_cache_release(page); | ||
1254 | } | ||
1255 | done: | ||
1256 | ntfs_debug("Done."); | ||
1257 | return 0; | ||
1258 | } | ||
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h new file mode 100644 index 000000000000..e0c2c6c81bc0 --- /dev/null +++ b/fs/ntfs/attrib.h | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_ATTRIB_H | ||
25 | #define _LINUX_NTFS_ATTRIB_H | ||
26 | |||
27 | #include "endian.h" | ||
28 | #include "types.h" | ||
29 | #include "layout.h" | ||
30 | #include "inode.h" | ||
31 | #include "runlist.h" | ||
32 | #include "volume.h" | ||
33 | |||
34 | /** | ||
35 | * ntfs_attr_search_ctx - used in attribute search functions | ||
36 | * @mrec: buffer containing mft record to search | ||
37 | * @attr: attribute record in @mrec where to begin/continue search | ||
38 | * @is_first: if true ntfs_attr_lookup() begins search with @attr, else after | ||
39 | * | ||
40 | * Structure must be initialized to zero before the first call to one of the | ||
41 | * attribute search functions. Initialize @mrec to point to the mft record to | ||
42 | * search, and @attr to point to the first attribute within @mrec (not necessary | ||
43 | * if calling the _first() functions), and set @is_first to TRUE (not necessary | ||
44 | * if calling the _first() functions). | ||
45 | * | ||
46 | * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE, | ||
47 | * the search begins after @attr. This is so that, after the first call to one | ||
48 | * of the search attribute functions, we can call the function again, without | ||
49 | * any modification of the search context, to automagically get the next | ||
50 | * matching attribute. | ||
51 | */ | ||
52 | typedef struct { | ||
53 | MFT_RECORD *mrec; | ||
54 | ATTR_RECORD *attr; | ||
55 | BOOL is_first; | ||
56 | ntfs_inode *ntfs_ino; | ||
57 | ATTR_LIST_ENTRY *al_entry; | ||
58 | ntfs_inode *base_ntfs_ino; | ||
59 | MFT_RECORD *base_mrec; | ||
60 | ATTR_RECORD *base_attr; | ||
61 | } ntfs_attr_search_ctx; | ||
62 | |||
63 | extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); | ||
64 | |||
65 | extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, | ||
66 | const BOOL need_write); | ||
67 | |||
68 | int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, | ||
69 | const u32 name_len, const IGNORE_CASE_BOOL ic, | ||
70 | const VCN lowest_vcn, const u8 *val, const u32 val_len, | ||
71 | ntfs_attr_search_ctx *ctx); | ||
72 | |||
73 | extern int load_attribute_list(ntfs_volume *vol, runlist *rl, u8 *al_start, | ||
74 | const s64 size, const s64 initialized_size); | ||
75 | |||
76 | static inline s64 ntfs_attr_size(const ATTR_RECORD *a) | ||
77 | { | ||
78 | if (!a->non_resident) | ||
79 | return (s64)le32_to_cpu(a->data.resident.value_length); | ||
80 | return sle64_to_cpu(a->data.non_resident.data_size); | ||
81 | } | ||
82 | |||
83 | extern void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx); | ||
84 | extern ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, | ||
85 | MFT_RECORD *mrec); | ||
86 | extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx); | ||
87 | |||
88 | extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol, | ||
89 | const ATTR_TYPE type, const s64 size); | ||
90 | extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, | ||
91 | const ATTR_TYPE type); | ||
92 | extern int ntfs_attr_can_be_resident(const ntfs_volume *vol, | ||
93 | const ATTR_TYPE type); | ||
94 | |||
95 | extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); | ||
96 | |||
97 | extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, | ||
98 | const u8 val); | ||
99 | |||
100 | #endif /* _LINUX_NTFS_ATTRIB_H */ | ||
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c new file mode 100644 index 000000000000..12cf2e30c7dd --- /dev/null +++ b/fs/ntfs/bitmap.c | |||
@@ -0,0 +1,192 @@ | |||
1 | /* | ||
2 | * bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifdef NTFS_RW | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | |||
26 | #include "bitmap.h" | ||
27 | #include "debug.h" | ||
28 | #include "aops.h" | ||
29 | #include "ntfs.h" | ||
30 | |||
31 | /** | ||
32 | * __ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value | ||
33 | * @vi: vfs inode describing the bitmap | ||
34 | * @start_bit: first bit to set | ||
35 | * @count: number of bits to set | ||
36 | * @value: value to set the bits to (i.e. 0 or 1) | ||
37 | * @is_rollback: if TRUE this is a rollback operation | ||
38 | * | ||
39 | * Set @count bits starting at bit @start_bit in the bitmap described by the | ||
40 | * vfs inode @vi to @value, where @value is either 0 or 1. | ||
41 | * | ||
42 | * @is_rollback should always be FALSE, it is for internal use to rollback | ||
43 | * errors. You probably want to use ntfs_bitmap_set_bits_in_run() instead. | ||
44 | * | ||
45 | * Return 0 on success and -errno on error. | ||
46 | */ | ||
47 | int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, | ||
48 | const s64 count, const u8 value, const BOOL is_rollback) | ||
49 | { | ||
50 | s64 cnt = count; | ||
51 | pgoff_t index, end_index; | ||
52 | struct address_space *mapping; | ||
53 | struct page *page; | ||
54 | u8 *kaddr; | ||
55 | int pos, len; | ||
56 | u8 bit; | ||
57 | |||
58 | BUG_ON(!vi); | ||
59 | ntfs_debug("Entering for i_ino 0x%lx, start_bit 0x%llx, count 0x%llx, " | ||
60 | "value %u.%s", vi->i_ino, (unsigned long long)start_bit, | ||
61 | (unsigned long long)cnt, (unsigned int)value, | ||
62 | is_rollback ? " (rollback)" : ""); | ||
63 | BUG_ON(start_bit < 0); | ||
64 | BUG_ON(cnt < 0); | ||
65 | BUG_ON(value > 1); | ||
66 | /* | ||
67 | * Calculate the indices for the pages containing the first and last | ||
68 | * bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively. | ||
69 | */ | ||
70 | index = start_bit >> (3 + PAGE_CACHE_SHIFT); | ||
71 | end_index = (start_bit + cnt - 1) >> (3 + PAGE_CACHE_SHIFT); | ||
72 | |||
73 | /* Get the page containing the first bit (@start_bit). */ | ||
74 | mapping = vi->i_mapping; | ||
75 | page = ntfs_map_page(mapping, index); | ||
76 | if (IS_ERR(page)) { | ||
77 | if (!is_rollback) | ||
78 | ntfs_error(vi->i_sb, "Failed to map first page (error " | ||
79 | "%li), aborting.", PTR_ERR(page)); | ||
80 | return PTR_ERR(page); | ||
81 | } | ||
82 | kaddr = page_address(page); | ||
83 | |||
84 | /* Set @pos to the position of the byte containing @start_bit. */ | ||
85 | pos = (start_bit >> 3) & ~PAGE_CACHE_MASK; | ||
86 | |||
87 | /* Calculate the position of @start_bit in the first byte. */ | ||
88 | bit = start_bit & 7; | ||
89 | |||
90 | /* If the first byte is partial, modify the appropriate bits in it. */ | ||
91 | if (bit) { | ||
92 | u8 *byte = kaddr + pos; | ||
93 | while ((bit & 7) && cnt--) { | ||
94 | if (value) | ||
95 | *byte |= 1 << bit++; | ||
96 | else | ||
97 | *byte &= ~(1 << bit++); | ||
98 | } | ||
99 | /* If we are done, unmap the page and return success. */ | ||
100 | if (!cnt) | ||
101 | goto done; | ||
102 | |||
103 | /* Update @pos to the new position. */ | ||
104 | pos++; | ||
105 | } | ||
106 | /* | ||
107 | * Depending on @value, modify all remaining whole bytes in the page up | ||
108 | * to @cnt. | ||
109 | */ | ||
110 | len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE - pos); | ||
111 | memset(kaddr + pos, value ? 0xff : 0, len); | ||
112 | cnt -= len << 3; | ||
113 | |||
114 | /* Update @len to point to the first not-done byte in the page. */ | ||
115 | if (cnt < 8) | ||
116 | len += pos; | ||
117 | |||
118 | /* If we are not in the last page, deal with all subsequent pages. */ | ||
119 | while (index < end_index) { | ||
120 | BUG_ON(cnt <= 0); | ||
121 | |||
122 | /* Update @index and get the next page. */ | ||
123 | flush_dcache_page(page); | ||
124 | set_page_dirty(page); | ||
125 | ntfs_unmap_page(page); | ||
126 | page = ntfs_map_page(mapping, ++index); | ||
127 | if (IS_ERR(page)) | ||
128 | goto rollback; | ||
129 | kaddr = page_address(page); | ||
130 | /* | ||
131 | * Depending on @value, modify all remaining whole bytes in the | ||
132 | * page up to @cnt. | ||
133 | */ | ||
134 | len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE); | ||
135 | memset(kaddr, value ? 0xff : 0, len); | ||
136 | cnt -= len << 3; | ||
137 | } | ||
138 | /* | ||
139 | * The currently mapped page is the last one. If the last byte is | ||
140 | * partial, modify the appropriate bits in it. Note, @len is the | ||
141 | * position of the last byte inside the page. | ||
142 | */ | ||
143 | if (cnt) { | ||
144 | u8 *byte; | ||
145 | |||
146 | BUG_ON(cnt > 7); | ||
147 | |||
148 | bit = cnt; | ||
149 | byte = kaddr + len; | ||
150 | while (bit--) { | ||
151 | if (value) | ||
152 | *byte |= 1 << bit; | ||
153 | else | ||
154 | *byte &= ~(1 << bit); | ||
155 | } | ||
156 | } | ||
157 | done: | ||
158 | /* We are done. Unmap the page and return success. */ | ||
159 | flush_dcache_page(page); | ||
160 | set_page_dirty(page); | ||
161 | ntfs_unmap_page(page); | ||
162 | ntfs_debug("Done."); | ||
163 | return 0; | ||
164 | rollback: | ||
165 | /* | ||
166 | * Current state: | ||
167 | * - no pages are mapped | ||
168 | * - @count - @cnt is the number of bits that have been modified | ||
169 | */ | ||
170 | if (is_rollback) | ||
171 | return PTR_ERR(page); | ||
172 | if (count != cnt) | ||
173 | pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt, | ||
174 | value ? 0 : 1, TRUE); | ||
175 | else | ||
176 | pos = 0; | ||
177 | if (!pos) { | ||
178 | /* Rollback was successful. */ | ||
179 | ntfs_error(vi->i_sb, "Failed to map subsequent page (error " | ||
180 | "%li), aborting.", PTR_ERR(page)); | ||
181 | } else { | ||
182 | /* Rollback failed. */ | ||
183 | ntfs_error(vi->i_sb, "Failed to map subsequent page (error " | ||
184 | "%li) and rollback failed (error %i). " | ||
185 | "Aborting and leaving inconsistent metadata. " | ||
186 | "Unmount and run chkdsk.", PTR_ERR(page), pos); | ||
187 | NVolSetErrors(NTFS_SB(vi->i_sb)); | ||
188 | } | ||
189 | return PTR_ERR(page); | ||
190 | } | ||
191 | |||
192 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/bitmap.h b/fs/ntfs/bitmap.h new file mode 100644 index 000000000000..bb50d6bc9212 --- /dev/null +++ b/fs/ntfs/bitmap.h | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * bitmap.h - Defines for NTFS kernel bitmap handling. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_BITMAP_H | ||
24 | #define _LINUX_NTFS_BITMAP_H | ||
25 | |||
26 | #ifdef NTFS_RW | ||
27 | |||
28 | #include <linux/fs.h> | ||
29 | |||
30 | #include "types.h" | ||
31 | |||
32 | extern int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, | ||
33 | const s64 count, const u8 value, const BOOL is_rollback); | ||
34 | |||
35 | /** | ||
36 | * ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value | ||
37 | * @vi: vfs inode describing the bitmap | ||
38 | * @start_bit: first bit to set | ||
39 | * @count: number of bits to set | ||
40 | * @value: value to set the bits to (i.e. 0 or 1) | ||
41 | * | ||
42 | * Set @count bits starting at bit @start_bit in the bitmap described by the | ||
43 | * vfs inode @vi to @value, where @value is either 0 or 1. | ||
44 | * | ||
45 | * Return 0 on success and -errno on error. | ||
46 | */ | ||
47 | static inline int ntfs_bitmap_set_bits_in_run(struct inode *vi, | ||
48 | const s64 start_bit, const s64 count, const u8 value) | ||
49 | { | ||
50 | return __ntfs_bitmap_set_bits_in_run(vi, start_bit, count, value, | ||
51 | FALSE); | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * ntfs_bitmap_set_run - set a run of bits in a bitmap | ||
56 | * @vi: vfs inode describing the bitmap | ||
57 | * @start_bit: first bit to set | ||
58 | * @count: number of bits to set | ||
59 | * | ||
60 | * Set @count bits starting at bit @start_bit in the bitmap described by the | ||
61 | * vfs inode @vi. | ||
62 | * | ||
63 | * Return 0 on success and -errno on error. | ||
64 | */ | ||
65 | static inline int ntfs_bitmap_set_run(struct inode *vi, const s64 start_bit, | ||
66 | const s64 count) | ||
67 | { | ||
68 | return ntfs_bitmap_set_bits_in_run(vi, start_bit, count, 1); | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * ntfs_bitmap_clear_run - clear a run of bits in a bitmap | ||
73 | * @vi: vfs inode describing the bitmap | ||
74 | * @start_bit: first bit to clear | ||
75 | * @count: number of bits to clear | ||
76 | * | ||
77 | * Clear @count bits starting at bit @start_bit in the bitmap described by the | ||
78 | * vfs inode @vi. | ||
79 | * | ||
80 | * Return 0 on success and -errno on error. | ||
81 | */ | ||
82 | static inline int ntfs_bitmap_clear_run(struct inode *vi, const s64 start_bit, | ||
83 | const s64 count) | ||
84 | { | ||
85 | return ntfs_bitmap_set_bits_in_run(vi, start_bit, count, 0); | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * ntfs_bitmap_set_bit - set a bit in a bitmap | ||
90 | * @vi: vfs inode describing the bitmap | ||
91 | * @bit: bit to set | ||
92 | * | ||
93 | * Set bit @bit in the bitmap described by the vfs inode @vi. | ||
94 | * | ||
95 | * Return 0 on success and -errno on error. | ||
96 | */ | ||
97 | static inline int ntfs_bitmap_set_bit(struct inode *vi, const s64 bit) | ||
98 | { | ||
99 | return ntfs_bitmap_set_run(vi, bit, 1); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * ntfs_bitmap_clear_bit - clear a bit in a bitmap | ||
104 | * @vi: vfs inode describing the bitmap | ||
105 | * @bit: bit to clear | ||
106 | * | ||
107 | * Clear bit @bit in the bitmap described by the vfs inode @vi. | ||
108 | * | ||
109 | * Return 0 on success and -errno on error. | ||
110 | */ | ||
111 | static inline int ntfs_bitmap_clear_bit(struct inode *vi, const s64 bit) | ||
112 | { | ||
113 | return ntfs_bitmap_clear_run(vi, bit, 1); | ||
114 | } | ||
115 | |||
116 | #endif /* NTFS_RW */ | ||
117 | |||
118 | #endif /* defined _LINUX_NTFS_BITMAP_H */ | ||
diff --git a/fs/ntfs/collate.c b/fs/ntfs/collate.c new file mode 100644 index 000000000000..4a28ab3898ef --- /dev/null +++ b/fs/ntfs/collate.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * collate.c - NTFS kernel collation handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include "collate.h" | ||
23 | #include "debug.h" | ||
24 | #include "ntfs.h" | ||
25 | |||
26 | static int ntfs_collate_binary(ntfs_volume *vol, | ||
27 | const void *data1, const int data1_len, | ||
28 | const void *data2, const int data2_len) | ||
29 | { | ||
30 | int rc; | ||
31 | |||
32 | ntfs_debug("Entering."); | ||
33 | rc = memcmp(data1, data2, min(data1_len, data2_len)); | ||
34 | if (!rc && (data1_len != data2_len)) { | ||
35 | if (data1_len < data2_len) | ||
36 | rc = -1; | ||
37 | else | ||
38 | rc = 1; | ||
39 | } | ||
40 | ntfs_debug("Done, returning %i", rc); | ||
41 | return rc; | ||
42 | } | ||
43 | |||
44 | static int ntfs_collate_ntofs_ulong(ntfs_volume *vol, | ||
45 | const void *data1, const int data1_len, | ||
46 | const void *data2, const int data2_len) | ||
47 | { | ||
48 | int rc; | ||
49 | u32 d1, d2; | ||
50 | |||
51 | ntfs_debug("Entering."); | ||
52 | // FIXME: We don't really want to bug here. | ||
53 | BUG_ON(data1_len != data2_len); | ||
54 | BUG_ON(data1_len != 4); | ||
55 | d1 = le32_to_cpup(data1); | ||
56 | d2 = le32_to_cpup(data2); | ||
57 | if (d1 < d2) | ||
58 | rc = -1; | ||
59 | else { | ||
60 | if (d1 == d2) | ||
61 | rc = 0; | ||
62 | else | ||
63 | rc = 1; | ||
64 | } | ||
65 | ntfs_debug("Done, returning %i", rc); | ||
66 | return rc; | ||
67 | } | ||
68 | |||
69 | typedef int (*ntfs_collate_func_t)(ntfs_volume *, const void *, const int, | ||
70 | const void *, const int); | ||
71 | |||
72 | static ntfs_collate_func_t ntfs_do_collate0x0[3] = { | ||
73 | ntfs_collate_binary, | ||
74 | NULL/*ntfs_collate_file_name*/, | ||
75 | NULL/*ntfs_collate_unicode_string*/, | ||
76 | }; | ||
77 | |||
78 | static ntfs_collate_func_t ntfs_do_collate0x1[4] = { | ||
79 | ntfs_collate_ntofs_ulong, | ||
80 | NULL/*ntfs_collate_ntofs_sid*/, | ||
81 | NULL/*ntfs_collate_ntofs_security_hash*/, | ||
82 | NULL/*ntfs_collate_ntofs_ulongs*/, | ||
83 | }; | ||
84 | |||
85 | /** | ||
86 | * ntfs_collate - collate two data items using a specified collation rule | ||
87 | * @vol: ntfs volume to which the data items belong | ||
88 | * @cr: collation rule to use when comparing the items | ||
89 | * @data1: first data item to collate | ||
90 | * @data1_len: length in bytes of @data1 | ||
91 | * @data2: second data item to collate | ||
92 | * @data2_len: length in bytes of @data2 | ||
93 | * | ||
94 | * Collate the two data items @data1 and @data2 using the collation rule @cr | ||
95 | * and return -1, 0, ir 1 if @data1 is found, respectively, to collate before, | ||
96 | * to match, or to collate after @data2. | ||
97 | * | ||
98 | * For speed we use the collation rule @cr as an index into two tables of | ||
99 | * function pointers to call the appropriate collation function. | ||
100 | */ | ||
101 | int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr, | ||
102 | const void *data1, const int data1_len, | ||
103 | const void *data2, const int data2_len) { | ||
104 | int i; | ||
105 | |||
106 | ntfs_debug("Entering."); | ||
107 | /* | ||
108 | * FIXME: At the moment we only support COLLATION_BINARY and | ||
109 | * COLLATION_NTOFS_ULONG, so we BUG() for everything else for now. | ||
110 | */ | ||
111 | BUG_ON(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG); | ||
112 | i = le32_to_cpu(cr); | ||
113 | BUG_ON(i < 0); | ||
114 | if (i <= 0x02) | ||
115 | return ntfs_do_collate0x0[i](vol, data1, data1_len, | ||
116 | data2, data2_len); | ||
117 | BUG_ON(i < 0x10); | ||
118 | i -= 0x10; | ||
119 | if (likely(i <= 3)) | ||
120 | return ntfs_do_collate0x1[i](vol, data1, data1_len, | ||
121 | data2, data2_len); | ||
122 | BUG(); | ||
123 | return 0; | ||
124 | } | ||
diff --git a/fs/ntfs/collate.h b/fs/ntfs/collate.h new file mode 100644 index 000000000000..e027f36fcc2f --- /dev/null +++ b/fs/ntfs/collate.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * collate.h - Defines for NTFS kernel collation handling. Part of the | ||
3 | * Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_COLLATE_H | ||
24 | #define _LINUX_NTFS_COLLATE_H | ||
25 | |||
26 | #include "types.h" | ||
27 | #include "volume.h" | ||
28 | |||
29 | static inline BOOL ntfs_is_collation_rule_supported(COLLATION_RULE cr) { | ||
30 | int i; | ||
31 | |||
32 | /* | ||
33 | * FIXME: At the moment we only support COLLATION_BINARY and | ||
34 | * COLLATION_NTOFS_ULONG, so we return false for everything else for | ||
35 | * now. | ||
36 | */ | ||
37 | if (unlikely(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG)) | ||
38 | return FALSE; | ||
39 | i = le32_to_cpu(cr); | ||
40 | if (likely(((i >= 0) && (i <= 0x02)) || | ||
41 | ((i >= 0x10) && (i <= 0x13)))) | ||
42 | return TRUE; | ||
43 | return FALSE; | ||
44 | } | ||
45 | |||
46 | extern int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr, | ||
47 | const void *data1, const int data1_len, | ||
48 | const void *data2, const int data2_len); | ||
49 | |||
50 | #endif /* _LINUX_NTFS_COLLATE_H */ | ||
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c new file mode 100644 index 000000000000..ee5ae706f861 --- /dev/null +++ b/fs/ntfs/compress.c | |||
@@ -0,0 +1,957 @@ | |||
1 | /** | ||
2 | * compress.c - NTFS kernel compressed attributes handling. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #include <linux/fs.h> | ||
25 | #include <linux/buffer_head.h> | ||
26 | #include <linux/blkdev.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | |||
29 | #include "attrib.h" | ||
30 | #include "inode.h" | ||
31 | #include "debug.h" | ||
32 | #include "ntfs.h" | ||
33 | |||
34 | /** | ||
35 | * ntfs_compression_constants - enum of constants used in the compression code | ||
36 | */ | ||
37 | typedef enum { | ||
38 | /* Token types and access mask. */ | ||
39 | NTFS_SYMBOL_TOKEN = 0, | ||
40 | NTFS_PHRASE_TOKEN = 1, | ||
41 | NTFS_TOKEN_MASK = 1, | ||
42 | |||
43 | /* Compression sub-block constants. */ | ||
44 | NTFS_SB_SIZE_MASK = 0x0fff, | ||
45 | NTFS_SB_SIZE = 0x1000, | ||
46 | NTFS_SB_IS_COMPRESSED = 0x8000, | ||
47 | |||
48 | /* | ||
49 | * The maximum compression block size is by definition 16 * the cluster | ||
50 | * size, with the maximum supported cluster size being 4kiB. Thus the | ||
51 | * maximum compression buffer size is 64kiB, so we use this when | ||
52 | * initializing the compression buffer. | ||
53 | */ | ||
54 | NTFS_MAX_CB_SIZE = 64 * 1024, | ||
55 | } ntfs_compression_constants; | ||
56 | |||
57 | /** | ||
58 | * ntfs_compression_buffer - one buffer for the decompression engine | ||
59 | */ | ||
60 | static u8 *ntfs_compression_buffer = NULL; | ||
61 | |||
62 | /** | ||
63 | * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer | ||
64 | */ | ||
65 | static DEFINE_SPINLOCK(ntfs_cb_lock); | ||
66 | |||
67 | /** | ||
68 | * allocate_compression_buffers - allocate the decompression buffers | ||
69 | * | ||
70 | * Caller has to hold the ntfs_lock semaphore. | ||
71 | * | ||
72 | * Return 0 on success or -ENOMEM if the allocations failed. | ||
73 | */ | ||
74 | int allocate_compression_buffers(void) | ||
75 | { | ||
76 | BUG_ON(ntfs_compression_buffer); | ||
77 | |||
78 | ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE); | ||
79 | if (!ntfs_compression_buffer) | ||
80 | return -ENOMEM; | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * free_compression_buffers - free the decompression buffers | ||
86 | * | ||
87 | * Caller has to hold the ntfs_lock semaphore. | ||
88 | */ | ||
89 | void free_compression_buffers(void) | ||
90 | { | ||
91 | BUG_ON(!ntfs_compression_buffer); | ||
92 | vfree(ntfs_compression_buffer); | ||
93 | ntfs_compression_buffer = NULL; | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * zero_partial_compressed_page - zero out of bounds compressed page region | ||
98 | */ | ||
99 | static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) | ||
100 | { | ||
101 | u8 *kp = page_address(page); | ||
102 | unsigned int kp_ofs; | ||
103 | |||
104 | ntfs_debug("Zeroing page region outside initialized size."); | ||
105 | if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { | ||
106 | /* | ||
107 | * FIXME: Using clear_page() will become wrong when we get | ||
108 | * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. | ||
109 | */ | ||
110 | clear_page(kp); | ||
111 | return; | ||
112 | } | ||
113 | kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; | ||
114 | memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); | ||
115 | return; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * handle_bounds_compressed_page - test for&handle out of bounds compressed page | ||
120 | */ | ||
121 | static inline void handle_bounds_compressed_page(ntfs_inode *ni, | ||
122 | struct page *page) | ||
123 | { | ||
124 | if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && | ||
125 | (ni->initialized_size < VFS_I(ni)->i_size)) | ||
126 | zero_partial_compressed_page(ni, page); | ||
127 | return; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * ntfs_decompress - decompress a compression block into an array of pages | ||
132 | * @dest_pages: destination array of pages | ||
133 | * @dest_index: current index into @dest_pages (IN/OUT) | ||
134 | * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) | ||
135 | * @dest_max_index: maximum index into @dest_pages (IN) | ||
136 | * @dest_max_ofs: maximum offset within @dest_pages[@dest_max_index] (IN) | ||
137 | * @xpage: the target page (-1 if none) (IN) | ||
138 | * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) | ||
139 | * @cb_start: compression block to decompress (IN) | ||
140 | * @cb_size: size of compression block @cb_start in bytes (IN) | ||
141 | * | ||
142 | * The caller must have disabled preemption. ntfs_decompress() reenables it when | ||
143 | * the critical section is finished. | ||
144 | * | ||
145 | * This decompresses the compression block @cb_start into the array of | ||
146 | * destination pages @dest_pages starting at index @dest_index into @dest_pages | ||
147 | * and at offset @dest_pos into the page @dest_pages[@dest_index]. | ||
148 | * | ||
149 | * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1. | ||
150 | * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified. | ||
151 | * | ||
152 | * @cb_start is a pointer to the compression block which needs decompressing | ||
153 | * and @cb_size is the size of @cb_start in bytes (8-64kiB). | ||
154 | * | ||
155 | * Return 0 if success or -EOVERFLOW on error in the compressed stream. | ||
156 | * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was | ||
157 | * completed during the decompression of the compression block (@cb_start). | ||
158 | * | ||
159 | * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up | ||
160 | * unpredicatbly! You have been warned! | ||
161 | * | ||
162 | * Note to hackers: This function may not sleep until it has finished accessing | ||
163 | * the compression block @cb_start as it is a per-CPU buffer. | ||
164 | */ | ||
165 | static int ntfs_decompress(struct page *dest_pages[], int *dest_index, | ||
166 | int *dest_ofs, const int dest_max_index, const int dest_max_ofs, | ||
167 | const int xpage, char *xpage_done, u8 *const cb_start, | ||
168 | const u32 cb_size) | ||
169 | { | ||
170 | /* | ||
171 | * Pointers into the compressed data, i.e. the compression block (cb), | ||
172 | * and the therein contained sub-blocks (sb). | ||
173 | */ | ||
174 | u8 *cb_end = cb_start + cb_size; /* End of cb. */ | ||
175 | u8 *cb = cb_start; /* Current position in cb. */ | ||
176 | u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ | ||
177 | u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ | ||
178 | |||
179 | /* Variables for uncompressed data / destination. */ | ||
180 | struct page *dp; /* Current destination page being worked on. */ | ||
181 | u8 *dp_addr; /* Current pointer into dp. */ | ||
182 | u8 *dp_sb_start; /* Start of current sub-block in dp. */ | ||
183 | u8 *dp_sb_end; /* End of current sb in dp (dp_sb_start + | ||
184 | NTFS_SB_SIZE). */ | ||
185 | u16 do_sb_start; /* @dest_ofs when starting this sub-block. */ | ||
186 | u16 do_sb_end; /* @dest_ofs of end of this sb (do_sb_start + | ||
187 | NTFS_SB_SIZE). */ | ||
188 | |||
189 | /* Variables for tag and token parsing. */ | ||
190 | u8 tag; /* Current tag. */ | ||
191 | int token; /* Loop counter for the eight tokens in tag. */ | ||
192 | |||
193 | /* Need this because we can't sleep, so need two stages. */ | ||
194 | int completed_pages[dest_max_index - *dest_index + 1]; | ||
195 | int nr_completed_pages = 0; | ||
196 | |||
197 | /* Default error code. */ | ||
198 | int err = -EOVERFLOW; | ||
199 | |||
200 | ntfs_debug("Entering, cb_size = 0x%x.", cb_size); | ||
201 | do_next_sb: | ||
202 | ntfs_debug("Beginning sub-block at offset = 0x%zx in the cb.", | ||
203 | cb - cb_start); | ||
204 | /* | ||
205 | * Have we reached the end of the compression block or the end of the | ||
206 | * decompressed data? The latter can happen for example if the current | ||
207 | * position in the compression block is one byte before its end so the | ||
208 | * first two checks do not detect it. | ||
209 | */ | ||
210 | if (cb == cb_end || !le16_to_cpup((le16*)cb) || | ||
211 | (*dest_index == dest_max_index && | ||
212 | *dest_ofs == dest_max_ofs)) { | ||
213 | int i; | ||
214 | |||
215 | ntfs_debug("Completed. Returning success (0)."); | ||
216 | err = 0; | ||
217 | return_error: | ||
218 | /* We can sleep from now on, so we drop lock. */ | ||
219 | spin_unlock(&ntfs_cb_lock); | ||
220 | /* Second stage: finalize completed pages. */ | ||
221 | if (nr_completed_pages > 0) { | ||
222 | struct page *page = dest_pages[completed_pages[0]]; | ||
223 | ntfs_inode *ni = NTFS_I(page->mapping->host); | ||
224 | |||
225 | for (i = 0; i < nr_completed_pages; i++) { | ||
226 | int di = completed_pages[i]; | ||
227 | |||
228 | dp = dest_pages[di]; | ||
229 | /* | ||
230 | * If we are outside the initialized size, zero | ||
231 | * the out of bounds page range. | ||
232 | */ | ||
233 | handle_bounds_compressed_page(ni, dp); | ||
234 | flush_dcache_page(dp); | ||
235 | kunmap(dp); | ||
236 | SetPageUptodate(dp); | ||
237 | unlock_page(dp); | ||
238 | if (di == xpage) | ||
239 | *xpage_done = 1; | ||
240 | else | ||
241 | page_cache_release(dp); | ||
242 | dest_pages[di] = NULL; | ||
243 | } | ||
244 | } | ||
245 | return err; | ||
246 | } | ||
247 | |||
248 | /* Setup offsets for the current sub-block destination. */ | ||
249 | do_sb_start = *dest_ofs; | ||
250 | do_sb_end = do_sb_start + NTFS_SB_SIZE; | ||
251 | |||
252 | /* Check that we are still within allowed boundaries. */ | ||
253 | if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs) | ||
254 | goto return_overflow; | ||
255 | |||
256 | /* Does the minimum size of a compressed sb overflow valid range? */ | ||
257 | if (cb + 6 > cb_end) | ||
258 | goto return_overflow; | ||
259 | |||
260 | /* Setup the current sub-block source pointers and validate range. */ | ||
261 | cb_sb_start = cb; | ||
262 | cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK) | ||
263 | + 3; | ||
264 | if (cb_sb_end > cb_end) | ||
265 | goto return_overflow; | ||
266 | |||
267 | /* Get the current destination page. */ | ||
268 | dp = dest_pages[*dest_index]; | ||
269 | if (!dp) { | ||
270 | /* No page present. Skip decompression of this sub-block. */ | ||
271 | cb = cb_sb_end; | ||
272 | |||
273 | /* Advance destination position to next sub-block. */ | ||
274 | *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK; | ||
275 | if (!*dest_ofs && (++*dest_index > dest_max_index)) | ||
276 | goto return_overflow; | ||
277 | goto do_next_sb; | ||
278 | } | ||
279 | |||
280 | /* We have a valid destination page. Setup the destination pointers. */ | ||
281 | dp_addr = (u8*)page_address(dp) + do_sb_start; | ||
282 | |||
283 | /* Now, we are ready to process the current sub-block (sb). */ | ||
284 | if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) { | ||
285 | ntfs_debug("Found uncompressed sub-block."); | ||
286 | /* This sb is not compressed, just copy it into destination. */ | ||
287 | |||
288 | /* Advance source position to first data byte. */ | ||
289 | cb += 2; | ||
290 | |||
291 | /* An uncompressed sb must be full size. */ | ||
292 | if (cb_sb_end - cb != NTFS_SB_SIZE) | ||
293 | goto return_overflow; | ||
294 | |||
295 | /* Copy the block and advance the source position. */ | ||
296 | memcpy(dp_addr, cb, NTFS_SB_SIZE); | ||
297 | cb += NTFS_SB_SIZE; | ||
298 | |||
299 | /* Advance destination position to next sub-block. */ | ||
300 | *dest_ofs += NTFS_SB_SIZE; | ||
301 | if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) { | ||
302 | finalize_page: | ||
303 | /* | ||
304 | * First stage: add current page index to array of | ||
305 | * completed pages. | ||
306 | */ | ||
307 | completed_pages[nr_completed_pages++] = *dest_index; | ||
308 | if (++*dest_index > dest_max_index) | ||
309 | goto return_overflow; | ||
310 | } | ||
311 | goto do_next_sb; | ||
312 | } | ||
313 | ntfs_debug("Found compressed sub-block."); | ||
314 | /* This sb is compressed, decompress it into destination. */ | ||
315 | |||
316 | /* Setup destination pointers. */ | ||
317 | dp_sb_start = dp_addr; | ||
318 | dp_sb_end = dp_sb_start + NTFS_SB_SIZE; | ||
319 | |||
320 | /* Forward to the first tag in the sub-block. */ | ||
321 | cb += 2; | ||
322 | do_next_tag: | ||
323 | if (cb == cb_sb_end) { | ||
324 | /* Check if the decompressed sub-block was not full-length. */ | ||
325 | if (dp_addr < dp_sb_end) { | ||
326 | int nr_bytes = do_sb_end - *dest_ofs; | ||
327 | |||
328 | ntfs_debug("Filling incomplete sub-block with " | ||
329 | "zeroes."); | ||
330 | /* Zero remainder and update destination position. */ | ||
331 | memset(dp_addr, 0, nr_bytes); | ||
332 | *dest_ofs += nr_bytes; | ||
333 | } | ||
334 | /* We have finished the current sub-block. */ | ||
335 | if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) | ||
336 | goto finalize_page; | ||
337 | goto do_next_sb; | ||
338 | } | ||
339 | |||
340 | /* Check we are still in range. */ | ||
341 | if (cb > cb_sb_end || dp_addr > dp_sb_end) | ||
342 | goto return_overflow; | ||
343 | |||
344 | /* Get the next tag and advance to first token. */ | ||
345 | tag = *cb++; | ||
346 | |||
347 | /* Parse the eight tokens described by the tag. */ | ||
348 | for (token = 0; token < 8; token++, tag >>= 1) { | ||
349 | u16 lg, pt, length, max_non_overlap; | ||
350 | register u16 i; | ||
351 | u8 *dp_back_addr; | ||
352 | |||
353 | /* Check if we are done / still in range. */ | ||
354 | if (cb >= cb_sb_end || dp_addr > dp_sb_end) | ||
355 | break; | ||
356 | |||
357 | /* Determine token type and parse appropriately.*/ | ||
358 | if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { | ||
359 | /* | ||
360 | * We have a symbol token, copy the symbol across, and | ||
361 | * advance the source and destination positions. | ||
362 | */ | ||
363 | *dp_addr++ = *cb++; | ||
364 | ++*dest_ofs; | ||
365 | |||
366 | /* Continue with the next token. */ | ||
367 | continue; | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * We have a phrase token. Make sure it is not the first tag in | ||
372 | * the sb as this is illegal and would confuse the code below. | ||
373 | */ | ||
374 | if (dp_addr == dp_sb_start) | ||
375 | goto return_overflow; | ||
376 | |||
377 | /* | ||
378 | * Determine the number of bytes to go back (p) and the number | ||
379 | * of bytes to copy (l). We use an optimized algorithm in which | ||
380 | * we first calculate log2(current destination position in sb), | ||
381 | * which allows determination of l and p in O(1) rather than | ||
382 | * O(n). We just need an arch-optimized log2() function now. | ||
383 | */ | ||
384 | lg = 0; | ||
385 | for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1) | ||
386 | lg++; | ||
387 | |||
388 | /* Get the phrase token into i. */ | ||
389 | pt = le16_to_cpup((le16*)cb); | ||
390 | |||
391 | /* | ||
392 | * Calculate starting position of the byte sequence in | ||
393 | * the destination using the fact that p = (pt >> (12 - lg)) + 1 | ||
394 | * and make sure we don't go too far back. | ||
395 | */ | ||
396 | dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1; | ||
397 | if (dp_back_addr < dp_sb_start) | ||
398 | goto return_overflow; | ||
399 | |||
400 | /* Now calculate the length of the byte sequence. */ | ||
401 | length = (pt & (0xfff >> lg)) + 3; | ||
402 | |||
403 | /* Advance destination position and verify it is in range. */ | ||
404 | *dest_ofs += length; | ||
405 | if (*dest_ofs > do_sb_end) | ||
406 | goto return_overflow; | ||
407 | |||
408 | /* The number of non-overlapping bytes. */ | ||
409 | max_non_overlap = dp_addr - dp_back_addr; | ||
410 | |||
411 | if (length <= max_non_overlap) { | ||
412 | /* The byte sequence doesn't overlap, just copy it. */ | ||
413 | memcpy(dp_addr, dp_back_addr, length); | ||
414 | |||
415 | /* Advance destination pointer. */ | ||
416 | dp_addr += length; | ||
417 | } else { | ||
418 | /* | ||
419 | * The byte sequence does overlap, copy non-overlapping | ||
420 | * part and then do a slow byte by byte copy for the | ||
421 | * overlapping part. Also, advance the destination | ||
422 | * pointer. | ||
423 | */ | ||
424 | memcpy(dp_addr, dp_back_addr, max_non_overlap); | ||
425 | dp_addr += max_non_overlap; | ||
426 | dp_back_addr += max_non_overlap; | ||
427 | length -= max_non_overlap; | ||
428 | while (length--) | ||
429 | *dp_addr++ = *dp_back_addr++; | ||
430 | } | ||
431 | |||
432 | /* Advance source position and continue with the next token. */ | ||
433 | cb += 2; | ||
434 | } | ||
435 | |||
436 | /* No tokens left in the current tag. Continue with the next tag. */ | ||
437 | goto do_next_tag; | ||
438 | |||
439 | return_overflow: | ||
440 | ntfs_error(NULL, "Failed. Returning -EOVERFLOW."); | ||
441 | goto return_error; | ||
442 | } | ||
443 | |||
444 | /** | ||
445 | * ntfs_read_compressed_block - read a compressed block into the page cache | ||
446 | * @page: locked page in the compression block(s) we need to read | ||
447 | * | ||
448 | * When we are called the page has already been verified to be locked and the | ||
449 | * attribute is known to be non-resident, not encrypted, but compressed. | ||
450 | * | ||
451 | * 1. Determine which compression block(s) @page is in. | ||
452 | * 2. Get hold of all pages corresponding to this/these compression block(s). | ||
453 | * 3. Read the (first) compression block. | ||
454 | * 4. Decompress it into the corresponding pages. | ||
455 | * 5. Throw the compressed data away and proceed to 3. for the next compression | ||
456 | * block or return success if no more compression blocks left. | ||
457 | * | ||
458 | * Warning: We have to be careful what we do about existing pages. They might | ||
459 | * have been written to so that we would lose data if we were to just overwrite | ||
460 | * them with the out-of-date uncompressed data. | ||
461 | * | ||
462 | * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at | ||
463 | * the end of the file I think. We need to detect this case and zero the out | ||
464 | * of bounds remainder of the page in question and mark it as handled. At the | ||
465 | * moment we would just return -EIO on such a page. This bug will only become | ||
466 | * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte | ||
467 | * clusters so is probably not going to be seen by anyone. Still this should | ||
468 | * be fixed. (AIA) | ||
469 | * | ||
470 | * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in | ||
471 | * handling sparse and compressed cbs. (AIA) | ||
472 | * | ||
473 | * FIXME: At the moment we don't do any zeroing out in the case that | ||
474 | * initialized_size is less than data_size. This should be safe because of the | ||
475 | * nature of the compression algorithm used. Just in case we check and output | ||
476 | * an error message in read inode if the two sizes are not equal for a | ||
477 | * compressed file. (AIA) | ||
478 | */ | ||
479 | int ntfs_read_compressed_block(struct page *page) | ||
480 | { | ||
481 | struct address_space *mapping = page->mapping; | ||
482 | ntfs_inode *ni = NTFS_I(mapping->host); | ||
483 | ntfs_volume *vol = ni->vol; | ||
484 | struct super_block *sb = vol->sb; | ||
485 | runlist_element *rl; | ||
486 | unsigned long block_size = sb->s_blocksize; | ||
487 | unsigned char block_size_bits = sb->s_blocksize_bits; | ||
488 | u8 *cb, *cb_pos, *cb_end; | ||
489 | struct buffer_head **bhs; | ||
490 | unsigned long offset, index = page->index; | ||
491 | u32 cb_size = ni->itype.compressed.block_size; | ||
492 | u64 cb_size_mask = cb_size - 1UL; | ||
493 | VCN vcn; | ||
494 | LCN lcn; | ||
495 | /* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */ | ||
496 | VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> | ||
497 | vol->cluster_size_bits; | ||
498 | /* | ||
499 | * The first vcn after the last wanted vcn (minumum alignment is again | ||
500 | * PAGE_CACHE_SIZE. | ||
501 | */ | ||
502 | VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) | ||
503 | & ~cb_size_mask) >> vol->cluster_size_bits; | ||
504 | /* Number of compression blocks (cbs) in the wanted vcn range. */ | ||
505 | unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits | ||
506 | >> ni->itype.compressed.block_size_bits; | ||
507 | /* | ||
508 | * Number of pages required to store the uncompressed data from all | ||
509 | * compression blocks (cbs) overlapping @page. Due to alignment | ||
510 | * guarantees of start_vcn and end_vcn, no need to round up here. | ||
511 | */ | ||
512 | unsigned int nr_pages = (end_vcn - start_vcn) << | ||
513 | vol->cluster_size_bits >> PAGE_CACHE_SHIFT; | ||
514 | unsigned int xpage, max_page, cur_page, cur_ofs, i; | ||
515 | unsigned int cb_clusters, cb_max_ofs; | ||
516 | int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; | ||
517 | struct page **pages; | ||
518 | unsigned char xpage_done = 0; | ||
519 | |||
520 | ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " | ||
521 | "%i.", index, cb_size, nr_pages); | ||
522 | /* | ||
523 | * Bad things happen if we get here for anything that is not an | ||
524 | * unnamed $DATA attribute. | ||
525 | */ | ||
526 | BUG_ON(ni->type != AT_DATA); | ||
527 | BUG_ON(ni->name_len); | ||
528 | |||
529 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS); | ||
530 | |||
531 | /* Allocate memory to store the buffer heads we need. */ | ||
532 | bhs_size = cb_size / block_size * sizeof(struct buffer_head *); | ||
533 | bhs = kmalloc(bhs_size, GFP_NOFS); | ||
534 | |||
535 | if (unlikely(!pages || !bhs)) { | ||
536 | kfree(bhs); | ||
537 | kfree(pages); | ||
538 | SetPageError(page); | ||
539 | unlock_page(page); | ||
540 | ntfs_error(vol->sb, "Failed to allocate internal buffers."); | ||
541 | return -ENOMEM; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * We have already been given one page, this is the one we must do. | ||
546 | * Once again, the alignment guarantees keep it simple. | ||
547 | */ | ||
548 | offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; | ||
549 | xpage = index - offset; | ||
550 | pages[xpage] = page; | ||
551 | /* | ||
552 | * The remaining pages need to be allocated and inserted into the page | ||
553 | * cache, alignment guarantees keep all the below much simpler. (-8 | ||
554 | */ | ||
555 | max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> | ||
556 | PAGE_CACHE_SHIFT) - offset; | ||
557 | if (nr_pages < max_page) | ||
558 | max_page = nr_pages; | ||
559 | for (i = 0; i < max_page; i++, offset++) { | ||
560 | if (i != xpage) | ||
561 | pages[i] = grab_cache_page_nowait(mapping, offset); | ||
562 | page = pages[i]; | ||
563 | if (page) { | ||
564 | /* | ||
565 | * We only (re)read the page if it isn't already read | ||
566 | * in and/or dirty or we would be losing data or at | ||
567 | * least wasting our time. | ||
568 | */ | ||
569 | if (!PageDirty(page) && (!PageUptodate(page) || | ||
570 | PageError(page))) { | ||
571 | ClearPageError(page); | ||
572 | kmap(page); | ||
573 | continue; | ||
574 | } | ||
575 | unlock_page(page); | ||
576 | page_cache_release(page); | ||
577 | pages[i] = NULL; | ||
578 | } | ||
579 | } | ||
580 | |||
581 | /* | ||
582 | * We have the runlist, and all the destination pages we need to fill. | ||
583 | * Now read the first compression block. | ||
584 | */ | ||
585 | cur_page = 0; | ||
586 | cur_ofs = 0; | ||
587 | cb_clusters = ni->itype.compressed.block_clusters; | ||
588 | do_next_cb: | ||
589 | nr_cbs--; | ||
590 | nr_bhs = 0; | ||
591 | |||
592 | /* Read all cb buffer heads one cluster at a time. */ | ||
593 | rl = NULL; | ||
594 | for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn; | ||
595 | vcn++) { | ||
596 | BOOL is_retry = FALSE; | ||
597 | |||
598 | if (!rl) { | ||
599 | lock_retry_remap: | ||
600 | down_read(&ni->runlist.lock); | ||
601 | rl = ni->runlist.rl; | ||
602 | } | ||
603 | if (likely(rl != NULL)) { | ||
604 | /* Seek to element containing target vcn. */ | ||
605 | while (rl->length && rl[1].vcn <= vcn) | ||
606 | rl++; | ||
607 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
608 | } else | ||
609 | lcn = LCN_RL_NOT_MAPPED; | ||
610 | ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.", | ||
611 | (unsigned long long)vcn, | ||
612 | (unsigned long long)lcn); | ||
613 | if (lcn < 0) { | ||
614 | /* | ||
615 | * When we reach the first sparse cluster we have | ||
616 | * finished with the cb. | ||
617 | */ | ||
618 | if (lcn == LCN_HOLE) | ||
619 | break; | ||
620 | if (is_retry || lcn != LCN_RL_NOT_MAPPED) | ||
621 | goto rl_err; | ||
622 | is_retry = TRUE; | ||
623 | /* | ||
624 | * Attempt to map runlist, dropping lock for the | ||
625 | * duration. | ||
626 | */ | ||
627 | up_read(&ni->runlist.lock); | ||
628 | if (!ntfs_map_runlist(ni, vcn)) | ||
629 | goto lock_retry_remap; | ||
630 | goto map_rl_err; | ||
631 | } | ||
632 | block = lcn << vol->cluster_size_bits >> block_size_bits; | ||
633 | /* Read the lcn from device in chunks of block_size bytes. */ | ||
634 | max_block = block + (vol->cluster_size >> block_size_bits); | ||
635 | do { | ||
636 | ntfs_debug("block = 0x%x.", block); | ||
637 | if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block)))) | ||
638 | goto getblk_err; | ||
639 | nr_bhs++; | ||
640 | } while (++block < max_block); | ||
641 | } | ||
642 | |||
643 | /* Release the lock if we took it. */ | ||
644 | if (rl) | ||
645 | up_read(&ni->runlist.lock); | ||
646 | |||
647 | /* Setup and initiate io on all buffer heads. */ | ||
648 | for (i = 0; i < nr_bhs; i++) { | ||
649 | struct buffer_head *tbh = bhs[i]; | ||
650 | |||
651 | if (unlikely(test_set_buffer_locked(tbh))) | ||
652 | continue; | ||
653 | if (unlikely(buffer_uptodate(tbh))) { | ||
654 | unlock_buffer(tbh); | ||
655 | continue; | ||
656 | } | ||
657 | get_bh(tbh); | ||
658 | tbh->b_end_io = end_buffer_read_sync; | ||
659 | submit_bh(READ, tbh); | ||
660 | } | ||
661 | |||
662 | /* Wait for io completion on all buffer heads. */ | ||
663 | for (i = 0; i < nr_bhs; i++) { | ||
664 | struct buffer_head *tbh = bhs[i]; | ||
665 | |||
666 | if (buffer_uptodate(tbh)) | ||
667 | continue; | ||
668 | wait_on_buffer(tbh); | ||
669 | /* | ||
670 | * We need an optimization barrier here, otherwise we start | ||
671 | * hitting the below fixup code when accessing a loopback | ||
672 | * mounted ntfs partition. This indicates either there is a | ||
673 | * race condition in the loop driver or, more likely, gcc | ||
674 | * overoptimises the code without the barrier and it doesn't | ||
675 | * do the Right Thing(TM). | ||
676 | */ | ||
677 | barrier(); | ||
678 | if (unlikely(!buffer_uptodate(tbh))) { | ||
679 | ntfs_warning(vol->sb, "Buffer is unlocked but not " | ||
680 | "uptodate! Unplugging the disk queue " | ||
681 | "and rescheduling."); | ||
682 | get_bh(tbh); | ||
683 | blk_run_address_space(mapping); | ||
684 | schedule(); | ||
685 | put_bh(tbh); | ||
686 | if (unlikely(!buffer_uptodate(tbh))) | ||
687 | goto read_err; | ||
688 | ntfs_warning(vol->sb, "Buffer is now uptodate. Good."); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * Get the compression buffer. We must not sleep any more | ||
694 | * until we are finished with it. | ||
695 | */ | ||
696 | spin_lock(&ntfs_cb_lock); | ||
697 | cb = ntfs_compression_buffer; | ||
698 | |||
699 | BUG_ON(!cb); | ||
700 | |||
701 | cb_pos = cb; | ||
702 | cb_end = cb + cb_size; | ||
703 | |||
704 | /* Copy the buffer heads into the contiguous buffer. */ | ||
705 | for (i = 0; i < nr_bhs; i++) { | ||
706 | memcpy(cb_pos, bhs[i]->b_data, block_size); | ||
707 | cb_pos += block_size; | ||
708 | } | ||
709 | |||
710 | /* Just a precaution. */ | ||
711 | if (cb_pos + 2 <= cb + cb_size) | ||
712 | *(u16*)cb_pos = 0; | ||
713 | |||
714 | /* Reset cb_pos back to the beginning. */ | ||
715 | cb_pos = cb; | ||
716 | |||
717 | /* We now have both source (if present) and destination. */ | ||
718 | ntfs_debug("Successfully read the compression block."); | ||
719 | |||
720 | /* The last page and maximum offset within it for the current cb. */ | ||
721 | cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size; | ||
722 | cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK; | ||
723 | cb_max_page >>= PAGE_CACHE_SHIFT; | ||
724 | |||
725 | /* Catch end of file inside a compression block. */ | ||
726 | if (cb_max_page > max_page) | ||
727 | cb_max_page = max_page; | ||
728 | |||
729 | if (vcn == start_vcn - cb_clusters) { | ||
730 | /* Sparse cb, zero out page range overlapping the cb. */ | ||
731 | ntfs_debug("Found sparse compression block."); | ||
732 | /* We can sleep from now on, so we drop lock. */ | ||
733 | spin_unlock(&ntfs_cb_lock); | ||
734 | if (cb_max_ofs) | ||
735 | cb_max_page--; | ||
736 | for (; cur_page < cb_max_page; cur_page++) { | ||
737 | page = pages[cur_page]; | ||
738 | if (page) { | ||
739 | /* | ||
740 | * FIXME: Using clear_page() will become wrong | ||
741 | * when we get PAGE_CACHE_SIZE != PAGE_SIZE but | ||
742 | * for now there is no problem. | ||
743 | */ | ||
744 | if (likely(!cur_ofs)) | ||
745 | clear_page(page_address(page)); | ||
746 | else | ||
747 | memset(page_address(page) + cur_ofs, 0, | ||
748 | PAGE_CACHE_SIZE - | ||
749 | cur_ofs); | ||
750 | flush_dcache_page(page); | ||
751 | kunmap(page); | ||
752 | SetPageUptodate(page); | ||
753 | unlock_page(page); | ||
754 | if (cur_page == xpage) | ||
755 | xpage_done = 1; | ||
756 | else | ||
757 | page_cache_release(page); | ||
758 | pages[cur_page] = NULL; | ||
759 | } | ||
760 | cb_pos += PAGE_CACHE_SIZE - cur_ofs; | ||
761 | cur_ofs = 0; | ||
762 | if (cb_pos >= cb_end) | ||
763 | break; | ||
764 | } | ||
765 | /* If we have a partial final page, deal with it now. */ | ||
766 | if (cb_max_ofs && cb_pos < cb_end) { | ||
767 | page = pages[cur_page]; | ||
768 | if (page) | ||
769 | memset(page_address(page) + cur_ofs, 0, | ||
770 | cb_max_ofs - cur_ofs); | ||
771 | /* | ||
772 | * No need to update cb_pos at this stage: | ||
773 | * cb_pos += cb_max_ofs - cur_ofs; | ||
774 | */ | ||
775 | cur_ofs = cb_max_ofs; | ||
776 | } | ||
777 | } else if (vcn == start_vcn) { | ||
778 | /* We can't sleep so we need two stages. */ | ||
779 | unsigned int cur2_page = cur_page; | ||
780 | unsigned int cur_ofs2 = cur_ofs; | ||
781 | u8 *cb_pos2 = cb_pos; | ||
782 | |||
783 | ntfs_debug("Found uncompressed compression block."); | ||
784 | /* Uncompressed cb, copy it to the destination pages. */ | ||
785 | /* | ||
786 | * TODO: As a big optimization, we could detect this case | ||
787 | * before we read all the pages and use block_read_full_page() | ||
788 | * on all full pages instead (we still have to treat partial | ||
789 | * pages especially but at least we are getting rid of the | ||
790 | * synchronous io for the majority of pages. | ||
791 | * Or if we choose not to do the read-ahead/-behind stuff, we | ||
792 | * could just return block_read_full_page(pages[xpage]) as long | ||
793 | * as PAGE_CACHE_SIZE <= cb_size. | ||
794 | */ | ||
795 | if (cb_max_ofs) | ||
796 | cb_max_page--; | ||
797 | /* First stage: copy data into destination pages. */ | ||
798 | for (; cur_page < cb_max_page; cur_page++) { | ||
799 | page = pages[cur_page]; | ||
800 | if (page) | ||
801 | memcpy(page_address(page) + cur_ofs, cb_pos, | ||
802 | PAGE_CACHE_SIZE - cur_ofs); | ||
803 | cb_pos += PAGE_CACHE_SIZE - cur_ofs; | ||
804 | cur_ofs = 0; | ||
805 | if (cb_pos >= cb_end) | ||
806 | break; | ||
807 | } | ||
808 | /* If we have a partial final page, deal with it now. */ | ||
809 | if (cb_max_ofs && cb_pos < cb_end) { | ||
810 | page = pages[cur_page]; | ||
811 | if (page) | ||
812 | memcpy(page_address(page) + cur_ofs, cb_pos, | ||
813 | cb_max_ofs - cur_ofs); | ||
814 | cb_pos += cb_max_ofs - cur_ofs; | ||
815 | cur_ofs = cb_max_ofs; | ||
816 | } | ||
817 | /* We can sleep from now on, so drop lock. */ | ||
818 | spin_unlock(&ntfs_cb_lock); | ||
819 | /* Second stage: finalize pages. */ | ||
820 | for (; cur2_page < cb_max_page; cur2_page++) { | ||
821 | page = pages[cur2_page]; | ||
822 | if (page) { | ||
823 | /* | ||
824 | * If we are outside the initialized size, zero | ||
825 | * the out of bounds page range. | ||
826 | */ | ||
827 | handle_bounds_compressed_page(ni, page); | ||
828 | flush_dcache_page(page); | ||
829 | kunmap(page); | ||
830 | SetPageUptodate(page); | ||
831 | unlock_page(page); | ||
832 | if (cur2_page == xpage) | ||
833 | xpage_done = 1; | ||
834 | else | ||
835 | page_cache_release(page); | ||
836 | pages[cur2_page] = NULL; | ||
837 | } | ||
838 | cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2; | ||
839 | cur_ofs2 = 0; | ||
840 | if (cb_pos2 >= cb_end) | ||
841 | break; | ||
842 | } | ||
843 | } else { | ||
844 | /* Compressed cb, decompress it into the destination page(s). */ | ||
845 | unsigned int prev_cur_page = cur_page; | ||
846 | |||
847 | ntfs_debug("Found compressed compression block."); | ||
848 | err = ntfs_decompress(pages, &cur_page, &cur_ofs, | ||
849 | cb_max_page, cb_max_ofs, xpage, &xpage_done, | ||
850 | cb_pos, cb_size - (cb_pos - cb)); | ||
851 | /* | ||
852 | * We can sleep from now on, lock already dropped by | ||
853 | * ntfs_decompress(). | ||
854 | */ | ||
855 | if (err) { | ||
856 | ntfs_error(vol->sb, "ntfs_decompress() failed in inode " | ||
857 | "0x%lx with error code %i. Skipping " | ||
858 | "this compression block.", | ||
859 | ni->mft_no, -err); | ||
860 | /* Release the unfinished pages. */ | ||
861 | for (; prev_cur_page < cur_page; prev_cur_page++) { | ||
862 | page = pages[prev_cur_page]; | ||
863 | if (page) { | ||
864 | if (prev_cur_page == xpage && | ||
865 | !xpage_done) | ||
866 | SetPageError(page); | ||
867 | flush_dcache_page(page); | ||
868 | kunmap(page); | ||
869 | unlock_page(page); | ||
870 | if (prev_cur_page != xpage) | ||
871 | page_cache_release(page); | ||
872 | pages[prev_cur_page] = NULL; | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | } | ||
877 | |||
878 | /* Release the buffer heads. */ | ||
879 | for (i = 0; i < nr_bhs; i++) | ||
880 | brelse(bhs[i]); | ||
881 | |||
882 | /* Do we have more work to do? */ | ||
883 | if (nr_cbs) | ||
884 | goto do_next_cb; | ||
885 | |||
886 | /* We no longer need the list of buffer heads. */ | ||
887 | kfree(bhs); | ||
888 | |||
889 | /* Clean up if we have any pages left. Should never happen. */ | ||
890 | for (cur_page = 0; cur_page < max_page; cur_page++) { | ||
891 | page = pages[cur_page]; | ||
892 | if (page) { | ||
893 | ntfs_error(vol->sb, "Still have pages left! " | ||
894 | "Terminating them with extreme " | ||
895 | "prejudice. Inode 0x%lx, page index " | ||
896 | "0x%lx.", ni->mft_no, page->index); | ||
897 | if (cur_page == xpage && !xpage_done) | ||
898 | SetPageError(page); | ||
899 | flush_dcache_page(page); | ||
900 | kunmap(page); | ||
901 | unlock_page(page); | ||
902 | if (cur_page != xpage) | ||
903 | page_cache_release(page); | ||
904 | pages[cur_page] = NULL; | ||
905 | } | ||
906 | } | ||
907 | |||
908 | /* We no longer need the list of pages. */ | ||
909 | kfree(pages); | ||
910 | |||
911 | /* If we have completed the requested page, we return success. */ | ||
912 | if (likely(xpage_done)) | ||
913 | return 0; | ||
914 | |||
915 | ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? | ||
916 | "EOVERFLOW" : (!err ? "EIO" : "unkown error")); | ||
917 | return err < 0 ? err : -EIO; | ||
918 | |||
919 | read_err: | ||
920 | ntfs_error(vol->sb, "IO error while reading compressed data."); | ||
921 | /* Release the buffer heads. */ | ||
922 | for (i = 0; i < nr_bhs; i++) | ||
923 | brelse(bhs[i]); | ||
924 | goto err_out; | ||
925 | |||
926 | map_rl_err: | ||
927 | ntfs_error(vol->sb, "ntfs_map_runlist() failed. Cannot read " | ||
928 | "compression block."); | ||
929 | goto err_out; | ||
930 | |||
931 | rl_err: | ||
932 | up_read(&ni->runlist.lock); | ||
933 | ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read " | ||
934 | "compression block."); | ||
935 | goto err_out; | ||
936 | |||
937 | getblk_err: | ||
938 | up_read(&ni->runlist.lock); | ||
939 | ntfs_error(vol->sb, "getblk() failed. Cannot read compression block."); | ||
940 | |||
941 | err_out: | ||
942 | kfree(bhs); | ||
943 | for (i = cur_page; i < max_page; i++) { | ||
944 | page = pages[i]; | ||
945 | if (page) { | ||
946 | if (i == xpage && !xpage_done) | ||
947 | SetPageError(page); | ||
948 | flush_dcache_page(page); | ||
949 | kunmap(page); | ||
950 | unlock_page(page); | ||
951 | if (i != xpage) | ||
952 | page_cache_release(page); | ||
953 | } | ||
954 | } | ||
955 | kfree(pages); | ||
956 | return -EIO; | ||
957 | } | ||
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c new file mode 100644 index 000000000000..6fb6bb5e3723 --- /dev/null +++ b/fs/ntfs/debug.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include "debug.h" | ||
23 | |||
24 | /* | ||
25 | * A static buffer to hold the error string being displayed and a spinlock | ||
26 | * to protect concurrent accesses to it. | ||
27 | */ | ||
28 | static char err_buf[1024]; | ||
29 | static DEFINE_SPINLOCK(err_buf_lock); | ||
30 | |||
31 | /** | ||
32 | * __ntfs_warning - output a warning to the syslog | ||
33 | * @function: name of function outputting the warning | ||
34 | * @sb: super block of mounted ntfs filesystem | ||
35 | * @fmt: warning string containing format specifications | ||
36 | * @...: a variable number of arguments specified in @fmt | ||
37 | * | ||
38 | * Outputs a warning to the syslog for the mounted ntfs filesystem described | ||
39 | * by @sb. | ||
40 | * | ||
41 | * @fmt and the corresponding @... is printf style format string containing | ||
42 | * the warning string and the corresponding format arguments, respectively. | ||
43 | * | ||
44 | * @function is the name of the function from which __ntfs_warning is being | ||
45 | * called. | ||
46 | * | ||
47 | * Note, you should be using debug.h::ntfs_warning(@sb, @fmt, @...) instead | ||
48 | * as this provides the @function parameter automatically. | ||
49 | */ | ||
50 | void __ntfs_warning(const char *function, const struct super_block *sb, | ||
51 | const char *fmt, ...) | ||
52 | { | ||
53 | va_list args; | ||
54 | int flen = 0; | ||
55 | |||
56 | #ifndef DEBUG | ||
57 | if (!printk_ratelimit()) | ||
58 | return; | ||
59 | #endif | ||
60 | if (function) | ||
61 | flen = strlen(function); | ||
62 | spin_lock(&err_buf_lock); | ||
63 | va_start(args, fmt); | ||
64 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | ||
65 | va_end(args); | ||
66 | if (sb) | ||
67 | printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", | ||
68 | sb->s_id, flen ? function : "", err_buf); | ||
69 | else | ||
70 | printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", | ||
71 | flen ? function : "", err_buf); | ||
72 | spin_unlock(&err_buf_lock); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * __ntfs_error - output an error to the syslog | ||
77 | * @function: name of function outputting the error | ||
78 | * @sb: super block of mounted ntfs filesystem | ||
79 | * @fmt: error string containing format specifications | ||
80 | * @...: a variable number of arguments specified in @fmt | ||
81 | * | ||
82 | * Outputs an error to the syslog for the mounted ntfs filesystem described | ||
83 | * by @sb. | ||
84 | * | ||
85 | * @fmt and the corresponding @... is printf style format string containing | ||
86 | * the error string and the corresponding format arguments, respectively. | ||
87 | * | ||
88 | * @function is the name of the function from which __ntfs_error is being | ||
89 | * called. | ||
90 | * | ||
91 | * Note, you should be using debug.h::ntfs_error(@sb, @fmt, @...) instead | ||
92 | * as this provides the @function parameter automatically. | ||
93 | */ | ||
94 | void __ntfs_error(const char *function, const struct super_block *sb, | ||
95 | const char *fmt, ...) | ||
96 | { | ||
97 | va_list args; | ||
98 | int flen = 0; | ||
99 | |||
100 | #ifndef DEBUG | ||
101 | if (!printk_ratelimit()) | ||
102 | return; | ||
103 | #endif | ||
104 | if (function) | ||
105 | flen = strlen(function); | ||
106 | spin_lock(&err_buf_lock); | ||
107 | va_start(args, fmt); | ||
108 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | ||
109 | va_end(args); | ||
110 | if (sb) | ||
111 | printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", | ||
112 | sb->s_id, flen ? function : "", err_buf); | ||
113 | else | ||
114 | printk(KERN_ERR "NTFS-fs error: %s(): %s\n", | ||
115 | flen ? function : "", err_buf); | ||
116 | spin_unlock(&err_buf_lock); | ||
117 | } | ||
118 | |||
119 | #ifdef DEBUG | ||
120 | |||
121 | /* If 1, output debug messages, and if 0, don't. */ | ||
122 | int debug_msgs = 0; | ||
123 | |||
124 | void __ntfs_debug (const char *file, int line, const char *function, | ||
125 | const char *fmt, ...) | ||
126 | { | ||
127 | va_list args; | ||
128 | int flen = 0; | ||
129 | |||
130 | if (!debug_msgs) | ||
131 | return; | ||
132 | if (function) | ||
133 | flen = strlen(function); | ||
134 | spin_lock(&err_buf_lock); | ||
135 | va_start(args, fmt); | ||
136 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | ||
137 | va_end(args); | ||
138 | printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line, | ||
139 | flen ? function : "", err_buf); | ||
140 | spin_unlock(&err_buf_lock); | ||
141 | } | ||
142 | |||
143 | /* Dump a runlist. Caller has to provide synchronisation for @rl. */ | ||
144 | void ntfs_debug_dump_runlist(const runlist_element *rl) | ||
145 | { | ||
146 | int i; | ||
147 | const char *lcn_str[5] = { "LCN_HOLE ", "LCN_RL_NOT_MAPPED", | ||
148 | "LCN_ENOENT ", "LCN_unknown " }; | ||
149 | |||
150 | if (!debug_msgs) | ||
151 | return; | ||
152 | printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n"); | ||
153 | if (!rl) { | ||
154 | printk(KERN_DEBUG "Run list not present.\n"); | ||
155 | return; | ||
156 | } | ||
157 | printk(KERN_DEBUG "VCN LCN Run length\n"); | ||
158 | for (i = 0; ; i++) { | ||
159 | LCN lcn = (rl + i)->lcn; | ||
160 | |||
161 | if (lcn < (LCN)0) { | ||
162 | int index = -lcn - 1; | ||
163 | |||
164 | if (index > -LCN_ENOENT - 1) | ||
165 | index = 3; | ||
166 | printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", | ||
167 | (rl + i)->vcn, lcn_str[index], | ||
168 | (rl + i)->length, (rl + i)->length ? | ||
169 | "" : " (runlist end)"); | ||
170 | } else | ||
171 | printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", | ||
172 | (rl + i)->vcn, (rl + i)->lcn, | ||
173 | (rl + i)->length, (rl + i)->length ? | ||
174 | "" : " (runlist end)"); | ||
175 | if (!(rl + i)->length) | ||
176 | break; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | #endif | ||
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h new file mode 100644 index 000000000000..8ac37c33d127 --- /dev/null +++ b/fs/ntfs/debug.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifndef _LINUX_NTFS_DEBUG_H | ||
23 | #define _LINUX_NTFS_DEBUG_H | ||
24 | |||
25 | #include <linux/fs.h> | ||
26 | |||
27 | #include "runlist.h" | ||
28 | |||
29 | #ifdef DEBUG | ||
30 | |||
31 | extern int debug_msgs; | ||
32 | |||
33 | #if 0 /* Fool kernel-doc since it doesn't do macros yet */ | ||
34 | /** | ||
35 | * ntfs_debug - write a debug level message to syslog | ||
36 | * @f: a printf format string containing the message | ||
37 | * @...: the variables to substitute into @f | ||
38 | * | ||
39 | * ntfs_debug() writes a DEBUG level message to the syslog but only if the | ||
40 | * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. | ||
41 | */ | ||
42 | static void ntfs_debug(const char *f, ...); | ||
43 | #endif | ||
44 | |||
45 | extern void __ntfs_debug (const char *file, int line, const char *function, | ||
46 | const char *format, ...) __attribute__ ((format (printf, 4, 5))); | ||
47 | #define ntfs_debug(f, a...) \ | ||
48 | __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) | ||
49 | |||
50 | extern void ntfs_debug_dump_runlist(const runlist_element *rl); | ||
51 | |||
52 | #else /* !DEBUG */ | ||
53 | |||
54 | #define ntfs_debug(f, a...) do {} while (0) | ||
55 | #define ntfs_debug_dump_runlist(rl) do {} while (0) | ||
56 | |||
57 | #endif /* !DEBUG */ | ||
58 | |||
59 | extern void __ntfs_warning(const char *function, const struct super_block *sb, | ||
60 | const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); | ||
61 | #define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) | ||
62 | |||
63 | extern void __ntfs_error(const char *function, const struct super_block *sb, | ||
64 | const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); | ||
65 | #define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) | ||
66 | |||
67 | #endif /* _LINUX_NTFS_DEBUG_H */ | ||
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c new file mode 100644 index 000000000000..93577561cdbe --- /dev/null +++ b/fs/ntfs/dir.c | |||
@@ -0,0 +1,1569 @@ | |||
1 | /** | ||
2 | * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * Copyright (c) 2002 Richard Russon | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/buffer_head.h> | ||
25 | |||
26 | #include "dir.h" | ||
27 | #include "aops.h" | ||
28 | #include "attrib.h" | ||
29 | #include "mft.h" | ||
30 | #include "debug.h" | ||
31 | #include "ntfs.h" | ||
32 | |||
33 | /** | ||
34 | * The little endian Unicode string $I30 as a global constant. | ||
35 | */ | ||
36 | ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), | ||
37 | const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 }; | ||
38 | |||
39 | /** | ||
40 | * ntfs_lookup_inode_by_name - find an inode in a directory given its name | ||
41 | * @dir_ni: ntfs inode of the directory in which to search for the name | ||
42 | * @uname: Unicode name for which to search in the directory | ||
43 | * @uname_len: length of the name @uname in Unicode characters | ||
44 | * @res: return the found file name if necessary (see below) | ||
45 | * | ||
46 | * Look for an inode with name @uname in the directory with inode @dir_ni. | ||
47 | * ntfs_lookup_inode_by_name() walks the contents of the directory looking for | ||
48 | * the Unicode name. If the name is found in the directory, the corresponding | ||
49 | * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it | ||
50 | * is a 64-bit number containing the sequence number. | ||
51 | * | ||
52 | * On error, a negative value is returned corresponding to the error code. In | ||
53 | * particular if the inode is not found -ENOENT is returned. Note that you | ||
54 | * can't just check the return value for being negative, you have to check the | ||
55 | * inode number for being negative which you can extract using MREC(return | ||
56 | * value). | ||
57 | * | ||
58 | * Note, @uname_len does not include the (optional) terminating NULL character. | ||
59 | * | ||
60 | * Note, we look for a case sensitive match first but we also look for a case | ||
61 | * insensitive match at the same time. If we find a case insensitive match, we | ||
62 | * save that for the case that we don't find an exact match, where we return | ||
63 | * the case insensitive match and setup @res (which we allocate!) with the mft | ||
64 | * reference, the file name type, length and with a copy of the little endian | ||
65 | * Unicode file name itself. If we match a file name which is in the DOS name | ||
66 | * space, we only return the mft reference and file name type in @res. | ||
67 | * ntfs_lookup() then uses this to find the long file name in the inode itself. | ||
68 | * This is to avoid polluting the dcache with short file names. We want them to | ||
69 | * work but we don't care for how quickly one can access them. This also fixes | ||
70 | * the dcache aliasing issues. | ||
71 | * | ||
72 | * Locking: - Caller must hold i_sem on the directory. | ||
73 | * - Each page cache page in the index allocation mapping must be | ||
74 | * locked whilst being accessed otherwise we may find a corrupt | ||
75 | * page due to it being under ->writepage at the moment which | ||
76 | * applies the mst protection fixups before writing out and then | ||
77 | * removes them again after the write is complete after which it | ||
78 | * unlocks the page. | ||
79 | */ | ||
80 | MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname, | ||
81 | const int uname_len, ntfs_name **res) | ||
82 | { | ||
83 | ntfs_volume *vol = dir_ni->vol; | ||
84 | struct super_block *sb = vol->sb; | ||
85 | MFT_RECORD *m; | ||
86 | INDEX_ROOT *ir; | ||
87 | INDEX_ENTRY *ie; | ||
88 | INDEX_ALLOCATION *ia; | ||
89 | u8 *index_end; | ||
90 | u64 mref; | ||
91 | ntfs_attr_search_ctx *ctx; | ||
92 | int err, rc; | ||
93 | VCN vcn, old_vcn; | ||
94 | struct address_space *ia_mapping; | ||
95 | struct page *page; | ||
96 | u8 *kaddr; | ||
97 | ntfs_name *name = NULL; | ||
98 | |||
99 | BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode)); | ||
100 | BUG_ON(NInoAttr(dir_ni)); | ||
101 | /* Get hold of the mft record for the directory. */ | ||
102 | m = map_mft_record(dir_ni); | ||
103 | if (IS_ERR(m)) { | ||
104 | ntfs_error(sb, "map_mft_record() failed with error code %ld.", | ||
105 | -PTR_ERR(m)); | ||
106 | return ERR_MREF(PTR_ERR(m)); | ||
107 | } | ||
108 | ctx = ntfs_attr_get_search_ctx(dir_ni, m); | ||
109 | if (unlikely(!ctx)) { | ||
110 | err = -ENOMEM; | ||
111 | goto err_out; | ||
112 | } | ||
113 | /* Find the index root attribute in the mft record. */ | ||
114 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, | ||
115 | 0, ctx); | ||
116 | if (unlikely(err)) { | ||
117 | if (err == -ENOENT) { | ||
118 | ntfs_error(sb, "Index root attribute missing in " | ||
119 | "directory inode 0x%lx.", | ||
120 | dir_ni->mft_no); | ||
121 | err = -EIO; | ||
122 | } | ||
123 | goto err_out; | ||
124 | } | ||
125 | /* Get to the index root value (it's been verified in read_inode). */ | ||
126 | ir = (INDEX_ROOT*)((u8*)ctx->attr + | ||
127 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
128 | index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); | ||
129 | /* The first index entry. */ | ||
130 | ie = (INDEX_ENTRY*)((u8*)&ir->index + | ||
131 | le32_to_cpu(ir->index.entries_offset)); | ||
132 | /* | ||
133 | * Loop until we exceed valid memory (corruption case) or until we | ||
134 | * reach the last entry. | ||
135 | */ | ||
136 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
137 | /* Bounds checks. */ | ||
138 | if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + | ||
139 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
140 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
141 | index_end) | ||
142 | goto dir_err_out; | ||
143 | /* | ||
144 | * The last entry cannot contain a name. It can however contain | ||
145 | * a pointer to a child node in the B+tree so we just break out. | ||
146 | */ | ||
147 | if (ie->flags & INDEX_ENTRY_END) | ||
148 | break; | ||
149 | /* | ||
150 | * We perform a case sensitive comparison and if that matches | ||
151 | * we are done and return the mft reference of the inode (i.e. | ||
152 | * the inode number together with the sequence number for | ||
153 | * consistency checking). We convert it to cpu format before | ||
154 | * returning. | ||
155 | */ | ||
156 | if (ntfs_are_names_equal(uname, uname_len, | ||
157 | (ntfschar*)&ie->key.file_name.file_name, | ||
158 | ie->key.file_name.file_name_length, | ||
159 | CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { | ||
160 | found_it: | ||
161 | /* | ||
162 | * We have a perfect match, so we don't need to care | ||
163 | * about having matched imperfectly before, so we can | ||
164 | * free name and set *res to NULL. | ||
165 | * However, if the perfect match is a short file name, | ||
166 | * we need to signal this through *res, so that | ||
167 | * ntfs_lookup() can fix dcache aliasing issues. | ||
168 | * As an optimization we just reuse an existing | ||
169 | * allocation of *res. | ||
170 | */ | ||
171 | if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { | ||
172 | if (!name) { | ||
173 | name = kmalloc(sizeof(ntfs_name), | ||
174 | GFP_NOFS); | ||
175 | if (!name) { | ||
176 | err = -ENOMEM; | ||
177 | goto err_out; | ||
178 | } | ||
179 | } | ||
180 | name->mref = le64_to_cpu( | ||
181 | ie->data.dir.indexed_file); | ||
182 | name->type = FILE_NAME_DOS; | ||
183 | name->len = 0; | ||
184 | *res = name; | ||
185 | } else { | ||
186 | if (name) | ||
187 | kfree(name); | ||
188 | *res = NULL; | ||
189 | } | ||
190 | mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
191 | ntfs_attr_put_search_ctx(ctx); | ||
192 | unmap_mft_record(dir_ni); | ||
193 | return mref; | ||
194 | } | ||
195 | /* | ||
196 | * For a case insensitive mount, we also perform a case | ||
197 | * insensitive comparison (provided the file name is not in the | ||
198 | * POSIX namespace). If the comparison matches, and the name is | ||
199 | * in the WIN32 namespace, we cache the filename in *res so | ||
200 | * that the caller, ntfs_lookup(), can work on it. If the | ||
201 | * comparison matches, and the name is in the DOS namespace, we | ||
202 | * only cache the mft reference and the file name type (we set | ||
203 | * the name length to zero for simplicity). | ||
204 | */ | ||
205 | if (!NVolCaseSensitive(vol) && | ||
206 | ie->key.file_name.file_name_type && | ||
207 | ntfs_are_names_equal(uname, uname_len, | ||
208 | (ntfschar*)&ie->key.file_name.file_name, | ||
209 | ie->key.file_name.file_name_length, | ||
210 | IGNORE_CASE, vol->upcase, vol->upcase_len)) { | ||
211 | int name_size = sizeof(ntfs_name); | ||
212 | u8 type = ie->key.file_name.file_name_type; | ||
213 | u8 len = ie->key.file_name.file_name_length; | ||
214 | |||
215 | /* Only one case insensitive matching name allowed. */ | ||
216 | if (name) { | ||
217 | ntfs_error(sb, "Found already allocated name " | ||
218 | "in phase 1. Please run chkdsk " | ||
219 | "and if that doesn't find any " | ||
220 | "errors please report you saw " | ||
221 | "this message to " | ||
222 | "linux-ntfs-dev@lists." | ||
223 | "sourceforge.net."); | ||
224 | goto dir_err_out; | ||
225 | } | ||
226 | |||
227 | if (type != FILE_NAME_DOS) | ||
228 | name_size += len * sizeof(ntfschar); | ||
229 | name = kmalloc(name_size, GFP_NOFS); | ||
230 | if (!name) { | ||
231 | err = -ENOMEM; | ||
232 | goto err_out; | ||
233 | } | ||
234 | name->mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
235 | name->type = type; | ||
236 | if (type != FILE_NAME_DOS) { | ||
237 | name->len = len; | ||
238 | memcpy(name->name, ie->key.file_name.file_name, | ||
239 | len * sizeof(ntfschar)); | ||
240 | } else | ||
241 | name->len = 0; | ||
242 | *res = name; | ||
243 | } | ||
244 | /* | ||
245 | * Not a perfect match, need to do full blown collation so we | ||
246 | * know which way in the B+tree we have to go. | ||
247 | */ | ||
248 | rc = ntfs_collate_names(uname, uname_len, | ||
249 | (ntfschar*)&ie->key.file_name.file_name, | ||
250 | ie->key.file_name.file_name_length, 1, | ||
251 | IGNORE_CASE, vol->upcase, vol->upcase_len); | ||
252 | /* | ||
253 | * If uname collates before the name of the current entry, there | ||
254 | * is definitely no such name in this index but we might need to | ||
255 | * descend into the B+tree so we just break out of the loop. | ||
256 | */ | ||
257 | if (rc == -1) | ||
258 | break; | ||
259 | /* The names are not equal, continue the search. */ | ||
260 | if (rc) | ||
261 | continue; | ||
262 | /* | ||
263 | * Names match with case insensitive comparison, now try the | ||
264 | * case sensitive comparison, which is required for proper | ||
265 | * collation. | ||
266 | */ | ||
267 | rc = ntfs_collate_names(uname, uname_len, | ||
268 | (ntfschar*)&ie->key.file_name.file_name, | ||
269 | ie->key.file_name.file_name_length, 1, | ||
270 | CASE_SENSITIVE, vol->upcase, vol->upcase_len); | ||
271 | if (rc == -1) | ||
272 | break; | ||
273 | if (rc) | ||
274 | continue; | ||
275 | /* | ||
276 | * Perfect match, this will never happen as the | ||
277 | * ntfs_are_names_equal() call will have gotten a match but we | ||
278 | * still treat it correctly. | ||
279 | */ | ||
280 | goto found_it; | ||
281 | } | ||
282 | /* | ||
283 | * We have finished with this index without success. Check for the | ||
284 | * presence of a child node and if not present return -ENOENT, unless | ||
285 | * we have got a matching name cached in name in which case return the | ||
286 | * mft reference associated with it. | ||
287 | */ | ||
288 | if (!(ie->flags & INDEX_ENTRY_NODE)) { | ||
289 | if (name) { | ||
290 | ntfs_attr_put_search_ctx(ctx); | ||
291 | unmap_mft_record(dir_ni); | ||
292 | return name->mref; | ||
293 | } | ||
294 | ntfs_debug("Entry not found."); | ||
295 | err = -ENOENT; | ||
296 | goto err_out; | ||
297 | } /* Child node present, descend into it. */ | ||
298 | /* Consistency check: Verify that an index allocation exists. */ | ||
299 | if (!NInoIndexAllocPresent(dir_ni)) { | ||
300 | ntfs_error(sb, "No index allocation attribute but index entry " | ||
301 | "requires one. Directory inode 0x%lx is " | ||
302 | "corrupt or driver bug.", dir_ni->mft_no); | ||
303 | goto err_out; | ||
304 | } | ||
305 | /* Get the starting vcn of the index_block holding the child node. */ | ||
306 | vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8)); | ||
307 | ia_mapping = VFS_I(dir_ni)->i_mapping; | ||
308 | /* | ||
309 | * We are done with the index root and the mft record. Release them, | ||
310 | * otherwise we deadlock with ntfs_map_page(). | ||
311 | */ | ||
312 | ntfs_attr_put_search_ctx(ctx); | ||
313 | unmap_mft_record(dir_ni); | ||
314 | m = NULL; | ||
315 | ctx = NULL; | ||
316 | descend_into_child_node: | ||
317 | /* | ||
318 | * Convert vcn to index into the index allocation attribute in units | ||
319 | * of PAGE_CACHE_SIZE and map the page cache page, reading it from | ||
320 | * disk if necessary. | ||
321 | */ | ||
322 | page = ntfs_map_page(ia_mapping, vcn << | ||
323 | dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); | ||
324 | if (IS_ERR(page)) { | ||
325 | ntfs_error(sb, "Failed to map directory index page, error %ld.", | ||
326 | -PTR_ERR(page)); | ||
327 | err = PTR_ERR(page); | ||
328 | goto err_out; | ||
329 | } | ||
330 | lock_page(page); | ||
331 | kaddr = (u8*)page_address(page); | ||
332 | fast_descend_into_child_node: | ||
333 | /* Get to the index allocation block. */ | ||
334 | ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << | ||
335 | dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); | ||
336 | /* Bounds checks. */ | ||
337 | if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { | ||
338 | ntfs_error(sb, "Out of bounds check failed. Corrupt directory " | ||
339 | "inode 0x%lx or driver bug.", dir_ni->mft_no); | ||
340 | goto unm_err_out; | ||
341 | } | ||
342 | /* Catch multi sector transfer fixup errors. */ | ||
343 | if (unlikely(!ntfs_is_indx_record(ia->magic))) { | ||
344 | ntfs_error(sb, "Directory index record with vcn 0x%llx is " | ||
345 | "corrupt. Corrupt inode 0x%lx. Run chkdsk.", | ||
346 | (unsigned long long)vcn, dir_ni->mft_no); | ||
347 | goto unm_err_out; | ||
348 | } | ||
349 | if (sle64_to_cpu(ia->index_block_vcn) != vcn) { | ||
350 | ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is " | ||
351 | "different from expected VCN (0x%llx). " | ||
352 | "Directory inode 0x%lx is corrupt or driver " | ||
353 | "bug.", (unsigned long long) | ||
354 | sle64_to_cpu(ia->index_block_vcn), | ||
355 | (unsigned long long)vcn, dir_ni->mft_no); | ||
356 | goto unm_err_out; | ||
357 | } | ||
358 | if (le32_to_cpu(ia->index.allocated_size) + 0x18 != | ||
359 | dir_ni->itype.index.block_size) { | ||
360 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
361 | "0x%lx has a size (%u) differing from the " | ||
362 | "directory specified size (%u). Directory " | ||
363 | "inode is corrupt or driver bug.", | ||
364 | (unsigned long long)vcn, dir_ni->mft_no, | ||
365 | le32_to_cpu(ia->index.allocated_size) + 0x18, | ||
366 | dir_ni->itype.index.block_size); | ||
367 | goto unm_err_out; | ||
368 | } | ||
369 | index_end = (u8*)ia + dir_ni->itype.index.block_size; | ||
370 | if (index_end > kaddr + PAGE_CACHE_SIZE) { | ||
371 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
372 | "0x%lx crosses page boundary. Impossible! " | ||
373 | "Cannot access! This is probably a bug in the " | ||
374 | "driver.", (unsigned long long)vcn, | ||
375 | dir_ni->mft_no); | ||
376 | goto unm_err_out; | ||
377 | } | ||
378 | index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); | ||
379 | if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { | ||
380 | ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory " | ||
381 | "inode 0x%lx exceeds maximum size.", | ||
382 | (unsigned long long)vcn, dir_ni->mft_no); | ||
383 | goto unm_err_out; | ||
384 | } | ||
385 | /* The first index entry. */ | ||
386 | ie = (INDEX_ENTRY*)((u8*)&ia->index + | ||
387 | le32_to_cpu(ia->index.entries_offset)); | ||
388 | /* | ||
389 | * Iterate similar to above big loop but applied to index buffer, thus | ||
390 | * loop until we exceed valid memory (corruption case) or until we | ||
391 | * reach the last entry. | ||
392 | */ | ||
393 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
394 | /* Bounds check. */ | ||
395 | if ((u8*)ie < (u8*)ia || (u8*)ie + | ||
396 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
397 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
398 | index_end) { | ||
399 | ntfs_error(sb, "Index entry out of bounds in " | ||
400 | "directory inode 0x%lx.", | ||
401 | dir_ni->mft_no); | ||
402 | goto unm_err_out; | ||
403 | } | ||
404 | /* | ||
405 | * The last entry cannot contain a name. It can however contain | ||
406 | * a pointer to a child node in the B+tree so we just break out. | ||
407 | */ | ||
408 | if (ie->flags & INDEX_ENTRY_END) | ||
409 | break; | ||
410 | /* | ||
411 | * We perform a case sensitive comparison and if that matches | ||
412 | * we are done and return the mft reference of the inode (i.e. | ||
413 | * the inode number together with the sequence number for | ||
414 | * consistency checking). We convert it to cpu format before | ||
415 | * returning. | ||
416 | */ | ||
417 | if (ntfs_are_names_equal(uname, uname_len, | ||
418 | (ntfschar*)&ie->key.file_name.file_name, | ||
419 | ie->key.file_name.file_name_length, | ||
420 | CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { | ||
421 | found_it2: | ||
422 | /* | ||
423 | * We have a perfect match, so we don't need to care | ||
424 | * about having matched imperfectly before, so we can | ||
425 | * free name and set *res to NULL. | ||
426 | * However, if the perfect match is a short file name, | ||
427 | * we need to signal this through *res, so that | ||
428 | * ntfs_lookup() can fix dcache aliasing issues. | ||
429 | * As an optimization we just reuse an existing | ||
430 | * allocation of *res. | ||
431 | */ | ||
432 | if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { | ||
433 | if (!name) { | ||
434 | name = kmalloc(sizeof(ntfs_name), | ||
435 | GFP_NOFS); | ||
436 | if (!name) { | ||
437 | err = -ENOMEM; | ||
438 | goto unm_err_out; | ||
439 | } | ||
440 | } | ||
441 | name->mref = le64_to_cpu( | ||
442 | ie->data.dir.indexed_file); | ||
443 | name->type = FILE_NAME_DOS; | ||
444 | name->len = 0; | ||
445 | *res = name; | ||
446 | } else { | ||
447 | if (name) | ||
448 | kfree(name); | ||
449 | *res = NULL; | ||
450 | } | ||
451 | mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
452 | unlock_page(page); | ||
453 | ntfs_unmap_page(page); | ||
454 | return mref; | ||
455 | } | ||
456 | /* | ||
457 | * For a case insensitive mount, we also perform a case | ||
458 | * insensitive comparison (provided the file name is not in the | ||
459 | * POSIX namespace). If the comparison matches, and the name is | ||
460 | * in the WIN32 namespace, we cache the filename in *res so | ||
461 | * that the caller, ntfs_lookup(), can work on it. If the | ||
462 | * comparison matches, and the name is in the DOS namespace, we | ||
463 | * only cache the mft reference and the file name type (we set | ||
464 | * the name length to zero for simplicity). | ||
465 | */ | ||
466 | if (!NVolCaseSensitive(vol) && | ||
467 | ie->key.file_name.file_name_type && | ||
468 | ntfs_are_names_equal(uname, uname_len, | ||
469 | (ntfschar*)&ie->key.file_name.file_name, | ||
470 | ie->key.file_name.file_name_length, | ||
471 | IGNORE_CASE, vol->upcase, vol->upcase_len)) { | ||
472 | int name_size = sizeof(ntfs_name); | ||
473 | u8 type = ie->key.file_name.file_name_type; | ||
474 | u8 len = ie->key.file_name.file_name_length; | ||
475 | |||
476 | /* Only one case insensitive matching name allowed. */ | ||
477 | if (name) { | ||
478 | ntfs_error(sb, "Found already allocated name " | ||
479 | "in phase 2. Please run chkdsk " | ||
480 | "and if that doesn't find any " | ||
481 | "errors please report you saw " | ||
482 | "this message to " | ||
483 | "linux-ntfs-dev@lists." | ||
484 | "sourceforge.net."); | ||
485 | unlock_page(page); | ||
486 | ntfs_unmap_page(page); | ||
487 | goto dir_err_out; | ||
488 | } | ||
489 | |||
490 | if (type != FILE_NAME_DOS) | ||
491 | name_size += len * sizeof(ntfschar); | ||
492 | name = kmalloc(name_size, GFP_NOFS); | ||
493 | if (!name) { | ||
494 | err = -ENOMEM; | ||
495 | goto unm_err_out; | ||
496 | } | ||
497 | name->mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
498 | name->type = type; | ||
499 | if (type != FILE_NAME_DOS) { | ||
500 | name->len = len; | ||
501 | memcpy(name->name, ie->key.file_name.file_name, | ||
502 | len * sizeof(ntfschar)); | ||
503 | } else | ||
504 | name->len = 0; | ||
505 | *res = name; | ||
506 | } | ||
507 | /* | ||
508 | * Not a perfect match, need to do full blown collation so we | ||
509 | * know which way in the B+tree we have to go. | ||
510 | */ | ||
511 | rc = ntfs_collate_names(uname, uname_len, | ||
512 | (ntfschar*)&ie->key.file_name.file_name, | ||
513 | ie->key.file_name.file_name_length, 1, | ||
514 | IGNORE_CASE, vol->upcase, vol->upcase_len); | ||
515 | /* | ||
516 | * If uname collates before the name of the current entry, there | ||
517 | * is definitely no such name in this index but we might need to | ||
518 | * descend into the B+tree so we just break out of the loop. | ||
519 | */ | ||
520 | if (rc == -1) | ||
521 | break; | ||
522 | /* The names are not equal, continue the search. */ | ||
523 | if (rc) | ||
524 | continue; | ||
525 | /* | ||
526 | * Names match with case insensitive comparison, now try the | ||
527 | * case sensitive comparison, which is required for proper | ||
528 | * collation. | ||
529 | */ | ||
530 | rc = ntfs_collate_names(uname, uname_len, | ||
531 | (ntfschar*)&ie->key.file_name.file_name, | ||
532 | ie->key.file_name.file_name_length, 1, | ||
533 | CASE_SENSITIVE, vol->upcase, vol->upcase_len); | ||
534 | if (rc == -1) | ||
535 | break; | ||
536 | if (rc) | ||
537 | continue; | ||
538 | /* | ||
539 | * Perfect match, this will never happen as the | ||
540 | * ntfs_are_names_equal() call will have gotten a match but we | ||
541 | * still treat it correctly. | ||
542 | */ | ||
543 | goto found_it2; | ||
544 | } | ||
545 | /* | ||
546 | * We have finished with this index buffer without success. Check for | ||
547 | * the presence of a child node. | ||
548 | */ | ||
549 | if (ie->flags & INDEX_ENTRY_NODE) { | ||
550 | if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { | ||
551 | ntfs_error(sb, "Index entry with child node found in " | ||
552 | "a leaf node in directory inode 0x%lx.", | ||
553 | dir_ni->mft_no); | ||
554 | goto unm_err_out; | ||
555 | } | ||
556 | /* Child node present, descend into it. */ | ||
557 | old_vcn = vcn; | ||
558 | vcn = sle64_to_cpup((sle64*)((u8*)ie + | ||
559 | le16_to_cpu(ie->length) - 8)); | ||
560 | if (vcn >= 0) { | ||
561 | /* If vcn is in the same page cache page as old_vcn we | ||
562 | * recycle the mapped page. */ | ||
563 | if (old_vcn << vol->cluster_size_bits >> | ||
564 | PAGE_CACHE_SHIFT == vcn << | ||
565 | vol->cluster_size_bits >> | ||
566 | PAGE_CACHE_SHIFT) | ||
567 | goto fast_descend_into_child_node; | ||
568 | unlock_page(page); | ||
569 | ntfs_unmap_page(page); | ||
570 | goto descend_into_child_node; | ||
571 | } | ||
572 | ntfs_error(sb, "Negative child node vcn in directory inode " | ||
573 | "0x%lx.", dir_ni->mft_no); | ||
574 | goto unm_err_out; | ||
575 | } | ||
576 | /* | ||
577 | * No child node present, return -ENOENT, unless we have got a matching | ||
578 | * name cached in name in which case return the mft reference | ||
579 | * associated with it. | ||
580 | */ | ||
581 | if (name) { | ||
582 | unlock_page(page); | ||
583 | ntfs_unmap_page(page); | ||
584 | return name->mref; | ||
585 | } | ||
586 | ntfs_debug("Entry not found."); | ||
587 | err = -ENOENT; | ||
588 | unm_err_out: | ||
589 | unlock_page(page); | ||
590 | ntfs_unmap_page(page); | ||
591 | err_out: | ||
592 | if (!err) | ||
593 | err = -EIO; | ||
594 | if (ctx) | ||
595 | ntfs_attr_put_search_ctx(ctx); | ||
596 | if (m) | ||
597 | unmap_mft_record(dir_ni); | ||
598 | if (name) { | ||
599 | kfree(name); | ||
600 | *res = NULL; | ||
601 | } | ||
602 | return ERR_MREF(err); | ||
603 | dir_err_out: | ||
604 | ntfs_error(sb, "Corrupt directory. Aborting lookup."); | ||
605 | goto err_out; | ||
606 | } | ||
607 | |||
608 | #if 0 | ||
609 | |||
610 | // TODO: (AIA) | ||
611 | // The algorithm embedded in this code will be required for the time when we | ||
612 | // want to support adding of entries to directories, where we require correct | ||
613 | // collation of file names in order not to cause corruption of the file system. | ||
614 | |||
615 | /** | ||
616 | * ntfs_lookup_inode_by_name - find an inode in a directory given its name | ||
617 | * @dir_ni: ntfs inode of the directory in which to search for the name | ||
618 | * @uname: Unicode name for which to search in the directory | ||
619 | * @uname_len: length of the name @uname in Unicode characters | ||
620 | * | ||
621 | * Look for an inode with name @uname in the directory with inode @dir_ni. | ||
622 | * ntfs_lookup_inode_by_name() walks the contents of the directory looking for | ||
623 | * the Unicode name. If the name is found in the directory, the corresponding | ||
624 | * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it | ||
625 | * is a 64-bit number containing the sequence number. | ||
626 | * | ||
627 | * On error, a negative value is returned corresponding to the error code. In | ||
628 | * particular if the inode is not found -ENOENT is returned. Note that you | ||
629 | * can't just check the return value for being negative, you have to check the | ||
630 | * inode number for being negative which you can extract using MREC(return | ||
631 | * value). | ||
632 | * | ||
633 | * Note, @uname_len does not include the (optional) terminating NULL character. | ||
634 | */ | ||
635 | u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname, | ||
636 | const int uname_len) | ||
637 | { | ||
638 | ntfs_volume *vol = dir_ni->vol; | ||
639 | struct super_block *sb = vol->sb; | ||
640 | MFT_RECORD *m; | ||
641 | INDEX_ROOT *ir; | ||
642 | INDEX_ENTRY *ie; | ||
643 | INDEX_ALLOCATION *ia; | ||
644 | u8 *index_end; | ||
645 | u64 mref; | ||
646 | ntfs_attr_search_ctx *ctx; | ||
647 | int err, rc; | ||
648 | IGNORE_CASE_BOOL ic; | ||
649 | VCN vcn, old_vcn; | ||
650 | struct address_space *ia_mapping; | ||
651 | struct page *page; | ||
652 | u8 *kaddr; | ||
653 | |||
654 | /* Get hold of the mft record for the directory. */ | ||
655 | m = map_mft_record(dir_ni); | ||
656 | if (IS_ERR(m)) { | ||
657 | ntfs_error(sb, "map_mft_record() failed with error code %ld.", | ||
658 | -PTR_ERR(m)); | ||
659 | return ERR_MREF(PTR_ERR(m)); | ||
660 | } | ||
661 | ctx = ntfs_attr_get_search_ctx(dir_ni, m); | ||
662 | if (!ctx) { | ||
663 | err = -ENOMEM; | ||
664 | goto err_out; | ||
665 | } | ||
666 | /* Find the index root attribute in the mft record. */ | ||
667 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, | ||
668 | 0, ctx); | ||
669 | if (unlikely(err)) { | ||
670 | if (err == -ENOENT) { | ||
671 | ntfs_error(sb, "Index root attribute missing in " | ||
672 | "directory inode 0x%lx.", | ||
673 | dir_ni->mft_no); | ||
674 | err = -EIO; | ||
675 | } | ||
676 | goto err_out; | ||
677 | } | ||
678 | /* Get to the index root value (it's been verified in read_inode). */ | ||
679 | ir = (INDEX_ROOT*)((u8*)ctx->attr + | ||
680 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
681 | index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); | ||
682 | /* The first index entry. */ | ||
683 | ie = (INDEX_ENTRY*)((u8*)&ir->index + | ||
684 | le32_to_cpu(ir->index.entries_offset)); | ||
685 | /* | ||
686 | * Loop until we exceed valid memory (corruption case) or until we | ||
687 | * reach the last entry. | ||
688 | */ | ||
689 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
690 | /* Bounds checks. */ | ||
691 | if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + | ||
692 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
693 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
694 | index_end) | ||
695 | goto dir_err_out; | ||
696 | /* | ||
697 | * The last entry cannot contain a name. It can however contain | ||
698 | * a pointer to a child node in the B+tree so we just break out. | ||
699 | */ | ||
700 | if (ie->flags & INDEX_ENTRY_END) | ||
701 | break; | ||
702 | /* | ||
703 | * If the current entry has a name type of POSIX, the name is | ||
704 | * case sensitive and not otherwise. This has the effect of us | ||
705 | * not being able to access any POSIX file names which collate | ||
706 | * after the non-POSIX one when they only differ in case, but | ||
707 | * anyone doing screwy stuff like that deserves to burn in | ||
708 | * hell... Doing that kind of stuff on NT4 actually causes | ||
709 | * corruption on the partition even when using SP6a and Linux | ||
710 | * is not involved at all. | ||
711 | */ | ||
712 | ic = ie->key.file_name.file_name_type ? IGNORE_CASE : | ||
713 | CASE_SENSITIVE; | ||
714 | /* | ||
715 | * If the names match perfectly, we are done and return the | ||
716 | * mft reference of the inode (i.e. the inode number together | ||
717 | * with the sequence number for consistency checking. We | ||
718 | * convert it to cpu format before returning. | ||
719 | */ | ||
720 | if (ntfs_are_names_equal(uname, uname_len, | ||
721 | (ntfschar*)&ie->key.file_name.file_name, | ||
722 | ie->key.file_name.file_name_length, ic, | ||
723 | vol->upcase, vol->upcase_len)) { | ||
724 | found_it: | ||
725 | mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
726 | ntfs_attr_put_search_ctx(ctx); | ||
727 | unmap_mft_record(dir_ni); | ||
728 | return mref; | ||
729 | } | ||
730 | /* | ||
731 | * Not a perfect match, need to do full blown collation so we | ||
732 | * know which way in the B+tree we have to go. | ||
733 | */ | ||
734 | rc = ntfs_collate_names(uname, uname_len, | ||
735 | (ntfschar*)&ie->key.file_name.file_name, | ||
736 | ie->key.file_name.file_name_length, 1, | ||
737 | IGNORE_CASE, vol->upcase, vol->upcase_len); | ||
738 | /* | ||
739 | * If uname collates before the name of the current entry, there | ||
740 | * is definitely no such name in this index but we might need to | ||
741 | * descend into the B+tree so we just break out of the loop. | ||
742 | */ | ||
743 | if (rc == -1) | ||
744 | break; | ||
745 | /* The names are not equal, continue the search. */ | ||
746 | if (rc) | ||
747 | continue; | ||
748 | /* | ||
749 | * Names match with case insensitive comparison, now try the | ||
750 | * case sensitive comparison, which is required for proper | ||
751 | * collation. | ||
752 | */ | ||
753 | rc = ntfs_collate_names(uname, uname_len, | ||
754 | (ntfschar*)&ie->key.file_name.file_name, | ||
755 | ie->key.file_name.file_name_length, 1, | ||
756 | CASE_SENSITIVE, vol->upcase, vol->upcase_len); | ||
757 | if (rc == -1) | ||
758 | break; | ||
759 | if (rc) | ||
760 | continue; | ||
761 | /* | ||
762 | * Perfect match, this will never happen as the | ||
763 | * ntfs_are_names_equal() call will have gotten a match but we | ||
764 | * still treat it correctly. | ||
765 | */ | ||
766 | goto found_it; | ||
767 | } | ||
768 | /* | ||
769 | * We have finished with this index without success. Check for the | ||
770 | * presence of a child node. | ||
771 | */ | ||
772 | if (!(ie->flags & INDEX_ENTRY_NODE)) { | ||
773 | /* No child node, return -ENOENT. */ | ||
774 | err = -ENOENT; | ||
775 | goto err_out; | ||
776 | } /* Child node present, descend into it. */ | ||
777 | /* Consistency check: Verify that an index allocation exists. */ | ||
778 | if (!NInoIndexAllocPresent(dir_ni)) { | ||
779 | ntfs_error(sb, "No index allocation attribute but index entry " | ||
780 | "requires one. Directory inode 0x%lx is " | ||
781 | "corrupt or driver bug.", dir_ni->mft_no); | ||
782 | goto err_out; | ||
783 | } | ||
784 | /* Get the starting vcn of the index_block holding the child node. */ | ||
785 | vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); | ||
786 | ia_mapping = VFS_I(dir_ni)->i_mapping; | ||
787 | /* | ||
788 | * We are done with the index root and the mft record. Release them, | ||
789 | * otherwise we deadlock with ntfs_map_page(). | ||
790 | */ | ||
791 | ntfs_attr_put_search_ctx(ctx); | ||
792 | unmap_mft_record(dir_ni); | ||
793 | m = NULL; | ||
794 | ctx = NULL; | ||
795 | descend_into_child_node: | ||
796 | /* | ||
797 | * Convert vcn to index into the index allocation attribute in units | ||
798 | * of PAGE_CACHE_SIZE and map the page cache page, reading it from | ||
799 | * disk if necessary. | ||
800 | */ | ||
801 | page = ntfs_map_page(ia_mapping, vcn << | ||
802 | dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); | ||
803 | if (IS_ERR(page)) { | ||
804 | ntfs_error(sb, "Failed to map directory index page, error %ld.", | ||
805 | -PTR_ERR(page)); | ||
806 | err = PTR_ERR(page); | ||
807 | goto err_out; | ||
808 | } | ||
809 | lock_page(page); | ||
810 | kaddr = (u8*)page_address(page); | ||
811 | fast_descend_into_child_node: | ||
812 | /* Get to the index allocation block. */ | ||
813 | ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << | ||
814 | dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); | ||
815 | /* Bounds checks. */ | ||
816 | if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { | ||
817 | ntfs_error(sb, "Out of bounds check failed. Corrupt directory " | ||
818 | "inode 0x%lx or driver bug.", dir_ni->mft_no); | ||
819 | goto unm_err_out; | ||
820 | } | ||
821 | /* Catch multi sector transfer fixup errors. */ | ||
822 | if (unlikely(!ntfs_is_indx_record(ia->magic))) { | ||
823 | ntfs_error(sb, "Directory index record with vcn 0x%llx is " | ||
824 | "corrupt. Corrupt inode 0x%lx. Run chkdsk.", | ||
825 | (unsigned long long)vcn, dir_ni->mft_no); | ||
826 | goto unm_err_out; | ||
827 | } | ||
828 | if (sle64_to_cpu(ia->index_block_vcn) != vcn) { | ||
829 | ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is " | ||
830 | "different from expected VCN (0x%llx). " | ||
831 | "Directory inode 0x%lx is corrupt or driver " | ||
832 | "bug.", (unsigned long long) | ||
833 | sle64_to_cpu(ia->index_block_vcn), | ||
834 | (unsigned long long)vcn, dir_ni->mft_no); | ||
835 | goto unm_err_out; | ||
836 | } | ||
837 | if (le32_to_cpu(ia->index.allocated_size) + 0x18 != | ||
838 | dir_ni->itype.index.block_size) { | ||
839 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
840 | "0x%lx has a size (%u) differing from the " | ||
841 | "directory specified size (%u). Directory " | ||
842 | "inode is corrupt or driver bug.", | ||
843 | (unsigned long long)vcn, dir_ni->mft_no, | ||
844 | le32_to_cpu(ia->index.allocated_size) + 0x18, | ||
845 | dir_ni->itype.index.block_size); | ||
846 | goto unm_err_out; | ||
847 | } | ||
848 | index_end = (u8*)ia + dir_ni->itype.index.block_size; | ||
849 | if (index_end > kaddr + PAGE_CACHE_SIZE) { | ||
850 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
851 | "0x%lx crosses page boundary. Impossible! " | ||
852 | "Cannot access! This is probably a bug in the " | ||
853 | "driver.", (unsigned long long)vcn, | ||
854 | dir_ni->mft_no); | ||
855 | goto unm_err_out; | ||
856 | } | ||
857 | index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); | ||
858 | if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { | ||
859 | ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory " | ||
860 | "inode 0x%lx exceeds maximum size.", | ||
861 | (unsigned long long)vcn, dir_ni->mft_no); | ||
862 | goto unm_err_out; | ||
863 | } | ||
864 | /* The first index entry. */ | ||
865 | ie = (INDEX_ENTRY*)((u8*)&ia->index + | ||
866 | le32_to_cpu(ia->index.entries_offset)); | ||
867 | /* | ||
868 | * Iterate similar to above big loop but applied to index buffer, thus | ||
869 | * loop until we exceed valid memory (corruption case) or until we | ||
870 | * reach the last entry. | ||
871 | */ | ||
872 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
873 | /* Bounds check. */ | ||
874 | if ((u8*)ie < (u8*)ia || (u8*)ie + | ||
875 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
876 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
877 | index_end) { | ||
878 | ntfs_error(sb, "Index entry out of bounds in " | ||
879 | "directory inode 0x%lx.", | ||
880 | dir_ni->mft_no); | ||
881 | goto unm_err_out; | ||
882 | } | ||
883 | /* | ||
884 | * The last entry cannot contain a name. It can however contain | ||
885 | * a pointer to a child node in the B+tree so we just break out. | ||
886 | */ | ||
887 | if (ie->flags & INDEX_ENTRY_END) | ||
888 | break; | ||
889 | /* | ||
890 | * If the current entry has a name type of POSIX, the name is | ||
891 | * case sensitive and not otherwise. This has the effect of us | ||
892 | * not being able to access any POSIX file names which collate | ||
893 | * after the non-POSIX one when they only differ in case, but | ||
894 | * anyone doing screwy stuff like that deserves to burn in | ||
895 | * hell... Doing that kind of stuff on NT4 actually causes | ||
896 | * corruption on the partition even when using SP6a and Linux | ||
897 | * is not involved at all. | ||
898 | */ | ||
899 | ic = ie->key.file_name.file_name_type ? IGNORE_CASE : | ||
900 | CASE_SENSITIVE; | ||
901 | /* | ||
902 | * If the names match perfectly, we are done and return the | ||
903 | * mft reference of the inode (i.e. the inode number together | ||
904 | * with the sequence number for consistency checking. We | ||
905 | * convert it to cpu format before returning. | ||
906 | */ | ||
907 | if (ntfs_are_names_equal(uname, uname_len, | ||
908 | (ntfschar*)&ie->key.file_name.file_name, | ||
909 | ie->key.file_name.file_name_length, ic, | ||
910 | vol->upcase, vol->upcase_len)) { | ||
911 | found_it2: | ||
912 | mref = le64_to_cpu(ie->data.dir.indexed_file); | ||
913 | unlock_page(page); | ||
914 | ntfs_unmap_page(page); | ||
915 | return mref; | ||
916 | } | ||
917 | /* | ||
918 | * Not a perfect match, need to do full blown collation so we | ||
919 | * know which way in the B+tree we have to go. | ||
920 | */ | ||
921 | rc = ntfs_collate_names(uname, uname_len, | ||
922 | (ntfschar*)&ie->key.file_name.file_name, | ||
923 | ie->key.file_name.file_name_length, 1, | ||
924 | IGNORE_CASE, vol->upcase, vol->upcase_len); | ||
925 | /* | ||
926 | * If uname collates before the name of the current entry, there | ||
927 | * is definitely no such name in this index but we might need to | ||
928 | * descend into the B+tree so we just break out of the loop. | ||
929 | */ | ||
930 | if (rc == -1) | ||
931 | break; | ||
932 | /* The names are not equal, continue the search. */ | ||
933 | if (rc) | ||
934 | continue; | ||
935 | /* | ||
936 | * Names match with case insensitive comparison, now try the | ||
937 | * case sensitive comparison, which is required for proper | ||
938 | * collation. | ||
939 | */ | ||
940 | rc = ntfs_collate_names(uname, uname_len, | ||
941 | (ntfschar*)&ie->key.file_name.file_name, | ||
942 | ie->key.file_name.file_name_length, 1, | ||
943 | CASE_SENSITIVE, vol->upcase, vol->upcase_len); | ||
944 | if (rc == -1) | ||
945 | break; | ||
946 | if (rc) | ||
947 | continue; | ||
948 | /* | ||
949 | * Perfect match, this will never happen as the | ||
950 | * ntfs_are_names_equal() call will have gotten a match but we | ||
951 | * still treat it correctly. | ||
952 | */ | ||
953 | goto found_it2; | ||
954 | } | ||
955 | /* | ||
956 | * We have finished with this index buffer without success. Check for | ||
957 | * the presence of a child node. | ||
958 | */ | ||
959 | if (ie->flags & INDEX_ENTRY_NODE) { | ||
960 | if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { | ||
961 | ntfs_error(sb, "Index entry with child node found in " | ||
962 | "a leaf node in directory inode 0x%lx.", | ||
963 | dir_ni->mft_no); | ||
964 | goto unm_err_out; | ||
965 | } | ||
966 | /* Child node present, descend into it. */ | ||
967 | old_vcn = vcn; | ||
968 | vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); | ||
969 | if (vcn >= 0) { | ||
970 | /* If vcn is in the same page cache page as old_vcn we | ||
971 | * recycle the mapped page. */ | ||
972 | if (old_vcn << vol->cluster_size_bits >> | ||
973 | PAGE_CACHE_SHIFT == vcn << | ||
974 | vol->cluster_size_bits >> | ||
975 | PAGE_CACHE_SHIFT) | ||
976 | goto fast_descend_into_child_node; | ||
977 | unlock_page(page); | ||
978 | ntfs_unmap_page(page); | ||
979 | goto descend_into_child_node; | ||
980 | } | ||
981 | ntfs_error(sb, "Negative child node vcn in directory inode " | ||
982 | "0x%lx.", dir_ni->mft_no); | ||
983 | goto unm_err_out; | ||
984 | } | ||
985 | /* No child node, return -ENOENT. */ | ||
986 | ntfs_debug("Entry not found."); | ||
987 | err = -ENOENT; | ||
988 | unm_err_out: | ||
989 | unlock_page(page); | ||
990 | ntfs_unmap_page(page); | ||
991 | err_out: | ||
992 | if (!err) | ||
993 | err = -EIO; | ||
994 | if (ctx) | ||
995 | ntfs_attr_put_search_ctx(ctx); | ||
996 | if (m) | ||
997 | unmap_mft_record(dir_ni); | ||
998 | return ERR_MREF(err); | ||
999 | dir_err_out: | ||
1000 | ntfs_error(sb, "Corrupt directory. Aborting lookup."); | ||
1001 | goto err_out; | ||
1002 | } | ||
1003 | |||
1004 | #endif | ||
1005 | |||
1006 | /** | ||
1007 | * ntfs_filldir - ntfs specific filldir method | ||
1008 | * @vol: current ntfs volume | ||
1009 | * @fpos: position in the directory | ||
1010 | * @ndir: ntfs inode of current directory | ||
1011 | * @ia_page: page in which the index allocation buffer @ie is in resides | ||
1012 | * @ie: current index entry | ||
1013 | * @name: buffer to use for the converted name | ||
1014 | * @dirent: vfs filldir callback context | ||
1015 | * @filldir: vfs filldir callback | ||
1016 | * | ||
1017 | * Convert the Unicode @name to the loaded NLS and pass it to the @filldir | ||
1018 | * callback. | ||
1019 | * | ||
1020 | * If @ia_page is not NULL it is the locked page containing the index | ||
1021 | * allocation block containing the index entry @ie. | ||
1022 | * | ||
1023 | * Note, we drop (and then reacquire) the page lock on @ia_page across the | ||
1024 | * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup | ||
1025 | * since ntfs_lookup() will lock the same page. As an optimization, we do not | ||
1026 | * retake the lock if we are returning a non-zero value as ntfs_readdir() | ||
1027 | * would need to drop the lock immediately anyway. | ||
1028 | */ | ||
1029 | static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | ||
1030 | ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, | ||
1031 | u8 *name, void *dirent, filldir_t filldir) | ||
1032 | { | ||
1033 | unsigned long mref; | ||
1034 | int name_len, rc; | ||
1035 | unsigned dt_type; | ||
1036 | FILE_NAME_TYPE_FLAGS name_type; | ||
1037 | |||
1038 | name_type = ie->key.file_name.file_name_type; | ||
1039 | if (name_type == FILE_NAME_DOS) { | ||
1040 | ntfs_debug("Skipping DOS name space entry."); | ||
1041 | return 0; | ||
1042 | } | ||
1043 | if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) { | ||
1044 | ntfs_debug("Skipping root directory self reference entry."); | ||
1045 | return 0; | ||
1046 | } | ||
1047 | if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user && | ||
1048 | !NVolShowSystemFiles(vol)) { | ||
1049 | ntfs_debug("Skipping system file."); | ||
1050 | return 0; | ||
1051 | } | ||
1052 | name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name, | ||
1053 | ie->key.file_name.file_name_length, &name, | ||
1054 | NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); | ||
1055 | if (name_len <= 0) { | ||
1056 | ntfs_debug("Skipping unrepresentable file."); | ||
1057 | return 0; | ||
1058 | } | ||
1059 | if (ie->key.file_name.file_attributes & | ||
1060 | FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT) | ||
1061 | dt_type = DT_DIR; | ||
1062 | else | ||
1063 | dt_type = DT_REG; | ||
1064 | mref = MREF_LE(ie->data.dir.indexed_file); | ||
1065 | /* | ||
1066 | * Drop the page lock otherwise we deadlock with NFS when it calls | ||
1067 | * ->lookup since ntfs_lookup() will lock the same page. | ||
1068 | */ | ||
1069 | if (ia_page) | ||
1070 | unlock_page(ia_page); | ||
1071 | ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " | ||
1072 | "0x%lx, DT_%s.", name, name_len, fpos, mref, | ||
1073 | dt_type == DT_DIR ? "DIR" : "REG"); | ||
1074 | rc = filldir(dirent, name, name_len, fpos, mref, dt_type); | ||
1075 | /* Relock the page but not if we are aborting ->readdir. */ | ||
1076 | if (!rc && ia_page) | ||
1077 | lock_page(ia_page); | ||
1078 | return rc; | ||
1079 | } | ||
1080 | |||
1081 | /* | ||
1082 | * We use the same basic approach as the old NTFS driver, i.e. we parse the | ||
1083 | * index root entries and then the index allocation entries that are marked | ||
1084 | * as in use in the index bitmap. | ||
1085 | * | ||
1086 | * While this will return the names in random order this doesn't matter for | ||
1087 | * ->readdir but OTOH results in a faster ->readdir. | ||
1088 | * | ||
1089 | * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS | ||
1090 | * parts (e.g. ->f_pos and ->i_size, and it also protects against directory | ||
1091 | * modifications). | ||
1092 | * | ||
1093 | * Locking: - Caller must hold i_sem on the directory. | ||
1094 | * - Each page cache page in the index allocation mapping must be | ||
1095 | * locked whilst being accessed otherwise we may find a corrupt | ||
1096 | * page due to it being under ->writepage at the moment which | ||
1097 | * applies the mst protection fixups before writing out and then | ||
1098 | * removes them again after the write is complete after which it | ||
1099 | * unlocks the page. | ||
1100 | */ | ||
1101 | static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
1102 | { | ||
1103 | s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; | ||
1104 | loff_t fpos; | ||
1105 | struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; | ||
1106 | struct super_block *sb = vdir->i_sb; | ||
1107 | ntfs_inode *ndir = NTFS_I(vdir); | ||
1108 | ntfs_volume *vol = NTFS_SB(sb); | ||
1109 | MFT_RECORD *m; | ||
1110 | INDEX_ROOT *ir = NULL; | ||
1111 | INDEX_ENTRY *ie; | ||
1112 | INDEX_ALLOCATION *ia; | ||
1113 | u8 *name = NULL; | ||
1114 | int rc, err, ir_pos, cur_bmp_pos; | ||
1115 | struct address_space *ia_mapping, *bmp_mapping; | ||
1116 | struct page *bmp_page = NULL, *ia_page = NULL; | ||
1117 | u8 *kaddr, *bmp, *index_end; | ||
1118 | ntfs_attr_search_ctx *ctx; | ||
1119 | |||
1120 | fpos = filp->f_pos; | ||
1121 | ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", | ||
1122 | vdir->i_ino, fpos); | ||
1123 | rc = err = 0; | ||
1124 | /* Are we at end of dir yet? */ | ||
1125 | if (fpos >= vdir->i_size + vol->mft_record_size) | ||
1126 | goto done; | ||
1127 | /* Emulate . and .. for all directories. */ | ||
1128 | if (!fpos) { | ||
1129 | ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " | ||
1130 | "inode 0x%lx, DT_DIR.", vdir->i_ino); | ||
1131 | rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); | ||
1132 | if (rc) | ||
1133 | goto done; | ||
1134 | fpos++; | ||
1135 | } | ||
1136 | if (fpos == 1) { | ||
1137 | ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " | ||
1138 | "inode 0x%lx, DT_DIR.", | ||
1139 | parent_ino(filp->f_dentry)); | ||
1140 | rc = filldir(dirent, "..", 2, fpos, | ||
1141 | parent_ino(filp->f_dentry), DT_DIR); | ||
1142 | if (rc) | ||
1143 | goto done; | ||
1144 | fpos++; | ||
1145 | } | ||
1146 | m = NULL; | ||
1147 | ctx = NULL; | ||
1148 | /* | ||
1149 | * Allocate a buffer to store the current name being processed | ||
1150 | * converted to format determined by current NLS. | ||
1151 | */ | ||
1152 | name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, | ||
1153 | GFP_NOFS); | ||
1154 | if (unlikely(!name)) { | ||
1155 | err = -ENOMEM; | ||
1156 | goto err_out; | ||
1157 | } | ||
1158 | /* Are we jumping straight into the index allocation attribute? */ | ||
1159 | if (fpos >= vol->mft_record_size) | ||
1160 | goto skip_index_root; | ||
1161 | /* Get hold of the mft record for the directory. */ | ||
1162 | m = map_mft_record(ndir); | ||
1163 | if (IS_ERR(m)) { | ||
1164 | err = PTR_ERR(m); | ||
1165 | m = NULL; | ||
1166 | goto err_out; | ||
1167 | } | ||
1168 | ctx = ntfs_attr_get_search_ctx(ndir, m); | ||
1169 | if (unlikely(!ctx)) { | ||
1170 | err = -ENOMEM; | ||
1171 | goto err_out; | ||
1172 | } | ||
1173 | /* Get the offset into the index root attribute. */ | ||
1174 | ir_pos = (s64)fpos; | ||
1175 | /* Find the index root attribute in the mft record. */ | ||
1176 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, | ||
1177 | 0, ctx); | ||
1178 | if (unlikely(err)) { | ||
1179 | ntfs_error(sb, "Index root attribute missing in directory " | ||
1180 | "inode 0x%lx.", vdir->i_ino); | ||
1181 | goto err_out; | ||
1182 | } | ||
1183 | /* | ||
1184 | * Copy the index root attribute value to a buffer so that we can put | ||
1185 | * the search context and unmap the mft record before calling the | ||
1186 | * filldir() callback. We need to do this because of NFSd which calls | ||
1187 | * ->lookup() from its filldir callback() and this causes NTFS to | ||
1188 | * deadlock as ntfs_lookup() maps the mft record of the directory and | ||
1189 | * we have got it mapped here already. The only solution is for us to | ||
1190 | * unmap the mft record here so that a call to ntfs_lookup() is able to | ||
1191 | * map the mft record without deadlocking. | ||
1192 | */ | ||
1193 | rc = le32_to_cpu(ctx->attr->data.resident.value_length); | ||
1194 | ir = (INDEX_ROOT*)kmalloc(rc, GFP_NOFS); | ||
1195 | if (unlikely(!ir)) { | ||
1196 | err = -ENOMEM; | ||
1197 | goto err_out; | ||
1198 | } | ||
1199 | /* Copy the index root value (it has been verified in read_inode). */ | ||
1200 | memcpy(ir, (u8*)ctx->attr + | ||
1201 | le16_to_cpu(ctx->attr->data.resident.value_offset), rc); | ||
1202 | ntfs_attr_put_search_ctx(ctx); | ||
1203 | unmap_mft_record(ndir); | ||
1204 | ctx = NULL; | ||
1205 | m = NULL; | ||
1206 | index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); | ||
1207 | /* The first index entry. */ | ||
1208 | ie = (INDEX_ENTRY*)((u8*)&ir->index + | ||
1209 | le32_to_cpu(ir->index.entries_offset)); | ||
1210 | /* | ||
1211 | * Loop until we exceed valid memory (corruption case) or until we | ||
1212 | * reach the last entry or until filldir tells us it has had enough | ||
1213 | * or signals an error (both covered by the rc test). | ||
1214 | */ | ||
1215 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
1216 | ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir); | ||
1217 | /* Bounds checks. */ | ||
1218 | if (unlikely((u8*)ie < (u8*)ir || (u8*)ie + | ||
1219 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
1220 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
1221 | index_end)) | ||
1222 | goto err_out; | ||
1223 | /* The last entry cannot contain a name. */ | ||
1224 | if (ie->flags & INDEX_ENTRY_END) | ||
1225 | break; | ||
1226 | /* Skip index root entry if continuing previous readdir. */ | ||
1227 | if (ir_pos > (u8*)ie - (u8*)ir) | ||
1228 | continue; | ||
1229 | /* Advance the position even if going to skip the entry. */ | ||
1230 | fpos = (u8*)ie - (u8*)ir; | ||
1231 | /* Submit the name to the filldir callback. */ | ||
1232 | rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent, | ||
1233 | filldir); | ||
1234 | if (rc) { | ||
1235 | kfree(ir); | ||
1236 | goto abort; | ||
1237 | } | ||
1238 | } | ||
1239 | /* We are done with the index root and can free the buffer. */ | ||
1240 | kfree(ir); | ||
1241 | ir = NULL; | ||
1242 | /* If there is no index allocation attribute we are finished. */ | ||
1243 | if (!NInoIndexAllocPresent(ndir)) | ||
1244 | goto EOD; | ||
1245 | /* Advance fpos to the beginning of the index allocation. */ | ||
1246 | fpos = vol->mft_record_size; | ||
1247 | skip_index_root: | ||
1248 | kaddr = NULL; | ||
1249 | prev_ia_pos = -1LL; | ||
1250 | /* Get the offset into the index allocation attribute. */ | ||
1251 | ia_pos = (s64)fpos - vol->mft_record_size; | ||
1252 | ia_mapping = vdir->i_mapping; | ||
1253 | bmp_vi = ndir->itype.index.bmp_ino; | ||
1254 | if (unlikely(!bmp_vi)) { | ||
1255 | ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino); | ||
1256 | bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); | ||
1257 | if (IS_ERR(bmp_vi)) { | ||
1258 | ntfs_error(sb, "Failed to get bitmap attribute."); | ||
1259 | err = PTR_ERR(bmp_vi); | ||
1260 | goto err_out; | ||
1261 | } | ||
1262 | ndir->itype.index.bmp_ino = bmp_vi; | ||
1263 | } | ||
1264 | bmp_mapping = bmp_vi->i_mapping; | ||
1265 | /* Get the starting bitmap bit position and sanity check it. */ | ||
1266 | bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; | ||
1267 | if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { | ||
1268 | ntfs_error(sb, "Current index allocation position exceeds " | ||
1269 | "index bitmap size."); | ||
1270 | goto err_out; | ||
1271 | } | ||
1272 | /* Get the starting bit position in the current bitmap page. */ | ||
1273 | cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1); | ||
1274 | bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1); | ||
1275 | get_next_bmp_page: | ||
1276 | ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx", | ||
1277 | (unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT), | ||
1278 | (unsigned long long)bmp_pos & | ||
1279 | (unsigned long long)((PAGE_CACHE_SIZE * 8) - 1)); | ||
1280 | bmp_page = ntfs_map_page(bmp_mapping, | ||
1281 | bmp_pos >> (3 + PAGE_CACHE_SHIFT)); | ||
1282 | if (IS_ERR(bmp_page)) { | ||
1283 | ntfs_error(sb, "Reading index bitmap failed."); | ||
1284 | err = PTR_ERR(bmp_page); | ||
1285 | bmp_page = NULL; | ||
1286 | goto err_out; | ||
1287 | } | ||
1288 | bmp = (u8*)page_address(bmp_page); | ||
1289 | /* Find next index block in use. */ | ||
1290 | while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) { | ||
1291 | find_next_index_buffer: | ||
1292 | cur_bmp_pos++; | ||
1293 | /* | ||
1294 | * If we have reached the end of the bitmap page, get the next | ||
1295 | * page, and put away the old one. | ||
1296 | */ | ||
1297 | if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) { | ||
1298 | ntfs_unmap_page(bmp_page); | ||
1299 | bmp_pos += PAGE_CACHE_SIZE * 8; | ||
1300 | cur_bmp_pos = 0; | ||
1301 | goto get_next_bmp_page; | ||
1302 | } | ||
1303 | /* If we have reached the end of the bitmap, we are done. */ | ||
1304 | if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) | ||
1305 | goto unm_EOD; | ||
1306 | ia_pos = (bmp_pos + cur_bmp_pos) << | ||
1307 | ndir->itype.index.block_size_bits; | ||
1308 | } | ||
1309 | ntfs_debug("Handling index buffer 0x%llx.", | ||
1310 | (unsigned long long)bmp_pos + cur_bmp_pos); | ||
1311 | /* If the current index buffer is in the same page we reuse the page. */ | ||
1312 | if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) { | ||
1313 | prev_ia_pos = ia_pos; | ||
1314 | if (likely(ia_page != NULL)) { | ||
1315 | unlock_page(ia_page); | ||
1316 | ntfs_unmap_page(ia_page); | ||
1317 | } | ||
1318 | /* | ||
1319 | * Map the page cache page containing the current ia_pos, | ||
1320 | * reading it from disk if necessary. | ||
1321 | */ | ||
1322 | ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT); | ||
1323 | if (IS_ERR(ia_page)) { | ||
1324 | ntfs_error(sb, "Reading index allocation data failed."); | ||
1325 | err = PTR_ERR(ia_page); | ||
1326 | ia_page = NULL; | ||
1327 | goto err_out; | ||
1328 | } | ||
1329 | lock_page(ia_page); | ||
1330 | kaddr = (u8*)page_address(ia_page); | ||
1331 | } | ||
1332 | /* Get the current index buffer. */ | ||
1333 | ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK & | ||
1334 | ~(s64)(ndir->itype.index.block_size - 1))); | ||
1335 | /* Bounds checks. */ | ||
1336 | if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) { | ||
1337 | ntfs_error(sb, "Out of bounds check failed. Corrupt directory " | ||
1338 | "inode 0x%lx or driver bug.", vdir->i_ino); | ||
1339 | goto err_out; | ||
1340 | } | ||
1341 | /* Catch multi sector transfer fixup errors. */ | ||
1342 | if (unlikely(!ntfs_is_indx_record(ia->magic))) { | ||
1343 | ntfs_error(sb, "Directory index record with vcn 0x%llx is " | ||
1344 | "corrupt. Corrupt inode 0x%lx. Run chkdsk.", | ||
1345 | (unsigned long long)ia_pos >> | ||
1346 | ndir->itype.index.vcn_size_bits, vdir->i_ino); | ||
1347 | goto err_out; | ||
1348 | } | ||
1349 | if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos & | ||
1350 | ~(s64)(ndir->itype.index.block_size - 1)) >> | ||
1351 | ndir->itype.index.vcn_size_bits)) { | ||
1352 | ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is " | ||
1353 | "different from expected VCN (0x%llx). " | ||
1354 | "Directory inode 0x%lx is corrupt or driver " | ||
1355 | "bug. ", (unsigned long long) | ||
1356 | sle64_to_cpu(ia->index_block_vcn), | ||
1357 | (unsigned long long)ia_pos >> | ||
1358 | ndir->itype.index.vcn_size_bits, vdir->i_ino); | ||
1359 | goto err_out; | ||
1360 | } | ||
1361 | if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 != | ||
1362 | ndir->itype.index.block_size)) { | ||
1363 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
1364 | "0x%lx has a size (%u) differing from the " | ||
1365 | "directory specified size (%u). Directory " | ||
1366 | "inode is corrupt or driver bug.", | ||
1367 | (unsigned long long)ia_pos >> | ||
1368 | ndir->itype.index.vcn_size_bits, vdir->i_ino, | ||
1369 | le32_to_cpu(ia->index.allocated_size) + 0x18, | ||
1370 | ndir->itype.index.block_size); | ||
1371 | goto err_out; | ||
1372 | } | ||
1373 | index_end = (u8*)ia + ndir->itype.index.block_size; | ||
1374 | if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) { | ||
1375 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode " | ||
1376 | "0x%lx crosses page boundary. Impossible! " | ||
1377 | "Cannot access! This is probably a bug in the " | ||
1378 | "driver.", (unsigned long long)ia_pos >> | ||
1379 | ndir->itype.index.vcn_size_bits, vdir->i_ino); | ||
1380 | goto err_out; | ||
1381 | } | ||
1382 | ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1); | ||
1383 | index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); | ||
1384 | if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) { | ||
1385 | ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory " | ||
1386 | "inode 0x%lx exceeds maximum size.", | ||
1387 | (unsigned long long)ia_pos >> | ||
1388 | ndir->itype.index.vcn_size_bits, vdir->i_ino); | ||
1389 | goto err_out; | ||
1390 | } | ||
1391 | /* The first index entry in this index buffer. */ | ||
1392 | ie = (INDEX_ENTRY*)((u8*)&ia->index + | ||
1393 | le32_to_cpu(ia->index.entries_offset)); | ||
1394 | /* | ||
1395 | * Loop until we exceed valid memory (corruption case) or until we | ||
1396 | * reach the last entry or until filldir tells us it has had enough | ||
1397 | * or signals an error (both covered by the rc test). | ||
1398 | */ | ||
1399 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
1400 | ntfs_debug("In index allocation, offset 0x%llx.", | ||
1401 | (unsigned long long)ia_start + | ||
1402 | (unsigned long long)((u8*)ie - (u8*)ia)); | ||
1403 | /* Bounds checks. */ | ||
1404 | if (unlikely((u8*)ie < (u8*)ia || (u8*)ie + | ||
1405 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
1406 | (u8*)ie + le16_to_cpu(ie->key_length) > | ||
1407 | index_end)) | ||
1408 | goto err_out; | ||
1409 | /* The last entry cannot contain a name. */ | ||
1410 | if (ie->flags & INDEX_ENTRY_END) | ||
1411 | break; | ||
1412 | /* Skip index block entry if continuing previous readdir. */ | ||
1413 | if (ia_pos - ia_start > (u8*)ie - (u8*)ia) | ||
1414 | continue; | ||
1415 | /* Advance the position even if going to skip the entry. */ | ||
1416 | fpos = (u8*)ie - (u8*)ia + | ||
1417 | (sle64_to_cpu(ia->index_block_vcn) << | ||
1418 | ndir->itype.index.vcn_size_bits) + | ||
1419 | vol->mft_record_size; | ||
1420 | /* | ||
1421 | * Submit the name to the @filldir callback. Note, | ||
1422 | * ntfs_filldir() drops the lock on @ia_page but it retakes it | ||
1423 | * before returning, unless a non-zero value is returned in | ||
1424 | * which case the page is left unlocked. | ||
1425 | */ | ||
1426 | rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent, | ||
1427 | filldir); | ||
1428 | if (rc) { | ||
1429 | /* @ia_page is already unlocked in this case. */ | ||
1430 | ntfs_unmap_page(ia_page); | ||
1431 | ntfs_unmap_page(bmp_page); | ||
1432 | goto abort; | ||
1433 | } | ||
1434 | } | ||
1435 | goto find_next_index_buffer; | ||
1436 | unm_EOD: | ||
1437 | if (ia_page) { | ||
1438 | unlock_page(ia_page); | ||
1439 | ntfs_unmap_page(ia_page); | ||
1440 | } | ||
1441 | ntfs_unmap_page(bmp_page); | ||
1442 | EOD: | ||
1443 | /* We are finished, set fpos to EOD. */ | ||
1444 | fpos = vdir->i_size + vol->mft_record_size; | ||
1445 | abort: | ||
1446 | kfree(name); | ||
1447 | done: | ||
1448 | #ifdef DEBUG | ||
1449 | if (!rc) | ||
1450 | ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos); | ||
1451 | else | ||
1452 | ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.", | ||
1453 | rc, fpos); | ||
1454 | #endif | ||
1455 | filp->f_pos = fpos; | ||
1456 | return 0; | ||
1457 | err_out: | ||
1458 | if (bmp_page) | ||
1459 | ntfs_unmap_page(bmp_page); | ||
1460 | if (ia_page) { | ||
1461 | unlock_page(ia_page); | ||
1462 | ntfs_unmap_page(ia_page); | ||
1463 | } | ||
1464 | if (ir) | ||
1465 | kfree(ir); | ||
1466 | if (name) | ||
1467 | kfree(name); | ||
1468 | if (ctx) | ||
1469 | ntfs_attr_put_search_ctx(ctx); | ||
1470 | if (m) | ||
1471 | unmap_mft_record(ndir); | ||
1472 | if (!err) | ||
1473 | err = -EIO; | ||
1474 | ntfs_debug("Failed. Returning error code %i.", -err); | ||
1475 | filp->f_pos = fpos; | ||
1476 | return err; | ||
1477 | } | ||
1478 | |||
1479 | /** | ||
1480 | * ntfs_dir_open - called when an inode is about to be opened | ||
1481 | * @vi: inode to be opened | ||
1482 | * @filp: file structure describing the inode | ||
1483 | * | ||
1484 | * Limit directory size to the page cache limit on architectures where unsigned | ||
1485 | * long is 32-bits. This is the most we can do for now without overflowing the | ||
1486 | * page cache page index. Doing it this way means we don't run into problems | ||
1487 | * because of existing too large directories. It would be better to allow the | ||
1488 | * user to read the accessible part of the directory but I doubt very much | ||
1489 | * anyone is going to hit this check on a 32-bit architecture, so there is no | ||
1490 | * point in adding the extra complexity required to support this. | ||
1491 | * | ||
1492 | * On 64-bit architectures, the check is hopefully optimized away by the | ||
1493 | * compiler. | ||
1494 | */ | ||
1495 | static int ntfs_dir_open(struct inode *vi, struct file *filp) | ||
1496 | { | ||
1497 | if (sizeof(unsigned long) < 8) { | ||
1498 | if (vi->i_size > MAX_LFS_FILESIZE) | ||
1499 | return -EFBIG; | ||
1500 | } | ||
1501 | return 0; | ||
1502 | } | ||
1503 | |||
1504 | #ifdef NTFS_RW | ||
1505 | |||
1506 | /** | ||
1507 | * ntfs_dir_fsync - sync a directory to disk | ||
1508 | * @filp: directory to be synced | ||
1509 | * @dentry: dentry describing the directory to sync | ||
1510 | * @datasync: if non-zero only flush user data and not metadata | ||
1511 | * | ||
1512 | * Data integrity sync of a directory to disk. Used for fsync, fdatasync, and | ||
1513 | * msync system calls. This function is based on file.c::ntfs_file_fsync(). | ||
1514 | * | ||
1515 | * Write the mft record and all associated extent mft records as well as the | ||
1516 | * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device. | ||
1517 | * | ||
1518 | * If @datasync is true, we do not wait on the inode(s) to be written out | ||
1519 | * but we always wait on the page cache pages to be written out. | ||
1520 | * | ||
1521 | * Note: In the past @filp could be NULL so we ignore it as we don't need it | ||
1522 | * anyway. | ||
1523 | * | ||
1524 | * Locking: Caller must hold i_sem on the inode. | ||
1525 | * | ||
1526 | * TODO: We should probably also write all attribute/index inodes associated | ||
1527 | * with this inode but since we have no simple way of getting to them we ignore | ||
1528 | * this problem for now. We do write the $BITMAP attribute if it is present | ||
1529 | * which is the important one for a directory so things are not too bad. | ||
1530 | */ | ||
1531 | static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, | ||
1532 | int datasync) | ||
1533 | { | ||
1534 | struct inode *vi = dentry->d_inode; | ||
1535 | ntfs_inode *ni = NTFS_I(vi); | ||
1536 | int err, ret; | ||
1537 | |||
1538 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); | ||
1539 | BUG_ON(!S_ISDIR(vi->i_mode)); | ||
1540 | if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) | ||
1541 | write_inode_now(ni->itype.index.bmp_ino, !datasync); | ||
1542 | ret = ntfs_write_inode(vi, 1); | ||
1543 | write_inode_now(vi, !datasync); | ||
1544 | err = sync_blockdev(vi->i_sb->s_bdev); | ||
1545 | if (unlikely(err && !ret)) | ||
1546 | ret = err; | ||
1547 | if (likely(!ret)) | ||
1548 | ntfs_debug("Done."); | ||
1549 | else | ||
1550 | ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " | ||
1551 | "%u.", datasync ? "data" : "", vi->i_ino, -ret); | ||
1552 | return ret; | ||
1553 | } | ||
1554 | |||
1555 | #endif /* NTFS_RW */ | ||
1556 | |||
1557 | struct file_operations ntfs_dir_ops = { | ||
1558 | .llseek = generic_file_llseek, /* Seek inside directory. */ | ||
1559 | .read = generic_read_dir, /* Return -EISDIR. */ | ||
1560 | .readdir = ntfs_readdir, /* Read directory contents. */ | ||
1561 | #ifdef NTFS_RW | ||
1562 | .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ | ||
1563 | /*.aio_fsync = ,*/ /* Sync all outstanding async | ||
1564 | i/o operations on a kiocb. */ | ||
1565 | #endif /* NTFS_RW */ | ||
1566 | /*.ioctl = ,*/ /* Perform function on the | ||
1567 | mounted filesystem. */ | ||
1568 | .open = ntfs_dir_open, /* Open directory. */ | ||
1569 | }; | ||
diff --git a/fs/ntfs/dir.h b/fs/ntfs/dir.h new file mode 100644 index 000000000000..aea7582d561f --- /dev/null +++ b/fs/ntfs/dir.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of | ||
3 | * the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2002-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_DIR_H | ||
24 | #define _LINUX_NTFS_DIR_H | ||
25 | |||
26 | #include "layout.h" | ||
27 | #include "inode.h" | ||
28 | #include "types.h" | ||
29 | |||
30 | /* | ||
31 | * ntfs_name is used to return the file name to the caller of | ||
32 | * ntfs_lookup_inode_by_name() in order for the caller (namei.c::ntfs_lookup()) | ||
33 | * to be able to deal with dcache aliasing issues. | ||
34 | */ | ||
35 | typedef struct { | ||
36 | MFT_REF mref; | ||
37 | FILE_NAME_TYPE_FLAGS type; | ||
38 | u8 len; | ||
39 | ntfschar name[0]; | ||
40 | } __attribute__ ((__packed__)) ntfs_name; | ||
41 | |||
42 | /* The little endian Unicode string $I30 as a global constant. */ | ||
43 | extern ntfschar I30[5]; | ||
44 | |||
45 | extern MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, | ||
46 | const ntfschar *uname, const int uname_len, ntfs_name **res); | ||
47 | |||
48 | #endif /* _LINUX_NTFS_FS_DIR_H */ | ||
diff --git a/fs/ntfs/endian.h b/fs/ntfs/endian.h new file mode 100644 index 000000000000..927b5bf04b4f --- /dev/null +++ b/fs/ntfs/endian.h | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * endian.h - Defines for endianness handling in NTFS Linux kernel driver. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_ENDIAN_H | ||
24 | #define _LINUX_NTFS_ENDIAN_H | ||
25 | |||
26 | #include <asm/byteorder.h> | ||
27 | #include "types.h" | ||
28 | |||
29 | /* | ||
30 | * Signed endianness conversion functions. | ||
31 | */ | ||
32 | |||
33 | static inline s16 sle16_to_cpu(sle16 x) | ||
34 | { | ||
35 | return le16_to_cpu((__force le16)x); | ||
36 | } | ||
37 | |||
38 | static inline s32 sle32_to_cpu(sle32 x) | ||
39 | { | ||
40 | return le32_to_cpu((__force le32)x); | ||
41 | } | ||
42 | |||
43 | static inline s64 sle64_to_cpu(sle64 x) | ||
44 | { | ||
45 | return le64_to_cpu((__force le64)x); | ||
46 | } | ||
47 | |||
48 | static inline s16 sle16_to_cpup(sle16 *x) | ||
49 | { | ||
50 | return le16_to_cpu(*(__force le16*)x); | ||
51 | } | ||
52 | |||
53 | static inline s32 sle32_to_cpup(sle32 *x) | ||
54 | { | ||
55 | return le32_to_cpu(*(__force le32*)x); | ||
56 | } | ||
57 | |||
58 | static inline s64 sle64_to_cpup(sle64 *x) | ||
59 | { | ||
60 | return le64_to_cpu(*(__force le64*)x); | ||
61 | } | ||
62 | |||
63 | static inline sle16 cpu_to_sle16(s16 x) | ||
64 | { | ||
65 | return (__force sle16)cpu_to_le16(x); | ||
66 | } | ||
67 | |||
68 | static inline sle32 cpu_to_sle32(s32 x) | ||
69 | { | ||
70 | return (__force sle32)cpu_to_le32(x); | ||
71 | } | ||
72 | |||
73 | static inline sle64 cpu_to_sle64(s64 x) | ||
74 | { | ||
75 | return (__force sle64)cpu_to_le64(x); | ||
76 | } | ||
77 | |||
78 | static inline sle16 cpu_to_sle16p(s16 *x) | ||
79 | { | ||
80 | return (__force sle16)cpu_to_le16(*x); | ||
81 | } | ||
82 | |||
83 | static inline sle32 cpu_to_sle32p(s32 *x) | ||
84 | { | ||
85 | return (__force sle32)cpu_to_le32(*x); | ||
86 | } | ||
87 | |||
88 | static inline sle64 cpu_to_sle64p(s64 *x) | ||
89 | { | ||
90 | return (__force sle64)cpu_to_le64(*x); | ||
91 | } | ||
92 | |||
93 | #endif /* _LINUX_NTFS_ENDIAN_H */ | ||
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c new file mode 100644 index 000000000000..db8713ea0d27 --- /dev/null +++ b/fs/ntfs/file.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/pagemap.h> | ||
23 | #include <linux/buffer_head.h> | ||
24 | |||
25 | #include "inode.h" | ||
26 | #include "debug.h" | ||
27 | #include "ntfs.h" | ||
28 | |||
29 | /** | ||
30 | * ntfs_file_open - called when an inode is about to be opened | ||
31 | * @vi: inode to be opened | ||
32 | * @filp: file structure describing the inode | ||
33 | * | ||
34 | * Limit file size to the page cache limit on architectures where unsigned long | ||
35 | * is 32-bits. This is the most we can do for now without overflowing the page | ||
36 | * cache page index. Doing it this way means we don't run into problems because | ||
37 | * of existing too large files. It would be better to allow the user to read | ||
38 | * the beginning of the file but I doubt very much anyone is going to hit this | ||
39 | * check on a 32-bit architecture, so there is no point in adding the extra | ||
40 | * complexity required to support this. | ||
41 | * | ||
42 | * On 64-bit architectures, the check is hopefully optimized away by the | ||
43 | * compiler. | ||
44 | * | ||
45 | * After the check passes, just call generic_file_open() to do its work. | ||
46 | */ | ||
47 | static int ntfs_file_open(struct inode *vi, struct file *filp) | ||
48 | { | ||
49 | if (sizeof(unsigned long) < 8) { | ||
50 | if (vi->i_size > MAX_LFS_FILESIZE) | ||
51 | return -EFBIG; | ||
52 | } | ||
53 | return generic_file_open(vi, filp); | ||
54 | } | ||
55 | |||
56 | #ifdef NTFS_RW | ||
57 | |||
58 | /** | ||
59 | * ntfs_file_fsync - sync a file to disk | ||
60 | * @filp: file to be synced | ||
61 | * @dentry: dentry describing the file to sync | ||
62 | * @datasync: if non-zero only flush user data and not metadata | ||
63 | * | ||
64 | * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync | ||
65 | * system calls. This function is inspired by fs/buffer.c::file_fsync(). | ||
66 | * | ||
67 | * If @datasync is false, write the mft record and all associated extent mft | ||
68 | * records as well as the $DATA attribute and then sync the block device. | ||
69 | * | ||
70 | * If @datasync is true and the attribute is non-resident, we skip the writing | ||
71 | * of the mft record and all associated extent mft records (this might still | ||
72 | * happen due to the write_inode_now() call). | ||
73 | * | ||
74 | * Also, if @datasync is true, we do not wait on the inode to be written out | ||
75 | * but we always wait on the page cache pages to be written out. | ||
76 | * | ||
77 | * Note: In the past @filp could be NULL so we ignore it as we don't need it | ||
78 | * anyway. | ||
79 | * | ||
80 | * Locking: Caller must hold i_sem on the inode. | ||
81 | * | ||
82 | * TODO: We should probably also write all attribute/index inodes associated | ||
83 | * with this inode but since we have no simple way of getting to them we ignore | ||
84 | * this problem for now. | ||
85 | */ | ||
86 | static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, | ||
87 | int datasync) | ||
88 | { | ||
89 | struct inode *vi = dentry->d_inode; | ||
90 | int err, ret = 0; | ||
91 | |||
92 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); | ||
93 | BUG_ON(S_ISDIR(vi->i_mode)); | ||
94 | if (!datasync || !NInoNonResident(NTFS_I(vi))) | ||
95 | ret = ntfs_write_inode(vi, 1); | ||
96 | write_inode_now(vi, !datasync); | ||
97 | err = sync_blockdev(vi->i_sb->s_bdev); | ||
98 | if (unlikely(err && !ret)) | ||
99 | ret = err; | ||
100 | if (likely(!ret)) | ||
101 | ntfs_debug("Done."); | ||
102 | else | ||
103 | ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " | ||
104 | "%u.", datasync ? "data" : "", vi->i_ino, -ret); | ||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | #endif /* NTFS_RW */ | ||
109 | |||
110 | struct file_operations ntfs_file_ops = { | ||
111 | .llseek = generic_file_llseek, /* Seek inside file. */ | ||
112 | .read = generic_file_read, /* Read from file. */ | ||
113 | .aio_read = generic_file_aio_read, /* Async read from file. */ | ||
114 | .readv = generic_file_readv, /* Read from file. */ | ||
115 | #ifdef NTFS_RW | ||
116 | .write = generic_file_write, /* Write to file. */ | ||
117 | .aio_write = generic_file_aio_write, /* Async write to file. */ | ||
118 | .writev = generic_file_writev, /* Write to file. */ | ||
119 | /*.release = ,*/ /* Last file is closed. See | ||
120 | fs/ext2/file.c:: | ||
121 | ext2_release_file() for | ||
122 | how to use this to discard | ||
123 | preallocated space for | ||
124 | write opened files. */ | ||
125 | .fsync = ntfs_file_fsync, /* Sync a file to disk. */ | ||
126 | /*.aio_fsync = ,*/ /* Sync all outstanding async | ||
127 | i/o operations on a | ||
128 | kiocb. */ | ||
129 | #endif /* NTFS_RW */ | ||
130 | /*.ioctl = ,*/ /* Perform function on the | ||
131 | mounted filesystem. */ | ||
132 | .mmap = generic_file_mmap, /* Mmap file. */ | ||
133 | .open = ntfs_file_open, /* Open file. */ | ||
134 | .sendfile = generic_file_sendfile, /* Zero-copy data send with | ||
135 | the data source being on | ||
136 | the ntfs partition. We | ||
137 | do not need to care about | ||
138 | the data destination. */ | ||
139 | /*.sendpage = ,*/ /* Zero-copy data send with | ||
140 | the data destination being | ||
141 | on the ntfs partition. We | ||
142 | do not need to care about | ||
143 | the data source. */ | ||
144 | }; | ||
145 | |||
146 | struct inode_operations ntfs_file_inode_ops = { | ||
147 | #ifdef NTFS_RW | ||
148 | .truncate = ntfs_truncate_vfs, | ||
149 | .setattr = ntfs_setattr, | ||
150 | #endif /* NTFS_RW */ | ||
151 | }; | ||
152 | |||
153 | struct file_operations ntfs_empty_file_ops = {}; | ||
154 | |||
155 | struct inode_operations ntfs_empty_inode_ops = {}; | ||
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c new file mode 100644 index 000000000000..71bd2cd7a4d9 --- /dev/null +++ b/fs/ntfs/index.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | * index.c - NTFS kernel index handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include "aops.h" | ||
23 | #include "collate.h" | ||
24 | #include "debug.h" | ||
25 | #include "index.h" | ||
26 | #include "ntfs.h" | ||
27 | |||
28 | /** | ||
29 | * ntfs_index_ctx_get - allocate and initialize a new index context | ||
30 | * @idx_ni: ntfs index inode with which to initialize the context | ||
31 | * | ||
32 | * Allocate a new index context, initialize it with @idx_ni and return it. | ||
33 | * Return NULL if allocation failed. | ||
34 | * | ||
35 | * Locking: Caller must hold i_sem on the index inode. | ||
36 | */ | ||
37 | ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) | ||
38 | { | ||
39 | ntfs_index_context *ictx; | ||
40 | |||
41 | ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); | ||
42 | if (ictx) { | ||
43 | ictx->idx_ni = idx_ni; | ||
44 | ictx->entry = NULL; | ||
45 | ictx->data = NULL; | ||
46 | ictx->data_len = 0; | ||
47 | ictx->is_in_root = 0; | ||
48 | ictx->ir = NULL; | ||
49 | ictx->actx = NULL; | ||
50 | ictx->base_ni = NULL; | ||
51 | ictx->ia = NULL; | ||
52 | ictx->page = NULL; | ||
53 | } | ||
54 | return ictx; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * ntfs_index_ctx_put - release an index context | ||
59 | * @ictx: index context to free | ||
60 | * | ||
61 | * Release the index context @ictx, releasing all associated resources. | ||
62 | * | ||
63 | * Locking: Caller must hold i_sem on the index inode. | ||
64 | */ | ||
65 | void ntfs_index_ctx_put(ntfs_index_context *ictx) | ||
66 | { | ||
67 | if (ictx->entry) { | ||
68 | if (ictx->is_in_root) { | ||
69 | if (ictx->actx) | ||
70 | ntfs_attr_put_search_ctx(ictx->actx); | ||
71 | if (ictx->base_ni) | ||
72 | unmap_mft_record(ictx->base_ni); | ||
73 | } else { | ||
74 | struct page *page = ictx->page; | ||
75 | if (page) { | ||
76 | BUG_ON(!PageLocked(page)); | ||
77 | unlock_page(page); | ||
78 | ntfs_unmap_page(page); | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | kmem_cache_free(ntfs_index_ctx_cache, ictx); | ||
83 | return; | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * ntfs_index_lookup - find a key in an index and return its index entry | ||
88 | * @key: [IN] key for which to search in the index | ||
89 | * @key_len: [IN] length of @key in bytes | ||
90 | * @ictx: [IN/OUT] context describing the index and the returned entry | ||
91 | * | ||
92 | * Before calling ntfs_index_lookup(), @ictx must have been obtained from a | ||
93 | * call to ntfs_index_ctx_get(). | ||
94 | * | ||
95 | * Look for the @key in the index specified by the index lookup context @ictx. | ||
96 | * ntfs_index_lookup() walks the contents of the index looking for the @key. | ||
97 | * | ||
98 | * If the @key is found in the index, 0 is returned and @ictx is setup to | ||
99 | * describe the index entry containing the matching @key. @ictx->entry is the | ||
100 | * index entry and @ictx->data and @ictx->data_len are the index entry data and | ||
101 | * its length in bytes, respectively. | ||
102 | * | ||
103 | * If the @key is not found in the index, -ENOENT is returned and @ictx is | ||
104 | * setup to describe the index entry whose key collates immediately after the | ||
105 | * search @key, i.e. this is the position in the index at which an index entry | ||
106 | * with a key of @key would need to be inserted. | ||
107 | * | ||
108 | * If an error occurs return the negative error code and @ictx is left | ||
109 | * untouched. | ||
110 | * | ||
111 | * When finished with the entry and its data, call ntfs_index_ctx_put() to free | ||
112 | * the context and other associated resources. | ||
113 | * | ||
114 | * If the index entry was modified, call flush_dcache_index_entry_page() | ||
115 | * immediately after the modification and either ntfs_index_entry_mark_dirty() | ||
116 | * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to | ||
117 | * ensure that the changes are written to disk. | ||
118 | * | ||
119 | * Locking: - Caller must hold i_sem on the index inode. | ||
120 | * - Each page cache page in the index allocation mapping must be | ||
121 | * locked whilst being accessed otherwise we may find a corrupt | ||
122 | * page due to it being under ->writepage at the moment which | ||
123 | * applies the mst protection fixups before writing out and then | ||
124 | * removes them again after the write is complete after which it | ||
125 | * unlocks the page. | ||
126 | */ | ||
127 | int ntfs_index_lookup(const void *key, const int key_len, | ||
128 | ntfs_index_context *ictx) | ||
129 | { | ||
130 | VCN vcn, old_vcn; | ||
131 | ntfs_inode *idx_ni = ictx->idx_ni; | ||
132 | ntfs_volume *vol = idx_ni->vol; | ||
133 | struct super_block *sb = vol->sb; | ||
134 | ntfs_inode *base_ni = idx_ni->ext.base_ntfs_ino; | ||
135 | MFT_RECORD *m; | ||
136 | INDEX_ROOT *ir; | ||
137 | INDEX_ENTRY *ie; | ||
138 | INDEX_ALLOCATION *ia; | ||
139 | u8 *index_end, *kaddr; | ||
140 | ntfs_attr_search_ctx *actx; | ||
141 | struct address_space *ia_mapping; | ||
142 | struct page *page; | ||
143 | int rc, err = 0; | ||
144 | |||
145 | ntfs_debug("Entering."); | ||
146 | BUG_ON(!NInoAttr(idx_ni)); | ||
147 | BUG_ON(idx_ni->type != AT_INDEX_ALLOCATION); | ||
148 | BUG_ON(idx_ni->nr_extents != -1); | ||
149 | BUG_ON(!base_ni); | ||
150 | BUG_ON(!key); | ||
151 | BUG_ON(key_len <= 0); | ||
152 | if (!ntfs_is_collation_rule_supported( | ||
153 | idx_ni->itype.index.collation_rule)) { | ||
154 | ntfs_error(sb, "Index uses unsupported collation rule 0x%x. " | ||
155 | "Aborting lookup.", le32_to_cpu( | ||
156 | idx_ni->itype.index.collation_rule)); | ||
157 | return -EOPNOTSUPP; | ||
158 | } | ||
159 | /* Get hold of the mft record for the index inode. */ | ||
160 | m = map_mft_record(base_ni); | ||
161 | if (IS_ERR(m)) { | ||
162 | ntfs_error(sb, "map_mft_record() failed with error code %ld.", | ||
163 | -PTR_ERR(m)); | ||
164 | return PTR_ERR(m); | ||
165 | } | ||
166 | actx = ntfs_attr_get_search_ctx(base_ni, m); | ||
167 | if (unlikely(!actx)) { | ||
168 | err = -ENOMEM; | ||
169 | goto err_out; | ||
170 | } | ||
171 | /* Find the index root attribute in the mft record. */ | ||
172 | err = ntfs_attr_lookup(AT_INDEX_ROOT, idx_ni->name, idx_ni->name_len, | ||
173 | CASE_SENSITIVE, 0, NULL, 0, actx); | ||
174 | if (unlikely(err)) { | ||
175 | if (err == -ENOENT) { | ||
176 | ntfs_error(sb, "Index root attribute missing in inode " | ||
177 | "0x%lx.", idx_ni->mft_no); | ||
178 | err = -EIO; | ||
179 | } | ||
180 | goto err_out; | ||
181 | } | ||
182 | /* Get to the index root value (it has been verified in read_inode). */ | ||
183 | ir = (INDEX_ROOT*)((u8*)actx->attr + | ||
184 | le16_to_cpu(actx->attr->data.resident.value_offset)); | ||
185 | index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); | ||
186 | /* The first index entry. */ | ||
187 | ie = (INDEX_ENTRY*)((u8*)&ir->index + | ||
188 | le32_to_cpu(ir->index.entries_offset)); | ||
189 | /* | ||
190 | * Loop until we exceed valid memory (corruption case) or until we | ||
191 | * reach the last entry. | ||
192 | */ | ||
193 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
194 | /* Bounds checks. */ | ||
195 | if ((u8*)ie < (u8*)actx->mrec || (u8*)ie + | ||
196 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
197 | (u8*)ie + le16_to_cpu(ie->length) > index_end) | ||
198 | goto idx_err_out; | ||
199 | /* | ||
200 | * The last entry cannot contain a key. It can however contain | ||
201 | * a pointer to a child node in the B+tree so we just break out. | ||
202 | */ | ||
203 | if (ie->flags & INDEX_ENTRY_END) | ||
204 | break; | ||
205 | /* Further bounds checks. */ | ||
206 | if ((u32)sizeof(INDEX_ENTRY_HEADER) + | ||
207 | le16_to_cpu(ie->key_length) > | ||
208 | le16_to_cpu(ie->data.vi.data_offset) || | ||
209 | (u32)le16_to_cpu(ie->data.vi.data_offset) + | ||
210 | le16_to_cpu(ie->data.vi.data_length) > | ||
211 | le16_to_cpu(ie->length)) | ||
212 | goto idx_err_out; | ||
213 | /* If the keys match perfectly, we setup @ictx and return 0. */ | ||
214 | if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key, | ||
215 | &ie->key, key_len)) { | ||
216 | ir_done: | ||
217 | ictx->is_in_root = TRUE; | ||
218 | ictx->actx = actx; | ||
219 | ictx->base_ni = base_ni; | ||
220 | ictx->ia = NULL; | ||
221 | ictx->page = NULL; | ||
222 | done: | ||
223 | ictx->entry = ie; | ||
224 | ictx->data = (u8*)ie + | ||
225 | le16_to_cpu(ie->data.vi.data_offset); | ||
226 | ictx->data_len = le16_to_cpu(ie->data.vi.data_length); | ||
227 | ntfs_debug("Done."); | ||
228 | return err; | ||
229 | } | ||
230 | /* | ||
231 | * Not a perfect match, need to do full blown collation so we | ||
232 | * know which way in the B+tree we have to go. | ||
233 | */ | ||
234 | rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key, | ||
235 | key_len, &ie->key, le16_to_cpu(ie->key_length)); | ||
236 | /* | ||
237 | * If @key collates before the key of the current entry, there | ||
238 | * is definitely no such key in this index but we might need to | ||
239 | * descend into the B+tree so we just break out of the loop. | ||
240 | */ | ||
241 | if (rc == -1) | ||
242 | break; | ||
243 | /* | ||
244 | * A match should never happen as the memcmp() call should have | ||
245 | * cought it, but we still treat it correctly. | ||
246 | */ | ||
247 | if (!rc) | ||
248 | goto ir_done; | ||
249 | /* The keys are not equal, continue the search. */ | ||
250 | } | ||
251 | /* | ||
252 | * We have finished with this index without success. Check for the | ||
253 | * presence of a child node and if not present setup @ictx and return | ||
254 | * -ENOENT. | ||
255 | */ | ||
256 | if (!(ie->flags & INDEX_ENTRY_NODE)) { | ||
257 | ntfs_debug("Entry not found."); | ||
258 | err = -ENOENT; | ||
259 | goto ir_done; | ||
260 | } /* Child node present, descend into it. */ | ||
261 | /* Consistency check: Verify that an index allocation exists. */ | ||
262 | if (!NInoIndexAllocPresent(idx_ni)) { | ||
263 | ntfs_error(sb, "No index allocation attribute but index entry " | ||
264 | "requires one. Inode 0x%lx is corrupt or " | ||
265 | "driver bug.", idx_ni->mft_no); | ||
266 | goto err_out; | ||
267 | } | ||
268 | /* Get the starting vcn of the index_block holding the child node. */ | ||
269 | vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8)); | ||
270 | ia_mapping = VFS_I(idx_ni)->i_mapping; | ||
271 | /* | ||
272 | * We are done with the index root and the mft record. Release them, | ||
273 | * otherwise we deadlock with ntfs_map_page(). | ||
274 | */ | ||
275 | ntfs_attr_put_search_ctx(actx); | ||
276 | unmap_mft_record(base_ni); | ||
277 | m = NULL; | ||
278 | actx = NULL; | ||
279 | descend_into_child_node: | ||
280 | /* | ||
281 | * Convert vcn to index into the index allocation attribute in units | ||
282 | * of PAGE_CACHE_SIZE and map the page cache page, reading it from | ||
283 | * disk if necessary. | ||
284 | */ | ||
285 | page = ntfs_map_page(ia_mapping, vcn << | ||
286 | idx_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); | ||
287 | if (IS_ERR(page)) { | ||
288 | ntfs_error(sb, "Failed to map index page, error %ld.", | ||
289 | -PTR_ERR(page)); | ||
290 | err = PTR_ERR(page); | ||
291 | goto err_out; | ||
292 | } | ||
293 | lock_page(page); | ||
294 | kaddr = (u8*)page_address(page); | ||
295 | fast_descend_into_child_node: | ||
296 | /* Get to the index allocation block. */ | ||
297 | ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << | ||
298 | idx_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); | ||
299 | /* Bounds checks. */ | ||
300 | if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { | ||
301 | ntfs_error(sb, "Out of bounds check failed. Corrupt inode " | ||
302 | "0x%lx or driver bug.", idx_ni->mft_no); | ||
303 | goto unm_err_out; | ||
304 | } | ||
305 | /* Catch multi sector transfer fixup errors. */ | ||
306 | if (unlikely(!ntfs_is_indx_record(ia->magic))) { | ||
307 | ntfs_error(sb, "Index record with vcn 0x%llx is corrupt. " | ||
308 | "Corrupt inode 0x%lx. Run chkdsk.", | ||
309 | (long long)vcn, idx_ni->mft_no); | ||
310 | goto unm_err_out; | ||
311 | } | ||
312 | if (sle64_to_cpu(ia->index_block_vcn) != vcn) { | ||
313 | ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is " | ||
314 | "different from expected VCN (0x%llx). Inode " | ||
315 | "0x%lx is corrupt or driver bug.", | ||
316 | (unsigned long long) | ||
317 | sle64_to_cpu(ia->index_block_vcn), | ||
318 | (unsigned long long)vcn, idx_ni->mft_no); | ||
319 | goto unm_err_out; | ||
320 | } | ||
321 | if (le32_to_cpu(ia->index.allocated_size) + 0x18 != | ||
322 | idx_ni->itype.index.block_size) { | ||
323 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx has " | ||
324 | "a size (%u) differing from the index " | ||
325 | "specified size (%u). Inode is corrupt or " | ||
326 | "driver bug.", (unsigned long long)vcn, | ||
327 | idx_ni->mft_no, | ||
328 | le32_to_cpu(ia->index.allocated_size) + 0x18, | ||
329 | idx_ni->itype.index.block_size); | ||
330 | goto unm_err_out; | ||
331 | } | ||
332 | index_end = (u8*)ia + idx_ni->itype.index.block_size; | ||
333 | if (index_end > kaddr + PAGE_CACHE_SIZE) { | ||
334 | ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx " | ||
335 | "crosses page boundary. Impossible! Cannot " | ||
336 | "access! This is probably a bug in the " | ||
337 | "driver.", (unsigned long long)vcn, | ||
338 | idx_ni->mft_no); | ||
339 | goto unm_err_out; | ||
340 | } | ||
341 | index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); | ||
342 | if (index_end > (u8*)ia + idx_ni->itype.index.block_size) { | ||
343 | ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of inode " | ||
344 | "0x%lx exceeds maximum size.", | ||
345 | (unsigned long long)vcn, idx_ni->mft_no); | ||
346 | goto unm_err_out; | ||
347 | } | ||
348 | /* The first index entry. */ | ||
349 | ie = (INDEX_ENTRY*)((u8*)&ia->index + | ||
350 | le32_to_cpu(ia->index.entries_offset)); | ||
351 | /* | ||
352 | * Iterate similar to above big loop but applied to index buffer, thus | ||
353 | * loop until we exceed valid memory (corruption case) or until we | ||
354 | * reach the last entry. | ||
355 | */ | ||
356 | for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { | ||
357 | /* Bounds checks. */ | ||
358 | if ((u8*)ie < (u8*)ia || (u8*)ie + | ||
359 | sizeof(INDEX_ENTRY_HEADER) > index_end || | ||
360 | (u8*)ie + le16_to_cpu(ie->length) > index_end) { | ||
361 | ntfs_error(sb, "Index entry out of bounds in inode " | ||
362 | "0x%lx.", idx_ni->mft_no); | ||
363 | goto unm_err_out; | ||
364 | } | ||
365 | /* | ||
366 | * The last entry cannot contain a key. It can however contain | ||
367 | * a pointer to a child node in the B+tree so we just break out. | ||
368 | */ | ||
369 | if (ie->flags & INDEX_ENTRY_END) | ||
370 | break; | ||
371 | /* Further bounds checks. */ | ||
372 | if ((u32)sizeof(INDEX_ENTRY_HEADER) + | ||
373 | le16_to_cpu(ie->key_length) > | ||
374 | le16_to_cpu(ie->data.vi.data_offset) || | ||
375 | (u32)le16_to_cpu(ie->data.vi.data_offset) + | ||
376 | le16_to_cpu(ie->data.vi.data_length) > | ||
377 | le16_to_cpu(ie->length)) { | ||
378 | ntfs_error(sb, "Index entry out of bounds in inode " | ||
379 | "0x%lx.", idx_ni->mft_no); | ||
380 | goto unm_err_out; | ||
381 | } | ||
382 | /* If the keys match perfectly, we setup @ictx and return 0. */ | ||
383 | if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key, | ||
384 | &ie->key, key_len)) { | ||
385 | ia_done: | ||
386 | ictx->is_in_root = FALSE; | ||
387 | ictx->actx = NULL; | ||
388 | ictx->base_ni = NULL; | ||
389 | ictx->ia = ia; | ||
390 | ictx->page = page; | ||
391 | goto done; | ||
392 | } | ||
393 | /* | ||
394 | * Not a perfect match, need to do full blown collation so we | ||
395 | * know which way in the B+tree we have to go. | ||
396 | */ | ||
397 | rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key, | ||
398 | key_len, &ie->key, le16_to_cpu(ie->key_length)); | ||
399 | /* | ||
400 | * If @key collates before the key of the current entry, there | ||
401 | * is definitely no such key in this index but we might need to | ||
402 | * descend into the B+tree so we just break out of the loop. | ||
403 | */ | ||
404 | if (rc == -1) | ||
405 | break; | ||
406 | /* | ||
407 | * A match should never happen as the memcmp() call should have | ||
408 | * cought it, but we still treat it correctly. | ||
409 | */ | ||
410 | if (!rc) | ||
411 | goto ia_done; | ||
412 | /* The keys are not equal, continue the search. */ | ||
413 | } | ||
414 | /* | ||
415 | * We have finished with this index buffer without success. Check for | ||
416 | * the presence of a child node and if not present return -ENOENT. | ||
417 | */ | ||
418 | if (!(ie->flags & INDEX_ENTRY_NODE)) { | ||
419 | ntfs_debug("Entry not found."); | ||
420 | err = -ENOENT; | ||
421 | goto ia_done; | ||
422 | } | ||
423 | if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { | ||
424 | ntfs_error(sb, "Index entry with child node found in a leaf " | ||
425 | "node in inode 0x%lx.", idx_ni->mft_no); | ||
426 | goto unm_err_out; | ||
427 | } | ||
428 | /* Child node present, descend into it. */ | ||
429 | old_vcn = vcn; | ||
430 | vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8)); | ||
431 | if (vcn >= 0) { | ||
432 | /* | ||
433 | * If vcn is in the same page cache page as old_vcn we recycle | ||
434 | * the mapped page. | ||
435 | */ | ||
436 | if (old_vcn << vol->cluster_size_bits >> | ||
437 | PAGE_CACHE_SHIFT == vcn << | ||
438 | vol->cluster_size_bits >> | ||
439 | PAGE_CACHE_SHIFT) | ||
440 | goto fast_descend_into_child_node; | ||
441 | unlock_page(page); | ||
442 | ntfs_unmap_page(page); | ||
443 | goto descend_into_child_node; | ||
444 | } | ||
445 | ntfs_error(sb, "Negative child node vcn in inode 0x%lx.", | ||
446 | idx_ni->mft_no); | ||
447 | unm_err_out: | ||
448 | unlock_page(page); | ||
449 | ntfs_unmap_page(page); | ||
450 | err_out: | ||
451 | if (!err) | ||
452 | err = -EIO; | ||
453 | if (actx) | ||
454 | ntfs_attr_put_search_ctx(actx); | ||
455 | if (m) | ||
456 | unmap_mft_record(base_ni); | ||
457 | return err; | ||
458 | idx_err_out: | ||
459 | ntfs_error(sb, "Corrupt index. Aborting lookup."); | ||
460 | goto err_out; | ||
461 | } | ||
diff --git a/fs/ntfs/index.h b/fs/ntfs/index.h new file mode 100644 index 000000000000..846a489e8692 --- /dev/null +++ b/fs/ntfs/index.h | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * index.h - Defines for NTFS kernel index handling. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_INDEX_H | ||
24 | #define _LINUX_NTFS_INDEX_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include "types.h" | ||
29 | #include "layout.h" | ||
30 | #include "inode.h" | ||
31 | #include "attrib.h" | ||
32 | #include "mft.h" | ||
33 | #include "aops.h" | ||
34 | |||
35 | /** | ||
36 | * @idx_ni: index inode containing the @entry described by this context | ||
37 | * @entry: index entry (points into @ir or @ia) | ||
38 | * @data: index entry data (points into @entry) | ||
39 | * @data_len: length in bytes of @data | ||
40 | * @is_in_root: TRUE if @entry is in @ir and FALSE if it is in @ia | ||
41 | * @ir: index root if @is_in_root and NULL otherwise | ||
42 | * @actx: attribute search context if @is_in_root and NULL otherwise | ||
43 | * @base_ni: base inode if @is_in_root and NULL otherwise | ||
44 | * @ia: index block if @is_in_root is FALSE and NULL otherwise | ||
45 | * @page: page if @is_in_root is FALSE and NULL otherwise | ||
46 | * | ||
47 | * @idx_ni is the index inode this context belongs to. | ||
48 | * | ||
49 | * @entry is the index entry described by this context. @data and @data_len | ||
50 | * are the index entry data and its length in bytes, respectively. @data | ||
51 | * simply points into @entry. This is probably what the user is interested in. | ||
52 | * | ||
53 | * If @is_in_root is TRUE, @entry is in the index root attribute @ir described | ||
54 | * by the attribute search context @actx and the base inode @base_ni. @ia and | ||
55 | * @page are NULL in this case. | ||
56 | * | ||
57 | * If @is_in_root is FALSE, @entry is in the index allocation attribute and @ia | ||
58 | * and @page point to the index allocation block and the mapped, locked page it | ||
59 | * is in, respectively. @ir, @actx and @base_ni are NULL in this case. | ||
60 | * | ||
61 | * To obtain a context call ntfs_index_ctx_get(). | ||
62 | * | ||
63 | * We use this context to allow ntfs_index_lookup() to return the found index | ||
64 | * @entry and its @data without having to allocate a buffer and copy the @entry | ||
65 | * and/or its @data into it. | ||
66 | * | ||
67 | * When finished with the @entry and its @data, call ntfs_index_ctx_put() to | ||
68 | * free the context and other associated resources. | ||
69 | * | ||
70 | * If the index entry was modified, call flush_dcache_index_entry_page() | ||
71 | * immediately after the modification and either ntfs_index_entry_mark_dirty() | ||
72 | * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to | ||
73 | * ensure that the changes are written to disk. | ||
74 | */ | ||
75 | typedef struct { | ||
76 | ntfs_inode *idx_ni; | ||
77 | INDEX_ENTRY *entry; | ||
78 | void *data; | ||
79 | u16 data_len; | ||
80 | BOOL is_in_root; | ||
81 | INDEX_ROOT *ir; | ||
82 | ntfs_attr_search_ctx *actx; | ||
83 | ntfs_inode *base_ni; | ||
84 | INDEX_ALLOCATION *ia; | ||
85 | struct page *page; | ||
86 | } ntfs_index_context; | ||
87 | |||
88 | extern ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni); | ||
89 | extern void ntfs_index_ctx_put(ntfs_index_context *ictx); | ||
90 | |||
91 | extern int ntfs_index_lookup(const void *key, const int key_len, | ||
92 | ntfs_index_context *ictx); | ||
93 | |||
94 | #ifdef NTFS_RW | ||
95 | |||
96 | /** | ||
97 | * ntfs_index_entry_flush_dcache_page - flush_dcache_page() for index entries | ||
98 | * @ictx: ntfs index context describing the index entry | ||
99 | * | ||
100 | * Call flush_dcache_page() for the page in which an index entry resides. | ||
101 | * | ||
102 | * This must be called every time an index entry is modified, just after the | ||
103 | * modification. | ||
104 | * | ||
105 | * If the index entry is in the index root attribute, simply flush the page | ||
106 | * containing the mft record containing the index root attribute. | ||
107 | * | ||
108 | * If the index entry is in an index block belonging to the index allocation | ||
109 | * attribute, simply flush the page cache page containing the index block. | ||
110 | */ | ||
111 | static inline void ntfs_index_entry_flush_dcache_page(ntfs_index_context *ictx) | ||
112 | { | ||
113 | if (ictx->is_in_root) | ||
114 | flush_dcache_mft_record_page(ictx->actx->ntfs_ino); | ||
115 | else | ||
116 | flush_dcache_page(ictx->page); | ||
117 | } | ||
118 | |||
119 | /** | ||
120 | * ntfs_index_entry_mark_dirty - mark an index entry dirty | ||
121 | * @ictx: ntfs index context describing the index entry | ||
122 | * | ||
123 | * Mark the index entry described by the index entry context @ictx dirty. | ||
124 | * | ||
125 | * If the index entry is in the index root attribute, simply mark the mft | ||
126 | * record containing the index root attribute dirty. This ensures the mft | ||
127 | * record, and hence the index root attribute, will be written out to disk | ||
128 | * later. | ||
129 | * | ||
130 | * If the index entry is in an index block belonging to the index allocation | ||
131 | * attribute, mark the buffers belonging to the index record as well as the | ||
132 | * page cache page the index block is in dirty. This automatically marks the | ||
133 | * VFS inode of the ntfs index inode to which the index entry belongs dirty, | ||
134 | * too (I_DIRTY_PAGES) and this in turn ensures the page buffers, and hence the | ||
135 | * dirty index block, will be written out to disk later. | ||
136 | */ | ||
137 | static inline void ntfs_index_entry_mark_dirty(ntfs_index_context *ictx) | ||
138 | { | ||
139 | if (ictx->is_in_root) | ||
140 | mark_mft_record_dirty(ictx->actx->ntfs_ino); | ||
141 | else | ||
142 | mark_ntfs_record_dirty(ictx->page, | ||
143 | (u8*)ictx->ia - (u8*)page_address(ictx->page)); | ||
144 | } | ||
145 | |||
146 | #endif /* NTFS_RW */ | ||
147 | |||
148 | #endif /* _LINUX_NTFS_INDEX_H */ | ||
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c new file mode 100644 index 000000000000..31840ba0b38c --- /dev/null +++ b/fs/ntfs/inode.c | |||
@@ -0,0 +1,2616 @@ | |||
1 | /** | ||
2 | * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/pagemap.h> | ||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/smp_lock.h> | ||
25 | #include <linux/quotaops.h> | ||
26 | #include <linux/mount.h> | ||
27 | |||
28 | #include "aops.h" | ||
29 | #include "dir.h" | ||
30 | #include "debug.h" | ||
31 | #include "inode.h" | ||
32 | #include "attrib.h" | ||
33 | #include "malloc.h" | ||
34 | #include "mft.h" | ||
35 | #include "time.h" | ||
36 | #include "ntfs.h" | ||
37 | |||
38 | /** | ||
39 | * ntfs_test_inode - compare two (possibly fake) inodes for equality | ||
40 | * @vi: vfs inode which to test | ||
41 | * @na: ntfs attribute which is being tested with | ||
42 | * | ||
43 | * Compare the ntfs attribute embedded in the ntfs specific part of the vfs | ||
44 | * inode @vi for equality with the ntfs attribute @na. | ||
45 | * | ||
46 | * If searching for the normal file/directory inode, set @na->type to AT_UNUSED. | ||
47 | * @na->name and @na->name_len are then ignored. | ||
48 | * | ||
49 | * Return 1 if the attributes match and 0 if not. | ||
50 | * | ||
51 | * NOTE: This function runs with the inode_lock spin lock held so it is not | ||
52 | * allowed to sleep. | ||
53 | */ | ||
54 | int ntfs_test_inode(struct inode *vi, ntfs_attr *na) | ||
55 | { | ||
56 | ntfs_inode *ni; | ||
57 | |||
58 | if (vi->i_ino != na->mft_no) | ||
59 | return 0; | ||
60 | ni = NTFS_I(vi); | ||
61 | /* If !NInoAttr(ni), @vi is a normal file or directory inode. */ | ||
62 | if (likely(!NInoAttr(ni))) { | ||
63 | /* If not looking for a normal inode this is a mismatch. */ | ||
64 | if (unlikely(na->type != AT_UNUSED)) | ||
65 | return 0; | ||
66 | } else { | ||
67 | /* A fake inode describing an attribute. */ | ||
68 | if (ni->type != na->type) | ||
69 | return 0; | ||
70 | if (ni->name_len != na->name_len) | ||
71 | return 0; | ||
72 | if (na->name_len && memcmp(ni->name, na->name, | ||
73 | na->name_len * sizeof(ntfschar))) | ||
74 | return 0; | ||
75 | } | ||
76 | /* Match! */ | ||
77 | return 1; | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * ntfs_init_locked_inode - initialize an inode | ||
82 | * @vi: vfs inode to initialize | ||
83 | * @na: ntfs attribute which to initialize @vi to | ||
84 | * | ||
85 | * Initialize the vfs inode @vi with the values from the ntfs attribute @na in | ||
86 | * order to enable ntfs_test_inode() to do its work. | ||
87 | * | ||
88 | * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. | ||
89 | * In that case, @na->name and @na->name_len should be set to NULL and 0, | ||
90 | * respectively. Although that is not strictly necessary as | ||
91 | * ntfs_read_inode_locked() will fill them in later. | ||
92 | * | ||
93 | * Return 0 on success and -errno on error. | ||
94 | * | ||
95 | * NOTE: This function runs with the inode_lock spin lock held so it is not | ||
96 | * allowed to sleep. (Hence the GFP_ATOMIC allocation.) | ||
97 | */ | ||
98 | static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) | ||
99 | { | ||
100 | ntfs_inode *ni = NTFS_I(vi); | ||
101 | |||
102 | vi->i_ino = na->mft_no; | ||
103 | |||
104 | ni->type = na->type; | ||
105 | if (na->type == AT_INDEX_ALLOCATION) | ||
106 | NInoSetMstProtected(ni); | ||
107 | |||
108 | ni->name = na->name; | ||
109 | ni->name_len = na->name_len; | ||
110 | |||
111 | /* If initializing a normal inode, we are done. */ | ||
112 | if (likely(na->type == AT_UNUSED)) { | ||
113 | BUG_ON(na->name); | ||
114 | BUG_ON(na->name_len); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | /* It is a fake inode. */ | ||
119 | NInoSetAttr(ni); | ||
120 | |||
121 | /* | ||
122 | * We have I30 global constant as an optimization as it is the name | ||
123 | * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC | ||
124 | * allocation but that is ok. And most attributes are unnamed anyway, | ||
125 | * thus the fraction of named attributes with name != I30 is actually | ||
126 | * absolutely tiny. | ||
127 | */ | ||
128 | if (na->name_len && na->name != I30) { | ||
129 | unsigned int i; | ||
130 | |||
131 | BUG_ON(!na->name); | ||
132 | i = na->name_len * sizeof(ntfschar); | ||
133 | ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC); | ||
134 | if (!ni->name) | ||
135 | return -ENOMEM; | ||
136 | memcpy(ni->name, na->name, i); | ||
137 | ni->name[i] = 0; | ||
138 | } | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | typedef int (*set_t)(struct inode *, void *); | ||
143 | static int ntfs_read_locked_inode(struct inode *vi); | ||
144 | static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi); | ||
145 | static int ntfs_read_locked_index_inode(struct inode *base_vi, | ||
146 | struct inode *vi); | ||
147 | |||
148 | /** | ||
149 | * ntfs_iget - obtain a struct inode corresponding to a specific normal inode | ||
150 | * @sb: super block of mounted volume | ||
151 | * @mft_no: mft record number / inode number to obtain | ||
152 | * | ||
153 | * Obtain the struct inode corresponding to a specific normal inode (i.e. a | ||
154 | * file or directory). | ||
155 | * | ||
156 | * If the inode is in the cache, it is just returned with an increased | ||
157 | * reference count. Otherwise, a new struct inode is allocated and initialized, | ||
158 | * and finally ntfs_read_locked_inode() is called to read in the inode and | ||
159 | * fill in the remainder of the inode structure. | ||
160 | * | ||
161 | * Return the struct inode on success. Check the return value with IS_ERR() and | ||
162 | * if true, the function failed and the error code is obtained from PTR_ERR(). | ||
163 | */ | ||
164 | struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) | ||
165 | { | ||
166 | struct inode *vi; | ||
167 | ntfs_attr na; | ||
168 | int err; | ||
169 | |||
170 | na.mft_no = mft_no; | ||
171 | na.type = AT_UNUSED; | ||
172 | na.name = NULL; | ||
173 | na.name_len = 0; | ||
174 | |||
175 | vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, | ||
176 | (set_t)ntfs_init_locked_inode, &na); | ||
177 | if (!vi) | ||
178 | return ERR_PTR(-ENOMEM); | ||
179 | |||
180 | err = 0; | ||
181 | |||
182 | /* If this is a freshly allocated inode, need to read it now. */ | ||
183 | if (vi->i_state & I_NEW) { | ||
184 | err = ntfs_read_locked_inode(vi); | ||
185 | unlock_new_inode(vi); | ||
186 | } | ||
187 | /* | ||
188 | * There is no point in keeping bad inodes around if the failure was | ||
189 | * due to ENOMEM. We want to be able to retry again later. | ||
190 | */ | ||
191 | if (err == -ENOMEM) { | ||
192 | iput(vi); | ||
193 | vi = ERR_PTR(err); | ||
194 | } | ||
195 | return vi; | ||
196 | } | ||
197 | |||
198 | /** | ||
199 | * ntfs_attr_iget - obtain a struct inode corresponding to an attribute | ||
200 | * @base_vi: vfs base inode containing the attribute | ||
201 | * @type: attribute type | ||
202 | * @name: Unicode name of the attribute (NULL if unnamed) | ||
203 | * @name_len: length of @name in Unicode characters (0 if unnamed) | ||
204 | * | ||
205 | * Obtain the (fake) struct inode corresponding to the attribute specified by | ||
206 | * @type, @name, and @name_len, which is present in the base mft record | ||
207 | * specified by the vfs inode @base_vi. | ||
208 | * | ||
209 | * If the attribute inode is in the cache, it is just returned with an | ||
210 | * increased reference count. Otherwise, a new struct inode is allocated and | ||
211 | * initialized, and finally ntfs_read_locked_attr_inode() is called to read the | ||
212 | * attribute and fill in the inode structure. | ||
213 | * | ||
214 | * Note, for index allocation attributes, you need to use ntfs_index_iget() | ||
215 | * instead of ntfs_attr_iget() as working with indices is a lot more complex. | ||
216 | * | ||
217 | * Return the struct inode of the attribute inode on success. Check the return | ||
218 | * value with IS_ERR() and if true, the function failed and the error code is | ||
219 | * obtained from PTR_ERR(). | ||
220 | */ | ||
221 | struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, | ||
222 | ntfschar *name, u32 name_len) | ||
223 | { | ||
224 | struct inode *vi; | ||
225 | ntfs_attr na; | ||
226 | int err; | ||
227 | |||
228 | /* Make sure no one calls ntfs_attr_iget() for indices. */ | ||
229 | BUG_ON(type == AT_INDEX_ALLOCATION); | ||
230 | |||
231 | na.mft_no = base_vi->i_ino; | ||
232 | na.type = type; | ||
233 | na.name = name; | ||
234 | na.name_len = name_len; | ||
235 | |||
236 | vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, | ||
237 | (set_t)ntfs_init_locked_inode, &na); | ||
238 | if (!vi) | ||
239 | return ERR_PTR(-ENOMEM); | ||
240 | |||
241 | err = 0; | ||
242 | |||
243 | /* If this is a freshly allocated inode, need to read it now. */ | ||
244 | if (vi->i_state & I_NEW) { | ||
245 | err = ntfs_read_locked_attr_inode(base_vi, vi); | ||
246 | unlock_new_inode(vi); | ||
247 | } | ||
248 | /* | ||
249 | * There is no point in keeping bad attribute inodes around. This also | ||
250 | * simplifies things in that we never need to check for bad attribute | ||
251 | * inodes elsewhere. | ||
252 | */ | ||
253 | if (err) { | ||
254 | iput(vi); | ||
255 | vi = ERR_PTR(err); | ||
256 | } | ||
257 | return vi; | ||
258 | } | ||
259 | |||
260 | /** | ||
261 | * ntfs_index_iget - obtain a struct inode corresponding to an index | ||
262 | * @base_vi: vfs base inode containing the index related attributes | ||
263 | * @name: Unicode name of the index | ||
264 | * @name_len: length of @name in Unicode characters | ||
265 | * | ||
266 | * Obtain the (fake) struct inode corresponding to the index specified by @name | ||
267 | * and @name_len, which is present in the base mft record specified by the vfs | ||
268 | * inode @base_vi. | ||
269 | * | ||
270 | * If the index inode is in the cache, it is just returned with an increased | ||
271 | * reference count. Otherwise, a new struct inode is allocated and | ||
272 | * initialized, and finally ntfs_read_locked_index_inode() is called to read | ||
273 | * the index related attributes and fill in the inode structure. | ||
274 | * | ||
275 | * Return the struct inode of the index inode on success. Check the return | ||
276 | * value with IS_ERR() and if true, the function failed and the error code is | ||
277 | * obtained from PTR_ERR(). | ||
278 | */ | ||
279 | struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, | ||
280 | u32 name_len) | ||
281 | { | ||
282 | struct inode *vi; | ||
283 | ntfs_attr na; | ||
284 | int err; | ||
285 | |||
286 | na.mft_no = base_vi->i_ino; | ||
287 | na.type = AT_INDEX_ALLOCATION; | ||
288 | na.name = name; | ||
289 | na.name_len = name_len; | ||
290 | |||
291 | vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, | ||
292 | (set_t)ntfs_init_locked_inode, &na); | ||
293 | if (!vi) | ||
294 | return ERR_PTR(-ENOMEM); | ||
295 | |||
296 | err = 0; | ||
297 | |||
298 | /* If this is a freshly allocated inode, need to read it now. */ | ||
299 | if (vi->i_state & I_NEW) { | ||
300 | err = ntfs_read_locked_index_inode(base_vi, vi); | ||
301 | unlock_new_inode(vi); | ||
302 | } | ||
303 | /* | ||
304 | * There is no point in keeping bad index inodes around. This also | ||
305 | * simplifies things in that we never need to check for bad index | ||
306 | * inodes elsewhere. | ||
307 | */ | ||
308 | if (err) { | ||
309 | iput(vi); | ||
310 | vi = ERR_PTR(err); | ||
311 | } | ||
312 | return vi; | ||
313 | } | ||
314 | |||
315 | struct inode *ntfs_alloc_big_inode(struct super_block *sb) | ||
316 | { | ||
317 | ntfs_inode *ni; | ||
318 | |||
319 | ntfs_debug("Entering."); | ||
320 | ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, | ||
321 | SLAB_NOFS); | ||
322 | if (likely(ni != NULL)) { | ||
323 | ni->state = 0; | ||
324 | return VFS_I(ni); | ||
325 | } | ||
326 | ntfs_error(sb, "Allocation of NTFS big inode structure failed."); | ||
327 | return NULL; | ||
328 | } | ||
329 | |||
330 | void ntfs_destroy_big_inode(struct inode *inode) | ||
331 | { | ||
332 | ntfs_inode *ni = NTFS_I(inode); | ||
333 | |||
334 | ntfs_debug("Entering."); | ||
335 | BUG_ON(ni->page); | ||
336 | if (!atomic_dec_and_test(&ni->count)) | ||
337 | BUG(); | ||
338 | kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); | ||
339 | } | ||
340 | |||
341 | static inline ntfs_inode *ntfs_alloc_extent_inode(void) | ||
342 | { | ||
343 | ntfs_inode *ni; | ||
344 | |||
345 | ntfs_debug("Entering."); | ||
346 | ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); | ||
347 | if (likely(ni != NULL)) { | ||
348 | ni->state = 0; | ||
349 | return ni; | ||
350 | } | ||
351 | ntfs_error(NULL, "Allocation of NTFS inode structure failed."); | ||
352 | return NULL; | ||
353 | } | ||
354 | |||
355 | static void ntfs_destroy_extent_inode(ntfs_inode *ni) | ||
356 | { | ||
357 | ntfs_debug("Entering."); | ||
358 | BUG_ON(ni->page); | ||
359 | if (!atomic_dec_and_test(&ni->count)) | ||
360 | BUG(); | ||
361 | kmem_cache_free(ntfs_inode_cache, ni); | ||
362 | } | ||
363 | |||
364 | /** | ||
365 | * __ntfs_init_inode - initialize ntfs specific part of an inode | ||
366 | * @sb: super block of mounted volume | ||
367 | * @ni: freshly allocated ntfs inode which to initialize | ||
368 | * | ||
369 | * Initialize an ntfs inode to defaults. | ||
370 | * | ||
371 | * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left | ||
372 | * untouched. Make sure to initialize them elsewhere. | ||
373 | * | ||
374 | * Return zero on success and -ENOMEM on error. | ||
375 | */ | ||
376 | void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) | ||
377 | { | ||
378 | ntfs_debug("Entering."); | ||
379 | ni->initialized_size = ni->allocated_size = 0; | ||
380 | ni->seq_no = 0; | ||
381 | atomic_set(&ni->count, 1); | ||
382 | ni->vol = NTFS_SB(sb); | ||
383 | ntfs_init_runlist(&ni->runlist); | ||
384 | init_MUTEX(&ni->mrec_lock); | ||
385 | ni->page = NULL; | ||
386 | ni->page_ofs = 0; | ||
387 | ni->attr_list_size = 0; | ||
388 | ni->attr_list = NULL; | ||
389 | ntfs_init_runlist(&ni->attr_list_rl); | ||
390 | ni->itype.index.bmp_ino = NULL; | ||
391 | ni->itype.index.block_size = 0; | ||
392 | ni->itype.index.vcn_size = 0; | ||
393 | ni->itype.index.collation_rule = 0; | ||
394 | ni->itype.index.block_size_bits = 0; | ||
395 | ni->itype.index.vcn_size_bits = 0; | ||
396 | init_MUTEX(&ni->extent_lock); | ||
397 | ni->nr_extents = 0; | ||
398 | ni->ext.base_ntfs_ino = NULL; | ||
399 | } | ||
400 | |||
401 | inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, | ||
402 | unsigned long mft_no) | ||
403 | { | ||
404 | ntfs_inode *ni = ntfs_alloc_extent_inode(); | ||
405 | |||
406 | ntfs_debug("Entering."); | ||
407 | if (likely(ni != NULL)) { | ||
408 | __ntfs_init_inode(sb, ni); | ||
409 | ni->mft_no = mft_no; | ||
410 | ni->type = AT_UNUSED; | ||
411 | ni->name = NULL; | ||
412 | ni->name_len = 0; | ||
413 | } | ||
414 | return ni; | ||
415 | } | ||
416 | |||
417 | /** | ||
418 | * ntfs_is_extended_system_file - check if a file is in the $Extend directory | ||
419 | * @ctx: initialized attribute search context | ||
420 | * | ||
421 | * Search all file name attributes in the inode described by the attribute | ||
422 | * search context @ctx and check if any of the names are in the $Extend system | ||
423 | * directory. | ||
424 | * | ||
425 | * Return values: | ||
426 | * 1: file is in $Extend directory | ||
427 | * 0: file is not in $Extend directory | ||
428 | * -errno: failed to determine if the file is in the $Extend directory | ||
429 | */ | ||
430 | static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx) | ||
431 | { | ||
432 | int nr_links, err; | ||
433 | |||
434 | /* Restart search. */ | ||
435 | ntfs_attr_reinit_search_ctx(ctx); | ||
436 | |||
437 | /* Get number of hard links. */ | ||
438 | nr_links = le16_to_cpu(ctx->mrec->link_count); | ||
439 | |||
440 | /* Loop through all hard links. */ | ||
441 | while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, | ||
442 | ctx))) { | ||
443 | FILE_NAME_ATTR *file_name_attr; | ||
444 | ATTR_RECORD *attr = ctx->attr; | ||
445 | u8 *p, *p2; | ||
446 | |||
447 | nr_links--; | ||
448 | /* | ||
449 | * Maximum sanity checking as we are called on an inode that | ||
450 | * we suspect might be corrupt. | ||
451 | */ | ||
452 | p = (u8*)attr + le32_to_cpu(attr->length); | ||
453 | if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec + | ||
454 | le32_to_cpu(ctx->mrec->bytes_in_use)) { | ||
455 | err_corrupt_attr: | ||
456 | ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name " | ||
457 | "attribute. You should run chkdsk."); | ||
458 | return -EIO; | ||
459 | } | ||
460 | if (attr->non_resident) { | ||
461 | ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file " | ||
462 | "name. You should run chkdsk."); | ||
463 | return -EIO; | ||
464 | } | ||
465 | if (attr->flags) { | ||
466 | ntfs_error(ctx->ntfs_ino->vol->sb, "File name with " | ||
467 | "invalid flags. You should run " | ||
468 | "chkdsk."); | ||
469 | return -EIO; | ||
470 | } | ||
471 | if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) { | ||
472 | ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file " | ||
473 | "name. You should run chkdsk."); | ||
474 | return -EIO; | ||
475 | } | ||
476 | file_name_attr = (FILE_NAME_ATTR*)((u8*)attr + | ||
477 | le16_to_cpu(attr->data.resident.value_offset)); | ||
478 | p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length); | ||
479 | if (p2 < (u8*)attr || p2 > p) | ||
480 | goto err_corrupt_attr; | ||
481 | /* This attribute is ok, but is it in the $Extend directory? */ | ||
482 | if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) | ||
483 | return 1; /* YES, it's an extended system file. */ | ||
484 | } | ||
485 | if (unlikely(err != -ENOENT)) | ||
486 | return err; | ||
487 | if (unlikely(nr_links)) { | ||
488 | ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count " | ||
489 | "doesn't match number of name attributes. You " | ||
490 | "should run chkdsk."); | ||
491 | return -EIO; | ||
492 | } | ||
493 | return 0; /* NO, it is not an extended system file. */ | ||
494 | } | ||
495 | |||
496 | /** | ||
497 | * ntfs_read_locked_inode - read an inode from its device | ||
498 | * @vi: inode to read | ||
499 | * | ||
500 | * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode | ||
501 | * described by @vi into memory from the device. | ||
502 | * | ||
503 | * The only fields in @vi that we need to/can look at when the function is | ||
504 | * called are i_sb, pointing to the mounted device's super block, and i_ino, | ||
505 | * the number of the inode to load. | ||
506 | * | ||
507 | * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino | ||
508 | * for reading and sets up the necessary @vi fields as well as initializing | ||
509 | * the ntfs inode. | ||
510 | * | ||
511 | * Q: What locks are held when the function is called? | ||
512 | * A: i_state has I_LOCK set, hence the inode is locked, also | ||
513 | * i_count is set to 1, so it is not going to go away | ||
514 | * i_flags is set to 0 and we have no business touching it. Only an ioctl() | ||
515 | * is allowed to write to them. We should of course be honouring them but | ||
516 | * we need to do that using the IS_* macros defined in include/linux/fs.h. | ||
517 | * In any case ntfs_read_locked_inode() has nothing to do with i_flags. | ||
518 | * | ||
519 | * Return 0 on success and -errno on error. In the error case, the inode will | ||
520 | * have had make_bad_inode() executed on it. | ||
521 | */ | ||
522 | static int ntfs_read_locked_inode(struct inode *vi) | ||
523 | { | ||
524 | ntfs_volume *vol = NTFS_SB(vi->i_sb); | ||
525 | ntfs_inode *ni; | ||
526 | MFT_RECORD *m; | ||
527 | STANDARD_INFORMATION *si; | ||
528 | ntfs_attr_search_ctx *ctx; | ||
529 | int err = 0; | ||
530 | |||
531 | ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); | ||
532 | |||
533 | /* Setup the generic vfs inode parts now. */ | ||
534 | |||
535 | /* This is the optimal IO size (for stat), not the fs block size. */ | ||
536 | vi->i_blksize = PAGE_CACHE_SIZE; | ||
537 | /* | ||
538 | * This is for checking whether an inode has changed w.r.t. a file so | ||
539 | * that the file can be updated if necessary (compare with f_version). | ||
540 | */ | ||
541 | vi->i_version = 1; | ||
542 | |||
543 | vi->i_uid = vol->uid; | ||
544 | vi->i_gid = vol->gid; | ||
545 | vi->i_mode = 0; | ||
546 | |||
547 | /* | ||
548 | * Initialize the ntfs specific part of @vi special casing | ||
549 | * FILE_MFT which we need to do at mount time. | ||
550 | */ | ||
551 | if (vi->i_ino != FILE_MFT) | ||
552 | ntfs_init_big_inode(vi); | ||
553 | ni = NTFS_I(vi); | ||
554 | |||
555 | m = map_mft_record(ni); | ||
556 | if (IS_ERR(m)) { | ||
557 | err = PTR_ERR(m); | ||
558 | goto err_out; | ||
559 | } | ||
560 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
561 | if (!ctx) { | ||
562 | err = -ENOMEM; | ||
563 | goto unm_err_out; | ||
564 | } | ||
565 | |||
566 | if (!(m->flags & MFT_RECORD_IN_USE)) { | ||
567 | ntfs_error(vi->i_sb, "Inode is not in use!"); | ||
568 | goto unm_err_out; | ||
569 | } | ||
570 | if (m->base_mft_record) { | ||
571 | ntfs_error(vi->i_sb, "Inode is an extent inode!"); | ||
572 | goto unm_err_out; | ||
573 | } | ||
574 | |||
575 | /* Transfer information from mft record into vfs and ntfs inodes. */ | ||
576 | vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); | ||
577 | |||
578 | /* | ||
579 | * FIXME: Keep in mind that link_count is two for files which have both | ||
580 | * a long file name and a short file name as separate entries, so if | ||
581 | * we are hiding short file names this will be too high. Either we need | ||
582 | * to account for the short file names by subtracting them or we need | ||
583 | * to make sure we delete files even though i_nlink is not zero which | ||
584 | * might be tricky due to vfs interactions. Need to think about this | ||
585 | * some more when implementing the unlink command. | ||
586 | */ | ||
587 | vi->i_nlink = le16_to_cpu(m->link_count); | ||
588 | /* | ||
589 | * FIXME: Reparse points can have the directory bit set even though | ||
590 | * they would be S_IFLNK. Need to deal with this further below when we | ||
591 | * implement reparse points / symbolic links but it will do for now. | ||
592 | * Also if not a directory, it could be something else, rather than | ||
593 | * a regular file. But again, will do for now. | ||
594 | */ | ||
595 | /* Everyone gets all permissions. */ | ||
596 | vi->i_mode |= S_IRWXUGO; | ||
597 | /* If read-only, noone gets write permissions. */ | ||
598 | if (IS_RDONLY(vi)) | ||
599 | vi->i_mode &= ~S_IWUGO; | ||
600 | if (m->flags & MFT_RECORD_IS_DIRECTORY) { | ||
601 | vi->i_mode |= S_IFDIR; | ||
602 | /* | ||
603 | * Apply the directory permissions mask set in the mount | ||
604 | * options. | ||
605 | */ | ||
606 | vi->i_mode &= ~vol->dmask; | ||
607 | /* Things break without this kludge! */ | ||
608 | if (vi->i_nlink > 1) | ||
609 | vi->i_nlink = 1; | ||
610 | } else { | ||
611 | vi->i_mode |= S_IFREG; | ||
612 | /* Apply the file permissions mask set in the mount options. */ | ||
613 | vi->i_mode &= ~vol->fmask; | ||
614 | } | ||
615 | /* | ||
616 | * Find the standard information attribute in the mft record. At this | ||
617 | * stage we haven't setup the attribute list stuff yet, so this could | ||
618 | * in fact fail if the standard information is in an extent record, but | ||
619 | * I don't think this actually ever happens. | ||
620 | */ | ||
621 | err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0, | ||
622 | ctx); | ||
623 | if (unlikely(err)) { | ||
624 | if (err == -ENOENT) { | ||
625 | /* | ||
626 | * TODO: We should be performing a hot fix here (if the | ||
627 | * recover mount option is set) by creating a new | ||
628 | * attribute. | ||
629 | */ | ||
630 | ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute " | ||
631 | "is missing."); | ||
632 | } | ||
633 | goto unm_err_out; | ||
634 | } | ||
635 | /* Get the standard information attribute value. */ | ||
636 | si = (STANDARD_INFORMATION*)((char*)ctx->attr + | ||
637 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
638 | |||
639 | /* Transfer information from the standard information into vi. */ | ||
640 | /* | ||
641 | * Note: The i_?times do not quite map perfectly onto the NTFS times, | ||
642 | * but they are close enough, and in the end it doesn't really matter | ||
643 | * that much... | ||
644 | */ | ||
645 | /* | ||
646 | * mtime is the last change of the data within the file. Not changed | ||
647 | * when only metadata is changed, e.g. a rename doesn't affect mtime. | ||
648 | */ | ||
649 | vi->i_mtime = ntfs2utc(si->last_data_change_time); | ||
650 | /* | ||
651 | * ctime is the last change of the metadata of the file. This obviously | ||
652 | * always changes, when mtime is changed. ctime can be changed on its | ||
653 | * own, mtime is then not changed, e.g. when a file is renamed. | ||
654 | */ | ||
655 | vi->i_ctime = ntfs2utc(si->last_mft_change_time); | ||
656 | /* | ||
657 | * Last access to the data within the file. Not changed during a rename | ||
658 | * for example but changed whenever the file is written to. | ||
659 | */ | ||
660 | vi->i_atime = ntfs2utc(si->last_access_time); | ||
661 | |||
662 | /* Find the attribute list attribute if present. */ | ||
663 | ntfs_attr_reinit_search_ctx(ctx); | ||
664 | err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx); | ||
665 | if (err) { | ||
666 | if (unlikely(err != -ENOENT)) { | ||
667 | ntfs_error(vi->i_sb, "Failed to lookup attribute list " | ||
668 | "attribute."); | ||
669 | goto unm_err_out; | ||
670 | } | ||
671 | } else /* if (!err) */ { | ||
672 | if (vi->i_ino == FILE_MFT) | ||
673 | goto skip_attr_list_load; | ||
674 | ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); | ||
675 | NInoSetAttrList(ni); | ||
676 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED || | ||
677 | ctx->attr->flags & ATTR_COMPRESSION_MASK || | ||
678 | ctx->attr->flags & ATTR_IS_SPARSE) { | ||
679 | ntfs_error(vi->i_sb, "Attribute list attribute is " | ||
680 | "compressed/encrypted/sparse."); | ||
681 | goto unm_err_out; | ||
682 | } | ||
683 | /* Now allocate memory for the attribute list. */ | ||
684 | ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); | ||
685 | ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); | ||
686 | if (!ni->attr_list) { | ||
687 | ntfs_error(vi->i_sb, "Not enough memory to allocate " | ||
688 | "buffer for attribute list."); | ||
689 | err = -ENOMEM; | ||
690 | goto unm_err_out; | ||
691 | } | ||
692 | if (ctx->attr->non_resident) { | ||
693 | NInoSetAttrListNonResident(ni); | ||
694 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
695 | ntfs_error(vi->i_sb, "Attribute list has non " | ||
696 | "zero lowest_vcn."); | ||
697 | goto unm_err_out; | ||
698 | } | ||
699 | /* | ||
700 | * Setup the runlist. No need for locking as we have | ||
701 | * exclusive access to the inode at this time. | ||
702 | */ | ||
703 | ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, | ||
704 | ctx->attr, NULL); | ||
705 | if (IS_ERR(ni->attr_list_rl.rl)) { | ||
706 | err = PTR_ERR(ni->attr_list_rl.rl); | ||
707 | ni->attr_list_rl.rl = NULL; | ||
708 | ntfs_error(vi->i_sb, "Mapping pairs " | ||
709 | "decompression failed."); | ||
710 | goto unm_err_out; | ||
711 | } | ||
712 | /* Now load the attribute list. */ | ||
713 | if ((err = load_attribute_list(vol, &ni->attr_list_rl, | ||
714 | ni->attr_list, ni->attr_list_size, | ||
715 | sle64_to_cpu(ctx->attr->data. | ||
716 | non_resident.initialized_size)))) { | ||
717 | ntfs_error(vi->i_sb, "Failed to load " | ||
718 | "attribute list attribute."); | ||
719 | goto unm_err_out; | ||
720 | } | ||
721 | } else /* if (!ctx.attr->non_resident) */ { | ||
722 | if ((u8*)ctx->attr + le16_to_cpu( | ||
723 | ctx->attr->data.resident.value_offset) + | ||
724 | le32_to_cpu( | ||
725 | ctx->attr->data.resident.value_length) > | ||
726 | (u8*)ctx->mrec + vol->mft_record_size) { | ||
727 | ntfs_error(vi->i_sb, "Corrupt attribute list " | ||
728 | "in inode."); | ||
729 | goto unm_err_out; | ||
730 | } | ||
731 | /* Now copy the attribute list. */ | ||
732 | memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( | ||
733 | ctx->attr->data.resident.value_offset), | ||
734 | le32_to_cpu( | ||
735 | ctx->attr->data.resident.value_length)); | ||
736 | } | ||
737 | } | ||
738 | skip_attr_list_load: | ||
739 | /* | ||
740 | * If an attribute list is present we now have the attribute list value | ||
741 | * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. | ||
742 | */ | ||
743 | if (S_ISDIR(vi->i_mode)) { | ||
744 | struct inode *bvi; | ||
745 | ntfs_inode *bni; | ||
746 | INDEX_ROOT *ir; | ||
747 | char *ir_end, *index_end; | ||
748 | |||
749 | /* It is a directory, find index root attribute. */ | ||
750 | ntfs_attr_reinit_search_ctx(ctx); | ||
751 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, | ||
752 | 0, NULL, 0, ctx); | ||
753 | if (unlikely(err)) { | ||
754 | if (err == -ENOENT) { | ||
755 | // FIXME: File is corrupt! Hot-fix with empty | ||
756 | // index root attribute if recovery option is | ||
757 | // set. | ||
758 | ntfs_error(vi->i_sb, "$INDEX_ROOT attribute " | ||
759 | "is missing."); | ||
760 | } | ||
761 | goto unm_err_out; | ||
762 | } | ||
763 | /* Set up the state. */ | ||
764 | if (unlikely(ctx->attr->non_resident)) { | ||
765 | ntfs_error(vol->sb, "$INDEX_ROOT attribute is not " | ||
766 | "resident."); | ||
767 | goto unm_err_out; | ||
768 | } | ||
769 | /* Ensure the attribute name is placed before the value. */ | ||
770 | if (unlikely(ctx->attr->name_length && | ||
771 | (le16_to_cpu(ctx->attr->name_offset) >= | ||
772 | le16_to_cpu(ctx->attr->data.resident. | ||
773 | value_offset)))) { | ||
774 | ntfs_error(vol->sb, "$INDEX_ROOT attribute name is " | ||
775 | "placed after the attribute value."); | ||
776 | goto unm_err_out; | ||
777 | } | ||
778 | /* | ||
779 | * Compressed/encrypted index root just means that the newly | ||
780 | * created files in that directory should be created compressed/ | ||
781 | * encrypted. However index root cannot be both compressed and | ||
782 | * encrypted. | ||
783 | */ | ||
784 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) | ||
785 | NInoSetCompressed(ni); | ||
786 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { | ||
787 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
788 | ntfs_error(vi->i_sb, "Found encrypted and " | ||
789 | "compressed attribute."); | ||
790 | goto unm_err_out; | ||
791 | } | ||
792 | NInoSetEncrypted(ni); | ||
793 | } | ||
794 | if (ctx->attr->flags & ATTR_IS_SPARSE) | ||
795 | NInoSetSparse(ni); | ||
796 | ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( | ||
797 | ctx->attr->data.resident.value_offset)); | ||
798 | ir_end = (char*)ir + le32_to_cpu( | ||
799 | ctx->attr->data.resident.value_length); | ||
800 | if (ir_end > (char*)ctx->mrec + vol->mft_record_size) { | ||
801 | ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " | ||
802 | "corrupt."); | ||
803 | goto unm_err_out; | ||
804 | } | ||
805 | index_end = (char*)&ir->index + | ||
806 | le32_to_cpu(ir->index.index_length); | ||
807 | if (index_end > ir_end) { | ||
808 | ntfs_error(vi->i_sb, "Directory index is corrupt."); | ||
809 | goto unm_err_out; | ||
810 | } | ||
811 | if (ir->type != AT_FILE_NAME) { | ||
812 | ntfs_error(vi->i_sb, "Indexed attribute is not " | ||
813 | "$FILE_NAME."); | ||
814 | goto unm_err_out; | ||
815 | } | ||
816 | if (ir->collation_rule != COLLATION_FILE_NAME) { | ||
817 | ntfs_error(vi->i_sb, "Index collation rule is not " | ||
818 | "COLLATION_FILE_NAME."); | ||
819 | goto unm_err_out; | ||
820 | } | ||
821 | ni->itype.index.collation_rule = ir->collation_rule; | ||
822 | ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); | ||
823 | if (ni->itype.index.block_size & | ||
824 | (ni->itype.index.block_size - 1)) { | ||
825 | ntfs_error(vi->i_sb, "Index block size (%u) is not a " | ||
826 | "power of two.", | ||
827 | ni->itype.index.block_size); | ||
828 | goto unm_err_out; | ||
829 | } | ||
830 | if (ni->itype.index.block_size > PAGE_CACHE_SIZE) { | ||
831 | ntfs_error(vi->i_sb, "Index block size (%u) > " | ||
832 | "PAGE_CACHE_SIZE (%ld) is not " | ||
833 | "supported. Sorry.", | ||
834 | ni->itype.index.block_size, | ||
835 | PAGE_CACHE_SIZE); | ||
836 | err = -EOPNOTSUPP; | ||
837 | goto unm_err_out; | ||
838 | } | ||
839 | if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) { | ||
840 | ntfs_error(vi->i_sb, "Index block size (%u) < " | ||
841 | "NTFS_BLOCK_SIZE (%i) is not " | ||
842 | "supported. Sorry.", | ||
843 | ni->itype.index.block_size, | ||
844 | NTFS_BLOCK_SIZE); | ||
845 | err = -EOPNOTSUPP; | ||
846 | goto unm_err_out; | ||
847 | } | ||
848 | ni->itype.index.block_size_bits = | ||
849 | ffs(ni->itype.index.block_size) - 1; | ||
850 | /* Determine the size of a vcn in the directory index. */ | ||
851 | if (vol->cluster_size <= ni->itype.index.block_size) { | ||
852 | ni->itype.index.vcn_size = vol->cluster_size; | ||
853 | ni->itype.index.vcn_size_bits = vol->cluster_size_bits; | ||
854 | } else { | ||
855 | ni->itype.index.vcn_size = vol->sector_size; | ||
856 | ni->itype.index.vcn_size_bits = vol->sector_size_bits; | ||
857 | } | ||
858 | |||
859 | /* Setup the index allocation attribute, even if not present. */ | ||
860 | NInoSetMstProtected(ni); | ||
861 | ni->type = AT_INDEX_ALLOCATION; | ||
862 | ni->name = I30; | ||
863 | ni->name_len = 4; | ||
864 | |||
865 | if (!(ir->index.flags & LARGE_INDEX)) { | ||
866 | /* No index allocation. */ | ||
867 | vi->i_size = ni->initialized_size = | ||
868 | ni->allocated_size = 0; | ||
869 | /* We are done with the mft record, so we release it. */ | ||
870 | ntfs_attr_put_search_ctx(ctx); | ||
871 | unmap_mft_record(ni); | ||
872 | m = NULL; | ||
873 | ctx = NULL; | ||
874 | goto skip_large_dir_stuff; | ||
875 | } /* LARGE_INDEX: Index allocation present. Setup state. */ | ||
876 | NInoSetIndexAllocPresent(ni); | ||
877 | /* Find index allocation attribute. */ | ||
878 | ntfs_attr_reinit_search_ctx(ctx); | ||
879 | err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, I30, 4, | ||
880 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
881 | if (unlikely(err)) { | ||
882 | if (err == -ENOENT) | ||
883 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION " | ||
884 | "attribute is not present but " | ||
885 | "$INDEX_ROOT indicated it is."); | ||
886 | else | ||
887 | ntfs_error(vi->i_sb, "Failed to lookup " | ||
888 | "$INDEX_ALLOCATION " | ||
889 | "attribute."); | ||
890 | goto unm_err_out; | ||
891 | } | ||
892 | if (!ctx->attr->non_resident) { | ||
893 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " | ||
894 | "is resident."); | ||
895 | goto unm_err_out; | ||
896 | } | ||
897 | /* | ||
898 | * Ensure the attribute name is placed before the mapping pairs | ||
899 | * array. | ||
900 | */ | ||
901 | if (unlikely(ctx->attr->name_length && | ||
902 | (le16_to_cpu(ctx->attr->name_offset) >= | ||
903 | le16_to_cpu(ctx->attr->data.non_resident. | ||
904 | mapping_pairs_offset)))) { | ||
905 | ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name " | ||
906 | "is placed after the mapping pairs " | ||
907 | "array."); | ||
908 | goto unm_err_out; | ||
909 | } | ||
910 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { | ||
911 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " | ||
912 | "is encrypted."); | ||
913 | goto unm_err_out; | ||
914 | } | ||
915 | if (ctx->attr->flags & ATTR_IS_SPARSE) { | ||
916 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " | ||
917 | "is sparse."); | ||
918 | goto unm_err_out; | ||
919 | } | ||
920 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
921 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " | ||
922 | "is compressed."); | ||
923 | goto unm_err_out; | ||
924 | } | ||
925 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
926 | ntfs_error(vi->i_sb, "First extent of " | ||
927 | "$INDEX_ALLOCATION attribute has non " | ||
928 | "zero lowest_vcn."); | ||
929 | goto unm_err_out; | ||
930 | } | ||
931 | vi->i_size = sle64_to_cpu( | ||
932 | ctx->attr->data.non_resident.data_size); | ||
933 | ni->initialized_size = sle64_to_cpu( | ||
934 | ctx->attr->data.non_resident.initialized_size); | ||
935 | ni->allocated_size = sle64_to_cpu( | ||
936 | ctx->attr->data.non_resident.allocated_size); | ||
937 | /* | ||
938 | * We are done with the mft record, so we release it. Otherwise | ||
939 | * we would deadlock in ntfs_attr_iget(). | ||
940 | */ | ||
941 | ntfs_attr_put_search_ctx(ctx); | ||
942 | unmap_mft_record(ni); | ||
943 | m = NULL; | ||
944 | ctx = NULL; | ||
945 | /* Get the index bitmap attribute inode. */ | ||
946 | bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4); | ||
947 | if (IS_ERR(bvi)) { | ||
948 | ntfs_error(vi->i_sb, "Failed to get bitmap attribute."); | ||
949 | err = PTR_ERR(bvi); | ||
950 | goto unm_err_out; | ||
951 | } | ||
952 | ni->itype.index.bmp_ino = bvi; | ||
953 | bni = NTFS_I(bvi); | ||
954 | if (NInoCompressed(bni) || NInoEncrypted(bni) || | ||
955 | NInoSparse(bni)) { | ||
956 | ntfs_error(vi->i_sb, "$BITMAP attribute is compressed " | ||
957 | "and/or encrypted and/or sparse."); | ||
958 | goto unm_err_out; | ||
959 | } | ||
960 | /* Consistency check bitmap size vs. index allocation size. */ | ||
961 | if ((bvi->i_size << 3) < (vi->i_size >> | ||
962 | ni->itype.index.block_size_bits)) { | ||
963 | ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " | ||
964 | "for index allocation (0x%llx).", | ||
965 | bvi->i_size << 3, vi->i_size); | ||
966 | goto unm_err_out; | ||
967 | } | ||
968 | skip_large_dir_stuff: | ||
969 | /* Setup the operations for this inode. */ | ||
970 | vi->i_op = &ntfs_dir_inode_ops; | ||
971 | vi->i_fop = &ntfs_dir_ops; | ||
972 | } else { | ||
973 | /* It is a file. */ | ||
974 | ntfs_attr_reinit_search_ctx(ctx); | ||
975 | |||
976 | /* Setup the data attribute, even if not present. */ | ||
977 | ni->type = AT_DATA; | ||
978 | ni->name = NULL; | ||
979 | ni->name_len = 0; | ||
980 | |||
981 | /* Find first extent of the unnamed data attribute. */ | ||
982 | err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx); | ||
983 | if (unlikely(err)) { | ||
984 | vi->i_size = ni->initialized_size = | ||
985 | ni->allocated_size = 0; | ||
986 | if (err != -ENOENT) { | ||
987 | ntfs_error(vi->i_sb, "Failed to lookup $DATA " | ||
988 | "attribute."); | ||
989 | goto unm_err_out; | ||
990 | } | ||
991 | /* | ||
992 | * FILE_Secure does not have an unnamed $DATA | ||
993 | * attribute, so we special case it here. | ||
994 | */ | ||
995 | if (vi->i_ino == FILE_Secure) | ||
996 | goto no_data_attr_special_case; | ||
997 | /* | ||
998 | * Most if not all the system files in the $Extend | ||
999 | * system directory do not have unnamed data | ||
1000 | * attributes so we need to check if the parent | ||
1001 | * directory of the file is FILE_Extend and if it is | ||
1002 | * ignore this error. To do this we need to get the | ||
1003 | * name of this inode from the mft record as the name | ||
1004 | * contains the back reference to the parent directory. | ||
1005 | */ | ||
1006 | if (ntfs_is_extended_system_file(ctx) > 0) | ||
1007 | goto no_data_attr_special_case; | ||
1008 | // FIXME: File is corrupt! Hot-fix with empty data | ||
1009 | // attribute if recovery option is set. | ||
1010 | ntfs_error(vi->i_sb, "$DATA attribute is missing."); | ||
1011 | goto unm_err_out; | ||
1012 | } | ||
1013 | /* Setup the state. */ | ||
1014 | if (ctx->attr->non_resident) { | ||
1015 | NInoSetNonResident(ni); | ||
1016 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
1017 | NInoSetCompressed(ni); | ||
1018 | if (vol->cluster_size > 4096) { | ||
1019 | ntfs_error(vi->i_sb, "Found " | ||
1020 | "compressed data but " | ||
1021 | "compression is disabled due " | ||
1022 | "to cluster size (%i) > 4kiB.", | ||
1023 | vol->cluster_size); | ||
1024 | goto unm_err_out; | ||
1025 | } | ||
1026 | if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) | ||
1027 | != ATTR_IS_COMPRESSED) { | ||
1028 | ntfs_error(vi->i_sb, "Found " | ||
1029 | "unknown compression method or " | ||
1030 | "corrupt file."); | ||
1031 | goto unm_err_out; | ||
1032 | } | ||
1033 | ni->itype.compressed.block_clusters = 1U << | ||
1034 | ctx->attr->data.non_resident. | ||
1035 | compression_unit; | ||
1036 | if (ctx->attr->data.non_resident. | ||
1037 | compression_unit != 4) { | ||
1038 | ntfs_error(vi->i_sb, "Found " | ||
1039 | "nonstandard compression unit " | ||
1040 | "(%u instead of 4). Cannot " | ||
1041 | "handle this.", | ||
1042 | ctx->attr->data.non_resident. | ||
1043 | compression_unit); | ||
1044 | err = -EOPNOTSUPP; | ||
1045 | goto unm_err_out; | ||
1046 | } | ||
1047 | ni->itype.compressed.block_size = 1U << ( | ||
1048 | ctx->attr->data.non_resident. | ||
1049 | compression_unit + | ||
1050 | vol->cluster_size_bits); | ||
1051 | ni->itype.compressed.block_size_bits = ffs( | ||
1052 | ni->itype.compressed.block_size) - 1; | ||
1053 | } | ||
1054 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { | ||
1055 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
1056 | ntfs_error(vi->i_sb, "Found encrypted " | ||
1057 | "and compressed data."); | ||
1058 | goto unm_err_out; | ||
1059 | } | ||
1060 | NInoSetEncrypted(ni); | ||
1061 | } | ||
1062 | if (ctx->attr->flags & ATTR_IS_SPARSE) | ||
1063 | NInoSetSparse(ni); | ||
1064 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
1065 | ntfs_error(vi->i_sb, "First extent of $DATA " | ||
1066 | "attribute has non zero " | ||
1067 | "lowest_vcn."); | ||
1068 | goto unm_err_out; | ||
1069 | } | ||
1070 | /* Setup all the sizes. */ | ||
1071 | vi->i_size = sle64_to_cpu( | ||
1072 | ctx->attr->data.non_resident.data_size); | ||
1073 | ni->initialized_size = sle64_to_cpu( | ||
1074 | ctx->attr->data.non_resident. | ||
1075 | initialized_size); | ||
1076 | ni->allocated_size = sle64_to_cpu( | ||
1077 | ctx->attr->data.non_resident. | ||
1078 | allocated_size); | ||
1079 | if (NInoCompressed(ni)) { | ||
1080 | ni->itype.compressed.size = sle64_to_cpu( | ||
1081 | ctx->attr->data.non_resident. | ||
1082 | compressed_size); | ||
1083 | } | ||
1084 | } else { /* Resident attribute. */ | ||
1085 | /* | ||
1086 | * Make all sizes equal for simplicity in read code | ||
1087 | * paths. FIXME: Need to keep this in mind when | ||
1088 | * converting to non-resident attribute in write code | ||
1089 | * path. (Probably only affects truncate().) | ||
1090 | */ | ||
1091 | vi->i_size = ni->initialized_size = ni->allocated_size = | ||
1092 | le32_to_cpu( | ||
1093 | ctx->attr->data.resident.value_length); | ||
1094 | } | ||
1095 | no_data_attr_special_case: | ||
1096 | /* We are done with the mft record, so we release it. */ | ||
1097 | ntfs_attr_put_search_ctx(ctx); | ||
1098 | unmap_mft_record(ni); | ||
1099 | m = NULL; | ||
1100 | ctx = NULL; | ||
1101 | /* Setup the operations for this inode. */ | ||
1102 | vi->i_op = &ntfs_file_inode_ops; | ||
1103 | vi->i_fop = &ntfs_file_ops; | ||
1104 | } | ||
1105 | if (NInoMstProtected(ni)) | ||
1106 | vi->i_mapping->a_ops = &ntfs_mst_aops; | ||
1107 | else | ||
1108 | vi->i_mapping->a_ops = &ntfs_aops; | ||
1109 | /* | ||
1110 | * The number of 512-byte blocks used on disk (for stat). This is in so | ||
1111 | * far inaccurate as it doesn't account for any named streams or other | ||
1112 | * special non-resident attributes, but that is how Windows works, too, | ||
1113 | * so we are at least consistent with Windows, if not entirely | ||
1114 | * consistent with the Linux Way. Doing it the Linux Way would cause a | ||
1115 | * significant slowdown as it would involve iterating over all | ||
1116 | * attributes in the mft record and adding the allocated/compressed | ||
1117 | * sizes of all non-resident attributes present to give us the Linux | ||
1118 | * correct size that should go into i_blocks (after division by 512). | ||
1119 | */ | ||
1120 | if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni)) | ||
1121 | vi->i_blocks = ni->allocated_size >> 9; | ||
1122 | else | ||
1123 | vi->i_blocks = ni->itype.compressed.size >> 9; | ||
1124 | |||
1125 | ntfs_debug("Done."); | ||
1126 | return 0; | ||
1127 | |||
1128 | unm_err_out: | ||
1129 | if (!err) | ||
1130 | err = -EIO; | ||
1131 | if (ctx) | ||
1132 | ntfs_attr_put_search_ctx(ctx); | ||
1133 | if (m) | ||
1134 | unmap_mft_record(ni); | ||
1135 | err_out: | ||
1136 | ntfs_error(vol->sb, "Failed with error code %i. Marking corrupt " | ||
1137 | "inode 0x%lx as bad. Run chkdsk.", err, vi->i_ino); | ||
1138 | make_bad_inode(vi); | ||
1139 | if (err != -EOPNOTSUPP && err != -ENOMEM) | ||
1140 | NVolSetErrors(vol); | ||
1141 | return err; | ||
1142 | } | ||
1143 | |||
1144 | /** | ||
1145 | * ntfs_read_locked_attr_inode - read an attribute inode from its base inode | ||
1146 | * @base_vi: base inode | ||
1147 | * @vi: attribute inode to read | ||
1148 | * | ||
1149 | * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the | ||
1150 | * attribute inode described by @vi into memory from the base mft record | ||
1151 | * described by @base_ni. | ||
1152 | * | ||
1153 | * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for | ||
1154 | * reading and looks up the attribute described by @vi before setting up the | ||
1155 | * necessary fields in @vi as well as initializing the ntfs inode. | ||
1156 | * | ||
1157 | * Q: What locks are held when the function is called? | ||
1158 | * A: i_state has I_LOCK set, hence the inode is locked, also | ||
1159 | * i_count is set to 1, so it is not going to go away | ||
1160 | * | ||
1161 | * Return 0 on success and -errno on error. In the error case, the inode will | ||
1162 | * have had make_bad_inode() executed on it. | ||
1163 | */ | ||
1164 | static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | ||
1165 | { | ||
1166 | ntfs_volume *vol = NTFS_SB(vi->i_sb); | ||
1167 | ntfs_inode *ni, *base_ni; | ||
1168 | MFT_RECORD *m; | ||
1169 | ntfs_attr_search_ctx *ctx; | ||
1170 | int err = 0; | ||
1171 | |||
1172 | ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); | ||
1173 | |||
1174 | ntfs_init_big_inode(vi); | ||
1175 | |||
1176 | ni = NTFS_I(vi); | ||
1177 | base_ni = NTFS_I(base_vi); | ||
1178 | |||
1179 | /* Just mirror the values from the base inode. */ | ||
1180 | vi->i_blksize = base_vi->i_blksize; | ||
1181 | vi->i_version = base_vi->i_version; | ||
1182 | vi->i_uid = base_vi->i_uid; | ||
1183 | vi->i_gid = base_vi->i_gid; | ||
1184 | vi->i_nlink = base_vi->i_nlink; | ||
1185 | vi->i_mtime = base_vi->i_mtime; | ||
1186 | vi->i_ctime = base_vi->i_ctime; | ||
1187 | vi->i_atime = base_vi->i_atime; | ||
1188 | vi->i_generation = ni->seq_no = base_ni->seq_no; | ||
1189 | |||
1190 | /* Set inode type to zero but preserve permissions. */ | ||
1191 | vi->i_mode = base_vi->i_mode & ~S_IFMT; | ||
1192 | |||
1193 | m = map_mft_record(base_ni); | ||
1194 | if (IS_ERR(m)) { | ||
1195 | err = PTR_ERR(m); | ||
1196 | goto err_out; | ||
1197 | } | ||
1198 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1199 | if (!ctx) { | ||
1200 | err = -ENOMEM; | ||
1201 | goto unm_err_out; | ||
1202 | } | ||
1203 | |||
1204 | /* Find the attribute. */ | ||
1205 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
1206 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1207 | if (unlikely(err)) | ||
1208 | goto unm_err_out; | ||
1209 | |||
1210 | if (!ctx->attr->non_resident) { | ||
1211 | /* Ensure the attribute name is placed before the value. */ | ||
1212 | if (unlikely(ctx->attr->name_length && | ||
1213 | (le16_to_cpu(ctx->attr->name_offset) >= | ||
1214 | le16_to_cpu(ctx->attr->data.resident. | ||
1215 | value_offset)))) { | ||
1216 | ntfs_error(vol->sb, "Attribute name is placed after " | ||
1217 | "the attribute value."); | ||
1218 | goto unm_err_out; | ||
1219 | } | ||
1220 | if (NInoMstProtected(ni) || ctx->attr->flags) { | ||
1221 | ntfs_error(vi->i_sb, "Found mst protected attribute " | ||
1222 | "or attribute with non-zero flags but " | ||
1223 | "the attribute is resident. Please " | ||
1224 | "report you saw this message to " | ||
1225 | "linux-ntfs-dev@lists.sourceforge.net"); | ||
1226 | goto unm_err_out; | ||
1227 | } | ||
1228 | /* | ||
1229 | * Resident attribute. Make all sizes equal for simplicity in | ||
1230 | * read code paths. | ||
1231 | */ | ||
1232 | vi->i_size = ni->initialized_size = ni->allocated_size = | ||
1233 | le32_to_cpu(ctx->attr->data.resident.value_length); | ||
1234 | } else { | ||
1235 | NInoSetNonResident(ni); | ||
1236 | /* | ||
1237 | * Ensure the attribute name is placed before the mapping pairs | ||
1238 | * array. | ||
1239 | */ | ||
1240 | if (unlikely(ctx->attr->name_length && | ||
1241 | (le16_to_cpu(ctx->attr->name_offset) >= | ||
1242 | le16_to_cpu(ctx->attr->data.non_resident. | ||
1243 | mapping_pairs_offset)))) { | ||
1244 | ntfs_error(vol->sb, "Attribute name is placed after " | ||
1245 | "the mapping pairs array."); | ||
1246 | goto unm_err_out; | ||
1247 | } | ||
1248 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
1249 | if (NInoMstProtected(ni)) { | ||
1250 | ntfs_error(vi->i_sb, "Found mst protected " | ||
1251 | "attribute but the attribute " | ||
1252 | "is compressed. Please report " | ||
1253 | "you saw this message to " | ||
1254 | "linux-ntfs-dev@lists." | ||
1255 | "sourceforge.net"); | ||
1256 | goto unm_err_out; | ||
1257 | } | ||
1258 | NInoSetCompressed(ni); | ||
1259 | if ((ni->type != AT_DATA) || (ni->type == AT_DATA && | ||
1260 | ni->name_len)) { | ||
1261 | ntfs_error(vi->i_sb, "Found compressed " | ||
1262 | "non-data or named data " | ||
1263 | "attribute. Please report " | ||
1264 | "you saw this message to " | ||
1265 | "linux-ntfs-dev@lists." | ||
1266 | "sourceforge.net"); | ||
1267 | goto unm_err_out; | ||
1268 | } | ||
1269 | if (vol->cluster_size > 4096) { | ||
1270 | ntfs_error(vi->i_sb, "Found compressed " | ||
1271 | "attribute but compression is " | ||
1272 | "disabled due to cluster size " | ||
1273 | "(%i) > 4kiB.", | ||
1274 | vol->cluster_size); | ||
1275 | goto unm_err_out; | ||
1276 | } | ||
1277 | if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) | ||
1278 | != ATTR_IS_COMPRESSED) { | ||
1279 | ntfs_error(vi->i_sb, "Found unknown " | ||
1280 | "compression method."); | ||
1281 | goto unm_err_out; | ||
1282 | } | ||
1283 | ni->itype.compressed.block_clusters = 1U << | ||
1284 | ctx->attr->data.non_resident. | ||
1285 | compression_unit; | ||
1286 | if (ctx->attr->data.non_resident.compression_unit != | ||
1287 | 4) { | ||
1288 | ntfs_error(vi->i_sb, "Found nonstandard " | ||
1289 | "compression unit (%u instead " | ||
1290 | "of 4). Cannot handle this.", | ||
1291 | ctx->attr->data.non_resident. | ||
1292 | compression_unit); | ||
1293 | err = -EOPNOTSUPP; | ||
1294 | goto unm_err_out; | ||
1295 | } | ||
1296 | ni->itype.compressed.block_size = 1U << ( | ||
1297 | ctx->attr->data.non_resident. | ||
1298 | compression_unit + | ||
1299 | vol->cluster_size_bits); | ||
1300 | ni->itype.compressed.block_size_bits = ffs( | ||
1301 | ni->itype.compressed.block_size) - 1; | ||
1302 | } | ||
1303 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { | ||
1304 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
1305 | ntfs_error(vi->i_sb, "Found encrypted " | ||
1306 | "and compressed data."); | ||
1307 | goto unm_err_out; | ||
1308 | } | ||
1309 | if (NInoMstProtected(ni)) { | ||
1310 | ntfs_error(vi->i_sb, "Found mst protected " | ||
1311 | "attribute but the attribute " | ||
1312 | "is encrypted. Please report " | ||
1313 | "you saw this message to " | ||
1314 | "linux-ntfs-dev@lists." | ||
1315 | "sourceforge.net"); | ||
1316 | goto unm_err_out; | ||
1317 | } | ||
1318 | NInoSetEncrypted(ni); | ||
1319 | } | ||
1320 | if (ctx->attr->flags & ATTR_IS_SPARSE) { | ||
1321 | if (NInoMstProtected(ni)) { | ||
1322 | ntfs_error(vi->i_sb, "Found mst protected " | ||
1323 | "attribute but the attribute " | ||
1324 | "is sparse. Please report " | ||
1325 | "you saw this message to " | ||
1326 | "linux-ntfs-dev@lists." | ||
1327 | "sourceforge.net"); | ||
1328 | goto unm_err_out; | ||
1329 | } | ||
1330 | NInoSetSparse(ni); | ||
1331 | } | ||
1332 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
1333 | ntfs_error(vi->i_sb, "First extent of attribute has " | ||
1334 | "non-zero lowest_vcn."); | ||
1335 | goto unm_err_out; | ||
1336 | } | ||
1337 | /* Setup all the sizes. */ | ||
1338 | vi->i_size = sle64_to_cpu( | ||
1339 | ctx->attr->data.non_resident.data_size); | ||
1340 | ni->initialized_size = sle64_to_cpu( | ||
1341 | ctx->attr->data.non_resident.initialized_size); | ||
1342 | ni->allocated_size = sle64_to_cpu( | ||
1343 | ctx->attr->data.non_resident.allocated_size); | ||
1344 | if (NInoCompressed(ni)) { | ||
1345 | ni->itype.compressed.size = sle64_to_cpu( | ||
1346 | ctx->attr->data.non_resident. | ||
1347 | compressed_size); | ||
1348 | } | ||
1349 | } | ||
1350 | |||
1351 | /* Setup the operations for this attribute inode. */ | ||
1352 | vi->i_op = NULL; | ||
1353 | vi->i_fop = NULL; | ||
1354 | if (NInoMstProtected(ni)) | ||
1355 | vi->i_mapping->a_ops = &ntfs_mst_aops; | ||
1356 | else | ||
1357 | vi->i_mapping->a_ops = &ntfs_aops; | ||
1358 | |||
1359 | if (!NInoCompressed(ni)) | ||
1360 | vi->i_blocks = ni->allocated_size >> 9; | ||
1361 | else | ||
1362 | vi->i_blocks = ni->itype.compressed.size >> 9; | ||
1363 | |||
1364 | /* | ||
1365 | * Make sure the base inode doesn't go away and attach it to the | ||
1366 | * attribute inode. | ||
1367 | */ | ||
1368 | igrab(base_vi); | ||
1369 | ni->ext.base_ntfs_ino = base_ni; | ||
1370 | ni->nr_extents = -1; | ||
1371 | |||
1372 | ntfs_attr_put_search_ctx(ctx); | ||
1373 | unmap_mft_record(base_ni); | ||
1374 | |||
1375 | ntfs_debug("Done."); | ||
1376 | return 0; | ||
1377 | |||
1378 | unm_err_out: | ||
1379 | if (!err) | ||
1380 | err = -EIO; | ||
1381 | if (ctx) | ||
1382 | ntfs_attr_put_search_ctx(ctx); | ||
1383 | unmap_mft_record(base_ni); | ||
1384 | err_out: | ||
1385 | ntfs_error(vol->sb, "Failed with error code %i while reading attribute " | ||
1386 | "inode (mft_no 0x%lx, type 0x%x, name_len %i). " | ||
1387 | "Marking corrupt inode and base inode 0x%lx as bad. " | ||
1388 | "Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len, | ||
1389 | base_vi->i_ino); | ||
1390 | make_bad_inode(vi); | ||
1391 | make_bad_inode(base_vi); | ||
1392 | if (err != -ENOMEM) | ||
1393 | NVolSetErrors(vol); | ||
1394 | return err; | ||
1395 | } | ||
1396 | |||
1397 | /** | ||
1398 | * ntfs_read_locked_index_inode - read an index inode from its base inode | ||
1399 | * @base_vi: base inode | ||
1400 | * @vi: index inode to read | ||
1401 | * | ||
1402 | * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the | ||
1403 | * index inode described by @vi into memory from the base mft record described | ||
1404 | * by @base_ni. | ||
1405 | * | ||
1406 | * ntfs_read_locked_index_inode() maps, pins and locks the base inode for | ||
1407 | * reading and looks up the attributes relating to the index described by @vi | ||
1408 | * before setting up the necessary fields in @vi as well as initializing the | ||
1409 | * ntfs inode. | ||
1410 | * | ||
1411 | * Note, index inodes are essentially attribute inodes (NInoAttr() is true) | ||
1412 | * with the attribute type set to AT_INDEX_ALLOCATION. Apart from that, they | ||
1413 | * are setup like directory inodes since directories are a special case of | ||
1414 | * indices ao they need to be treated in much the same way. Most importantly, | ||
1415 | * for small indices the index allocation attribute might not actually exist. | ||
1416 | * However, the index root attribute always exists but this does not need to | ||
1417 | * have an inode associated with it and this is why we define a new inode type | ||
1418 | * index. Also, like for directories, we need to have an attribute inode for | ||
1419 | * the bitmap attribute corresponding to the index allocation attribute and we | ||
1420 | * can store this in the appropriate field of the inode, just like we do for | ||
1421 | * normal directory inodes. | ||
1422 | * | ||
1423 | * Q: What locks are held when the function is called? | ||
1424 | * A: i_state has I_LOCK set, hence the inode is locked, also | ||
1425 | * i_count is set to 1, so it is not going to go away | ||
1426 | * | ||
1427 | * Return 0 on success and -errno on error. In the error case, the inode will | ||
1428 | * have had make_bad_inode() executed on it. | ||
1429 | */ | ||
1430 | static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) | ||
1431 | { | ||
1432 | ntfs_volume *vol = NTFS_SB(vi->i_sb); | ||
1433 | ntfs_inode *ni, *base_ni, *bni; | ||
1434 | struct inode *bvi; | ||
1435 | MFT_RECORD *m; | ||
1436 | ntfs_attr_search_ctx *ctx; | ||
1437 | INDEX_ROOT *ir; | ||
1438 | u8 *ir_end, *index_end; | ||
1439 | int err = 0; | ||
1440 | |||
1441 | ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); | ||
1442 | ntfs_init_big_inode(vi); | ||
1443 | ni = NTFS_I(vi); | ||
1444 | base_ni = NTFS_I(base_vi); | ||
1445 | /* Just mirror the values from the base inode. */ | ||
1446 | vi->i_blksize = base_vi->i_blksize; | ||
1447 | vi->i_version = base_vi->i_version; | ||
1448 | vi->i_uid = base_vi->i_uid; | ||
1449 | vi->i_gid = base_vi->i_gid; | ||
1450 | vi->i_nlink = base_vi->i_nlink; | ||
1451 | vi->i_mtime = base_vi->i_mtime; | ||
1452 | vi->i_ctime = base_vi->i_ctime; | ||
1453 | vi->i_atime = base_vi->i_atime; | ||
1454 | vi->i_generation = ni->seq_no = base_ni->seq_no; | ||
1455 | /* Set inode type to zero but preserve permissions. */ | ||
1456 | vi->i_mode = base_vi->i_mode & ~S_IFMT; | ||
1457 | /* Map the mft record for the base inode. */ | ||
1458 | m = map_mft_record(base_ni); | ||
1459 | if (IS_ERR(m)) { | ||
1460 | err = PTR_ERR(m); | ||
1461 | goto err_out; | ||
1462 | } | ||
1463 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1464 | if (!ctx) { | ||
1465 | err = -ENOMEM; | ||
1466 | goto unm_err_out; | ||
1467 | } | ||
1468 | /* Find the index root attribute. */ | ||
1469 | err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len, | ||
1470 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1471 | if (unlikely(err)) { | ||
1472 | if (err == -ENOENT) | ||
1473 | ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " | ||
1474 | "missing."); | ||
1475 | goto unm_err_out; | ||
1476 | } | ||
1477 | /* Set up the state. */ | ||
1478 | if (unlikely(ctx->attr->non_resident)) { | ||
1479 | ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident."); | ||
1480 | goto unm_err_out; | ||
1481 | } | ||
1482 | /* Ensure the attribute name is placed before the value. */ | ||
1483 | if (unlikely(ctx->attr->name_length && | ||
1484 | (le16_to_cpu(ctx->attr->name_offset) >= | ||
1485 | le16_to_cpu(ctx->attr->data.resident. | ||
1486 | value_offset)))) { | ||
1487 | ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed " | ||
1488 | "after the attribute value."); | ||
1489 | goto unm_err_out; | ||
1490 | } | ||
1491 | /* Compressed/encrypted/sparse index root is not allowed. */ | ||
1492 | if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | | ||
1493 | ATTR_IS_SPARSE)) { | ||
1494 | ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " | ||
1495 | "root attribute."); | ||
1496 | goto unm_err_out; | ||
1497 | } | ||
1498 | ir = (INDEX_ROOT*)((u8*)ctx->attr + | ||
1499 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
1500 | ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length); | ||
1501 | if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) { | ||
1502 | ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt."); | ||
1503 | goto unm_err_out; | ||
1504 | } | ||
1505 | index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); | ||
1506 | if (index_end > ir_end) { | ||
1507 | ntfs_error(vi->i_sb, "Index is corrupt."); | ||
1508 | goto unm_err_out; | ||
1509 | } | ||
1510 | if (ir->type) { | ||
1511 | ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).", | ||
1512 | le32_to_cpu(ir->type)); | ||
1513 | goto unm_err_out; | ||
1514 | } | ||
1515 | ni->itype.index.collation_rule = ir->collation_rule; | ||
1516 | ntfs_debug("Index collation rule is 0x%x.", | ||
1517 | le32_to_cpu(ir->collation_rule)); | ||
1518 | ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); | ||
1519 | if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) { | ||
1520 | ntfs_error(vi->i_sb, "Index block size (%u) is not a power of " | ||
1521 | "two.", ni->itype.index.block_size); | ||
1522 | goto unm_err_out; | ||
1523 | } | ||
1524 | if (ni->itype.index.block_size > PAGE_CACHE_SIZE) { | ||
1525 | ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE " | ||
1526 | "(%ld) is not supported. Sorry.", | ||
1527 | ni->itype.index.block_size, PAGE_CACHE_SIZE); | ||
1528 | err = -EOPNOTSUPP; | ||
1529 | goto unm_err_out; | ||
1530 | } | ||
1531 | if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) { | ||
1532 | ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE " | ||
1533 | "(%i) is not supported. Sorry.", | ||
1534 | ni->itype.index.block_size, NTFS_BLOCK_SIZE); | ||
1535 | err = -EOPNOTSUPP; | ||
1536 | goto unm_err_out; | ||
1537 | } | ||
1538 | ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1; | ||
1539 | /* Determine the size of a vcn in the index. */ | ||
1540 | if (vol->cluster_size <= ni->itype.index.block_size) { | ||
1541 | ni->itype.index.vcn_size = vol->cluster_size; | ||
1542 | ni->itype.index.vcn_size_bits = vol->cluster_size_bits; | ||
1543 | } else { | ||
1544 | ni->itype.index.vcn_size = vol->sector_size; | ||
1545 | ni->itype.index.vcn_size_bits = vol->sector_size_bits; | ||
1546 | } | ||
1547 | /* Check for presence of index allocation attribute. */ | ||
1548 | if (!(ir->index.flags & LARGE_INDEX)) { | ||
1549 | /* No index allocation. */ | ||
1550 | vi->i_size = ni->initialized_size = ni->allocated_size = 0; | ||
1551 | /* We are done with the mft record, so we release it. */ | ||
1552 | ntfs_attr_put_search_ctx(ctx); | ||
1553 | unmap_mft_record(base_ni); | ||
1554 | m = NULL; | ||
1555 | ctx = NULL; | ||
1556 | goto skip_large_index_stuff; | ||
1557 | } /* LARGE_INDEX: Index allocation present. Setup state. */ | ||
1558 | NInoSetIndexAllocPresent(ni); | ||
1559 | /* Find index allocation attribute. */ | ||
1560 | ntfs_attr_reinit_search_ctx(ctx); | ||
1561 | err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len, | ||
1562 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1563 | if (unlikely(err)) { | ||
1564 | if (err == -ENOENT) | ||
1565 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " | ||
1566 | "not present but $INDEX_ROOT " | ||
1567 | "indicated it is."); | ||
1568 | else | ||
1569 | ntfs_error(vi->i_sb, "Failed to lookup " | ||
1570 | "$INDEX_ALLOCATION attribute."); | ||
1571 | goto unm_err_out; | ||
1572 | } | ||
1573 | if (!ctx->attr->non_resident) { | ||
1574 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " | ||
1575 | "resident."); | ||
1576 | goto unm_err_out; | ||
1577 | } | ||
1578 | /* | ||
1579 | * Ensure the attribute name is placed before the mapping pairs array. | ||
1580 | */ | ||
1581 | if (unlikely(ctx->attr->name_length && (le16_to_cpu( | ||
1582 | ctx->attr->name_offset) >= le16_to_cpu( | ||
1583 | ctx->attr->data.non_resident.mapping_pairs_offset)))) { | ||
1584 | ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is " | ||
1585 | "placed after the mapping pairs array."); | ||
1586 | goto unm_err_out; | ||
1587 | } | ||
1588 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { | ||
1589 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " | ||
1590 | "encrypted."); | ||
1591 | goto unm_err_out; | ||
1592 | } | ||
1593 | if (ctx->attr->flags & ATTR_IS_SPARSE) { | ||
1594 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse."); | ||
1595 | goto unm_err_out; | ||
1596 | } | ||
1597 | if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { | ||
1598 | ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " | ||
1599 | "compressed."); | ||
1600 | goto unm_err_out; | ||
1601 | } | ||
1602 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
1603 | ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION " | ||
1604 | "attribute has non zero lowest_vcn."); | ||
1605 | goto unm_err_out; | ||
1606 | } | ||
1607 | vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size); | ||
1608 | ni->initialized_size = sle64_to_cpu( | ||
1609 | ctx->attr->data.non_resident.initialized_size); | ||
1610 | ni->allocated_size = sle64_to_cpu( | ||
1611 | ctx->attr->data.non_resident.allocated_size); | ||
1612 | /* | ||
1613 | * We are done with the mft record, so we release it. Otherwise | ||
1614 | * we would deadlock in ntfs_attr_iget(). | ||
1615 | */ | ||
1616 | ntfs_attr_put_search_ctx(ctx); | ||
1617 | unmap_mft_record(base_ni); | ||
1618 | m = NULL; | ||
1619 | ctx = NULL; | ||
1620 | /* Get the index bitmap attribute inode. */ | ||
1621 | bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len); | ||
1622 | if (IS_ERR(bvi)) { | ||
1623 | ntfs_error(vi->i_sb, "Failed to get bitmap attribute."); | ||
1624 | err = PTR_ERR(bvi); | ||
1625 | goto unm_err_out; | ||
1626 | } | ||
1627 | bni = NTFS_I(bvi); | ||
1628 | if (NInoCompressed(bni) || NInoEncrypted(bni) || | ||
1629 | NInoSparse(bni)) { | ||
1630 | ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or " | ||
1631 | "encrypted and/or sparse."); | ||
1632 | goto iput_unm_err_out; | ||
1633 | } | ||
1634 | /* Consistency check bitmap size vs. index allocation size. */ | ||
1635 | if ((bvi->i_size << 3) < (vi->i_size >> | ||
1636 | ni->itype.index.block_size_bits)) { | ||
1637 | ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for " | ||
1638 | "index allocation (0x%llx).", bvi->i_size << 3, | ||
1639 | vi->i_size); | ||
1640 | goto iput_unm_err_out; | ||
1641 | } | ||
1642 | ni->itype.index.bmp_ino = bvi; | ||
1643 | skip_large_index_stuff: | ||
1644 | /* Setup the operations for this index inode. */ | ||
1645 | vi->i_op = NULL; | ||
1646 | vi->i_fop = NULL; | ||
1647 | vi->i_mapping->a_ops = &ntfs_mst_aops; | ||
1648 | vi->i_blocks = ni->allocated_size >> 9; | ||
1649 | |||
1650 | /* | ||
1651 | * Make sure the base inode doesn't go away and attach it to the | ||
1652 | * index inode. | ||
1653 | */ | ||
1654 | igrab(base_vi); | ||
1655 | ni->ext.base_ntfs_ino = base_ni; | ||
1656 | ni->nr_extents = -1; | ||
1657 | |||
1658 | ntfs_debug("Done."); | ||
1659 | return 0; | ||
1660 | |||
1661 | iput_unm_err_out: | ||
1662 | iput(bvi); | ||
1663 | unm_err_out: | ||
1664 | if (!err) | ||
1665 | err = -EIO; | ||
1666 | if (ctx) | ||
1667 | ntfs_attr_put_search_ctx(ctx); | ||
1668 | if (m) | ||
1669 | unmap_mft_record(base_ni); | ||
1670 | err_out: | ||
1671 | ntfs_error(vi->i_sb, "Failed with error code %i while reading index " | ||
1672 | "inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino, | ||
1673 | ni->name_len); | ||
1674 | make_bad_inode(vi); | ||
1675 | if (err != -EOPNOTSUPP && err != -ENOMEM) | ||
1676 | NVolSetErrors(vol); | ||
1677 | return err; | ||
1678 | } | ||
1679 | |||
1680 | /** | ||
1681 | * ntfs_read_inode_mount - special read_inode for mount time use only | ||
1682 | * @vi: inode to read | ||
1683 | * | ||
1684 | * Read inode FILE_MFT at mount time, only called with super_block lock | ||
1685 | * held from within the read_super() code path. | ||
1686 | * | ||
1687 | * This function exists because when it is called the page cache for $MFT/$DATA | ||
1688 | * is not initialized and hence we cannot get at the contents of mft records | ||
1689 | * by calling map_mft_record*(). | ||
1690 | * | ||
1691 | * Further it needs to cope with the circular references problem, i.e. cannot | ||
1692 | * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because | ||
1693 | * we do not know where the other extent mft records are yet and again, because | ||
1694 | * we cannot call map_mft_record*() yet. Obviously this applies only when an | ||
1695 | * attribute list is actually present in $MFT inode. | ||
1696 | * | ||
1697 | * We solve these problems by starting with the $DATA attribute before anything | ||
1698 | * else and iterating using ntfs_attr_lookup($DATA) over all extents. As each | ||
1699 | * extent is found, we ntfs_mapping_pairs_decompress() including the implied | ||
1700 | * ntfs_runlists_merge(). Each step of the iteration necessarily provides | ||
1701 | * sufficient information for the next step to complete. | ||
1702 | * | ||
1703 | * This should work but there are two possible pit falls (see inline comments | ||
1704 | * below), but only time will tell if they are real pits or just smoke... | ||
1705 | */ | ||
1706 | int ntfs_read_inode_mount(struct inode *vi) | ||
1707 | { | ||
1708 | VCN next_vcn, last_vcn, highest_vcn; | ||
1709 | s64 block; | ||
1710 | struct super_block *sb = vi->i_sb; | ||
1711 | ntfs_volume *vol = NTFS_SB(sb); | ||
1712 | struct buffer_head *bh; | ||
1713 | ntfs_inode *ni; | ||
1714 | MFT_RECORD *m = NULL; | ||
1715 | ATTR_RECORD *attr; | ||
1716 | ntfs_attr_search_ctx *ctx; | ||
1717 | unsigned int i, nr_blocks; | ||
1718 | int err; | ||
1719 | |||
1720 | ntfs_debug("Entering."); | ||
1721 | |||
1722 | /* Initialize the ntfs specific part of @vi. */ | ||
1723 | ntfs_init_big_inode(vi); | ||
1724 | |||
1725 | ni = NTFS_I(vi); | ||
1726 | |||
1727 | /* Setup the data attribute. It is special as it is mst protected. */ | ||
1728 | NInoSetNonResident(ni); | ||
1729 | NInoSetMstProtected(ni); | ||
1730 | ni->type = AT_DATA; | ||
1731 | ni->name = NULL; | ||
1732 | ni->name_len = 0; | ||
1733 | |||
1734 | /* | ||
1735 | * This sets up our little cheat allowing us to reuse the async read io | ||
1736 | * completion handler for directories. | ||
1737 | */ | ||
1738 | ni->itype.index.block_size = vol->mft_record_size; | ||
1739 | ni->itype.index.block_size_bits = vol->mft_record_size_bits; | ||
1740 | |||
1741 | /* Very important! Needed to be able to call map_mft_record*(). */ | ||
1742 | vol->mft_ino = vi; | ||
1743 | |||
1744 | /* Allocate enough memory to read the first mft record. */ | ||
1745 | if (vol->mft_record_size > 64 * 1024) { | ||
1746 | ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).", | ||
1747 | vol->mft_record_size); | ||
1748 | goto err_out; | ||
1749 | } | ||
1750 | i = vol->mft_record_size; | ||
1751 | if (i < sb->s_blocksize) | ||
1752 | i = sb->s_blocksize; | ||
1753 | m = (MFT_RECORD*)ntfs_malloc_nofs(i); | ||
1754 | if (!m) { | ||
1755 | ntfs_error(sb, "Failed to allocate buffer for $MFT record 0."); | ||
1756 | goto err_out; | ||
1757 | } | ||
1758 | |||
1759 | /* Determine the first block of the $MFT/$DATA attribute. */ | ||
1760 | block = vol->mft_lcn << vol->cluster_size_bits >> | ||
1761 | sb->s_blocksize_bits; | ||
1762 | nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits; | ||
1763 | if (!nr_blocks) | ||
1764 | nr_blocks = 1; | ||
1765 | |||
1766 | /* Load $MFT/$DATA's first mft record. */ | ||
1767 | for (i = 0; i < nr_blocks; i++) { | ||
1768 | bh = sb_bread(sb, block++); | ||
1769 | if (!bh) { | ||
1770 | ntfs_error(sb, "Device read failed."); | ||
1771 | goto err_out; | ||
1772 | } | ||
1773 | memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data, | ||
1774 | sb->s_blocksize); | ||
1775 | brelse(bh); | ||
1776 | } | ||
1777 | |||
1778 | /* Apply the mst fixups. */ | ||
1779 | if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) { | ||
1780 | /* FIXME: Try to use the $MFTMirr now. */ | ||
1781 | ntfs_error(sb, "MST fixup failed. $MFT is corrupt."); | ||
1782 | goto err_out; | ||
1783 | } | ||
1784 | |||
1785 | /* Need this to sanity check attribute list references to $MFT. */ | ||
1786 | vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); | ||
1787 | |||
1788 | /* Provides readpage() and sync_page() for map_mft_record(). */ | ||
1789 | vi->i_mapping->a_ops = &ntfs_mst_aops; | ||
1790 | |||
1791 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
1792 | if (!ctx) { | ||
1793 | err = -ENOMEM; | ||
1794 | goto err_out; | ||
1795 | } | ||
1796 | |||
1797 | /* Find the attribute list attribute if present. */ | ||
1798 | err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx); | ||
1799 | if (err) { | ||
1800 | if (unlikely(err != -ENOENT)) { | ||
1801 | ntfs_error(sb, "Failed to lookup attribute list " | ||
1802 | "attribute. You should run chkdsk."); | ||
1803 | goto put_err_out; | ||
1804 | } | ||
1805 | } else /* if (!err) */ { | ||
1806 | ATTR_LIST_ENTRY *al_entry, *next_al_entry; | ||
1807 | u8 *al_end; | ||
1808 | |||
1809 | ntfs_debug("Attribute list attribute found in $MFT."); | ||
1810 | NInoSetAttrList(ni); | ||
1811 | if (ctx->attr->flags & ATTR_IS_ENCRYPTED || | ||
1812 | ctx->attr->flags & ATTR_COMPRESSION_MASK || | ||
1813 | ctx->attr->flags & ATTR_IS_SPARSE) { | ||
1814 | ntfs_error(sb, "Attribute list attribute is " | ||
1815 | "compressed/encrypted/sparse. Not " | ||
1816 | "allowed. $MFT is corrupt. You should " | ||
1817 | "run chkdsk."); | ||
1818 | goto put_err_out; | ||
1819 | } | ||
1820 | /* Now allocate memory for the attribute list. */ | ||
1821 | ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); | ||
1822 | ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); | ||
1823 | if (!ni->attr_list) { | ||
1824 | ntfs_error(sb, "Not enough memory to allocate buffer " | ||
1825 | "for attribute list."); | ||
1826 | goto put_err_out; | ||
1827 | } | ||
1828 | if (ctx->attr->non_resident) { | ||
1829 | NInoSetAttrListNonResident(ni); | ||
1830 | if (ctx->attr->data.non_resident.lowest_vcn) { | ||
1831 | ntfs_error(sb, "Attribute list has non zero " | ||
1832 | "lowest_vcn. $MFT is corrupt. " | ||
1833 | "You should run chkdsk."); | ||
1834 | goto put_err_out; | ||
1835 | } | ||
1836 | /* Setup the runlist. */ | ||
1837 | ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, | ||
1838 | ctx->attr, NULL); | ||
1839 | if (IS_ERR(ni->attr_list_rl.rl)) { | ||
1840 | err = PTR_ERR(ni->attr_list_rl.rl); | ||
1841 | ni->attr_list_rl.rl = NULL; | ||
1842 | ntfs_error(sb, "Mapping pairs decompression " | ||
1843 | "failed with error code %i.", | ||
1844 | -err); | ||
1845 | goto put_err_out; | ||
1846 | } | ||
1847 | /* Now load the attribute list. */ | ||
1848 | if ((err = load_attribute_list(vol, &ni->attr_list_rl, | ||
1849 | ni->attr_list, ni->attr_list_size, | ||
1850 | sle64_to_cpu(ctx->attr->data. | ||
1851 | non_resident.initialized_size)))) { | ||
1852 | ntfs_error(sb, "Failed to load attribute list " | ||
1853 | "attribute with error code %i.", | ||
1854 | -err); | ||
1855 | goto put_err_out; | ||
1856 | } | ||
1857 | } else /* if (!ctx.attr->non_resident) */ { | ||
1858 | if ((u8*)ctx->attr + le16_to_cpu( | ||
1859 | ctx->attr->data.resident.value_offset) + | ||
1860 | le32_to_cpu( | ||
1861 | ctx->attr->data.resident.value_length) > | ||
1862 | (u8*)ctx->mrec + vol->mft_record_size) { | ||
1863 | ntfs_error(sb, "Corrupt attribute list " | ||
1864 | "attribute."); | ||
1865 | goto put_err_out; | ||
1866 | } | ||
1867 | /* Now copy the attribute list. */ | ||
1868 | memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( | ||
1869 | ctx->attr->data.resident.value_offset), | ||
1870 | le32_to_cpu( | ||
1871 | ctx->attr->data.resident.value_length)); | ||
1872 | } | ||
1873 | /* The attribute list is now setup in memory. */ | ||
1874 | /* | ||
1875 | * FIXME: I don't know if this case is actually possible. | ||
1876 | * According to logic it is not possible but I have seen too | ||
1877 | * many weird things in MS software to rely on logic... Thus we | ||
1878 | * perform a manual search and make sure the first $MFT/$DATA | ||
1879 | * extent is in the base inode. If it is not we abort with an | ||
1880 | * error and if we ever see a report of this error we will need | ||
1881 | * to do some magic in order to have the necessary mft record | ||
1882 | * loaded and in the right place in the page cache. But | ||
1883 | * hopefully logic will prevail and this never happens... | ||
1884 | */ | ||
1885 | al_entry = (ATTR_LIST_ENTRY*)ni->attr_list; | ||
1886 | al_end = (u8*)al_entry + ni->attr_list_size; | ||
1887 | for (;; al_entry = next_al_entry) { | ||
1888 | /* Out of bounds check. */ | ||
1889 | if ((u8*)al_entry < ni->attr_list || | ||
1890 | (u8*)al_entry > al_end) | ||
1891 | goto em_put_err_out; | ||
1892 | /* Catch the end of the attribute list. */ | ||
1893 | if ((u8*)al_entry == al_end) | ||
1894 | goto em_put_err_out; | ||
1895 | if (!al_entry->length) | ||
1896 | goto em_put_err_out; | ||
1897 | if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + | ||
1898 | le16_to_cpu(al_entry->length) > al_end) | ||
1899 | goto em_put_err_out; | ||
1900 | next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + | ||
1901 | le16_to_cpu(al_entry->length)); | ||
1902 | if (le32_to_cpu(al_entry->type) > | ||
1903 | const_le32_to_cpu(AT_DATA)) | ||
1904 | goto em_put_err_out; | ||
1905 | if (AT_DATA != al_entry->type) | ||
1906 | continue; | ||
1907 | /* We want an unnamed attribute. */ | ||
1908 | if (al_entry->name_length) | ||
1909 | goto em_put_err_out; | ||
1910 | /* Want the first entry, i.e. lowest_vcn == 0. */ | ||
1911 | if (al_entry->lowest_vcn) | ||
1912 | goto em_put_err_out; | ||
1913 | /* First entry has to be in the base mft record. */ | ||
1914 | if (MREF_LE(al_entry->mft_reference) != vi->i_ino) { | ||
1915 | /* MFT references do not match, logic fails. */ | ||
1916 | ntfs_error(sb, "BUG: The first $DATA extent " | ||
1917 | "of $MFT is not in the base " | ||
1918 | "mft record. Please report " | ||
1919 | "you saw this message to " | ||
1920 | "linux-ntfs-dev@lists." | ||
1921 | "sourceforge.net"); | ||
1922 | goto put_err_out; | ||
1923 | } else { | ||
1924 | /* Sequence numbers must match. */ | ||
1925 | if (MSEQNO_LE(al_entry->mft_reference) != | ||
1926 | ni->seq_no) | ||
1927 | goto em_put_err_out; | ||
1928 | /* Got it. All is ok. We can stop now. */ | ||
1929 | break; | ||
1930 | } | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | ntfs_attr_reinit_search_ctx(ctx); | ||
1935 | |||
1936 | /* Now load all attribute extents. */ | ||
1937 | attr = NULL; | ||
1938 | next_vcn = last_vcn = highest_vcn = 0; | ||
1939 | while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, | ||
1940 | ctx))) { | ||
1941 | runlist_element *nrl; | ||
1942 | |||
1943 | /* Cache the current attribute. */ | ||
1944 | attr = ctx->attr; | ||
1945 | /* $MFT must be non-resident. */ | ||
1946 | if (!attr->non_resident) { | ||
1947 | ntfs_error(sb, "$MFT must be non-resident but a " | ||
1948 | "resident extent was found. $MFT is " | ||
1949 | "corrupt. Run chkdsk."); | ||
1950 | goto put_err_out; | ||
1951 | } | ||
1952 | /* $MFT must be uncompressed and unencrypted. */ | ||
1953 | if (attr->flags & ATTR_COMPRESSION_MASK || | ||
1954 | attr->flags & ATTR_IS_ENCRYPTED || | ||
1955 | attr->flags & ATTR_IS_SPARSE) { | ||
1956 | ntfs_error(sb, "$MFT must be uncompressed, " | ||
1957 | "non-sparse, and unencrypted but a " | ||
1958 | "compressed/sparse/encrypted extent " | ||
1959 | "was found. $MFT is corrupt. Run " | ||
1960 | "chkdsk."); | ||
1961 | goto put_err_out; | ||
1962 | } | ||
1963 | /* | ||
1964 | * Decompress the mapping pairs array of this extent and merge | ||
1965 | * the result into the existing runlist. No need for locking | ||
1966 | * as we have exclusive access to the inode at this time and we | ||
1967 | * are a mount in progress task, too. | ||
1968 | */ | ||
1969 | nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl); | ||
1970 | if (IS_ERR(nrl)) { | ||
1971 | ntfs_error(sb, "ntfs_mapping_pairs_decompress() " | ||
1972 | "failed with error code %ld. $MFT is " | ||
1973 | "corrupt.", PTR_ERR(nrl)); | ||
1974 | goto put_err_out; | ||
1975 | } | ||
1976 | ni->runlist.rl = nrl; | ||
1977 | |||
1978 | /* Are we in the first extent? */ | ||
1979 | if (!next_vcn) { | ||
1980 | if (attr->data.non_resident.lowest_vcn) { | ||
1981 | ntfs_error(sb, "First extent of $DATA " | ||
1982 | "attribute has non zero " | ||
1983 | "lowest_vcn. $MFT is corrupt. " | ||
1984 | "You should run chkdsk."); | ||
1985 | goto put_err_out; | ||
1986 | } | ||
1987 | /* Get the last vcn in the $DATA attribute. */ | ||
1988 | last_vcn = sle64_to_cpu( | ||
1989 | attr->data.non_resident.allocated_size) | ||
1990 | >> vol->cluster_size_bits; | ||
1991 | /* Fill in the inode size. */ | ||
1992 | vi->i_size = sle64_to_cpu( | ||
1993 | attr->data.non_resident.data_size); | ||
1994 | ni->initialized_size = sle64_to_cpu(attr->data. | ||
1995 | non_resident.initialized_size); | ||
1996 | ni->allocated_size = sle64_to_cpu( | ||
1997 | attr->data.non_resident.allocated_size); | ||
1998 | /* | ||
1999 | * Verify the number of mft records does not exceed | ||
2000 | * 2^32 - 1. | ||
2001 | */ | ||
2002 | if ((vi->i_size >> vol->mft_record_size_bits) >= | ||
2003 | (1ULL << 32)) { | ||
2004 | ntfs_error(sb, "$MFT is too big! Aborting."); | ||
2005 | goto put_err_out; | ||
2006 | } | ||
2007 | /* | ||
2008 | * We have got the first extent of the runlist for | ||
2009 | * $MFT which means it is now relatively safe to call | ||
2010 | * the normal ntfs_read_inode() function. | ||
2011 | * Complete reading the inode, this will actually | ||
2012 | * re-read the mft record for $MFT, this time entering | ||
2013 | * it into the page cache with which we complete the | ||
2014 | * kick start of the volume. It should be safe to do | ||
2015 | * this now as the first extent of $MFT/$DATA is | ||
2016 | * already known and we would hope that we don't need | ||
2017 | * further extents in order to find the other | ||
2018 | * attributes belonging to $MFT. Only time will tell if | ||
2019 | * this is really the case. If not we will have to play | ||
2020 | * magic at this point, possibly duplicating a lot of | ||
2021 | * ntfs_read_inode() at this point. We will need to | ||
2022 | * ensure we do enough of its work to be able to call | ||
2023 | * ntfs_read_inode() on extents of $MFT/$DATA. But lets | ||
2024 | * hope this never happens... | ||
2025 | */ | ||
2026 | ntfs_read_locked_inode(vi); | ||
2027 | if (is_bad_inode(vi)) { | ||
2028 | ntfs_error(sb, "ntfs_read_inode() of $MFT " | ||
2029 | "failed. BUG or corrupt $MFT. " | ||
2030 | "Run chkdsk and if no errors " | ||
2031 | "are found, please report you " | ||
2032 | "saw this message to " | ||
2033 | "linux-ntfs-dev@lists." | ||
2034 | "sourceforge.net"); | ||
2035 | ntfs_attr_put_search_ctx(ctx); | ||
2036 | /* Revert to the safe super operations. */ | ||
2037 | ntfs_free(m); | ||
2038 | return -1; | ||
2039 | } | ||
2040 | /* | ||
2041 | * Re-initialize some specifics about $MFT's inode as | ||
2042 | * ntfs_read_inode() will have set up the default ones. | ||
2043 | */ | ||
2044 | /* Set uid and gid to root. */ | ||
2045 | vi->i_uid = vi->i_gid = 0; | ||
2046 | /* Regular file. No access for anyone. */ | ||
2047 | vi->i_mode = S_IFREG; | ||
2048 | /* No VFS initiated operations allowed for $MFT. */ | ||
2049 | vi->i_op = &ntfs_empty_inode_ops; | ||
2050 | vi->i_fop = &ntfs_empty_file_ops; | ||
2051 | } | ||
2052 | |||
2053 | /* Get the lowest vcn for the next extent. */ | ||
2054 | highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); | ||
2055 | next_vcn = highest_vcn + 1; | ||
2056 | |||
2057 | /* Only one extent or error, which we catch below. */ | ||
2058 | if (next_vcn <= 0) | ||
2059 | break; | ||
2060 | |||
2061 | /* Avoid endless loops due to corruption. */ | ||
2062 | if (next_vcn < sle64_to_cpu( | ||
2063 | attr->data.non_resident.lowest_vcn)) { | ||
2064 | ntfs_error(sb, "$MFT has corrupt attribute list " | ||
2065 | "attribute. Run chkdsk."); | ||
2066 | goto put_err_out; | ||
2067 | } | ||
2068 | } | ||
2069 | if (err != -ENOENT) { | ||
2070 | ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. " | ||
2071 | "$MFT is corrupt. Run chkdsk."); | ||
2072 | goto put_err_out; | ||
2073 | } | ||
2074 | if (!attr) { | ||
2075 | ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " | ||
2076 | "corrupt. Run chkdsk."); | ||
2077 | goto put_err_out; | ||
2078 | } | ||
2079 | if (highest_vcn && highest_vcn != last_vcn - 1) { | ||
2080 | ntfs_error(sb, "Failed to load the complete runlist for " | ||
2081 | "$MFT/$DATA. Driver bug or corrupt $MFT. " | ||
2082 | "Run chkdsk."); | ||
2083 | ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx", | ||
2084 | (unsigned long long)highest_vcn, | ||
2085 | (unsigned long long)last_vcn - 1); | ||
2086 | goto put_err_out; | ||
2087 | } | ||
2088 | ntfs_attr_put_search_ctx(ctx); | ||
2089 | ntfs_debug("Done."); | ||
2090 | ntfs_free(m); | ||
2091 | return 0; | ||
2092 | |||
2093 | em_put_err_out: | ||
2094 | ntfs_error(sb, "Couldn't find first extent of $DATA attribute in " | ||
2095 | "attribute list. $MFT is corrupt. Run chkdsk."); | ||
2096 | put_err_out: | ||
2097 | ntfs_attr_put_search_ctx(ctx); | ||
2098 | err_out: | ||
2099 | ntfs_error(sb, "Failed. Marking inode as bad."); | ||
2100 | make_bad_inode(vi); | ||
2101 | ntfs_free(m); | ||
2102 | return -1; | ||
2103 | } | ||
2104 | |||
2105 | /** | ||
2106 | * ntfs_put_inode - handler for when the inode reference count is decremented | ||
2107 | * @vi: vfs inode | ||
2108 | * | ||
2109 | * The VFS calls ntfs_put_inode() every time the inode reference count (i_count) | ||
2110 | * is about to be decremented (but before the decrement itself. | ||
2111 | * | ||
2112 | * If the inode @vi is a directory with two references, one of which is being | ||
2113 | * dropped, we need to put the attribute inode for the directory index bitmap, | ||
2114 | * if it is present, otherwise the directory inode would remain pinned for | ||
2115 | * ever. | ||
2116 | */ | ||
2117 | void ntfs_put_inode(struct inode *vi) | ||
2118 | { | ||
2119 | if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) { | ||
2120 | ntfs_inode *ni = NTFS_I(vi); | ||
2121 | if (NInoIndexAllocPresent(ni)) { | ||
2122 | struct inode *bvi = NULL; | ||
2123 | down(&vi->i_sem); | ||
2124 | if (atomic_read(&vi->i_count) == 2) { | ||
2125 | bvi = ni->itype.index.bmp_ino; | ||
2126 | if (bvi) | ||
2127 | ni->itype.index.bmp_ino = NULL; | ||
2128 | } | ||
2129 | up(&vi->i_sem); | ||
2130 | if (bvi) | ||
2131 | iput(bvi); | ||
2132 | } | ||
2133 | } | ||
2134 | } | ||
2135 | |||
2136 | static void __ntfs_clear_inode(ntfs_inode *ni) | ||
2137 | { | ||
2138 | /* Free all alocated memory. */ | ||
2139 | down_write(&ni->runlist.lock); | ||
2140 | if (ni->runlist.rl) { | ||
2141 | ntfs_free(ni->runlist.rl); | ||
2142 | ni->runlist.rl = NULL; | ||
2143 | } | ||
2144 | up_write(&ni->runlist.lock); | ||
2145 | |||
2146 | if (ni->attr_list) { | ||
2147 | ntfs_free(ni->attr_list); | ||
2148 | ni->attr_list = NULL; | ||
2149 | } | ||
2150 | |||
2151 | down_write(&ni->attr_list_rl.lock); | ||
2152 | if (ni->attr_list_rl.rl) { | ||
2153 | ntfs_free(ni->attr_list_rl.rl); | ||
2154 | ni->attr_list_rl.rl = NULL; | ||
2155 | } | ||
2156 | up_write(&ni->attr_list_rl.lock); | ||
2157 | |||
2158 | if (ni->name_len && ni->name != I30) { | ||
2159 | /* Catch bugs... */ | ||
2160 | BUG_ON(!ni->name); | ||
2161 | kfree(ni->name); | ||
2162 | } | ||
2163 | } | ||
2164 | |||
2165 | void ntfs_clear_extent_inode(ntfs_inode *ni) | ||
2166 | { | ||
2167 | ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); | ||
2168 | |||
2169 | BUG_ON(NInoAttr(ni)); | ||
2170 | BUG_ON(ni->nr_extents != -1); | ||
2171 | |||
2172 | #ifdef NTFS_RW | ||
2173 | if (NInoDirty(ni)) { | ||
2174 | if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino))) | ||
2175 | ntfs_error(ni->vol->sb, "Clearing dirty extent inode! " | ||
2176 | "Losing data! This is a BUG!!!"); | ||
2177 | // FIXME: Do something!!! | ||
2178 | } | ||
2179 | #endif /* NTFS_RW */ | ||
2180 | |||
2181 | __ntfs_clear_inode(ni); | ||
2182 | |||
2183 | /* Bye, bye... */ | ||
2184 | ntfs_destroy_extent_inode(ni); | ||
2185 | } | ||
2186 | |||
2187 | /** | ||
2188 | * ntfs_clear_big_inode - clean up the ntfs specific part of an inode | ||
2189 | * @vi: vfs inode pending annihilation | ||
2190 | * | ||
2191 | * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() | ||
2192 | * is called, which deallocates all memory belonging to the NTFS specific part | ||
2193 | * of the inode and returns. | ||
2194 | * | ||
2195 | * If the MFT record is dirty, we commit it before doing anything else. | ||
2196 | */ | ||
2197 | void ntfs_clear_big_inode(struct inode *vi) | ||
2198 | { | ||
2199 | ntfs_inode *ni = NTFS_I(vi); | ||
2200 | |||
2201 | /* | ||
2202 | * If the inode @vi is an index inode we need to put the attribute | ||
2203 | * inode for the index bitmap, if it is present, otherwise the index | ||
2204 | * inode would disappear and the attribute inode for the index bitmap | ||
2205 | * would no longer be referenced from anywhere and thus it would remain | ||
2206 | * pinned for ever. | ||
2207 | */ | ||
2208 | if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) && | ||
2209 | NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) { | ||
2210 | iput(ni->itype.index.bmp_ino); | ||
2211 | ni->itype.index.bmp_ino = NULL; | ||
2212 | } | ||
2213 | #ifdef NTFS_RW | ||
2214 | if (NInoDirty(ni)) { | ||
2215 | BOOL was_bad = (is_bad_inode(vi)); | ||
2216 | |||
2217 | /* Committing the inode also commits all extent inodes. */ | ||
2218 | ntfs_commit_inode(vi); | ||
2219 | |||
2220 | if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) { | ||
2221 | ntfs_error(vi->i_sb, "Failed to commit dirty inode " | ||
2222 | "0x%lx. Losing data!", vi->i_ino); | ||
2223 | // FIXME: Do something!!! | ||
2224 | } | ||
2225 | } | ||
2226 | #endif /* NTFS_RW */ | ||
2227 | |||
2228 | /* No need to lock at this stage as no one else has a reference. */ | ||
2229 | if (ni->nr_extents > 0) { | ||
2230 | int i; | ||
2231 | |||
2232 | for (i = 0; i < ni->nr_extents; i++) | ||
2233 | ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]); | ||
2234 | kfree(ni->ext.extent_ntfs_inos); | ||
2235 | } | ||
2236 | |||
2237 | __ntfs_clear_inode(ni); | ||
2238 | |||
2239 | if (NInoAttr(ni)) { | ||
2240 | /* Release the base inode if we are holding it. */ | ||
2241 | if (ni->nr_extents == -1) { | ||
2242 | iput(VFS_I(ni->ext.base_ntfs_ino)); | ||
2243 | ni->nr_extents = 0; | ||
2244 | ni->ext.base_ntfs_ino = NULL; | ||
2245 | } | ||
2246 | } | ||
2247 | return; | ||
2248 | } | ||
2249 | |||
2250 | /** | ||
2251 | * ntfs_show_options - show mount options in /proc/mounts | ||
2252 | * @sf: seq_file in which to write our mount options | ||
2253 | * @mnt: vfs mount whose mount options to display | ||
2254 | * | ||
2255 | * Called by the VFS once for each mounted ntfs volume when someone reads | ||
2256 | * /proc/mounts in order to display the NTFS specific mount options of each | ||
2257 | * mount. The mount options of the vfs mount @mnt are written to the seq file | ||
2258 | * @sf and success is returned. | ||
2259 | */ | ||
2260 | int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) | ||
2261 | { | ||
2262 | ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); | ||
2263 | int i; | ||
2264 | |||
2265 | seq_printf(sf, ",uid=%i", vol->uid); | ||
2266 | seq_printf(sf, ",gid=%i", vol->gid); | ||
2267 | if (vol->fmask == vol->dmask) | ||
2268 | seq_printf(sf, ",umask=0%o", vol->fmask); | ||
2269 | else { | ||
2270 | seq_printf(sf, ",fmask=0%o", vol->fmask); | ||
2271 | seq_printf(sf, ",dmask=0%o", vol->dmask); | ||
2272 | } | ||
2273 | seq_printf(sf, ",nls=%s", vol->nls_map->charset); | ||
2274 | if (NVolCaseSensitive(vol)) | ||
2275 | seq_printf(sf, ",case_sensitive"); | ||
2276 | if (NVolShowSystemFiles(vol)) | ||
2277 | seq_printf(sf, ",show_sys_files"); | ||
2278 | for (i = 0; on_errors_arr[i].val; i++) { | ||
2279 | if (on_errors_arr[i].val & vol->on_errors) | ||
2280 | seq_printf(sf, ",errors=%s", on_errors_arr[i].str); | ||
2281 | } | ||
2282 | seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier); | ||
2283 | return 0; | ||
2284 | } | ||
2285 | |||
2286 | #ifdef NTFS_RW | ||
2287 | |||
2288 | /** | ||
2289 | * ntfs_truncate - called when the i_size of an ntfs inode is changed | ||
2290 | * @vi: inode for which the i_size was changed | ||
2291 | * | ||
2292 | * We do not support i_size changes yet. | ||
2293 | * | ||
2294 | * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and | ||
2295 | * that the change is allowed. | ||
2296 | * | ||
2297 | * This implies for us that @vi is a file inode rather than a directory, index, | ||
2298 | * or attribute inode as well as that @vi is a base inode. | ||
2299 | * | ||
2300 | * Returns 0 on success or -errno on error. | ||
2301 | * | ||
2302 | * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for | ||
2303 | * writing. The only case where ->i_alloc_sem is not held is | ||
2304 | * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called | ||
2305 | * with the current i_size as the offset which means that it is a noop as far | ||
2306 | * as ntfs_truncate() is concerned. | ||
2307 | */ | ||
2308 | int ntfs_truncate(struct inode *vi) | ||
2309 | { | ||
2310 | ntfs_inode *ni = NTFS_I(vi); | ||
2311 | ntfs_volume *vol = ni->vol; | ||
2312 | ntfs_attr_search_ctx *ctx; | ||
2313 | MFT_RECORD *m; | ||
2314 | const char *te = " Leaving file length out of sync with i_size."; | ||
2315 | int err; | ||
2316 | |||
2317 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); | ||
2318 | BUG_ON(NInoAttr(ni)); | ||
2319 | BUG_ON(ni->nr_extents < 0); | ||
2320 | m = map_mft_record(ni); | ||
2321 | if (IS_ERR(m)) { | ||
2322 | err = PTR_ERR(m); | ||
2323 | ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx " | ||
2324 | "(error code %d).%s", vi->i_ino, err, te); | ||
2325 | ctx = NULL; | ||
2326 | m = NULL; | ||
2327 | goto err_out; | ||
2328 | } | ||
2329 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
2330 | if (unlikely(!ctx)) { | ||
2331 | ntfs_error(vi->i_sb, "Failed to allocate a search context for " | ||
2332 | "inode 0x%lx (not enough memory).%s", | ||
2333 | vi->i_ino, te); | ||
2334 | err = -ENOMEM; | ||
2335 | goto err_out; | ||
2336 | } | ||
2337 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
2338 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
2339 | if (unlikely(err)) { | ||
2340 | if (err == -ENOENT) | ||
2341 | ntfs_error(vi->i_sb, "Open attribute is missing from " | ||
2342 | "mft record. Inode 0x%lx is corrupt. " | ||
2343 | "Run chkdsk.", vi->i_ino); | ||
2344 | else | ||
2345 | ntfs_error(vi->i_sb, "Failed to lookup attribute in " | ||
2346 | "inode 0x%lx (error code %d).", | ||
2347 | vi->i_ino, err); | ||
2348 | goto err_out; | ||
2349 | } | ||
2350 | /* If the size has not changed there is nothing to do. */ | ||
2351 | if (ntfs_attr_size(ctx->attr) == i_size_read(vi)) | ||
2352 | goto done; | ||
2353 | // TODO: Implement the truncate... | ||
2354 | ntfs_error(vi->i_sb, "Inode size has changed but this is not " | ||
2355 | "implemented yet. Resetting inode size to old value. " | ||
2356 | " This is most likely a bug in the ntfs driver!"); | ||
2357 | i_size_write(vi, ntfs_attr_size(ctx->attr)); | ||
2358 | done: | ||
2359 | ntfs_attr_put_search_ctx(ctx); | ||
2360 | unmap_mft_record(ni); | ||
2361 | NInoClearTruncateFailed(ni); | ||
2362 | ntfs_debug("Done."); | ||
2363 | return 0; | ||
2364 | err_out: | ||
2365 | if (err != -ENOMEM) { | ||
2366 | NVolSetErrors(vol); | ||
2367 | make_bad_inode(vi); | ||
2368 | } | ||
2369 | if (ctx) | ||
2370 | ntfs_attr_put_search_ctx(ctx); | ||
2371 | if (m) | ||
2372 | unmap_mft_record(ni); | ||
2373 | NInoSetTruncateFailed(ni); | ||
2374 | return err; | ||
2375 | } | ||
2376 | |||
2377 | /** | ||
2378 | * ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value | ||
2379 | * @vi: inode for which the i_size was changed | ||
2380 | * | ||
2381 | * Wrapper for ntfs_truncate() that has no return value. | ||
2382 | * | ||
2383 | * See ntfs_truncate() description above for details. | ||
2384 | */ | ||
2385 | void ntfs_truncate_vfs(struct inode *vi) { | ||
2386 | ntfs_truncate(vi); | ||
2387 | } | ||
2388 | |||
2389 | /** | ||
2390 | * ntfs_setattr - called from notify_change() when an attribute is being changed | ||
2391 | * @dentry: dentry whose attributes to change | ||
2392 | * @attr: structure describing the attributes and the changes | ||
2393 | * | ||
2394 | * We have to trap VFS attempts to truncate the file described by @dentry as | ||
2395 | * soon as possible, because we do not implement changes in i_size yet. So we | ||
2396 | * abort all i_size changes here. | ||
2397 | * | ||
2398 | * We also abort all changes of user, group, and mode as we do not implement | ||
2399 | * the NTFS ACLs yet. | ||
2400 | * | ||
2401 | * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also | ||
2402 | * called with ->i_alloc_sem held for writing. | ||
2403 | * | ||
2404 | * Basically this is a copy of generic notify_change() and inode_setattr() | ||
2405 | * functionality, except we intercept and abort changes in i_size. | ||
2406 | */ | ||
2407 | int ntfs_setattr(struct dentry *dentry, struct iattr *attr) | ||
2408 | { | ||
2409 | struct inode *vi = dentry->d_inode; | ||
2410 | int err; | ||
2411 | unsigned int ia_valid = attr->ia_valid; | ||
2412 | |||
2413 | err = inode_change_ok(vi, attr); | ||
2414 | if (err) | ||
2415 | return err; | ||
2416 | |||
2417 | /* We do not support NTFS ACLs yet. */ | ||
2418 | if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) { | ||
2419 | ntfs_warning(vi->i_sb, "Changes in user/group/mode are not " | ||
2420 | "supported yet, ignoring."); | ||
2421 | err = -EOPNOTSUPP; | ||
2422 | goto out; | ||
2423 | } | ||
2424 | |||
2425 | if (ia_valid & ATTR_SIZE) { | ||
2426 | if (attr->ia_size != i_size_read(vi)) { | ||
2427 | ntfs_warning(vi->i_sb, "Changes in inode size are not " | ||
2428 | "supported yet, ignoring."); | ||
2429 | err = -EOPNOTSUPP; | ||
2430 | // TODO: Implement... | ||
2431 | // err = vmtruncate(vi, attr->ia_size); | ||
2432 | if (err || ia_valid == ATTR_SIZE) | ||
2433 | goto out; | ||
2434 | } else { | ||
2435 | /* | ||
2436 | * We skipped the truncate but must still update | ||
2437 | * timestamps. | ||
2438 | */ | ||
2439 | ia_valid |= ATTR_MTIME|ATTR_CTIME; | ||
2440 | } | ||
2441 | } | ||
2442 | |||
2443 | if (ia_valid & ATTR_ATIME) | ||
2444 | vi->i_atime = attr->ia_atime; | ||
2445 | if (ia_valid & ATTR_MTIME) | ||
2446 | vi->i_mtime = attr->ia_mtime; | ||
2447 | if (ia_valid & ATTR_CTIME) | ||
2448 | vi->i_ctime = attr->ia_ctime; | ||
2449 | mark_inode_dirty(vi); | ||
2450 | out: | ||
2451 | return err; | ||
2452 | } | ||
2453 | |||
2454 | /** | ||
2455 | * ntfs_write_inode - write out a dirty inode | ||
2456 | * @vi: inode to write out | ||
2457 | * @sync: if true, write out synchronously | ||
2458 | * | ||
2459 | * Write out a dirty inode to disk including any extent inodes if present. | ||
2460 | * | ||
2461 | * If @sync is true, commit the inode to disk and wait for io completion. This | ||
2462 | * is done using write_mft_record(). | ||
2463 | * | ||
2464 | * If @sync is false, just schedule the write to happen but do not wait for i/o | ||
2465 | * completion. In 2.6 kernels, scheduling usually happens just by virtue of | ||
2466 | * marking the page (and in this case mft record) dirty but we do not implement | ||
2467 | * this yet as write_mft_record() largely ignores the @sync parameter and | ||
2468 | * always performs synchronous writes. | ||
2469 | * | ||
2470 | * Return 0 on success and -errno on error. | ||
2471 | */ | ||
2472 | int ntfs_write_inode(struct inode *vi, int sync) | ||
2473 | { | ||
2474 | sle64 nt; | ||
2475 | ntfs_inode *ni = NTFS_I(vi); | ||
2476 | ntfs_attr_search_ctx *ctx; | ||
2477 | MFT_RECORD *m; | ||
2478 | STANDARD_INFORMATION *si; | ||
2479 | int err = 0; | ||
2480 | BOOL modified = FALSE; | ||
2481 | |||
2482 | ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "", | ||
2483 | vi->i_ino); | ||
2484 | /* | ||
2485 | * Dirty attribute inodes are written via their real inodes so just | ||
2486 | * clean them here. Access time updates are taken care off when the | ||
2487 | * real inode is written. | ||
2488 | */ | ||
2489 | if (NInoAttr(ni)) { | ||
2490 | NInoClearDirty(ni); | ||
2491 | ntfs_debug("Done."); | ||
2492 | return 0; | ||
2493 | } | ||
2494 | /* Map, pin, and lock the mft record belonging to the inode. */ | ||
2495 | m = map_mft_record(ni); | ||
2496 | if (IS_ERR(m)) { | ||
2497 | err = PTR_ERR(m); | ||
2498 | goto err_out; | ||
2499 | } | ||
2500 | /* Update the access times in the standard information attribute. */ | ||
2501 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
2502 | if (unlikely(!ctx)) { | ||
2503 | err = -ENOMEM; | ||
2504 | goto unm_err_out; | ||
2505 | } | ||
2506 | err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, | ||
2507 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
2508 | if (unlikely(err)) { | ||
2509 | ntfs_attr_put_search_ctx(ctx); | ||
2510 | goto unm_err_out; | ||
2511 | } | ||
2512 | si = (STANDARD_INFORMATION*)((u8*)ctx->attr + | ||
2513 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
2514 | /* Update the access times if they have changed. */ | ||
2515 | nt = utc2ntfs(vi->i_mtime); | ||
2516 | if (si->last_data_change_time != nt) { | ||
2517 | ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " | ||
2518 | "new = 0x%llx", vi->i_ino, | ||
2519 | sle64_to_cpu(si->last_data_change_time), | ||
2520 | sle64_to_cpu(nt)); | ||
2521 | si->last_data_change_time = nt; | ||
2522 | modified = TRUE; | ||
2523 | } | ||
2524 | nt = utc2ntfs(vi->i_ctime); | ||
2525 | if (si->last_mft_change_time != nt) { | ||
2526 | ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " | ||
2527 | "new = 0x%llx", vi->i_ino, | ||
2528 | sle64_to_cpu(si->last_mft_change_time), | ||
2529 | sle64_to_cpu(nt)); | ||
2530 | si->last_mft_change_time = nt; | ||
2531 | modified = TRUE; | ||
2532 | } | ||
2533 | nt = utc2ntfs(vi->i_atime); | ||
2534 | if (si->last_access_time != nt) { | ||
2535 | ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " | ||
2536 | "new = 0x%llx", vi->i_ino, | ||
2537 | sle64_to_cpu(si->last_access_time), | ||
2538 | sle64_to_cpu(nt)); | ||
2539 | si->last_access_time = nt; | ||
2540 | modified = TRUE; | ||
2541 | } | ||
2542 | /* | ||
2543 | * If we just modified the standard information attribute we need to | ||
2544 | * mark the mft record it is in dirty. We do this manually so that | ||
2545 | * mark_inode_dirty() is not called which would redirty the inode and | ||
2546 | * hence result in an infinite loop of trying to write the inode. | ||
2547 | * There is no need to mark the base inode nor the base mft record | ||
2548 | * dirty, since we are going to write this mft record below in any case | ||
2549 | * and the base mft record may actually not have been modified so it | ||
2550 | * might not need to be written out. | ||
2551 | * NOTE: It is not a problem when the inode for $MFT itself is being | ||
2552 | * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES | ||
2553 | * on the $MFT inode and hence ntfs_write_inode() will not be | ||
2554 | * re-invoked because of it which in turn is ok since the dirtied mft | ||
2555 | * record will be cleaned and written out to disk below, i.e. before | ||
2556 | * this function returns. | ||
2557 | */ | ||
2558 | if (modified && !NInoTestSetDirty(ctx->ntfs_ino)) | ||
2559 | mark_ntfs_record_dirty(ctx->ntfs_ino->page, | ||
2560 | ctx->ntfs_ino->page_ofs); | ||
2561 | ntfs_attr_put_search_ctx(ctx); | ||
2562 | /* Now the access times are updated, write the base mft record. */ | ||
2563 | if (NInoDirty(ni)) | ||
2564 | err = write_mft_record(ni, m, sync); | ||
2565 | /* Write all attached extent mft records. */ | ||
2566 | down(&ni->extent_lock); | ||
2567 | if (ni->nr_extents > 0) { | ||
2568 | ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos; | ||
2569 | int i; | ||
2570 | |||
2571 | ntfs_debug("Writing %i extent inodes.", ni->nr_extents); | ||
2572 | for (i = 0; i < ni->nr_extents; i++) { | ||
2573 | ntfs_inode *tni = extent_nis[i]; | ||
2574 | |||
2575 | if (NInoDirty(tni)) { | ||
2576 | MFT_RECORD *tm = map_mft_record(tni); | ||
2577 | int ret; | ||
2578 | |||
2579 | if (IS_ERR(tm)) { | ||
2580 | if (!err || err == -ENOMEM) | ||
2581 | err = PTR_ERR(tm); | ||
2582 | continue; | ||
2583 | } | ||
2584 | ret = write_mft_record(tni, tm, sync); | ||
2585 | unmap_mft_record(tni); | ||
2586 | if (unlikely(ret)) { | ||
2587 | if (!err || err == -ENOMEM) | ||
2588 | err = ret; | ||
2589 | } | ||
2590 | } | ||
2591 | } | ||
2592 | } | ||
2593 | up(&ni->extent_lock); | ||
2594 | unmap_mft_record(ni); | ||
2595 | if (unlikely(err)) | ||
2596 | goto err_out; | ||
2597 | ntfs_debug("Done."); | ||
2598 | return 0; | ||
2599 | unm_err_out: | ||
2600 | unmap_mft_record(ni); | ||
2601 | err_out: | ||
2602 | if (err == -ENOMEM) { | ||
2603 | ntfs_warning(vi->i_sb, "Not enough memory to write inode. " | ||
2604 | "Marking the inode dirty again, so the VFS " | ||
2605 | "retries later."); | ||
2606 | mark_inode_dirty(vi); | ||
2607 | } else { | ||
2608 | ntfs_error(vi->i_sb, "Failed (error code %i): Marking inode " | ||
2609 | "as bad. You should run chkdsk.", -err); | ||
2610 | make_bad_inode(vi); | ||
2611 | NVolSetErrors(ni->vol); | ||
2612 | } | ||
2613 | return err; | ||
2614 | } | ||
2615 | |||
2616 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h new file mode 100644 index 000000000000..99580455f2ed --- /dev/null +++ b/fs/ntfs/inode.h | |||
@@ -0,0 +1,321 @@ | |||
1 | /* | ||
2 | * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of | ||
3 | * the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_INODE_H | ||
25 | #define _LINUX_NTFS_INODE_H | ||
26 | |||
27 | #include <linux/mm.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/seq_file.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <asm/atomic.h> | ||
32 | #include <asm/semaphore.h> | ||
33 | |||
34 | #include "layout.h" | ||
35 | #include "volume.h" | ||
36 | #include "types.h" | ||
37 | #include "runlist.h" | ||
38 | #include "debug.h" | ||
39 | |||
40 | typedef struct _ntfs_inode ntfs_inode; | ||
41 | |||
42 | /* | ||
43 | * The NTFS in-memory inode structure. It is just used as an extension to the | ||
44 | * fields already provided in the VFS inode. | ||
45 | */ | ||
46 | struct _ntfs_inode { | ||
47 | s64 initialized_size; /* Copy from the attribute record. */ | ||
48 | s64 allocated_size; /* Copy from the attribute record. */ | ||
49 | unsigned long state; /* NTFS specific flags describing this inode. | ||
50 | See ntfs_inode_state_bits below. */ | ||
51 | unsigned long mft_no; /* Number of the mft record / inode. */ | ||
52 | u16 seq_no; /* Sequence number of the mft record. */ | ||
53 | atomic_t count; /* Inode reference count for book keeping. */ | ||
54 | ntfs_volume *vol; /* Pointer to the ntfs volume of this inode. */ | ||
55 | /* | ||
56 | * If NInoAttr() is true, the below fields describe the attribute which | ||
57 | * this fake inode belongs to. The actual inode of this attribute is | ||
58 | * pointed to by base_ntfs_ino and nr_extents is always set to -1 (see | ||
59 | * below). For real inodes, we also set the type (AT_DATA for files and | ||
60 | * AT_INDEX_ALLOCATION for directories), with the name = NULL and | ||
61 | * name_len = 0 for files and name = I30 (global constant) and | ||
62 | * name_len = 4 for directories. | ||
63 | */ | ||
64 | ATTR_TYPE type; /* Attribute type of this fake inode. */ | ||
65 | ntfschar *name; /* Attribute name of this fake inode. */ | ||
66 | u32 name_len; /* Attribute name length of this fake inode. */ | ||
67 | runlist runlist; /* If state has the NI_NonResident bit set, | ||
68 | the runlist of the unnamed data attribute | ||
69 | (if a file) or of the index allocation | ||
70 | attribute (directory) or of the attribute | ||
71 | described by the fake inode (if NInoAttr()). | ||
72 | If runlist.rl is NULL, the runlist has not | ||
73 | been read in yet or has been unmapped. If | ||
74 | NI_NonResident is clear, the attribute is | ||
75 | resident (file and fake inode) or there is | ||
76 | no $I30 index allocation attribute | ||
77 | (small directory). In the latter case | ||
78 | runlist.rl is always NULL.*/ | ||
79 | /* | ||
80 | * The following fields are only valid for real inodes and extent | ||
81 | * inodes. | ||
82 | */ | ||
83 | struct semaphore mrec_lock; /* Lock for serializing access to the | ||
84 | mft record belonging to this inode. */ | ||
85 | struct page *page; /* The page containing the mft record of the | ||
86 | inode. This should only be touched by the | ||
87 | (un)map_mft_record*() functions. */ | ||
88 | int page_ofs; /* Offset into the page at which the mft record | ||
89 | begins. This should only be touched by the | ||
90 | (un)map_mft_record*() functions. */ | ||
91 | /* | ||
92 | * Attribute list support (only for use by the attribute lookup | ||
93 | * functions). Setup during read_inode for all inodes with attribute | ||
94 | * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is | ||
95 | * further only valid if NI_AttrListNonResident is set. | ||
96 | */ | ||
97 | u32 attr_list_size; /* Length of attribute list value in bytes. */ | ||
98 | u8 *attr_list; /* Attribute list value itself. */ | ||
99 | runlist attr_list_rl; /* Run list for the attribute list value. */ | ||
100 | union { | ||
101 | struct { /* It is a directory, $MFT, or an index inode. */ | ||
102 | struct inode *bmp_ino; /* Attribute inode for the | ||
103 | index $BITMAP. */ | ||
104 | u32 block_size; /* Size of an index block. */ | ||
105 | u32 vcn_size; /* Size of a vcn in this | ||
106 | index. */ | ||
107 | COLLATION_RULE collation_rule; /* The collation rule | ||
108 | for the index. */ | ||
109 | u8 block_size_bits; /* Log2 of the above. */ | ||
110 | u8 vcn_size_bits; /* Log2 of the above. */ | ||
111 | } index; | ||
112 | struct { /* It is a compressed file or an attribute inode. */ | ||
113 | s64 size; /* Copy of compressed_size from | ||
114 | $DATA. */ | ||
115 | u32 block_size; /* Size of a compression block | ||
116 | (cb). */ | ||
117 | u8 block_size_bits; /* Log2 of the size of a cb. */ | ||
118 | u8 block_clusters; /* Number of clusters per cb. */ | ||
119 | } compressed; | ||
120 | } itype; | ||
121 | struct semaphore extent_lock; /* Lock for accessing/modifying the | ||
122 | below . */ | ||
123 | s32 nr_extents; /* For a base mft record, the number of attached extent | ||
124 | inodes (0 if none), for extent records and for fake | ||
125 | inodes describing an attribute this is -1. */ | ||
126 | union { /* This union is only used if nr_extents != 0. */ | ||
127 | ntfs_inode **extent_ntfs_inos; /* For nr_extents > 0, array of | ||
128 | the ntfs inodes of the extent | ||
129 | mft records belonging to | ||
130 | this base inode which have | ||
131 | been loaded. */ | ||
132 | ntfs_inode *base_ntfs_ino; /* For nr_extents == -1, the | ||
133 | ntfs inode of the base mft | ||
134 | record. For fake inodes, the | ||
135 | real (base) inode to which | ||
136 | the attribute belongs. */ | ||
137 | } ext; | ||
138 | }; | ||
139 | |||
140 | /* | ||
141 | * Defined bits for the state field in the ntfs_inode structure. | ||
142 | * (f) = files only, (d) = directories only, (a) = attributes/fake inodes only | ||
143 | */ | ||
144 | typedef enum { | ||
145 | NI_Dirty, /* 1: Mft record needs to be written to disk. */ | ||
146 | NI_AttrList, /* 1: Mft record contains an attribute list. */ | ||
147 | NI_AttrListNonResident, /* 1: Attribute list is non-resident. Implies | ||
148 | NI_AttrList is set. */ | ||
149 | |||
150 | NI_Attr, /* 1: Fake inode for attribute i/o. | ||
151 | 0: Real inode or extent inode. */ | ||
152 | |||
153 | NI_MstProtected, /* 1: Attribute is protected by MST fixups. | ||
154 | 0: Attribute is not protected by fixups. */ | ||
155 | NI_NonResident, /* 1: Unnamed data attr is non-resident (f). | ||
156 | 1: Attribute is non-resident (a). */ | ||
157 | NI_IndexAllocPresent = NI_NonResident, /* 1: $I30 index alloc attr is | ||
158 | present (d). */ | ||
159 | NI_Compressed, /* 1: Unnamed data attr is compressed (f). | ||
160 | 1: Create compressed files by default (d). | ||
161 | 1: Attribute is compressed (a). */ | ||
162 | NI_Encrypted, /* 1: Unnamed data attr is encrypted (f). | ||
163 | 1: Create encrypted files by default (d). | ||
164 | 1: Attribute is encrypted (a). */ | ||
165 | NI_Sparse, /* 1: Unnamed data attr is sparse (f). | ||
166 | 1: Create sparse files by default (d). | ||
167 | 1: Attribute is sparse (a). */ | ||
168 | NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */ | ||
169 | } ntfs_inode_state_bits; | ||
170 | |||
171 | /* | ||
172 | * NOTE: We should be adding dirty mft records to a list somewhere and they | ||
173 | * should be independent of the (ntfs/vfs) inode structure so that an inode can | ||
174 | * be removed but the record can be left dirty for syncing later. | ||
175 | */ | ||
176 | |||
177 | /* | ||
178 | * Macro tricks to expand the NInoFoo(), NInoSetFoo(), and NInoClearFoo() | ||
179 | * functions. | ||
180 | */ | ||
181 | #define NINO_FNS(flag) \ | ||
182 | static inline int NIno##flag(ntfs_inode *ni) \ | ||
183 | { \ | ||
184 | return test_bit(NI_##flag, &(ni)->state); \ | ||
185 | } \ | ||
186 | static inline void NInoSet##flag(ntfs_inode *ni) \ | ||
187 | { \ | ||
188 | set_bit(NI_##flag, &(ni)->state); \ | ||
189 | } \ | ||
190 | static inline void NInoClear##flag(ntfs_inode *ni) \ | ||
191 | { \ | ||
192 | clear_bit(NI_##flag, &(ni)->state); \ | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * As above for NInoTestSetFoo() and NInoTestClearFoo(). | ||
197 | */ | ||
198 | #define TAS_NINO_FNS(flag) \ | ||
199 | static inline int NInoTestSet##flag(ntfs_inode *ni) \ | ||
200 | { \ | ||
201 | return test_and_set_bit(NI_##flag, &(ni)->state); \ | ||
202 | } \ | ||
203 | static inline int NInoTestClear##flag(ntfs_inode *ni) \ | ||
204 | { \ | ||
205 | return test_and_clear_bit(NI_##flag, &(ni)->state); \ | ||
206 | } | ||
207 | |||
208 | /* Emit the ntfs inode bitops functions. */ | ||
209 | NINO_FNS(Dirty) | ||
210 | TAS_NINO_FNS(Dirty) | ||
211 | NINO_FNS(AttrList) | ||
212 | NINO_FNS(AttrListNonResident) | ||
213 | NINO_FNS(Attr) | ||
214 | NINO_FNS(MstProtected) | ||
215 | NINO_FNS(NonResident) | ||
216 | NINO_FNS(IndexAllocPresent) | ||
217 | NINO_FNS(Compressed) | ||
218 | NINO_FNS(Encrypted) | ||
219 | NINO_FNS(Sparse) | ||
220 | NINO_FNS(TruncateFailed) | ||
221 | |||
222 | /* | ||
223 | * The full structure containing a ntfs_inode and a vfs struct inode. Used for | ||
224 | * all real and fake inodes but not for extent inodes which lack the vfs struct | ||
225 | * inode. | ||
226 | */ | ||
227 | typedef struct { | ||
228 | ntfs_inode ntfs_inode; | ||
229 | struct inode vfs_inode; /* The vfs inode structure. */ | ||
230 | } big_ntfs_inode; | ||
231 | |||
232 | /** | ||
233 | * NTFS_I - return the ntfs inode given a vfs inode | ||
234 | * @inode: VFS inode | ||
235 | * | ||
236 | * NTFS_I() returns the ntfs inode associated with the VFS @inode. | ||
237 | */ | ||
238 | static inline ntfs_inode *NTFS_I(struct inode *inode) | ||
239 | { | ||
240 | return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode); | ||
241 | } | ||
242 | |||
243 | static inline struct inode *VFS_I(ntfs_inode *ni) | ||
244 | { | ||
245 | return &((big_ntfs_inode *)ni)->vfs_inode; | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * ntfs_attr - ntfs in memory attribute structure | ||
250 | * @mft_no: mft record number of the base mft record of this attribute | ||
251 | * @name: Unicode name of the attribute (NULL if unnamed) | ||
252 | * @name_len: length of @name in Unicode characters (0 if unnamed) | ||
253 | * @type: attribute type (see layout.h) | ||
254 | * | ||
255 | * This structure exists only to provide a small structure for the | ||
256 | * ntfs_{attr_}iget()/ntfs_test_inode()/ntfs_init_locked_inode() mechanism. | ||
257 | * | ||
258 | * NOTE: Elements are ordered by size to make the structure as compact as | ||
259 | * possible on all architectures. | ||
260 | */ | ||
261 | typedef struct { | ||
262 | unsigned long mft_no; | ||
263 | ntfschar *name; | ||
264 | u32 name_len; | ||
265 | ATTR_TYPE type; | ||
266 | } ntfs_attr; | ||
267 | |||
268 | typedef int (*test_t)(struct inode *, void *); | ||
269 | |||
270 | extern int ntfs_test_inode(struct inode *vi, ntfs_attr *na); | ||
271 | |||
272 | extern struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no); | ||
273 | extern struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, | ||
274 | ntfschar *name, u32 name_len); | ||
275 | extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, | ||
276 | u32 name_len); | ||
277 | |||
278 | extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); | ||
279 | extern void ntfs_destroy_big_inode(struct inode *inode); | ||
280 | extern void ntfs_clear_big_inode(struct inode *vi); | ||
281 | |||
282 | extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); | ||
283 | |||
284 | static inline void ntfs_init_big_inode(struct inode *vi) | ||
285 | { | ||
286 | ntfs_inode *ni = NTFS_I(vi); | ||
287 | |||
288 | ntfs_debug("Entering."); | ||
289 | __ntfs_init_inode(vi->i_sb, ni); | ||
290 | ni->mft_no = vi->i_ino; | ||
291 | } | ||
292 | |||
293 | extern ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, | ||
294 | unsigned long mft_no); | ||
295 | extern void ntfs_clear_extent_inode(ntfs_inode *ni); | ||
296 | |||
297 | extern int ntfs_read_inode_mount(struct inode *vi); | ||
298 | |||
299 | extern void ntfs_put_inode(struct inode *vi); | ||
300 | |||
301 | extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); | ||
302 | |||
303 | #ifdef NTFS_RW | ||
304 | |||
305 | extern int ntfs_truncate(struct inode *vi); | ||
306 | extern void ntfs_truncate_vfs(struct inode *vi); | ||
307 | |||
308 | extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); | ||
309 | |||
310 | extern int ntfs_write_inode(struct inode *vi, int sync); | ||
311 | |||
312 | static inline void ntfs_commit_inode(struct inode *vi) | ||
313 | { | ||
314 | if (!is_bad_inode(vi)) | ||
315 | ntfs_write_inode(vi, 1); | ||
316 | return; | ||
317 | } | ||
318 | |||
319 | #endif /* NTFS_RW */ | ||
320 | |||
321 | #endif /* _LINUX_NTFS_INODE_H */ | ||
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h new file mode 100644 index 000000000000..47b338999921 --- /dev/null +++ b/fs/ntfs/layout.h | |||
@@ -0,0 +1,2413 @@ | |||
1 | /* | ||
2 | * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_LAYOUT_H | ||
25 | #define _LINUX_NTFS_LAYOUT_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/bitops.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <asm/byteorder.h> | ||
31 | |||
32 | #include "types.h" | ||
33 | |||
34 | /* | ||
35 | * Constant endianness conversion defines. | ||
36 | */ | ||
37 | #define const_le16_to_cpu(x) __constant_le16_to_cpu(x) | ||
38 | #define const_le32_to_cpu(x) __constant_le32_to_cpu(x) | ||
39 | #define const_le64_to_cpu(x) __constant_le64_to_cpu(x) | ||
40 | |||
41 | #define const_cpu_to_le16(x) __constant_cpu_to_le16(x) | ||
42 | #define const_cpu_to_le32(x) __constant_cpu_to_le32(x) | ||
43 | #define const_cpu_to_le64(x) __constant_cpu_to_le64(x) | ||
44 | |||
45 | /* The NTFS oem_id "NTFS " */ | ||
46 | #define magicNTFS const_cpu_to_le64(0x202020205346544eULL) | ||
47 | |||
48 | /* | ||
49 | * Location of bootsector on partition: | ||
50 | * The standard NTFS_BOOT_SECTOR is on sector 0 of the partition. | ||
51 | * On NT4 and above there is one backup copy of the boot sector to | ||
52 | * be found on the last sector of the partition (not normally accessible | ||
53 | * from within Windows as the bootsector contained number of sectors | ||
54 | * value is one less than the actual value!). | ||
55 | * On versions of NT 3.51 and earlier, the backup copy was located at | ||
56 | * number of sectors/2 (integer divide), i.e. in the middle of the volume. | ||
57 | */ | ||
58 | |||
59 | /* | ||
60 | * BIOS parameter block (bpb) structure. | ||
61 | */ | ||
62 | typedef struct { | ||
63 | le16 bytes_per_sector; /* Size of a sector in bytes. */ | ||
64 | u8 sectors_per_cluster; /* Size of a cluster in sectors. */ | ||
65 | le16 reserved_sectors; /* zero */ | ||
66 | u8 fats; /* zero */ | ||
67 | le16 root_entries; /* zero */ | ||
68 | le16 sectors; /* zero */ | ||
69 | u8 media_type; /* 0xf8 = hard disk */ | ||
70 | le16 sectors_per_fat; /* zero */ | ||
71 | le16 sectors_per_track; /* irrelevant */ | ||
72 | le16 heads; /* irrelevant */ | ||
73 | le32 hidden_sectors; /* zero */ | ||
74 | le32 large_sectors; /* zero */ | ||
75 | } __attribute__ ((__packed__)) BIOS_PARAMETER_BLOCK; | ||
76 | |||
77 | /* | ||
78 | * NTFS boot sector structure. | ||
79 | */ | ||
80 | typedef struct { | ||
81 | u8 jump[3]; /* Irrelevant (jump to boot up code).*/ | ||
82 | le64 oem_id; /* Magic "NTFS ". */ | ||
83 | BIOS_PARAMETER_BLOCK bpb; /* See BIOS_PARAMETER_BLOCK. */ | ||
84 | u8 unused[4]; /* zero, NTFS diskedit.exe states that | ||
85 | this is actually: | ||
86 | __u8 physical_drive; // 0x80 | ||
87 | __u8 current_head; // zero | ||
88 | __u8 extended_boot_signature; | ||
89 | // 0x80 | ||
90 | __u8 unused; // zero | ||
91 | */ | ||
92 | /*0x28*/sle64 number_of_sectors; /* Number of sectors in volume. Gives | ||
93 | maximum volume size of 2^63 sectors. | ||
94 | Assuming standard sector size of 512 | ||
95 | bytes, the maximum byte size is | ||
96 | approx. 4.7x10^21 bytes. (-; */ | ||
97 | sle64 mft_lcn; /* Cluster location of mft data. */ | ||
98 | sle64 mftmirr_lcn; /* Cluster location of copy of mft. */ | ||
99 | s8 clusters_per_mft_record; /* Mft record size in clusters. */ | ||
100 | u8 reserved0[3]; /* zero */ | ||
101 | s8 clusters_per_index_record; /* Index block size in clusters. */ | ||
102 | u8 reserved1[3]; /* zero */ | ||
103 | le64 volume_serial_number; /* Irrelevant (serial number). */ | ||
104 | le32 checksum; /* Boot sector checksum. */ | ||
105 | /*0x54*/u8 bootstrap[426]; /* Irrelevant (boot up code). */ | ||
106 | le16 end_of_sector_marker; /* End of bootsector magic. Always is | ||
107 | 0xaa55 in little endian. */ | ||
108 | /* sizeof() = 512 (0x200) bytes */ | ||
109 | } __attribute__ ((__packed__)) NTFS_BOOT_SECTOR; | ||
110 | |||
111 | /* | ||
112 | * Magic identifiers present at the beginning of all ntfs record containing | ||
113 | * records (like mft records for example). | ||
114 | */ | ||
115 | enum { | ||
116 | /* Found in $MFT/$DATA. */ | ||
117 | magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */ | ||
118 | magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */ | ||
119 | magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ | ||
120 | |||
121 | /* Found in $LogFile/$DATA. */ | ||
122 | magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */ | ||
123 | magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */ | ||
124 | |||
125 | /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */ | ||
126 | magic_CHKD = const_cpu_to_le32(0x424b4843), /* Modified by chkdsk. */ | ||
127 | |||
128 | /* Found in all ntfs record containing records. */ | ||
129 | magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector | ||
130 | transfer was detected. */ | ||
131 | /* | ||
132 | * Found in $LogFile/$DATA when a page is full of 0xff bytes and is | ||
133 | * thus not initialized. Page must be initialized before using it. | ||
134 | */ | ||
135 | magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */ | ||
136 | }; | ||
137 | |||
138 | typedef le32 NTFS_RECORD_TYPE; | ||
139 | |||
140 | /* | ||
141 | * Generic magic comparison macros. Finally found a use for the ## preprocessor | ||
142 | * operator! (-8 | ||
143 | */ | ||
144 | |||
145 | static inline BOOL __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r) | ||
146 | { | ||
147 | return (x == r); | ||
148 | } | ||
149 | #define ntfs_is_magic(x, m) __ntfs_is_magic(x, magic_##m) | ||
150 | |||
151 | static inline BOOL __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r) | ||
152 | { | ||
153 | return (*p == r); | ||
154 | } | ||
155 | #define ntfs_is_magicp(p, m) __ntfs_is_magicp(p, magic_##m) | ||
156 | |||
157 | /* | ||
158 | * Specialised magic comparison macros for the NTFS_RECORD_TYPEs defined above. | ||
159 | */ | ||
160 | #define ntfs_is_file_record(x) ( ntfs_is_magic (x, FILE) ) | ||
161 | #define ntfs_is_file_recordp(p) ( ntfs_is_magicp(p, FILE) ) | ||
162 | #define ntfs_is_mft_record(x) ( ntfs_is_file_record (x) ) | ||
163 | #define ntfs_is_mft_recordp(p) ( ntfs_is_file_recordp(p) ) | ||
164 | #define ntfs_is_indx_record(x) ( ntfs_is_magic (x, INDX) ) | ||
165 | #define ntfs_is_indx_recordp(p) ( ntfs_is_magicp(p, INDX) ) | ||
166 | #define ntfs_is_hole_record(x) ( ntfs_is_magic (x, HOLE) ) | ||
167 | #define ntfs_is_hole_recordp(p) ( ntfs_is_magicp(p, HOLE) ) | ||
168 | |||
169 | #define ntfs_is_rstr_record(x) ( ntfs_is_magic (x, RSTR) ) | ||
170 | #define ntfs_is_rstr_recordp(p) ( ntfs_is_magicp(p, RSTR) ) | ||
171 | #define ntfs_is_rcrd_record(x) ( ntfs_is_magic (x, RCRD) ) | ||
172 | #define ntfs_is_rcrd_recordp(p) ( ntfs_is_magicp(p, RCRD) ) | ||
173 | |||
174 | #define ntfs_is_chkd_record(x) ( ntfs_is_magic (x, CHKD) ) | ||
175 | #define ntfs_is_chkd_recordp(p) ( ntfs_is_magicp(p, CHKD) ) | ||
176 | |||
177 | #define ntfs_is_baad_record(x) ( ntfs_is_magic (x, BAAD) ) | ||
178 | #define ntfs_is_baad_recordp(p) ( ntfs_is_magicp(p, BAAD) ) | ||
179 | |||
180 | #define ntfs_is_empty_record(x) ( ntfs_is_magic (x, empty) ) | ||
181 | #define ntfs_is_empty_recordp(p) ( ntfs_is_magicp(p, empty) ) | ||
182 | |||
183 | /* | ||
184 | * The Update Sequence Array (usa) is an array of the le16 values which belong | ||
185 | * to the end of each sector protected by the update sequence record in which | ||
186 | * this array is contained. Note that the first entry is the Update Sequence | ||
187 | * Number (usn), a cyclic counter of how many times the protected record has | ||
188 | * been written to disk. The values 0 and -1 (ie. 0xffff) are not used. All | ||
189 | * last le16's of each sector have to be equal to the usn (during reading) or | ||
190 | * are set to it (during writing). If they are not, an incomplete multi sector | ||
191 | * transfer has occurred when the data was written. | ||
192 | * The maximum size for the update sequence array is fixed to: | ||
193 | * maximum size = usa_ofs + (usa_count * 2) = 510 bytes | ||
194 | * The 510 bytes comes from the fact that the last le16 in the array has to | ||
195 | * (obviously) finish before the last le16 of the first 512-byte sector. | ||
196 | * This formula can be used as a consistency check in that usa_ofs + | ||
197 | * (usa_count * 2) has to be less than or equal to 510. | ||
198 | */ | ||
199 | typedef struct { | ||
200 | NTFS_RECORD_TYPE magic; /* A four-byte magic identifying the record | ||
201 | type and/or status. */ | ||
202 | le16 usa_ofs; /* Offset to the Update Sequence Array (usa) | ||
203 | from the start of the ntfs record. */ | ||
204 | le16 usa_count; /* Number of le16 sized entries in the usa | ||
205 | including the Update Sequence Number (usn), | ||
206 | thus the number of fixups is the usa_count | ||
207 | minus 1. */ | ||
208 | } __attribute__ ((__packed__)) NTFS_RECORD; | ||
209 | |||
210 | /* | ||
211 | * System files mft record numbers. All these files are always marked as used | ||
212 | * in the bitmap attribute of the mft; presumably in order to avoid accidental | ||
213 | * allocation for random other mft records. Also, the sequence number for each | ||
214 | * of the system files is always equal to their mft record number and it is | ||
215 | * never modified. | ||
216 | */ | ||
217 | typedef enum { | ||
218 | FILE_MFT = 0, /* Master file table (mft). Data attribute | ||
219 | contains the entries and bitmap attribute | ||
220 | records which ones are in use (bit==1). */ | ||
221 | FILE_MFTMirr = 1, /* Mft mirror: copy of first four mft records | ||
222 | in data attribute. If cluster size > 4kiB, | ||
223 | copy of first N mft records, with | ||
224 | N = cluster_size / mft_record_size. */ | ||
225 | FILE_LogFile = 2, /* Journalling log in data attribute. */ | ||
226 | FILE_Volume = 3, /* Volume name attribute and volume information | ||
227 | attribute (flags and ntfs version). Windows | ||
228 | refers to this file as volume DASD (Direct | ||
229 | Access Storage Device). */ | ||
230 | FILE_AttrDef = 4, /* Array of attribute definitions in data | ||
231 | attribute. */ | ||
232 | FILE_root = 5, /* Root directory. */ | ||
233 | FILE_Bitmap = 6, /* Allocation bitmap of all clusters (lcns) in | ||
234 | data attribute. */ | ||
235 | FILE_Boot = 7, /* Boot sector (always at cluster 0) in data | ||
236 | attribute. */ | ||
237 | FILE_BadClus = 8, /* Contains all bad clusters in the non-resident | ||
238 | data attribute. */ | ||
239 | FILE_Secure = 9, /* Shared security descriptors in data attribute | ||
240 | and two indexes into the descriptors. | ||
241 | Appeared in Windows 2000. Before that, this | ||
242 | file was named $Quota but was unused. */ | ||
243 | FILE_UpCase = 10, /* Uppercase equivalents of all 65536 Unicode | ||
244 | characters in data attribute. */ | ||
245 | FILE_Extend = 11, /* Directory containing other system files (eg. | ||
246 | $ObjId, $Quota, $Reparse and $UsnJrnl). This | ||
247 | is new to NTFS3.0. */ | ||
248 | FILE_reserved12 = 12, /* Reserved for future use (records 12-15). */ | ||
249 | FILE_reserved13 = 13, | ||
250 | FILE_reserved14 = 14, | ||
251 | FILE_reserved15 = 15, | ||
252 | FILE_first_user = 16, /* First user file, used as test limit for | ||
253 | whether to allow opening a file or not. */ | ||
254 | } NTFS_SYSTEM_FILES; | ||
255 | |||
256 | /* | ||
257 | * These are the so far known MFT_RECORD_* flags (16-bit) which contain | ||
258 | * information about the mft record in which they are present. | ||
259 | */ | ||
260 | enum { | ||
261 | MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), | ||
262 | MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), | ||
263 | } __attribute__ ((__packed__)); | ||
264 | |||
265 | typedef le16 MFT_RECORD_FLAGS; | ||
266 | |||
267 | /* | ||
268 | * mft references (aka file references or file record segment references) are | ||
269 | * used whenever a structure needs to refer to a record in the mft. | ||
270 | * | ||
271 | * A reference consists of a 48-bit index into the mft and a 16-bit sequence | ||
272 | * number used to detect stale references. | ||
273 | * | ||
274 | * For error reporting purposes we treat the 48-bit index as a signed quantity. | ||
275 | * | ||
276 | * The sequence number is a circular counter (skipping 0) describing how many | ||
277 | * times the referenced mft record has been (re)used. This has to match the | ||
278 | * sequence number of the mft record being referenced, otherwise the reference | ||
279 | * is considered stale and removed (FIXME: only ntfsck or the driver itself?). | ||
280 | * | ||
281 | * If the sequence number is zero it is assumed that no sequence number | ||
282 | * consistency checking should be performed. | ||
283 | * | ||
284 | * FIXME: Since inodes are 32-bit as of now, the driver needs to always check | ||
285 | * for high_part being 0 and if not either BUG(), cause a panic() or handle | ||
286 | * the situation in some other way. This shouldn't be a problem as a volume has | ||
287 | * to become HUGE in order to need more than 32-bits worth of mft records. | ||
288 | * Assuming the standard mft record size of 1kb only the records (never mind | ||
289 | * the non-resident attributes, etc.) would require 4Tb of space on their own | ||
290 | * for the first 32 bits worth of records. This is only if some strange person | ||
291 | * doesn't decide to foul play and make the mft sparse which would be a really | ||
292 | * horrible thing to do as it would trash our current driver implementation. )-: | ||
293 | * Do I hear screams "we want 64-bit inodes!" ?!? (-; | ||
294 | * | ||
295 | * FIXME: The mft zone is defined as the first 12% of the volume. This space is | ||
296 | * reserved so that the mft can grow contiguously and hence doesn't become | ||
297 | * fragmented. Volume free space includes the empty part of the mft zone and | ||
298 | * when the volume's free 88% are used up, the mft zone is shrunk by a factor | ||
299 | * of 2, thus making more space available for more files/data. This process is | ||
300 | * repeated everytime there is no more free space except for the mft zone until | ||
301 | * there really is no more free space. | ||
302 | */ | ||
303 | |||
304 | /* | ||
305 | * Typedef the MFT_REF as a 64-bit value for easier handling. | ||
306 | * Also define two unpacking macros to get to the reference (MREF) and | ||
307 | * sequence number (MSEQNO) respectively. | ||
308 | * The _LE versions are to be applied on little endian MFT_REFs. | ||
309 | * Note: The _LE versions will return a CPU endian formatted value! | ||
310 | */ | ||
311 | typedef enum { | ||
312 | MFT_REF_MASK_CPU = 0x0000ffffffffffffULL, | ||
313 | MFT_REF_MASK_LE = const_cpu_to_le64(0x0000ffffffffffffULL), | ||
314 | } MFT_REF_CONSTS; | ||
315 | |||
316 | typedef u64 MFT_REF; | ||
317 | typedef le64 leMFT_REF; | ||
318 | |||
319 | #define MK_MREF(m, s) ((MFT_REF)(((MFT_REF)(s) << 48) | \ | ||
320 | ((MFT_REF)(m) & MFT_REF_MASK_CPU))) | ||
321 | #define MK_LE_MREF(m, s) cpu_to_le64(MK_MREF(m, s)) | ||
322 | |||
323 | #define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU)) | ||
324 | #define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff)) | ||
325 | #define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) | ||
326 | #define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) | ||
327 | |||
328 | #define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) | ||
329 | #define ERR_MREF(x) ((u64)((s64)(x))) | ||
330 | #define MREF_ERR(x) ((int)((s64)(x))) | ||
331 | |||
332 | /* | ||
333 | * The mft record header present at the beginning of every record in the mft. | ||
334 | * This is followed by a sequence of variable length attribute records which | ||
335 | * is terminated by an attribute of type AT_END which is a truncated attribute | ||
336 | * in that it only consists of the attribute type code AT_END and none of the | ||
337 | * other members of the attribute structure are present. | ||
338 | */ | ||
339 | typedef struct { | ||
340 | /*Ofs*/ | ||
341 | /* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
342 | NTFS_RECORD_TYPE magic; /* Usually the magic is "FILE". */ | ||
343 | le16 usa_ofs; /* See NTFS_RECORD definition above. */ | ||
344 | le16 usa_count; /* See NTFS_RECORD definition above. */ | ||
345 | |||
346 | /* 8*/ le64 lsn; /* $LogFile sequence number for this record. | ||
347 | Changed every time the record is modified. */ | ||
348 | /* 16*/ le16 sequence_number; /* Number of times this mft record has been | ||
349 | reused. (See description for MFT_REF | ||
350 | above.) NOTE: The increment (skipping zero) | ||
351 | is done when the file is deleted. NOTE: If | ||
352 | this is zero it is left zero. */ | ||
353 | /* 18*/ le16 link_count; /* Number of hard links, i.e. the number of | ||
354 | directory entries referencing this record. | ||
355 | NOTE: Only used in mft base records. | ||
356 | NOTE: When deleting a directory entry we | ||
357 | check the link_count and if it is 1 we | ||
358 | delete the file. Otherwise we delete the | ||
359 | FILE_NAME_ATTR being referenced by the | ||
360 | directory entry from the mft record and | ||
361 | decrement the link_count. | ||
362 | FIXME: Careful with Win32 + DOS names! */ | ||
363 | /* 20*/ le16 attrs_offset; /* Byte offset to the first attribute in this | ||
364 | mft record from the start of the mft record. | ||
365 | NOTE: Must be aligned to 8-byte boundary. */ | ||
366 | /* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file | ||
367 | is deleted, the MFT_RECORD_IN_USE flag is | ||
368 | set to zero. */ | ||
369 | /* 24*/ le32 bytes_in_use; /* Number of bytes used in this mft record. | ||
370 | NOTE: Must be aligned to 8-byte boundary. */ | ||
371 | /* 28*/ le32 bytes_allocated; /* Number of bytes allocated for this mft | ||
372 | record. This should be equal to the mft | ||
373 | record size. */ | ||
374 | /* 32*/ leMFT_REF base_mft_record;/* This is zero for base mft records. | ||
375 | When it is not zero it is a mft reference | ||
376 | pointing to the base mft record to which | ||
377 | this record belongs (this is then used to | ||
378 | locate the attribute list attribute present | ||
379 | in the base record which describes this | ||
380 | extension record and hence might need | ||
381 | modification when the extension record | ||
382 | itself is modified, also locating the | ||
383 | attribute list also means finding the other | ||
384 | potential extents, belonging to the non-base | ||
385 | mft record). */ | ||
386 | /* 40*/ le16 next_attr_instance;/* The instance number that will be assigned to | ||
387 | the next attribute added to this mft record. | ||
388 | NOTE: Incremented each time after it is used. | ||
389 | NOTE: Every time the mft record is reused | ||
390 | this number is set to zero. NOTE: The first | ||
391 | instance number is always 0. */ | ||
392 | /* The below fields are specific to NTFS 3.1+ (Windows XP and above): */ | ||
393 | /* 42*/ le16 reserved; /* Reserved/alignment. */ | ||
394 | /* 44*/ le32 mft_record_number; /* Number of this mft record. */ | ||
395 | /* sizeof() = 48 bytes */ | ||
396 | /* | ||
397 | * When (re)using the mft record, we place the update sequence array at this | ||
398 | * offset, i.e. before we start with the attributes. This also makes sense, | ||
399 | * otherwise we could run into problems with the update sequence array | ||
400 | * containing in itself the last two bytes of a sector which would mean that | ||
401 | * multi sector transfer protection wouldn't work. As you can't protect data | ||
402 | * by overwriting it since you then can't get it back... | ||
403 | * When reading we obviously use the data from the ntfs record header. | ||
404 | */ | ||
405 | } __attribute__ ((__packed__)) MFT_RECORD; | ||
406 | |||
407 | /* This is the version without the NTFS 3.1+ specific fields. */ | ||
408 | typedef struct { | ||
409 | /*Ofs*/ | ||
410 | /* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
411 | NTFS_RECORD_TYPE magic; /* Usually the magic is "FILE". */ | ||
412 | le16 usa_ofs; /* See NTFS_RECORD definition above. */ | ||
413 | le16 usa_count; /* See NTFS_RECORD definition above. */ | ||
414 | |||
415 | /* 8*/ le64 lsn; /* $LogFile sequence number for this record. | ||
416 | Changed every time the record is modified. */ | ||
417 | /* 16*/ le16 sequence_number; /* Number of times this mft record has been | ||
418 | reused. (See description for MFT_REF | ||
419 | above.) NOTE: The increment (skipping zero) | ||
420 | is done when the file is deleted. NOTE: If | ||
421 | this is zero it is left zero. */ | ||
422 | /* 18*/ le16 link_count; /* Number of hard links, i.e. the number of | ||
423 | directory entries referencing this record. | ||
424 | NOTE: Only used in mft base records. | ||
425 | NOTE: When deleting a directory entry we | ||
426 | check the link_count and if it is 1 we | ||
427 | delete the file. Otherwise we delete the | ||
428 | FILE_NAME_ATTR being referenced by the | ||
429 | directory entry from the mft record and | ||
430 | decrement the link_count. | ||
431 | FIXME: Careful with Win32 + DOS names! */ | ||
432 | /* 20*/ le16 attrs_offset; /* Byte offset to the first attribute in this | ||
433 | mft record from the start of the mft record. | ||
434 | NOTE: Must be aligned to 8-byte boundary. */ | ||
435 | /* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file | ||
436 | is deleted, the MFT_RECORD_IN_USE flag is | ||
437 | set to zero. */ | ||
438 | /* 24*/ le32 bytes_in_use; /* Number of bytes used in this mft record. | ||
439 | NOTE: Must be aligned to 8-byte boundary. */ | ||
440 | /* 28*/ le32 bytes_allocated; /* Number of bytes allocated for this mft | ||
441 | record. This should be equal to the mft | ||
442 | record size. */ | ||
443 | /* 32*/ leMFT_REF base_mft_record;/* This is zero for base mft records. | ||
444 | When it is not zero it is a mft reference | ||
445 | pointing to the base mft record to which | ||
446 | this record belongs (this is then used to | ||
447 | locate the attribute list attribute present | ||
448 | in the base record which describes this | ||
449 | extension record and hence might need | ||
450 | modification when the extension record | ||
451 | itself is modified, also locating the | ||
452 | attribute list also means finding the other | ||
453 | potential extents, belonging to the non-base | ||
454 | mft record). */ | ||
455 | /* 40*/ le16 next_attr_instance;/* The instance number that will be assigned to | ||
456 | the next attribute added to this mft record. | ||
457 | NOTE: Incremented each time after it is used. | ||
458 | NOTE: Every time the mft record is reused | ||
459 | this number is set to zero. NOTE: The first | ||
460 | instance number is always 0. */ | ||
461 | /* sizeof() = 42 bytes */ | ||
462 | /* | ||
463 | * When (re)using the mft record, we place the update sequence array at this | ||
464 | * offset, i.e. before we start with the attributes. This also makes sense, | ||
465 | * otherwise we could run into problems with the update sequence array | ||
466 | * containing in itself the last two bytes of a sector which would mean that | ||
467 | * multi sector transfer protection wouldn't work. As you can't protect data | ||
468 | * by overwriting it since you then can't get it back... | ||
469 | * When reading we obviously use the data from the ntfs record header. | ||
470 | */ | ||
471 | } __attribute__ ((__packed__)) MFT_RECORD_OLD; | ||
472 | |||
473 | /* | ||
474 | * System defined attributes (32-bit). Each attribute type has a corresponding | ||
475 | * attribute name (Unicode string of maximum 64 character length) as described | ||
476 | * by the attribute definitions present in the data attribute of the $AttrDef | ||
477 | * system file. On NTFS 3.0 volumes the names are just as the types are named | ||
478 | * in the below defines exchanging AT_ for the dollar sign ($). If that is not | ||
479 | * a revealing choice of symbol I do not know what is... (-; | ||
480 | */ | ||
481 | enum { | ||
482 | AT_UNUSED = const_cpu_to_le32( 0), | ||
483 | AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), | ||
484 | AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), | ||
485 | AT_FILE_NAME = const_cpu_to_le32( 0x30), | ||
486 | AT_OBJECT_ID = const_cpu_to_le32( 0x40), | ||
487 | AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), | ||
488 | AT_VOLUME_NAME = const_cpu_to_le32( 0x60), | ||
489 | AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), | ||
490 | AT_DATA = const_cpu_to_le32( 0x80), | ||
491 | AT_INDEX_ROOT = const_cpu_to_le32( 0x90), | ||
492 | AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), | ||
493 | AT_BITMAP = const_cpu_to_le32( 0xb0), | ||
494 | AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), | ||
495 | AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), | ||
496 | AT_EA = const_cpu_to_le32( 0xe0), | ||
497 | AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), | ||
498 | AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), | ||
499 | AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), | ||
500 | AT_END = const_cpu_to_le32(0xffffffff) | ||
501 | }; | ||
502 | |||
503 | typedef le32 ATTR_TYPE; | ||
504 | |||
505 | /* | ||
506 | * The collation rules for sorting views/indexes/etc (32-bit). | ||
507 | * | ||
508 | * COLLATION_BINARY - Collate by binary compare where the first byte is most | ||
509 | * significant. | ||
510 | * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary | ||
511 | * Unicode values, except that when a character can be uppercased, the | ||
512 | * upper case value collates before the lower case one. | ||
513 | * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation | ||
514 | * is done very much like COLLATION_UNICODE_STRING. In fact I have no idea | ||
515 | * what the difference is. Perhaps the difference is that file names | ||
516 | * would treat some special characters in an odd way (see | ||
517 | * unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[] | ||
518 | * for what I mean but COLLATION_UNICODE_STRING would not give any special | ||
519 | * treatment to any characters at all, but this is speculation. | ||
520 | * COLLATION_NTOFS_ULONG - Sorting is done according to ascending le32 key | ||
521 | * values. E.g. used for $SII index in FILE_Secure, which sorts by | ||
522 | * security_id (le32). | ||
523 | * COLLATION_NTOFS_SID - Sorting is done according to ascending SID values. | ||
524 | * E.g. used for $O index in FILE_Extend/$Quota. | ||
525 | * COLLATION_NTOFS_SECURITY_HASH - Sorting is done first by ascending hash | ||
526 | * values and second by ascending security_id values. E.g. used for $SDH | ||
527 | * index in FILE_Secure. | ||
528 | * COLLATION_NTOFS_ULONGS - Sorting is done according to a sequence of ascending | ||
529 | * le32 key values. E.g. used for $O index in FILE_Extend/$ObjId, which | ||
530 | * sorts by object_id (16-byte), by splitting up the object_id in four | ||
531 | * le32 values and using them as individual keys. E.g. take the following | ||
532 | * two security_ids, stored as follows on disk: | ||
533 | * 1st: a1 61 65 b7 65 7b d4 11 9e 3d 00 e0 81 10 42 59 | ||
534 | * 2nd: 38 14 37 d2 d2 f3 d4 11 a5 21 c8 6b 79 b1 97 45 | ||
535 | * To compare them, they are split into four le32 values each, like so: | ||
536 | * 1st: 0xb76561a1 0x11d47b65 0xe0003d9e 0x59421081 | ||
537 | * 2nd: 0xd2371438 0x11d4f3d2 0x6bc821a5 0x4597b179 | ||
538 | * Now, it is apparent why the 2nd object_id collates after the 1st: the | ||
539 | * first le32 value of the 1st object_id is less than the first le32 of | ||
540 | * the 2nd object_id. If the first le32 values of both object_ids were | ||
541 | * equal then the second le32 values would be compared, etc. | ||
542 | */ | ||
543 | enum { | ||
544 | COLLATION_BINARY = const_cpu_to_le32(0x00), | ||
545 | COLLATION_FILE_NAME = const_cpu_to_le32(0x01), | ||
546 | COLLATION_UNICODE_STRING = const_cpu_to_le32(0x02), | ||
547 | COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), | ||
548 | COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), | ||
549 | COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), | ||
550 | COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13) | ||
551 | }; | ||
552 | |||
553 | typedef le32 COLLATION_RULE; | ||
554 | |||
555 | /* | ||
556 | * The flags (32-bit) describing attribute properties in the attribute | ||
557 | * definition structure. FIXME: This information is from Regis's information | ||
558 | * and, according to him, it is not certain and probably incomplete. | ||
559 | * The INDEXABLE flag is fairly certainly correct as only the file name | ||
560 | * attribute has this flag set and this is the only attribute indexed in NT4. | ||
561 | */ | ||
562 | enum { | ||
563 | INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be | ||
564 | indexed. */ | ||
565 | NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate | ||
566 | during regeneration | ||
567 | phase. */ | ||
568 | CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be | ||
569 | non-resident. */ | ||
570 | }; | ||
571 | |||
572 | typedef le32 ATTR_DEF_FLAGS; | ||
573 | |||
574 | /* | ||
575 | * The data attribute of FILE_AttrDef contains a sequence of attribute | ||
576 | * definitions for the NTFS volume. With this, it is supposed to be safe for an | ||
577 | * older NTFS driver to mount a volume containing a newer NTFS version without | ||
578 | * damaging it (that's the theory. In practice it's: not damaging it too much). | ||
579 | * Entries are sorted by attribute type. The flags describe whether the | ||
580 | * attribute can be resident/non-resident and possibly other things, but the | ||
581 | * actual bits are unknown. | ||
582 | */ | ||
583 | typedef struct { | ||
584 | /*hex ofs*/ | ||
585 | /* 0*/ ntfschar name[0x40]; /* Unicode name of the attribute. Zero | ||
586 | terminated. */ | ||
587 | /* 80*/ ATTR_TYPE type; /* Type of the attribute. */ | ||
588 | /* 84*/ le32 display_rule; /* Default display rule. | ||
589 | FIXME: What does it mean? (AIA) */ | ||
590 | /* 88*/ COLLATION_RULE collation_rule; /* Default collation rule. */ | ||
591 | /* 8c*/ ATTR_DEF_FLAGS flags; /* Flags describing the attribute. */ | ||
592 | /* 90*/ sle64 min_size; /* Optional minimum attribute size. */ | ||
593 | /* 98*/ sle64 max_size; /* Maximum size of attribute. */ | ||
594 | /* sizeof() = 0xa0 or 160 bytes */ | ||
595 | } __attribute__ ((__packed__)) ATTR_DEF; | ||
596 | |||
597 | /* | ||
598 | * Attribute flags (16-bit). | ||
599 | */ | ||
600 | enum { | ||
601 | ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), | ||
602 | ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method | ||
603 | mask. Also, first | ||
604 | illegal value. */ | ||
605 | ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), | ||
606 | ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), | ||
607 | } __attribute__ ((__packed__)); | ||
608 | |||
609 | typedef le16 ATTR_FLAGS; | ||
610 | |||
611 | /* | ||
612 | * Attribute compression. | ||
613 | * | ||
614 | * Only the data attribute is ever compressed in the current ntfs driver in | ||
615 | * Windows. Further, compression is only applied when the data attribute is | ||
616 | * non-resident. Finally, to use compression, the maximum allowed cluster size | ||
617 | * on a volume is 4kib. | ||
618 | * | ||
619 | * The compression method is based on independently compressing blocks of X | ||
620 | * clusters, where X is determined from the compression_unit value found in the | ||
621 | * non-resident attribute record header (more precisely: X = 2^compression_unit | ||
622 | * clusters). On Windows NT/2k, X always is 16 clusters (compression_unit = 4). | ||
623 | * | ||
624 | * There are three different cases of how a compression block of X clusters | ||
625 | * can be stored: | ||
626 | * | ||
627 | * 1) The data in the block is all zero (a sparse block): | ||
628 | * This is stored as a sparse block in the runlist, i.e. the runlist | ||
629 | * entry has length = X and lcn = -1. The mapping pairs array actually | ||
630 | * uses a delta_lcn value length of 0, i.e. delta_lcn is not present at | ||
631 | * all, which is then interpreted by the driver as lcn = -1. | ||
632 | * NOTE: Even uncompressed files can be sparse on NTFS 3.0 volumes, then | ||
633 | * the same principles apply as above, except that the length is not | ||
634 | * restricted to being any particular value. | ||
635 | * | ||
636 | * 2) The data in the block is not compressed: | ||
637 | * This happens when compression doesn't reduce the size of the block | ||
638 | * in clusters. I.e. if compression has a small effect so that the | ||
639 | * compressed data still occupies X clusters, then the uncompressed data | ||
640 | * is stored in the block. | ||
641 | * This case is recognised by the fact that the runlist entry has | ||
642 | * length = X and lcn >= 0. The mapping pairs array stores this as | ||
643 | * normal with a run length of X and some specific delta_lcn, i.e. | ||
644 | * delta_lcn has to be present. | ||
645 | * | ||
646 | * 3) The data in the block is compressed: | ||
647 | * The common case. This case is recognised by the fact that the run | ||
648 | * list entry has length L < X and lcn >= 0. The mapping pairs array | ||
649 | * stores this as normal with a run length of X and some specific | ||
650 | * delta_lcn, i.e. delta_lcn has to be present. This runlist entry is | ||
651 | * immediately followed by a sparse entry with length = X - L and | ||
652 | * lcn = -1. The latter entry is to make up the vcn counting to the | ||
653 | * full compression block size X. | ||
654 | * | ||
655 | * In fact, life is more complicated because adjacent entries of the same type | ||
656 | * can be coalesced. This means that one has to keep track of the number of | ||
657 | * clusters handled and work on a basis of X clusters at a time being one | ||
658 | * block. An example: if length L > X this means that this particular runlist | ||
659 | * entry contains a block of length X and part of one or more blocks of length | ||
660 | * L - X. Another example: if length L < X, this does not necessarily mean that | ||
661 | * the block is compressed as it might be that the lcn changes inside the block | ||
662 | * and hence the following runlist entry describes the continuation of the | ||
663 | * potentially compressed block. The block would be compressed if the | ||
664 | * following runlist entry describes at least X - L sparse clusters, thus | ||
665 | * making up the compression block length as described in point 3 above. (Of | ||
666 | * course, there can be several runlist entries with small lengths so that the | ||
667 | * sparse entry does not follow the first data containing entry with | ||
668 | * length < X.) | ||
669 | * | ||
670 | * NOTE: At the end of the compressed attribute value, there most likely is not | ||
671 | * just the right amount of data to make up a compression block, thus this data | ||
672 | * is not even attempted to be compressed. It is just stored as is, unless | ||
673 | * the number of clusters it occupies is reduced when compressed in which case | ||
674 | * it is stored as a compressed compression block, complete with sparse | ||
675 | * clusters at the end. | ||
676 | */ | ||
677 | |||
678 | /* | ||
679 | * Flags of resident attributes (8-bit). | ||
680 | */ | ||
681 | enum { | ||
682 | RESIDENT_ATTR_IS_INDEXED = 0x01, /* Attribute is referenced in an index | ||
683 | (has implications for deleting and | ||
684 | modifying the attribute). */ | ||
685 | } __attribute__ ((__packed__)); | ||
686 | |||
687 | typedef u8 RESIDENT_ATTR_FLAGS; | ||
688 | |||
689 | /* | ||
690 | * Attribute record header. Always aligned to 8-byte boundary. | ||
691 | */ | ||
692 | typedef struct { | ||
693 | /*Ofs*/ | ||
694 | /* 0*/ ATTR_TYPE type; /* The (32-bit) type of the attribute. */ | ||
695 | /* 4*/ le32 length; /* Byte size of the resident part of the | ||
696 | attribute (aligned to 8-byte boundary). | ||
697 | Used to get to the next attribute. */ | ||
698 | /* 8*/ u8 non_resident; /* If 0, attribute is resident. | ||
699 | If 1, attribute is non-resident. */ | ||
700 | /* 9*/ u8 name_length; /* Unicode character size of name of attribute. | ||
701 | 0 if unnamed. */ | ||
702 | /* 10*/ le16 name_offset; /* If name_length != 0, the byte offset to the | ||
703 | beginning of the name from the attribute | ||
704 | record. Note that the name is stored as a | ||
705 | Unicode string. When creating, place offset | ||
706 | just at the end of the record header. Then, | ||
707 | follow with attribute value or mapping pairs | ||
708 | array, resident and non-resident attributes | ||
709 | respectively, aligning to an 8-byte | ||
710 | boundary. */ | ||
711 | /* 12*/ ATTR_FLAGS flags; /* Flags describing the attribute. */ | ||
712 | /* 14*/ le16 instance; /* The instance of this attribute record. This | ||
713 | number is unique within this mft record (see | ||
714 | MFT_RECORD/next_attribute_instance notes in | ||
715 | in mft.h for more details). */ | ||
716 | /* 16*/ union { | ||
717 | /* Resident attributes. */ | ||
718 | struct { | ||
719 | /* 16 */ le32 value_length;/* Byte size of attribute value. */ | ||
720 | /* 20 */ le16 value_offset;/* Byte offset of the attribute | ||
721 | value from the start of the | ||
722 | attribute record. When creating, | ||
723 | align to 8-byte boundary if we | ||
724 | have a name present as this might | ||
725 | not have a length of a multiple | ||
726 | of 8-bytes. */ | ||
727 | /* 22 */ RESIDENT_ATTR_FLAGS flags; /* See above. */ | ||
728 | /* 23 */ s8 reserved; /* Reserved/alignment to 8-byte | ||
729 | boundary. */ | ||
730 | } __attribute__ ((__packed__)) resident; | ||
731 | /* Non-resident attributes. */ | ||
732 | struct { | ||
733 | /* 16*/ leVCN lowest_vcn;/* Lowest valid virtual cluster number | ||
734 | for this portion of the attribute value or | ||
735 | 0 if this is the only extent (usually the | ||
736 | case). - Only when an attribute list is used | ||
737 | does lowest_vcn != 0 ever occur. */ | ||
738 | /* 24*/ leVCN highest_vcn;/* Highest valid vcn of this extent of | ||
739 | the attribute value. - Usually there is only one | ||
740 | portion, so this usually equals the attribute | ||
741 | value size in clusters minus 1. Can be -1 for | ||
742 | zero length files. Can be 0 for "single extent" | ||
743 | attributes. */ | ||
744 | /* 32*/ le16 mapping_pairs_offset; /* Byte offset from the | ||
745 | beginning of the structure to the mapping pairs | ||
746 | array which contains the mappings between the | ||
747 | vcns and the logical cluster numbers (lcns). | ||
748 | When creating, place this at the end of this | ||
749 | record header aligned to 8-byte boundary. */ | ||
750 | /* 34*/ u8 compression_unit; /* The compression unit expressed | ||
751 | as the log to the base 2 of the number of | ||
752 | clusters in a compression unit. 0 means not | ||
753 | compressed. (This effectively limits the | ||
754 | compression unit size to be a power of two | ||
755 | clusters.) WinNT4 only uses a value of 4. */ | ||
756 | /* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ | ||
757 | /* The sizes below are only used when lowest_vcn is zero, as otherwise it would | ||
758 | be difficult to keep them up-to-date.*/ | ||
759 | /* 40*/ sle64 allocated_size; /* Byte size of disk space | ||
760 | allocated to hold the attribute value. Always | ||
761 | is a multiple of the cluster size. When a file | ||
762 | is compressed, this field is a multiple of the | ||
763 | compression block size (2^compression_unit) and | ||
764 | it represents the logically allocated space | ||
765 | rather than the actual on disk usage. For this | ||
766 | use the compressed_size (see below). */ | ||
767 | /* 48*/ sle64 data_size; /* Byte size of the attribute | ||
768 | value. Can be larger than allocated_size if | ||
769 | attribute value is compressed or sparse. */ | ||
770 | /* 56*/ sle64 initialized_size; /* Byte size of initialized | ||
771 | portion of the attribute value. Usually equals | ||
772 | data_size. */ | ||
773 | /* sizeof(uncompressed attr) = 64*/ | ||
774 | /* 64*/ sle64 compressed_size; /* Byte size of the attribute | ||
775 | value after compression. Only present when | ||
776 | compressed. Always is a multiple of the | ||
777 | cluster size. Represents the actual amount of | ||
778 | disk space being used on the disk. */ | ||
779 | /* sizeof(compressed attr) = 72*/ | ||
780 | } __attribute__ ((__packed__)) non_resident; | ||
781 | } __attribute__ ((__packed__)) data; | ||
782 | } __attribute__ ((__packed__)) ATTR_RECORD; | ||
783 | |||
784 | typedef ATTR_RECORD ATTR_REC; | ||
785 | |||
786 | /* | ||
787 | * File attribute flags (32-bit). | ||
788 | */ | ||
789 | enum { | ||
790 | /* | ||
791 | * The following flags are only present in the STANDARD_INFORMATION | ||
792 | * attribute (in the field file_attributes). | ||
793 | */ | ||
794 | FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), | ||
795 | FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), | ||
796 | FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), | ||
797 | /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */ | ||
798 | |||
799 | FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), | ||
800 | /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is | ||
801 | reserved for the DOS SUBDIRECTORY flag. */ | ||
802 | FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), | ||
803 | FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), | ||
804 | FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), | ||
805 | |||
806 | FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), | ||
807 | FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), | ||
808 | FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), | ||
809 | FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), | ||
810 | |||
811 | FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), | ||
812 | FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), | ||
813 | FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), | ||
814 | |||
815 | FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), | ||
816 | /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the | ||
817 | FILE_ATTR_DEVICE and preserves everything else. This mask is used | ||
818 | to obtain all flags that are valid for reading. */ | ||
819 | FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), | ||
820 | /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the | ||
821 | F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, | ||
822 | F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask | ||
823 | is used to to obtain all flags that are valid for setting. */ | ||
824 | |||
825 | /* | ||
826 | * The following flags are only present in the FILE_NAME attribute (in | ||
827 | * the field file_attributes). | ||
828 | */ | ||
829 | FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), | ||
830 | /* Note, this is a copy of the corresponding bit from the mft record, | ||
831 | telling us whether this is a directory or not, i.e. whether it has | ||
832 | an index root attribute or not. */ | ||
833 | FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), | ||
834 | /* Note, this is a copy of the corresponding bit from the mft record, | ||
835 | telling us whether this file has a view index present (eg. object id | ||
836 | index, quota index, one of the security indexes or the encrypting | ||
837 | file system related indexes). */ | ||
838 | }; | ||
839 | |||
840 | typedef le32 FILE_ATTR_FLAGS; | ||
841 | |||
842 | /* | ||
843 | * NOTE on times in NTFS: All times are in MS standard time format, i.e. they | ||
844 | * are the number of 100-nanosecond intervals since 1st January 1601, 00:00:00 | ||
845 | * universal coordinated time (UTC). (In Linux time starts 1st January 1970, | ||
846 | * 00:00:00 UTC and is stored as the number of 1-second intervals since then.) | ||
847 | */ | ||
848 | |||
849 | /* | ||
850 | * Attribute: Standard information (0x10). | ||
851 | * | ||
852 | * NOTE: Always resident. | ||
853 | * NOTE: Present in all base file records on a volume. | ||
854 | * NOTE: There is conflicting information about the meaning of each of the time | ||
855 | * fields but the meaning as defined below has been verified to be | ||
856 | * correct by practical experimentation on Windows NT4 SP6a and is hence | ||
857 | * assumed to be the one and only correct interpretation. | ||
858 | */ | ||
859 | typedef struct { | ||
860 | /*Ofs*/ | ||
861 | /* 0*/ sle64 creation_time; /* Time file was created. Updated when | ||
862 | a filename is changed(?). */ | ||
863 | /* 8*/ sle64 last_data_change_time; /* Time the data attribute was last | ||
864 | modified. */ | ||
865 | /* 16*/ sle64 last_mft_change_time; /* Time this mft record was last | ||
866 | modified. */ | ||
867 | /* 24*/ sle64 last_access_time; /* Approximate time when the file was | ||
868 | last accessed (obviously this is not | ||
869 | updated on read-only volumes). In | ||
870 | Windows this is only updated when | ||
871 | accessed if some time delta has | ||
872 | passed since the last update. Also, | ||
873 | last access times updates can be | ||
874 | disabled altogether for speed. */ | ||
875 | /* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ | ||
876 | /* 36*/ union { | ||
877 | /* NTFS 1.2 */ | ||
878 | struct { | ||
879 | /* 36*/ u8 reserved12[12]; /* Reserved/alignment to 8-byte | ||
880 | boundary. */ | ||
881 | } __attribute__ ((__packed__)) v1; | ||
882 | /* sizeof() = 48 bytes */ | ||
883 | /* NTFS 3.x */ | ||
884 | struct { | ||
885 | /* | ||
886 | * If a volume has been upgraded from a previous NTFS version, then these | ||
887 | * fields are present only if the file has been accessed since the upgrade. | ||
888 | * Recognize the difference by comparing the length of the resident attribute | ||
889 | * value. If it is 48, then the following fields are missing. If it is 72 then | ||
890 | * the fields are present. Maybe just check like this: | ||
891 | * if (resident.ValueLength < sizeof(STANDARD_INFORMATION)) { | ||
892 | * Assume NTFS 1.2- format. | ||
893 | * If (volume version is 3.x) | ||
894 | * Upgrade attribute to NTFS 3.x format. | ||
895 | * else | ||
896 | * Use NTFS 1.2- format for access. | ||
897 | * } else | ||
898 | * Use NTFS 3.x format for access. | ||
899 | * Only problem is that it might be legal to set the length of the value to | ||
900 | * arbitrarily large values thus spoiling this check. - But chkdsk probably | ||
901 | * views that as a corruption, assuming that it behaves like this for all | ||
902 | * attributes. | ||
903 | */ | ||
904 | /* 36*/ le32 maximum_versions; /* Maximum allowed versions for | ||
905 | file. Zero if version numbering is disabled. */ | ||
906 | /* 40*/ le32 version_number; /* This file's version (if any). | ||
907 | Set to zero if maximum_versions is zero. */ | ||
908 | /* 44*/ le32 class_id; /* Class id from bidirectional | ||
909 | class id index (?). */ | ||
910 | /* 48*/ le32 owner_id; /* Owner_id of the user owning | ||
911 | the file. Translate via $Q index in FILE_Extend | ||
912 | /$Quota to the quota control entry for the user | ||
913 | owning the file. Zero if quotas are disabled. */ | ||
914 | /* 52*/ le32 security_id; /* Security_id for the file. | ||
915 | Translate via $SII index and $SDS data stream | ||
916 | in FILE_Secure to the security descriptor. */ | ||
917 | /* 56*/ le64 quota_charged; /* Byte size of the charge to | ||
918 | the quota for all streams of the file. Note: Is | ||
919 | zero if quotas are disabled. */ | ||
920 | /* 64*/ le64 usn; /* Last update sequence number | ||
921 | of the file. This is a direct index into the | ||
922 | change (aka usn) journal file. It is zero if | ||
923 | the usn journal is disabled. | ||
924 | NOTE: To disable the journal need to delete | ||
925 | the journal file itself and to then walk the | ||
926 | whole mft and set all Usn entries in all mft | ||
927 | records to zero! (This can take a while!) | ||
928 | The journal is FILE_Extend/$UsnJrnl. Win2k | ||
929 | will recreate the journal and initiate | ||
930 | logging if necessary when mounting the | ||
931 | partition. This, in contrast to disabling the | ||
932 | journal is a very fast process, so the user | ||
933 | won't even notice it. */ | ||
934 | } __attribute__ ((__packed__)) v3; | ||
935 | /* sizeof() = 72 bytes (NTFS 3.x) */ | ||
936 | } __attribute__ ((__packed__)) ver; | ||
937 | } __attribute__ ((__packed__)) STANDARD_INFORMATION; | ||
938 | |||
939 | /* | ||
940 | * Attribute: Attribute list (0x20). | ||
941 | * | ||
942 | * - Can be either resident or non-resident. | ||
943 | * - Value consists of a sequence of variable length, 8-byte aligned, | ||
944 | * ATTR_LIST_ENTRY records. | ||
945 | * - The list is not terminated by anything at all! The only way to know when | ||
946 | * the end is reached is to keep track of the current offset and compare it to | ||
947 | * the attribute value size. | ||
948 | * - The attribute list attribute contains one entry for each attribute of | ||
949 | * the file in which the list is located, except for the list attribute | ||
950 | * itself. The list is sorted: first by attribute type, second by attribute | ||
951 | * name (if present), third by instance number. The extents of one | ||
952 | * non-resident attribute (if present) immediately follow after the initial | ||
953 | * extent. They are ordered by lowest_vcn and have their instace set to zero. | ||
954 | * It is not allowed to have two attributes with all sorting keys equal. | ||
955 | * - Further restrictions: | ||
956 | * - If not resident, the vcn to lcn mapping array has to fit inside the | ||
957 | * base mft record. | ||
958 | * - The attribute list attribute value has a maximum size of 256kb. This | ||
959 | * is imposed by the Windows cache manager. | ||
960 | * - Attribute lists are only used when the attributes of mft record do not | ||
961 | * fit inside the mft record despite all attributes (that can be made | ||
962 | * non-resident) having been made non-resident. This can happen e.g. when: | ||
963 | * - File has a large number of hard links (lots of file name | ||
964 | * attributes present). | ||
965 | * - The mapping pairs array of some non-resident attribute becomes so | ||
966 | * large due to fragmentation that it overflows the mft record. | ||
967 | * - The security descriptor is very complex (not applicable to | ||
968 | * NTFS 3.0 volumes). | ||
969 | * - There are many named streams. | ||
970 | */ | ||
971 | typedef struct { | ||
972 | /*Ofs*/ | ||
973 | /* 0*/ ATTR_TYPE type; /* Type of referenced attribute. */ | ||
974 | /* 4*/ le16 length; /* Byte size of this entry (8-byte aligned). */ | ||
975 | /* 6*/ u8 name_length; /* Size in Unicode chars of the name of the | ||
976 | attribute or 0 if unnamed. */ | ||
977 | /* 7*/ u8 name_offset; /* Byte offset to beginning of attribute name | ||
978 | (always set this to where the name would | ||
979 | start even if unnamed). */ | ||
980 | /* 8*/ leVCN lowest_vcn; /* Lowest virtual cluster number of this portion | ||
981 | of the attribute value. This is usually 0. It | ||
982 | is non-zero for the case where one attribute | ||
983 | does not fit into one mft record and thus | ||
984 | several mft records are allocated to hold | ||
985 | this attribute. In the latter case, each mft | ||
986 | record holds one extent of the attribute and | ||
987 | there is one attribute list entry for each | ||
988 | extent. NOTE: This is DEFINITELY a signed | ||
989 | value! The windows driver uses cmp, followed | ||
990 | by jg when comparing this, thus it treats it | ||
991 | as signed. */ | ||
992 | /* 16*/ leMFT_REF mft_reference;/* The reference of the mft record holding | ||
993 | the ATTR_RECORD for this portion of the | ||
994 | attribute value. */ | ||
995 | /* 24*/ le16 instance; /* If lowest_vcn = 0, the instance of the | ||
996 | attribute being referenced; otherwise 0. */ | ||
997 | /* 26*/ ntfschar name[0]; /* Use when creating only. When reading use | ||
998 | name_offset to determine the location of the | ||
999 | name. */ | ||
1000 | /* sizeof() = 26 + (attribute_name_length * 2) bytes */ | ||
1001 | } __attribute__ ((__packed__)) ATTR_LIST_ENTRY; | ||
1002 | |||
1003 | /* | ||
1004 | * The maximum allowed length for a file name. | ||
1005 | */ | ||
1006 | #define MAXIMUM_FILE_NAME_LENGTH 255 | ||
1007 | |||
1008 | /* | ||
1009 | * Possible namespaces for filenames in ntfs (8-bit). | ||
1010 | */ | ||
1011 | enum { | ||
1012 | FILE_NAME_POSIX = 0x00, | ||
1013 | /* This is the largest namespace. It is case sensitive and allows all | ||
1014 | Unicode characters except for: '\0' and '/'. Beware that in | ||
1015 | WinNT/2k files which eg have the same name except for their case | ||
1016 | will not be distinguished by the standard utilities and thus a "del | ||
1017 | filename" will delete both "filename" and "fileName" without | ||
1018 | warning. */ | ||
1019 | FILE_NAME_WIN32 = 0x01, | ||
1020 | /* The standard WinNT/2k NTFS long filenames. Case insensitive. All | ||
1021 | Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\', | ||
1022 | and '|'. Further, names cannot end with a '.' or a space. */ | ||
1023 | FILE_NAME_DOS = 0x02, | ||
1024 | /* The standard DOS filenames (8.3 format). Uppercase only. All 8-bit | ||
1025 | characters greater space, except: '"', '*', '+', ',', '/', ':', ';', | ||
1026 | '<', '=', '>', '?', and '\'. */ | ||
1027 | FILE_NAME_WIN32_AND_DOS = 0x03, | ||
1028 | /* 3 means that both the Win32 and the DOS filenames are identical and | ||
1029 | hence have been saved in this single filename record. */ | ||
1030 | } __attribute__ ((__packed__)); | ||
1031 | |||
1032 | typedef u8 FILE_NAME_TYPE_FLAGS; | ||
1033 | |||
1034 | /* | ||
1035 | * Attribute: Filename (0x30). | ||
1036 | * | ||
1037 | * NOTE: Always resident. | ||
1038 | * NOTE: All fields, except the parent_directory, are only updated when the | ||
1039 | * filename is changed. Until then, they just become out of sync with | ||
1040 | * reality and the more up to date values are present in the standard | ||
1041 | * information attribute. | ||
1042 | * NOTE: There is conflicting information about the meaning of each of the time | ||
1043 | * fields but the meaning as defined below has been verified to be | ||
1044 | * correct by practical experimentation on Windows NT4 SP6a and is hence | ||
1045 | * assumed to be the one and only correct interpretation. | ||
1046 | */ | ||
1047 | typedef struct { | ||
1048 | /*hex ofs*/ | ||
1049 | /* 0*/ leMFT_REF parent_directory; /* Directory this filename is | ||
1050 | referenced from. */ | ||
1051 | /* 8*/ sle64 creation_time; /* Time file was created. */ | ||
1052 | /* 10*/ sle64 last_data_change_time; /* Time the data attribute was last | ||
1053 | modified. */ | ||
1054 | /* 18*/ sle64 last_mft_change_time; /* Time this mft record was last | ||
1055 | modified. */ | ||
1056 | /* 20*/ sle64 last_access_time; /* Time this mft record was last | ||
1057 | accessed. */ | ||
1058 | /* 28*/ sle64 allocated_size; /* Byte size of allocated space for the | ||
1059 | data attribute. NOTE: Is a multiple | ||
1060 | of the cluster size. */ | ||
1061 | /* 30*/ sle64 data_size; /* Byte size of actual data in data | ||
1062 | attribute. */ | ||
1063 | /* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ | ||
1064 | /* 3c*/ union { | ||
1065 | /* 3c*/ struct { | ||
1066 | /* 3c*/ le16 packed_ea_size; /* Size of the buffer needed to | ||
1067 | pack the extended attributes | ||
1068 | (EAs), if such are present.*/ | ||
1069 | /* 3e*/ le16 reserved; /* Reserved for alignment. */ | ||
1070 | } __attribute__ ((__packed__)) ea; | ||
1071 | /* 3c*/ struct { | ||
1072 | /* 3c*/ le32 reparse_point_tag; /* Type of reparse point, | ||
1073 | present only in reparse | ||
1074 | points and only if there are | ||
1075 | no EAs. */ | ||
1076 | } __attribute__ ((__packed__)) rp; | ||
1077 | } __attribute__ ((__packed__)) type; | ||
1078 | /* 40*/ u8 file_name_length; /* Length of file name in | ||
1079 | (Unicode) characters. */ | ||
1080 | /* 41*/ FILE_NAME_TYPE_FLAGS file_name_type; /* Namespace of the file name.*/ | ||
1081 | /* 42*/ ntfschar file_name[0]; /* File name in Unicode. */ | ||
1082 | } __attribute__ ((__packed__)) FILE_NAME_ATTR; | ||
1083 | |||
1084 | /* | ||
1085 | * GUID structures store globally unique identifiers (GUID). A GUID is a | ||
1086 | * 128-bit value consisting of one group of eight hexadecimal digits, followed | ||
1087 | * by three groups of four hexadecimal digits each, followed by one group of | ||
1088 | * twelve hexadecimal digits. GUIDs are Microsoft's implementation of the | ||
1089 | * distributed computing environment (DCE) universally unique identifier (UUID). | ||
1090 | * Example of a GUID: | ||
1091 | * 1F010768-5A73-BC91-0010A52216A7 | ||
1092 | */ | ||
1093 | typedef struct { | ||
1094 | le32 data1; /* The first eight hexadecimal digits of the GUID. */ | ||
1095 | le16 data2; /* The first group of four hexadecimal digits. */ | ||
1096 | le16 data3; /* The second group of four hexadecimal digits. */ | ||
1097 | u8 data4[8]; /* The first two bytes are the third group of four | ||
1098 | hexadecimal digits. The remaining six bytes are the | ||
1099 | final 12 hexadecimal digits. */ | ||
1100 | } __attribute__ ((__packed__)) GUID; | ||
1101 | |||
1102 | /* | ||
1103 | * FILE_Extend/$ObjId contains an index named $O. This index contains all | ||
1104 | * object_ids present on the volume as the index keys and the corresponding | ||
1105 | * mft_record numbers as the index entry data parts. The data part (defined | ||
1106 | * below) also contains three other object_ids: | ||
1107 | * birth_volume_id - object_id of FILE_Volume on which the file was first | ||
1108 | * created. Optional (i.e. can be zero). | ||
1109 | * birth_object_id - object_id of file when it was first created. Usually | ||
1110 | * equals the object_id. Optional (i.e. can be zero). | ||
1111 | * domain_id - Reserved (always zero). | ||
1112 | */ | ||
1113 | typedef struct { | ||
1114 | leMFT_REF mft_reference;/* Mft record containing the object_id in | ||
1115 | the index entry key. */ | ||
1116 | union { | ||
1117 | struct { | ||
1118 | GUID birth_volume_id; | ||
1119 | GUID birth_object_id; | ||
1120 | GUID domain_id; | ||
1121 | } __attribute__ ((__packed__)) origin; | ||
1122 | u8 extended_info[48]; | ||
1123 | } __attribute__ ((__packed__)) opt; | ||
1124 | } __attribute__ ((__packed__)) OBJ_ID_INDEX_DATA; | ||
1125 | |||
1126 | /* | ||
1127 | * Attribute: Object id (NTFS 3.0+) (0x40). | ||
1128 | * | ||
1129 | * NOTE: Always resident. | ||
1130 | */ | ||
1131 | typedef struct { | ||
1132 | GUID object_id; /* Unique id assigned to the | ||
1133 | file.*/ | ||
1134 | /* The following fields are optional. The attribute value size is 16 | ||
1135 | bytes, i.e. sizeof(GUID), if these are not present at all. Note, | ||
1136 | the entries can be present but one or more (or all) can be zero | ||
1137 | meaning that that particular value(s) is(are) not defined. */ | ||
1138 | union { | ||
1139 | struct { | ||
1140 | GUID birth_volume_id; /* Unique id of volume on which | ||
1141 | the file was first created.*/ | ||
1142 | GUID birth_object_id; /* Unique id of file when it was | ||
1143 | first created. */ | ||
1144 | GUID domain_id; /* Reserved, zero. */ | ||
1145 | } __attribute__ ((__packed__)) origin; | ||
1146 | u8 extended_info[48]; | ||
1147 | } __attribute__ ((__packed__)) opt; | ||
1148 | } __attribute__ ((__packed__)) OBJECT_ID_ATTR; | ||
1149 | |||
1150 | /* | ||
1151 | * The pre-defined IDENTIFIER_AUTHORITIES used as SID_IDENTIFIER_AUTHORITY in | ||
1152 | * the SID structure (see below). | ||
1153 | */ | ||
1154 | //typedef enum { /* SID string prefix. */ | ||
1155 | // SECURITY_NULL_SID_AUTHORITY = {0, 0, 0, 0, 0, 0}, /* S-1-0 */ | ||
1156 | // SECURITY_WORLD_SID_AUTHORITY = {0, 0, 0, 0, 0, 1}, /* S-1-1 */ | ||
1157 | // SECURITY_LOCAL_SID_AUTHORITY = {0, 0, 0, 0, 0, 2}, /* S-1-2 */ | ||
1158 | // SECURITY_CREATOR_SID_AUTHORITY = {0, 0, 0, 0, 0, 3}, /* S-1-3 */ | ||
1159 | // SECURITY_NON_UNIQUE_AUTHORITY = {0, 0, 0, 0, 0, 4}, /* S-1-4 */ | ||
1160 | // SECURITY_NT_SID_AUTHORITY = {0, 0, 0, 0, 0, 5}, /* S-1-5 */ | ||
1161 | //} IDENTIFIER_AUTHORITIES; | ||
1162 | |||
1163 | /* | ||
1164 | * These relative identifiers (RIDs) are used with the above identifier | ||
1165 | * authorities to make up universal well-known SIDs. | ||
1166 | * | ||
1167 | * Note: The relative identifier (RID) refers to the portion of a SID, which | ||
1168 | * identifies a user or group in relation to the authority that issued the SID. | ||
1169 | * For example, the universal well-known SID Creator Owner ID (S-1-3-0) is | ||
1170 | * made up of the identifier authority SECURITY_CREATOR_SID_AUTHORITY (3) and | ||
1171 | * the relative identifier SECURITY_CREATOR_OWNER_RID (0). | ||
1172 | */ | ||
1173 | typedef enum { /* Identifier authority. */ | ||
1174 | SECURITY_NULL_RID = 0, /* S-1-0 */ | ||
1175 | SECURITY_WORLD_RID = 0, /* S-1-1 */ | ||
1176 | SECURITY_LOCAL_RID = 0, /* S-1-2 */ | ||
1177 | |||
1178 | SECURITY_CREATOR_OWNER_RID = 0, /* S-1-3 */ | ||
1179 | SECURITY_CREATOR_GROUP_RID = 1, /* S-1-3 */ | ||
1180 | |||
1181 | SECURITY_CREATOR_OWNER_SERVER_RID = 2, /* S-1-3 */ | ||
1182 | SECURITY_CREATOR_GROUP_SERVER_RID = 3, /* S-1-3 */ | ||
1183 | |||
1184 | SECURITY_DIALUP_RID = 1, | ||
1185 | SECURITY_NETWORK_RID = 2, | ||
1186 | SECURITY_BATCH_RID = 3, | ||
1187 | SECURITY_INTERACTIVE_RID = 4, | ||
1188 | SECURITY_SERVICE_RID = 6, | ||
1189 | SECURITY_ANONYMOUS_LOGON_RID = 7, | ||
1190 | SECURITY_PROXY_RID = 8, | ||
1191 | SECURITY_ENTERPRISE_CONTROLLERS_RID=9, | ||
1192 | SECURITY_SERVER_LOGON_RID = 9, | ||
1193 | SECURITY_PRINCIPAL_SELF_RID = 0xa, | ||
1194 | SECURITY_AUTHENTICATED_USER_RID = 0xb, | ||
1195 | SECURITY_RESTRICTED_CODE_RID = 0xc, | ||
1196 | SECURITY_TERMINAL_SERVER_RID = 0xd, | ||
1197 | |||
1198 | SECURITY_LOGON_IDS_RID = 5, | ||
1199 | SECURITY_LOGON_IDS_RID_COUNT = 3, | ||
1200 | |||
1201 | SECURITY_LOCAL_SYSTEM_RID = 0x12, | ||
1202 | |||
1203 | SECURITY_NT_NON_UNIQUE = 0x15, | ||
1204 | |||
1205 | SECURITY_BUILTIN_DOMAIN_RID = 0x20, | ||
1206 | |||
1207 | /* | ||
1208 | * Well-known domain relative sub-authority values (RIDs). | ||
1209 | */ | ||
1210 | |||
1211 | /* Users. */ | ||
1212 | DOMAIN_USER_RID_ADMIN = 0x1f4, | ||
1213 | DOMAIN_USER_RID_GUEST = 0x1f5, | ||
1214 | DOMAIN_USER_RID_KRBTGT = 0x1f6, | ||
1215 | |||
1216 | /* Groups. */ | ||
1217 | DOMAIN_GROUP_RID_ADMINS = 0x200, | ||
1218 | DOMAIN_GROUP_RID_USERS = 0x201, | ||
1219 | DOMAIN_GROUP_RID_GUESTS = 0x202, | ||
1220 | DOMAIN_GROUP_RID_COMPUTERS = 0x203, | ||
1221 | DOMAIN_GROUP_RID_CONTROLLERS = 0x204, | ||
1222 | DOMAIN_GROUP_RID_CERT_ADMINS = 0x205, | ||
1223 | DOMAIN_GROUP_RID_SCHEMA_ADMINS = 0x206, | ||
1224 | DOMAIN_GROUP_RID_ENTERPRISE_ADMINS= 0x207, | ||
1225 | DOMAIN_GROUP_RID_POLICY_ADMINS = 0x208, | ||
1226 | |||
1227 | /* Aliases. */ | ||
1228 | DOMAIN_ALIAS_RID_ADMINS = 0x220, | ||
1229 | DOMAIN_ALIAS_RID_USERS = 0x221, | ||
1230 | DOMAIN_ALIAS_RID_GUESTS = 0x222, | ||
1231 | DOMAIN_ALIAS_RID_POWER_USERS = 0x223, | ||
1232 | |||
1233 | DOMAIN_ALIAS_RID_ACCOUNT_OPS = 0x224, | ||
1234 | DOMAIN_ALIAS_RID_SYSTEM_OPS = 0x225, | ||
1235 | DOMAIN_ALIAS_RID_PRINT_OPS = 0x226, | ||
1236 | DOMAIN_ALIAS_RID_BACKUP_OPS = 0x227, | ||
1237 | |||
1238 | DOMAIN_ALIAS_RID_REPLICATOR = 0x228, | ||
1239 | DOMAIN_ALIAS_RID_RAS_SERVERS = 0x229, | ||
1240 | DOMAIN_ALIAS_RID_PREW2KCOMPACCESS = 0x22a, | ||
1241 | } RELATIVE_IDENTIFIERS; | ||
1242 | |||
1243 | /* | ||
1244 | * The universal well-known SIDs: | ||
1245 | * | ||
1246 | * NULL_SID S-1-0-0 | ||
1247 | * WORLD_SID S-1-1-0 | ||
1248 | * LOCAL_SID S-1-2-0 | ||
1249 | * CREATOR_OWNER_SID S-1-3-0 | ||
1250 | * CREATOR_GROUP_SID S-1-3-1 | ||
1251 | * CREATOR_OWNER_SERVER_SID S-1-3-2 | ||
1252 | * CREATOR_GROUP_SERVER_SID S-1-3-3 | ||
1253 | * | ||
1254 | * (Non-unique IDs) S-1-4 | ||
1255 | * | ||
1256 | * NT well-known SIDs: | ||
1257 | * | ||
1258 | * NT_AUTHORITY_SID S-1-5 | ||
1259 | * DIALUP_SID S-1-5-1 | ||
1260 | * | ||
1261 | * NETWORD_SID S-1-5-2 | ||
1262 | * BATCH_SID S-1-5-3 | ||
1263 | * INTERACTIVE_SID S-1-5-4 | ||
1264 | * SERVICE_SID S-1-5-6 | ||
1265 | * ANONYMOUS_LOGON_SID S-1-5-7 (aka null logon session) | ||
1266 | * PROXY_SID S-1-5-8 | ||
1267 | * SERVER_LOGON_SID S-1-5-9 (aka domain controller account) | ||
1268 | * SELF_SID S-1-5-10 (self RID) | ||
1269 | * AUTHENTICATED_USER_SID S-1-5-11 | ||
1270 | * RESTRICTED_CODE_SID S-1-5-12 (running restricted code) | ||
1271 | * TERMINAL_SERVER_SID S-1-5-13 (running on terminal server) | ||
1272 | * | ||
1273 | * (Logon IDs) S-1-5-5-X-Y | ||
1274 | * | ||
1275 | * (NT non-unique IDs) S-1-5-0x15-... | ||
1276 | * | ||
1277 | * (Built-in domain) S-1-5-0x20 | ||
1278 | */ | ||
1279 | |||
1280 | /* | ||
1281 | * The SID_IDENTIFIER_AUTHORITY is a 48-bit value used in the SID structure. | ||
1282 | * | ||
1283 | * NOTE: This is stored as a big endian number, hence the high_part comes | ||
1284 | * before the low_part. | ||
1285 | */ | ||
1286 | typedef union { | ||
1287 | struct { | ||
1288 | u16 high_part; /* High 16-bits. */ | ||
1289 | u32 low_part; /* Low 32-bits. */ | ||
1290 | } __attribute__ ((__packed__)) parts; | ||
1291 | u8 value[6]; /* Value as individual bytes. */ | ||
1292 | } __attribute__ ((__packed__)) SID_IDENTIFIER_AUTHORITY; | ||
1293 | |||
1294 | /* | ||
1295 | * The SID structure is a variable-length structure used to uniquely identify | ||
1296 | * users or groups. SID stands for security identifier. | ||
1297 | * | ||
1298 | * The standard textual representation of the SID is of the form: | ||
1299 | * S-R-I-S-S... | ||
1300 | * Where: | ||
1301 | * - The first "S" is the literal character 'S' identifying the following | ||
1302 | * digits as a SID. | ||
1303 | * - R is the revision level of the SID expressed as a sequence of digits | ||
1304 | * either in decimal or hexadecimal (if the later, prefixed by "0x"). | ||
1305 | * - I is the 48-bit identifier_authority, expressed as digits as R above. | ||
1306 | * - S... is one or more sub_authority values, expressed as digits as above. | ||
1307 | * | ||
1308 | * Example SID; the domain-relative SID of the local Administrators group on | ||
1309 | * Windows NT/2k: | ||
1310 | * S-1-5-32-544 | ||
1311 | * This translates to a SID with: | ||
1312 | * revision = 1, | ||
1313 | * sub_authority_count = 2, | ||
1314 | * identifier_authority = {0,0,0,0,0,5}, // SECURITY_NT_AUTHORITY | ||
1315 | * sub_authority[0] = 32, // SECURITY_BUILTIN_DOMAIN_RID | ||
1316 | * sub_authority[1] = 544 // DOMAIN_ALIAS_RID_ADMINS | ||
1317 | */ | ||
1318 | typedef struct { | ||
1319 | u8 revision; | ||
1320 | u8 sub_authority_count; | ||
1321 | SID_IDENTIFIER_AUTHORITY identifier_authority; | ||
1322 | le32 sub_authority[1]; /* At least one sub_authority. */ | ||
1323 | } __attribute__ ((__packed__)) SID; | ||
1324 | |||
1325 | /* | ||
1326 | * Current constants for SIDs. | ||
1327 | */ | ||
1328 | typedef enum { | ||
1329 | SID_REVISION = 1, /* Current revision level. */ | ||
1330 | SID_MAX_SUB_AUTHORITIES = 15, /* Maximum number of those. */ | ||
1331 | SID_RECOMMENDED_SUB_AUTHORITIES = 1, /* Will change to around 6 in | ||
1332 | a future revision. */ | ||
1333 | } SID_CONSTANTS; | ||
1334 | |||
1335 | /* | ||
1336 | * The predefined ACE types (8-bit, see below). | ||
1337 | */ | ||
1338 | enum { | ||
1339 | ACCESS_MIN_MS_ACE_TYPE = 0, | ||
1340 | ACCESS_ALLOWED_ACE_TYPE = 0, | ||
1341 | ACCESS_DENIED_ACE_TYPE = 1, | ||
1342 | SYSTEM_AUDIT_ACE_TYPE = 2, | ||
1343 | SYSTEM_ALARM_ACE_TYPE = 3, /* Not implemented as of Win2k. */ | ||
1344 | ACCESS_MAX_MS_V2_ACE_TYPE = 3, | ||
1345 | |||
1346 | ACCESS_ALLOWED_COMPOUND_ACE_TYPE= 4, | ||
1347 | ACCESS_MAX_MS_V3_ACE_TYPE = 4, | ||
1348 | |||
1349 | /* The following are Win2k only. */ | ||
1350 | ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5, | ||
1351 | ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5, | ||
1352 | ACCESS_DENIED_OBJECT_ACE_TYPE = 6, | ||
1353 | SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7, | ||
1354 | SYSTEM_ALARM_OBJECT_ACE_TYPE = 8, | ||
1355 | ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8, | ||
1356 | |||
1357 | ACCESS_MAX_MS_V4_ACE_TYPE = 8, | ||
1358 | |||
1359 | /* This one is for WinNT/2k. */ | ||
1360 | ACCESS_MAX_MS_ACE_TYPE = 8, | ||
1361 | } __attribute__ ((__packed__)); | ||
1362 | |||
1363 | typedef u8 ACE_TYPES; | ||
1364 | |||
1365 | /* | ||
1366 | * The ACE flags (8-bit) for audit and inheritance (see below). | ||
1367 | * | ||
1368 | * SUCCESSFUL_ACCESS_ACE_FLAG is only used with system audit and alarm ACE | ||
1369 | * types to indicate that a message is generated (in Windows!) for successful | ||
1370 | * accesses. | ||
1371 | * | ||
1372 | * FAILED_ACCESS_ACE_FLAG is only used with system audit and alarm ACE types | ||
1373 | * to indicate that a message is generated (in Windows!) for failed accesses. | ||
1374 | */ | ||
1375 | enum { | ||
1376 | /* The inheritance flags. */ | ||
1377 | OBJECT_INHERIT_ACE = 0x01, | ||
1378 | CONTAINER_INHERIT_ACE = 0x02, | ||
1379 | NO_PROPAGATE_INHERIT_ACE = 0x04, | ||
1380 | INHERIT_ONLY_ACE = 0x08, | ||
1381 | INHERITED_ACE = 0x10, /* Win2k only. */ | ||
1382 | VALID_INHERIT_FLAGS = 0x1f, | ||
1383 | |||
1384 | /* The audit flags. */ | ||
1385 | SUCCESSFUL_ACCESS_ACE_FLAG = 0x40, | ||
1386 | FAILED_ACCESS_ACE_FLAG = 0x80, | ||
1387 | } __attribute__ ((__packed__)); | ||
1388 | |||
1389 | typedef u8 ACE_FLAGS; | ||
1390 | |||
1391 | /* | ||
1392 | * An ACE is an access-control entry in an access-control list (ACL). | ||
1393 | * An ACE defines access to an object for a specific user or group or defines | ||
1394 | * the types of access that generate system-administration messages or alarms | ||
1395 | * for a specific user or group. The user or group is identified by a security | ||
1396 | * identifier (SID). | ||
1397 | * | ||
1398 | * Each ACE starts with an ACE_HEADER structure (aligned on 4-byte boundary), | ||
1399 | * which specifies the type and size of the ACE. The format of the subsequent | ||
1400 | * data depends on the ACE type. | ||
1401 | */ | ||
1402 | typedef struct { | ||
1403 | /*Ofs*/ | ||
1404 | /* 0*/ ACE_TYPES type; /* Type of the ACE. */ | ||
1405 | /* 1*/ ACE_FLAGS flags; /* Flags describing the ACE. */ | ||
1406 | /* 2*/ le16 size; /* Size in bytes of the ACE. */ | ||
1407 | } __attribute__ ((__packed__)) ACE_HEADER; | ||
1408 | |||
1409 | /* | ||
1410 | * The access mask (32-bit). Defines the access rights. | ||
1411 | * | ||
1412 | * The specific rights (bits 0 to 15). These depend on the type of the object | ||
1413 | * being secured by the ACE. | ||
1414 | */ | ||
1415 | enum { | ||
1416 | /* Specific rights for files and directories are as follows: */ | ||
1417 | |||
1418 | /* Right to read data from the file. (FILE) */ | ||
1419 | FILE_READ_DATA = const_cpu_to_le32(0x00000001), | ||
1420 | /* Right to list contents of a directory. (DIRECTORY) */ | ||
1421 | FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), | ||
1422 | |||
1423 | /* Right to write data to the file. (FILE) */ | ||
1424 | FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), | ||
1425 | /* Right to create a file in the directory. (DIRECTORY) */ | ||
1426 | FILE_ADD_FILE = const_cpu_to_le32(0x00000002), | ||
1427 | |||
1428 | /* Right to append data to the file. (FILE) */ | ||
1429 | FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), | ||
1430 | /* Right to create a subdirectory. (DIRECTORY) */ | ||
1431 | FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), | ||
1432 | |||
1433 | /* Right to read extended attributes. (FILE/DIRECTORY) */ | ||
1434 | FILE_READ_EA = const_cpu_to_le32(0x00000008), | ||
1435 | |||
1436 | /* Right to write extended attributes. (FILE/DIRECTORY) */ | ||
1437 | FILE_WRITE_EA = const_cpu_to_le32(0x00000010), | ||
1438 | |||
1439 | /* Right to execute a file. (FILE) */ | ||
1440 | FILE_EXECUTE = const_cpu_to_le32(0x00000020), | ||
1441 | /* Right to traverse the directory. (DIRECTORY) */ | ||
1442 | FILE_TRAVERSE = const_cpu_to_le32(0x00000020), | ||
1443 | |||
1444 | /* | ||
1445 | * Right to delete a directory and all the files it contains (its | ||
1446 | * children), even if the files are read-only. (DIRECTORY) | ||
1447 | */ | ||
1448 | FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), | ||
1449 | |||
1450 | /* Right to read file attributes. (FILE/DIRECTORY) */ | ||
1451 | FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), | ||
1452 | |||
1453 | /* Right to change file attributes. (FILE/DIRECTORY) */ | ||
1454 | FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), | ||
1455 | |||
1456 | /* | ||
1457 | * The standard rights (bits 16 to 23). These are independent of the | ||
1458 | * type of object being secured. | ||
1459 | */ | ||
1460 | |||
1461 | /* Right to delete the object. */ | ||
1462 | DELETE = const_cpu_to_le32(0x00010000), | ||
1463 | |||
1464 | /* | ||
1465 | * Right to read the information in the object's security descriptor, | ||
1466 | * not including the information in the SACL, i.e. right to read the | ||
1467 | * security descriptor and owner. | ||
1468 | */ | ||
1469 | READ_CONTROL = const_cpu_to_le32(0x00020000), | ||
1470 | |||
1471 | /* Right to modify the DACL in the object's security descriptor. */ | ||
1472 | WRITE_DAC = const_cpu_to_le32(0x00040000), | ||
1473 | |||
1474 | /* Right to change the owner in the object's security descriptor. */ | ||
1475 | WRITE_OWNER = const_cpu_to_le32(0x00080000), | ||
1476 | |||
1477 | /* | ||
1478 | * Right to use the object for synchronization. Enables a process to | ||
1479 | * wait until the object is in the signalled state. Some object types | ||
1480 | * do not support this access right. | ||
1481 | */ | ||
1482 | SYNCHRONIZE = const_cpu_to_le32(0x00100000), | ||
1483 | |||
1484 | /* | ||
1485 | * The following STANDARD_RIGHTS_* are combinations of the above for | ||
1486 | * convenience and are defined by the Win32 API. | ||
1487 | */ | ||
1488 | |||
1489 | /* These are currently defined to READ_CONTROL. */ | ||
1490 | STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), | ||
1491 | STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), | ||
1492 | STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), | ||
1493 | |||
1494 | /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ | ||
1495 | STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), | ||
1496 | |||
1497 | /* | ||
1498 | * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and | ||
1499 | * SYNCHRONIZE access. | ||
1500 | */ | ||
1501 | STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), | ||
1502 | |||
1503 | /* | ||
1504 | * The access system ACL and maximum allowed access types (bits 24 to | ||
1505 | * 25, bits 26 to 27 are reserved). | ||
1506 | */ | ||
1507 | ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), | ||
1508 | MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), | ||
1509 | |||
1510 | /* | ||
1511 | * The generic rights (bits 28 to 31). These map onto the standard and | ||
1512 | * specific rights. | ||
1513 | */ | ||
1514 | |||
1515 | /* Read, write, and execute access. */ | ||
1516 | GENERIC_ALL = const_cpu_to_le32(0x10000000), | ||
1517 | |||
1518 | /* Execute access. */ | ||
1519 | GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), | ||
1520 | |||
1521 | /* | ||
1522 | * Write access. For files, this maps onto: | ||
1523 | * FILE_APPEND_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_DATA | | ||
1524 | * FILE_WRITE_EA | STANDARD_RIGHTS_WRITE | SYNCHRONIZE | ||
1525 | * For directories, the mapping has the same numerical value. See | ||
1526 | * above for the descriptions of the rights granted. | ||
1527 | */ | ||
1528 | GENERIC_WRITE = const_cpu_to_le32(0x40000000), | ||
1529 | |||
1530 | /* | ||
1531 | * Read access. For files, this maps onto: | ||
1532 | * FILE_READ_ATTRIBUTES | FILE_READ_DATA | FILE_READ_EA | | ||
1533 | * STANDARD_RIGHTS_READ | SYNCHRONIZE | ||
1534 | * For directories, the mapping has the same numberical value. See | ||
1535 | * above for the descriptions of the rights granted. | ||
1536 | */ | ||
1537 | GENERIC_READ = const_cpu_to_le32(0x80000000), | ||
1538 | }; | ||
1539 | |||
1540 | typedef le32 ACCESS_MASK; | ||
1541 | |||
1542 | /* | ||
1543 | * The generic mapping array. Used to denote the mapping of each generic | ||
1544 | * access right to a specific access mask. | ||
1545 | * | ||
1546 | * FIXME: What exactly is this and what is it for? (AIA) | ||
1547 | */ | ||
1548 | typedef struct { | ||
1549 | ACCESS_MASK generic_read; | ||
1550 | ACCESS_MASK generic_write; | ||
1551 | ACCESS_MASK generic_execute; | ||
1552 | ACCESS_MASK generic_all; | ||
1553 | } __attribute__ ((__packed__)) GENERIC_MAPPING; | ||
1554 | |||
1555 | /* | ||
1556 | * The predefined ACE type structures are as defined below. | ||
1557 | */ | ||
1558 | |||
1559 | /* | ||
1560 | * ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE | ||
1561 | */ | ||
1562 | typedef struct { | ||
1563 | /* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
1564 | ACE_TYPES type; /* Type of the ACE. */ | ||
1565 | ACE_FLAGS flags; /* Flags describing the ACE. */ | ||
1566 | le16 size; /* Size in bytes of the ACE. */ | ||
1567 | /* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ | ||
1568 | |||
1569 | /* 8*/ SID sid; /* The SID associated with the ACE. */ | ||
1570 | } __attribute__ ((__packed__)) ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, | ||
1571 | SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE; | ||
1572 | |||
1573 | /* | ||
1574 | * The object ACE flags (32-bit). | ||
1575 | */ | ||
1576 | enum { | ||
1577 | ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), | ||
1578 | ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), | ||
1579 | }; | ||
1580 | |||
1581 | typedef le32 OBJECT_ACE_FLAGS; | ||
1582 | |||
1583 | typedef struct { | ||
1584 | /* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
1585 | ACE_TYPES type; /* Type of the ACE. */ | ||
1586 | ACE_FLAGS flags; /* Flags describing the ACE. */ | ||
1587 | le16 size; /* Size in bytes of the ACE. */ | ||
1588 | /* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ | ||
1589 | |||
1590 | /* 8*/ OBJECT_ACE_FLAGS object_flags; /* Flags describing the object ACE. */ | ||
1591 | /* 12*/ GUID object_type; | ||
1592 | /* 28*/ GUID inherited_object_type; | ||
1593 | |||
1594 | /* 44*/ SID sid; /* The SID associated with the ACE. */ | ||
1595 | } __attribute__ ((__packed__)) ACCESS_ALLOWED_OBJECT_ACE, | ||
1596 | ACCESS_DENIED_OBJECT_ACE, | ||
1597 | SYSTEM_AUDIT_OBJECT_ACE, | ||
1598 | SYSTEM_ALARM_OBJECT_ACE; | ||
1599 | |||
1600 | /* | ||
1601 | * An ACL is an access-control list (ACL). | ||
1602 | * An ACL starts with an ACL header structure, which specifies the size of | ||
1603 | * the ACL and the number of ACEs it contains. The ACL header is followed by | ||
1604 | * zero or more access control entries (ACEs). The ACL as well as each ACE | ||
1605 | * are aligned on 4-byte boundaries. | ||
1606 | */ | ||
1607 | typedef struct { | ||
1608 | u8 revision; /* Revision of this ACL. */ | ||
1609 | u8 alignment1; | ||
1610 | le16 size; /* Allocated space in bytes for ACL. Includes this | ||
1611 | header, the ACEs and the remaining free space. */ | ||
1612 | le16 ace_count; /* Number of ACEs in the ACL. */ | ||
1613 | le16 alignment2; | ||
1614 | /* sizeof() = 8 bytes */ | ||
1615 | } __attribute__ ((__packed__)) ACL; | ||
1616 | |||
1617 | /* | ||
1618 | * Current constants for ACLs. | ||
1619 | */ | ||
1620 | typedef enum { | ||
1621 | /* Current revision. */ | ||
1622 | ACL_REVISION = 2, | ||
1623 | ACL_REVISION_DS = 4, | ||
1624 | |||
1625 | /* History of revisions. */ | ||
1626 | ACL_REVISION1 = 1, | ||
1627 | MIN_ACL_REVISION = 2, | ||
1628 | ACL_REVISION2 = 2, | ||
1629 | ACL_REVISION3 = 3, | ||
1630 | ACL_REVISION4 = 4, | ||
1631 | MAX_ACL_REVISION = 4, | ||
1632 | } ACL_CONSTANTS; | ||
1633 | |||
1634 | /* | ||
1635 | * The security descriptor control flags (16-bit). | ||
1636 | * | ||
1637 | * SE_OWNER_DEFAULTED - This boolean flag, when set, indicates that the SID | ||
1638 | * pointed to by the Owner field was provided by a defaulting mechanism | ||
1639 | * rather than explicitly provided by the original provider of the | ||
1640 | * security descriptor. This may affect the treatment of the SID with | ||
1641 | * respect to inheritence of an owner. | ||
1642 | * | ||
1643 | * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in | ||
1644 | * the Group field was provided by a defaulting mechanism rather than | ||
1645 | * explicitly provided by the original provider of the security | ||
1646 | * descriptor. This may affect the treatment of the SID with respect to | ||
1647 | * inheritence of a primary group. | ||
1648 | * | ||
1649 | * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security | ||
1650 | * descriptor contains a discretionary ACL. If this flag is set and the | ||
1651 | * Dacl field of the SECURITY_DESCRIPTOR is null, then a null ACL is | ||
1652 | * explicitly being specified. | ||
1653 | * | ||
1654 | * SE_DACL_DEFAULTED - This boolean flag, when set, indicates that the ACL | ||
1655 | * pointed to by the Dacl field was provided by a defaulting mechanism | ||
1656 | * rather than explicitly provided by the original provider of the | ||
1657 | * security descriptor. This may affect the treatment of the ACL with | ||
1658 | * respect to inheritence of an ACL. This flag is ignored if the | ||
1659 | * DaclPresent flag is not set. | ||
1660 | * | ||
1661 | * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security | ||
1662 | * descriptor contains a system ACL pointed to by the Sacl field. If this | ||
1663 | * flag is set and the Sacl field of the SECURITY_DESCRIPTOR is null, then | ||
1664 | * an empty (but present) ACL is being specified. | ||
1665 | * | ||
1666 | * SE_SACL_DEFAULTED - This boolean flag, when set, indicates that the ACL | ||
1667 | * pointed to by the Sacl field was provided by a defaulting mechanism | ||
1668 | * rather than explicitly provided by the original provider of the | ||
1669 | * security descriptor. This may affect the treatment of the ACL with | ||
1670 | * respect to inheritence of an ACL. This flag is ignored if the | ||
1671 | * SaclPresent flag is not set. | ||
1672 | * | ||
1673 | * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security | ||
1674 | * descriptor is in self-relative form. In this form, all fields of the | ||
1675 | * security descriptor are contiguous in memory and all pointer fields are | ||
1676 | * expressed as offsets from the beginning of the security descriptor. | ||
1677 | */ | ||
1678 | enum { | ||
1679 | SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), | ||
1680 | SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), | ||
1681 | SE_DACL_PRESENT = const_cpu_to_le16(0x0004), | ||
1682 | SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), | ||
1683 | |||
1684 | SE_SACL_PRESENT = const_cpu_to_le16(0x0010), | ||
1685 | SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), | ||
1686 | |||
1687 | SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), | ||
1688 | SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), | ||
1689 | SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), | ||
1690 | SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), | ||
1691 | |||
1692 | SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), | ||
1693 | SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), | ||
1694 | SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), | ||
1695 | SE_SELF_RELATIVE = const_cpu_to_le16(0x8000) | ||
1696 | } __attribute__ ((__packed__)); | ||
1697 | |||
1698 | typedef le16 SECURITY_DESCRIPTOR_CONTROL; | ||
1699 | |||
1700 | /* | ||
1701 | * Self-relative security descriptor. Contains the owner and group SIDs as well | ||
1702 | * as the sacl and dacl ACLs inside the security descriptor itself. | ||
1703 | */ | ||
1704 | typedef struct { | ||
1705 | u8 revision; /* Revision level of the security descriptor. */ | ||
1706 | u8 alignment; | ||
1707 | SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of | ||
1708 | the descriptor as well as the following fields. */ | ||
1709 | le32 owner; /* Byte offset to a SID representing an object's | ||
1710 | owner. If this is NULL, no owner SID is present in | ||
1711 | the descriptor. */ | ||
1712 | le32 group; /* Byte offset to a SID representing an object's | ||
1713 | primary group. If this is NULL, no primary group | ||
1714 | SID is present in the descriptor. */ | ||
1715 | le32 sacl; /* Byte offset to a system ACL. Only valid, if | ||
1716 | SE_SACL_PRESENT is set in the control field. If | ||
1717 | SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL | ||
1718 | is specified. */ | ||
1719 | le32 dacl; /* Byte offset to a discretionary ACL. Only valid, if | ||
1720 | SE_DACL_PRESENT is set in the control field. If | ||
1721 | SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL | ||
1722 | (unconditionally granting access) is specified. */ | ||
1723 | /* sizeof() = 0x14 bytes */ | ||
1724 | } __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_RELATIVE; | ||
1725 | |||
1726 | /* | ||
1727 | * Absolute security descriptor. Does not contain the owner and group SIDs, nor | ||
1728 | * the sacl and dacl ACLs inside the security descriptor. Instead, it contains | ||
1729 | * pointers to these structures in memory. Obviously, absolute security | ||
1730 | * descriptors are only useful for in memory representations of security | ||
1731 | * descriptors. On disk, a self-relative security descriptor is used. | ||
1732 | */ | ||
1733 | typedef struct { | ||
1734 | u8 revision; /* Revision level of the security descriptor. */ | ||
1735 | u8 alignment; | ||
1736 | SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of | ||
1737 | the descriptor as well as the following fields. */ | ||
1738 | SID *owner; /* Points to a SID representing an object's owner. If | ||
1739 | this is NULL, no owner SID is present in the | ||
1740 | descriptor. */ | ||
1741 | SID *group; /* Points to a SID representing an object's primary | ||
1742 | group. If this is NULL, no primary group SID is | ||
1743 | present in the descriptor. */ | ||
1744 | ACL *sacl; /* Points to a system ACL. Only valid, if | ||
1745 | SE_SACL_PRESENT is set in the control field. If | ||
1746 | SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL | ||
1747 | is specified. */ | ||
1748 | ACL *dacl; /* Points to a discretionary ACL. Only valid, if | ||
1749 | SE_DACL_PRESENT is set in the control field. If | ||
1750 | SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL | ||
1751 | (unconditionally granting access) is specified. */ | ||
1752 | } __attribute__ ((__packed__)) SECURITY_DESCRIPTOR; | ||
1753 | |||
1754 | /* | ||
1755 | * Current constants for security descriptors. | ||
1756 | */ | ||
1757 | typedef enum { | ||
1758 | /* Current revision. */ | ||
1759 | SECURITY_DESCRIPTOR_REVISION = 1, | ||
1760 | SECURITY_DESCRIPTOR_REVISION1 = 1, | ||
1761 | |||
1762 | /* The sizes of both the absolute and relative security descriptors is | ||
1763 | the same as pointers, at least on ia32 architecture are 32-bit. */ | ||
1764 | SECURITY_DESCRIPTOR_MIN_LENGTH = sizeof(SECURITY_DESCRIPTOR), | ||
1765 | } SECURITY_DESCRIPTOR_CONSTANTS; | ||
1766 | |||
1767 | /* | ||
1768 | * Attribute: Security descriptor (0x50). A standard self-relative security | ||
1769 | * descriptor. | ||
1770 | * | ||
1771 | * NOTE: Can be resident or non-resident. | ||
1772 | * NOTE: Not used in NTFS 3.0+, as security descriptors are stored centrally | ||
1773 | * in FILE_Secure and the correct descriptor is found using the security_id | ||
1774 | * from the standard information attribute. | ||
1775 | */ | ||
1776 | typedef SECURITY_DESCRIPTOR_RELATIVE SECURITY_DESCRIPTOR_ATTR; | ||
1777 | |||
1778 | /* | ||
1779 | * On NTFS 3.0+, all security descriptors are stored in FILE_Secure. Only one | ||
1780 | * referenced instance of each unique security descriptor is stored. | ||
1781 | * | ||
1782 | * FILE_Secure contains no unnamed data attribute, i.e. it has zero length. It | ||
1783 | * does, however, contain two indexes ($SDH and $SII) as well as a named data | ||
1784 | * stream ($SDS). | ||
1785 | * | ||
1786 | * Every unique security descriptor is assigned a unique security identifier | ||
1787 | * (security_id, not to be confused with a SID). The security_id is unique for | ||
1788 | * the NTFS volume and is used as an index into the $SII index, which maps | ||
1789 | * security_ids to the security descriptor's storage location within the $SDS | ||
1790 | * data attribute. The $SII index is sorted by ascending security_id. | ||
1791 | * | ||
1792 | * A simple hash is computed from each security descriptor. This hash is used | ||
1793 | * as an index into the $SDH index, which maps security descriptor hashes to | ||
1794 | * the security descriptor's storage location within the $SDS data attribute. | ||
1795 | * The $SDH index is sorted by security descriptor hash and is stored in a B+ | ||
1796 | * tree. When searching $SDH (with the intent of determining whether or not a | ||
1797 | * new security descriptor is already present in the $SDS data stream), if a | ||
1798 | * matching hash is found, but the security descriptors do not match, the | ||
1799 | * search in the $SDH index is continued, searching for a next matching hash. | ||
1800 | * | ||
1801 | * When a precise match is found, the security_id coresponding to the security | ||
1802 | * descriptor in the $SDS attribute is read from the found $SDH index entry and | ||
1803 | * is stored in the $STANDARD_INFORMATION attribute of the file/directory to | ||
1804 | * which the security descriptor is being applied. The $STANDARD_INFORMATION | ||
1805 | * attribute is present in all base mft records (i.e. in all files and | ||
1806 | * directories). | ||
1807 | * | ||
1808 | * If a match is not found, the security descriptor is assigned a new unique | ||
1809 | * security_id and is added to the $SDS data attribute. Then, entries | ||
1810 | * referencing the this security descriptor in the $SDS data attribute are | ||
1811 | * added to the $SDH and $SII indexes. | ||
1812 | * | ||
1813 | * Note: Entries are never deleted from FILE_Secure, even if nothing | ||
1814 | * references an entry any more. | ||
1815 | */ | ||
1816 | |||
1817 | /* | ||
1818 | * This header precedes each security descriptor in the $SDS data stream. | ||
1819 | * This is also the index entry data part of both the $SII and $SDH indexes. | ||
1820 | */ | ||
1821 | typedef struct { | ||
1822 | le32 hash; /* Hash of the security descriptor. */ | ||
1823 | le32 security_id; /* The security_id assigned to the descriptor. */ | ||
1824 | le64 offset; /* Byte offset of this entry in the $SDS stream. */ | ||
1825 | le32 length; /* Size in bytes of this entry in $SDS stream. */ | ||
1826 | } __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_HEADER; | ||
1827 | |||
1828 | /* | ||
1829 | * The $SDS data stream contains the security descriptors, aligned on 16-byte | ||
1830 | * boundaries, sorted by security_id in a B+ tree. Security descriptors cannot | ||
1831 | * cross 256kib boundaries (this restriction is imposed by the Windows cache | ||
1832 | * manager). Each security descriptor is contained in a SDS_ENTRY structure. | ||
1833 | * Also, each security descriptor is stored twice in the $SDS stream with a | ||
1834 | * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size) | ||
1835 | * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the | ||
1836 | * the first copy of the security descriptor will be at offset 0x51d0 in the | ||
1837 | * $SDS data stream and the second copy will be at offset 0x451d0. | ||
1838 | */ | ||
1839 | typedef struct { | ||
1840 | /*Ofs*/ | ||
1841 | /* 0 SECURITY_DESCRIPTOR_HEADER; -- Unfolded here as gcc doesn't like | ||
1842 | unnamed structs. */ | ||
1843 | le32 hash; /* Hash of the security descriptor. */ | ||
1844 | le32 security_id; /* The security_id assigned to the descriptor. */ | ||
1845 | le64 offset; /* Byte offset of this entry in the $SDS stream. */ | ||
1846 | le32 length; /* Size in bytes of this entry in $SDS stream. */ | ||
1847 | /* 20*/ SECURITY_DESCRIPTOR_RELATIVE sid; /* The self-relative security | ||
1848 | descriptor. */ | ||
1849 | } __attribute__ ((__packed__)) SDS_ENTRY; | ||
1850 | |||
1851 | /* | ||
1852 | * The index entry key used in the $SII index. The collation type is | ||
1853 | * COLLATION_NTOFS_ULONG. | ||
1854 | */ | ||
1855 | typedef struct { | ||
1856 | le32 security_id; /* The security_id assigned to the descriptor. */ | ||
1857 | } __attribute__ ((__packed__)) SII_INDEX_KEY; | ||
1858 | |||
1859 | /* | ||
1860 | * The index entry key used in the $SDH index. The keys are sorted first by | ||
1861 | * hash and then by security_id. The collation rule is | ||
1862 | * COLLATION_NTOFS_SECURITY_HASH. | ||
1863 | */ | ||
1864 | typedef struct { | ||
1865 | le32 hash; /* Hash of the security descriptor. */ | ||
1866 | le32 security_id; /* The security_id assigned to the descriptor. */ | ||
1867 | } __attribute__ ((__packed__)) SDH_INDEX_KEY; | ||
1868 | |||
1869 | /* | ||
1870 | * Attribute: Volume name (0x60). | ||
1871 | * | ||
1872 | * NOTE: Always resident. | ||
1873 | * NOTE: Present only in FILE_Volume. | ||
1874 | */ | ||
1875 | typedef struct { | ||
1876 | ntfschar name[0]; /* The name of the volume in Unicode. */ | ||
1877 | } __attribute__ ((__packed__)) VOLUME_NAME; | ||
1878 | |||
1879 | /* | ||
1880 | * Possible flags for the volume (16-bit). | ||
1881 | */ | ||
1882 | enum { | ||
1883 | VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), | ||
1884 | VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), | ||
1885 | VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), | ||
1886 | VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), | ||
1887 | |||
1888 | VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), | ||
1889 | VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), | ||
1890 | |||
1891 | VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), | ||
1892 | |||
1893 | VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), | ||
1894 | |||
1895 | /* To make our life easier when checking if we must mount read-only. */ | ||
1896 | VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8037), | ||
1897 | } __attribute__ ((__packed__)); | ||
1898 | |||
1899 | typedef le16 VOLUME_FLAGS; | ||
1900 | |||
1901 | /* | ||
1902 | * Attribute: Volume information (0x70). | ||
1903 | * | ||
1904 | * NOTE: Always resident. | ||
1905 | * NOTE: Present only in FILE_Volume. | ||
1906 | * NOTE: Windows 2000 uses NTFS 3.0 while Windows NT4 service pack 6a uses | ||
1907 | * NTFS 1.2. I haven't personally seen other values yet. | ||
1908 | */ | ||
1909 | typedef struct { | ||
1910 | le64 reserved; /* Not used (yet?). */ | ||
1911 | u8 major_ver; /* Major version of the ntfs format. */ | ||
1912 | u8 minor_ver; /* Minor version of the ntfs format. */ | ||
1913 | VOLUME_FLAGS flags; /* Bit array of VOLUME_* flags. */ | ||
1914 | } __attribute__ ((__packed__)) VOLUME_INFORMATION; | ||
1915 | |||
1916 | /* | ||
1917 | * Attribute: Data attribute (0x80). | ||
1918 | * | ||
1919 | * NOTE: Can be resident or non-resident. | ||
1920 | * | ||
1921 | * Data contents of a file (i.e. the unnamed stream) or of a named stream. | ||
1922 | */ | ||
1923 | typedef struct { | ||
1924 | u8 data[0]; /* The file's data contents. */ | ||
1925 | } __attribute__ ((__packed__)) DATA_ATTR; | ||
1926 | |||
1927 | /* | ||
1928 | * Index header flags (8-bit). | ||
1929 | */ | ||
1930 | enum { | ||
1931 | /* | ||
1932 | * When index header is in an index root attribute: | ||
1933 | */ | ||
1934 | SMALL_INDEX = 0, /* The index is small enough to fit inside the index | ||
1935 | root attribute and there is no index allocation | ||
1936 | attribute present. */ | ||
1937 | LARGE_INDEX = 1, /* The index is too large to fit in the index root | ||
1938 | attribute and/or an index allocation attribute is | ||
1939 | present. */ | ||
1940 | /* | ||
1941 | * When index header is in an index block, i.e. is part of index | ||
1942 | * allocation attribute: | ||
1943 | */ | ||
1944 | LEAF_NODE = 0, /* This is a leaf node, i.e. there are no more nodes | ||
1945 | branching off it. */ | ||
1946 | INDEX_NODE = 1, /* This node indexes other nodes, i.e. it is not a leaf | ||
1947 | node. */ | ||
1948 | NODE_MASK = 1, /* Mask for accessing the *_NODE bits. */ | ||
1949 | } __attribute__ ((__packed__)); | ||
1950 | |||
1951 | typedef u8 INDEX_HEADER_FLAGS; | ||
1952 | |||
1953 | /* | ||
1954 | * This is the header for indexes, describing the INDEX_ENTRY records, which | ||
1955 | * follow the INDEX_HEADER. Together the index header and the index entries | ||
1956 | * make up a complete index. | ||
1957 | * | ||
1958 | * IMPORTANT NOTE: The offset, length and size structure members are counted | ||
1959 | * relative to the start of the index header structure and not relative to the | ||
1960 | * start of the index root or index allocation structures themselves. | ||
1961 | */ | ||
1962 | typedef struct { | ||
1963 | le32 entries_offset; /* Byte offset to first INDEX_ENTRY | ||
1964 | aligned to 8-byte boundary. */ | ||
1965 | le32 index_length; /* Data size of the index in bytes, | ||
1966 | i.e. bytes used from allocated | ||
1967 | size, aligned to 8-byte boundary. */ | ||
1968 | le32 allocated_size; /* Byte size of this index (block), | ||
1969 | multiple of 8 bytes. */ | ||
1970 | /* NOTE: For the index root attribute, the above two numbers are always | ||
1971 | equal, as the attribute is resident and it is resized as needed. In | ||
1972 | the case of the index allocation attribute the attribute is not | ||
1973 | resident and hence the allocated_size is a fixed value and must | ||
1974 | equal the index_block_size specified by the INDEX_ROOT attribute | ||
1975 | corresponding to the INDEX_ALLOCATION attribute this INDEX_BLOCK | ||
1976 | belongs to. */ | ||
1977 | INDEX_HEADER_FLAGS flags; /* Bit field of INDEX_HEADER_FLAGS. */ | ||
1978 | u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ | ||
1979 | } __attribute__ ((__packed__)) INDEX_HEADER; | ||
1980 | |||
1981 | /* | ||
1982 | * Attribute: Index root (0x90). | ||
1983 | * | ||
1984 | * NOTE: Always resident. | ||
1985 | * | ||
1986 | * This is followed by a sequence of index entries (INDEX_ENTRY structures) | ||
1987 | * as described by the index header. | ||
1988 | * | ||
1989 | * When a directory is small enough to fit inside the index root then this | ||
1990 | * is the only attribute describing the directory. When the directory is too | ||
1991 | * large to fit in the index root, on the other hand, two aditional attributes | ||
1992 | * are present: an index allocation attribute, containing sub-nodes of the B+ | ||
1993 | * directory tree (see below), and a bitmap attribute, describing which virtual | ||
1994 | * cluster numbers (vcns) in the index allocation attribute are in use by an | ||
1995 | * index block. | ||
1996 | * | ||
1997 | * NOTE: The root directory (FILE_root) contains an entry for itself. Other | ||
1998 | * dircetories do not contain entries for themselves, though. | ||
1999 | */ | ||
2000 | typedef struct { | ||
2001 | ATTR_TYPE type; /* Type of the indexed attribute. Is | ||
2002 | $FILE_NAME for directories, zero | ||
2003 | for view indexes. No other values | ||
2004 | allowed. */ | ||
2005 | COLLATION_RULE collation_rule; /* Collation rule used to sort the | ||
2006 | index entries. If type is $FILE_NAME, | ||
2007 | this must be COLLATION_FILE_NAME. */ | ||
2008 | le32 index_block_size; /* Size of each index block in bytes (in | ||
2009 | the index allocation attribute). */ | ||
2010 | u8 clusters_per_index_block; /* Cluster size of each index block (in | ||
2011 | the index allocation attribute), when | ||
2012 | an index block is >= than a cluster, | ||
2013 | otherwise this will be the log of | ||
2014 | the size (like how the encoding of | ||
2015 | the mft record size and the index | ||
2016 | record size found in the boot sector | ||
2017 | work). Has to be a power of 2. */ | ||
2018 | u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ | ||
2019 | INDEX_HEADER index; /* Index header describing the | ||
2020 | following index entries. */ | ||
2021 | } __attribute__ ((__packed__)) INDEX_ROOT; | ||
2022 | |||
2023 | /* | ||
2024 | * Attribute: Index allocation (0xa0). | ||
2025 | * | ||
2026 | * NOTE: Always non-resident (doesn't make sense to be resident anyway!). | ||
2027 | * | ||
2028 | * This is an array of index blocks. Each index block starts with an | ||
2029 | * INDEX_BLOCK structure containing an index header, followed by a sequence of | ||
2030 | * index entries (INDEX_ENTRY structures), as described by the INDEX_HEADER. | ||
2031 | */ | ||
2032 | typedef struct { | ||
2033 | /* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
2034 | NTFS_RECORD_TYPE magic; /* Magic is "INDX". */ | ||
2035 | le16 usa_ofs; /* See NTFS_RECORD definition. */ | ||
2036 | le16 usa_count; /* See NTFS_RECORD definition. */ | ||
2037 | |||
2038 | /* 8*/ sle64 lsn; /* $LogFile sequence number of the last | ||
2039 | modification of this index block. */ | ||
2040 | /* 16*/ leVCN index_block_vcn; /* Virtual cluster number of the index block. | ||
2041 | If the cluster_size on the volume is <= the | ||
2042 | index_block_size of the directory, | ||
2043 | index_block_vcn counts in units of clusters, | ||
2044 | and in units of sectors otherwise. */ | ||
2045 | /* 24*/ INDEX_HEADER index; /* Describes the following index entries. */ | ||
2046 | /* sizeof()= 40 (0x28) bytes */ | ||
2047 | /* | ||
2048 | * When creating the index block, we place the update sequence array at this | ||
2049 | * offset, i.e. before we start with the index entries. This also makes sense, | ||
2050 | * otherwise we could run into problems with the update sequence array | ||
2051 | * containing in itself the last two bytes of a sector which would mean that | ||
2052 | * multi sector transfer protection wouldn't work. As you can't protect data | ||
2053 | * by overwriting it since you then can't get it back... | ||
2054 | * When reading use the data from the ntfs record header. | ||
2055 | */ | ||
2056 | } __attribute__ ((__packed__)) INDEX_BLOCK; | ||
2057 | |||
2058 | typedef INDEX_BLOCK INDEX_ALLOCATION; | ||
2059 | |||
2060 | /* | ||
2061 | * The system file FILE_Extend/$Reparse contains an index named $R listing | ||
2062 | * all reparse points on the volume. The index entry keys are as defined | ||
2063 | * below. Note, that there is no index data associated with the index entries. | ||
2064 | * | ||
2065 | * The index entries are sorted by the index key file_id. The collation rule is | ||
2066 | * COLLATION_NTOFS_ULONGS. FIXME: Verify whether the reparse_tag is not the | ||
2067 | * primary key / is not a key at all. (AIA) | ||
2068 | */ | ||
2069 | typedef struct { | ||
2070 | le32 reparse_tag; /* Reparse point type (inc. flags). */ | ||
2071 | leMFT_REF file_id; /* Mft record of the file containing the | ||
2072 | reparse point attribute. */ | ||
2073 | } __attribute__ ((__packed__)) REPARSE_INDEX_KEY; | ||
2074 | |||
2075 | /* | ||
2076 | * Quota flags (32-bit). | ||
2077 | * | ||
2078 | * The user quota flags. Names explain meaning. | ||
2079 | */ | ||
2080 | enum { | ||
2081 | QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), | ||
2082 | QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), | ||
2083 | QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), | ||
2084 | |||
2085 | QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), | ||
2086 | /* This is a bit mask for the user quota flags. */ | ||
2087 | |||
2088 | /* | ||
2089 | * These flags are only present in the quota defaults index entry, i.e. | ||
2090 | * in the entry where owner_id = QUOTA_DEFAULTS_ID. | ||
2091 | */ | ||
2092 | QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), | ||
2093 | QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), | ||
2094 | QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), | ||
2095 | QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), | ||
2096 | |||
2097 | QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), | ||
2098 | QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), | ||
2099 | QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), | ||
2100 | QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), | ||
2101 | }; | ||
2102 | |||
2103 | typedef le32 QUOTA_FLAGS; | ||
2104 | |||
2105 | /* | ||
2106 | * The system file FILE_Extend/$Quota contains two indexes $O and $Q. Quotas | ||
2107 | * are on a per volume and per user basis. | ||
2108 | * | ||
2109 | * The $Q index contains one entry for each existing user_id on the volume. The | ||
2110 | * index key is the user_id of the user/group owning this quota control entry, | ||
2111 | * i.e. the key is the owner_id. The user_id of the owner of a file, i.e. the | ||
2112 | * owner_id, is found in the standard information attribute. The collation rule | ||
2113 | * for $Q is COLLATION_NTOFS_ULONG. | ||
2114 | * | ||
2115 | * The $O index contains one entry for each user/group who has been assigned | ||
2116 | * a quota on that volume. The index key holds the SID of the user_id the | ||
2117 | * entry belongs to, i.e. the owner_id. The collation rule for $O is | ||
2118 | * COLLATION_NTOFS_SID. | ||
2119 | * | ||
2120 | * The $O index entry data is the user_id of the user corresponding to the SID. | ||
2121 | * This user_id is used as an index into $Q to find the quota control entry | ||
2122 | * associated with the SID. | ||
2123 | * | ||
2124 | * The $Q index entry data is the quota control entry and is defined below. | ||
2125 | */ | ||
2126 | typedef struct { | ||
2127 | le32 version; /* Currently equals 2. */ | ||
2128 | QUOTA_FLAGS flags; /* Flags describing this quota entry. */ | ||
2129 | le64 bytes_used; /* How many bytes of the quota are in use. */ | ||
2130 | sle64 change_time; /* Last time this quota entry was changed. */ | ||
2131 | sle64 threshold; /* Soft quota (-1 if not limited). */ | ||
2132 | sle64 limit; /* Hard quota (-1 if not limited). */ | ||
2133 | sle64 exceeded_time; /* How long the soft quota has been exceeded. */ | ||
2134 | SID sid; /* The SID of the user/object associated with | ||
2135 | this quota entry. Equals zero for the quota | ||
2136 | defaults entry (and in fact on a WinXP | ||
2137 | volume, it is not present at all). */ | ||
2138 | } __attribute__ ((__packed__)) QUOTA_CONTROL_ENTRY; | ||
2139 | |||
2140 | /* | ||
2141 | * Predefined owner_id values (32-bit). | ||
2142 | */ | ||
2143 | enum { | ||
2144 | QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), | ||
2145 | QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), | ||
2146 | QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), | ||
2147 | }; | ||
2148 | |||
2149 | /* | ||
2150 | * Current constants for quota control entries. | ||
2151 | */ | ||
2152 | typedef enum { | ||
2153 | /* Current version. */ | ||
2154 | QUOTA_VERSION = 2, | ||
2155 | } QUOTA_CONTROL_ENTRY_CONSTANTS; | ||
2156 | |||
2157 | /* | ||
2158 | * Index entry flags (16-bit). | ||
2159 | */ | ||
2160 | enum { | ||
2161 | INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a | ||
2162 | sub-node, i.e. a reference to an index block in form of | ||
2163 | a virtual cluster number (see below). */ | ||
2164 | INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last | ||
2165 | entry in an index block. The index entry does not | ||
2166 | represent a file but it can point to a sub-node. */ | ||
2167 | |||
2168 | INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force | ||
2169 | enum bit width to 16-bit. */ | ||
2170 | } __attribute__ ((__packed__)); | ||
2171 | |||
2172 | typedef le16 INDEX_ENTRY_FLAGS; | ||
2173 | |||
2174 | /* | ||
2175 | * This the index entry header (see below). | ||
2176 | */ | ||
2177 | typedef struct { | ||
2178 | /* 0*/ union { | ||
2179 | struct { /* Only valid when INDEX_ENTRY_END is not set. */ | ||
2180 | leMFT_REF indexed_file; /* The mft reference of the file | ||
2181 | described by this index | ||
2182 | entry. Used for directory | ||
2183 | indexes. */ | ||
2184 | } __attribute__ ((__packed__)) dir; | ||
2185 | struct { /* Used for views/indexes to find the entry's data. */ | ||
2186 | le16 data_offset; /* Data byte offset from this | ||
2187 | INDEX_ENTRY. Follows the | ||
2188 | index key. */ | ||
2189 | le16 data_length; /* Data length in bytes. */ | ||
2190 | le32 reservedV; /* Reserved (zero). */ | ||
2191 | } __attribute__ ((__packed__)) vi; | ||
2192 | } __attribute__ ((__packed__)) data; | ||
2193 | /* 8*/ le16 length; /* Byte size of this index entry, multiple of | ||
2194 | 8-bytes. */ | ||
2195 | /* 10*/ le16 key_length; /* Byte size of the key value, which is in the | ||
2196 | index entry. It follows field reserved. Not | ||
2197 | multiple of 8-bytes. */ | ||
2198 | /* 12*/ INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ | ||
2199 | /* 14*/ le16 reserved; /* Reserved/align to 8-byte boundary. */ | ||
2200 | /* sizeof() = 16 bytes */ | ||
2201 | } __attribute__ ((__packed__)) INDEX_ENTRY_HEADER; | ||
2202 | |||
2203 | /* | ||
2204 | * This is an index entry. A sequence of such entries follows each INDEX_HEADER | ||
2205 | * structure. Together they make up a complete index. The index follows either | ||
2206 | * an index root attribute or an index allocation attribute. | ||
2207 | * | ||
2208 | * NOTE: Before NTFS 3.0 only filename attributes were indexed. | ||
2209 | */ | ||
2210 | typedef struct { | ||
2211 | /*Ofs*/ | ||
2212 | /* 0 INDEX_ENTRY_HEADER; -- Unfolded here as gcc dislikes unnamed structs. */ | ||
2213 | union { | ||
2214 | struct { /* Only valid when INDEX_ENTRY_END is not set. */ | ||
2215 | leMFT_REF indexed_file; /* The mft reference of the file | ||
2216 | described by this index | ||
2217 | entry. Used for directory | ||
2218 | indexes. */ | ||
2219 | } __attribute__ ((__packed__)) dir; | ||
2220 | struct { /* Used for views/indexes to find the entry's data. */ | ||
2221 | le16 data_offset; /* Data byte offset from this | ||
2222 | INDEX_ENTRY. Follows the | ||
2223 | index key. */ | ||
2224 | le16 data_length; /* Data length in bytes. */ | ||
2225 | le32 reservedV; /* Reserved (zero). */ | ||
2226 | } __attribute__ ((__packed__)) vi; | ||
2227 | } __attribute__ ((__packed__)) data; | ||
2228 | le16 length; /* Byte size of this index entry, multiple of | ||
2229 | 8-bytes. */ | ||
2230 | le16 key_length; /* Byte size of the key value, which is in the | ||
2231 | index entry. It follows field reserved. Not | ||
2232 | multiple of 8-bytes. */ | ||
2233 | INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ | ||
2234 | le16 reserved; /* Reserved/align to 8-byte boundary. */ | ||
2235 | |||
2236 | /* 16*/ union { /* The key of the indexed attribute. NOTE: Only present | ||
2237 | if INDEX_ENTRY_END bit in flags is not set. NOTE: On | ||
2238 | NTFS versions before 3.0 the only valid key is the | ||
2239 | FILE_NAME_ATTR. On NTFS 3.0+ the following | ||
2240 | additional index keys are defined: */ | ||
2241 | FILE_NAME_ATTR file_name;/* $I30 index in directories. */ | ||
2242 | SII_INDEX_KEY sii; /* $SII index in $Secure. */ | ||
2243 | SDH_INDEX_KEY sdh; /* $SDH index in $Secure. */ | ||
2244 | GUID object_id; /* $O index in FILE_Extend/$ObjId: The | ||
2245 | object_id of the mft record found in | ||
2246 | the data part of the index. */ | ||
2247 | REPARSE_INDEX_KEY reparse; /* $R index in | ||
2248 | FILE_Extend/$Reparse. */ | ||
2249 | SID sid; /* $O index in FILE_Extend/$Quota: | ||
2250 | SID of the owner of the user_id. */ | ||
2251 | le32 owner_id; /* $Q index in FILE_Extend/$Quota: | ||
2252 | user_id of the owner of the quota | ||
2253 | control entry in the data part of | ||
2254 | the index. */ | ||
2255 | } __attribute__ ((__packed__)) key; | ||
2256 | /* The (optional) index data is inserted here when creating. */ | ||
2257 | // leVCN vcn; /* If INDEX_ENTRY_NODE bit in flags is set, the last | ||
2258 | // eight bytes of this index entry contain the virtual | ||
2259 | // cluster number of the index block that holds the | ||
2260 | // entries immediately preceding the current entry (the | ||
2261 | // vcn references the corresponding cluster in the data | ||
2262 | // of the non-resident index allocation attribute). If | ||
2263 | // the key_length is zero, then the vcn immediately | ||
2264 | // follows the INDEX_ENTRY_HEADER. Regardless of | ||
2265 | // key_length, the address of the 8-byte boundary | ||
2266 | // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by | ||
2267 | // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), | ||
2268 | // where sizeof(VCN) can be hardcoded as 8 if wanted. */ | ||
2269 | } __attribute__ ((__packed__)) INDEX_ENTRY; | ||
2270 | |||
2271 | /* | ||
2272 | * Attribute: Bitmap (0xb0). | ||
2273 | * | ||
2274 | * Contains an array of bits (aka a bitfield). | ||
2275 | * | ||
2276 | * When used in conjunction with the index allocation attribute, each bit | ||
2277 | * corresponds to one index block within the index allocation attribute. Thus | ||
2278 | * the number of bits in the bitmap * index block size / cluster size is the | ||
2279 | * number of clusters in the index allocation attribute. | ||
2280 | */ | ||
2281 | typedef struct { | ||
2282 | u8 bitmap[0]; /* Array of bits. */ | ||
2283 | } __attribute__ ((__packed__)) BITMAP_ATTR; | ||
2284 | |||
2285 | /* | ||
2286 | * The reparse point tag defines the type of the reparse point. It also | ||
2287 | * includes several flags, which further describe the reparse point. | ||
2288 | * | ||
2289 | * The reparse point tag is an unsigned 32-bit value divided in three parts: | ||
2290 | * | ||
2291 | * 1. The least significant 16 bits (i.e. bits 0 to 15) specifiy the type of | ||
2292 | * the reparse point. | ||
2293 | * 2. The 13 bits after this (i.e. bits 16 to 28) are reserved for future use. | ||
2294 | * 3. The most significant three bits are flags describing the reparse point. | ||
2295 | * They are defined as follows: | ||
2296 | * bit 29: Name surrogate bit. If set, the filename is an alias for | ||
2297 | * another object in the system. | ||
2298 | * bit 30: High-latency bit. If set, accessing the first byte of data will | ||
2299 | * be slow. (E.g. the data is stored on a tape drive.) | ||
2300 | * bit 31: Microsoft bit. If set, the tag is owned by Microsoft. User | ||
2301 | * defined tags have to use zero here. | ||
2302 | * | ||
2303 | * These are the predefined reparse point tags: | ||
2304 | */ | ||
2305 | enum { | ||
2306 | IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), | ||
2307 | IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), | ||
2308 | IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), | ||
2309 | |||
2310 | IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), | ||
2311 | IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), | ||
2312 | IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), | ||
2313 | |||
2314 | IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), | ||
2315 | IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), | ||
2316 | IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), | ||
2317 | IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), | ||
2318 | |||
2319 | IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), | ||
2320 | |||
2321 | IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), | ||
2322 | |||
2323 | IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), | ||
2324 | |||
2325 | IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), | ||
2326 | }; | ||
2327 | |||
2328 | /* | ||
2329 | * Attribute: Reparse point (0xc0). | ||
2330 | * | ||
2331 | * NOTE: Can be resident or non-resident. | ||
2332 | */ | ||
2333 | typedef struct { | ||
2334 | le32 reparse_tag; /* Reparse point type (inc. flags). */ | ||
2335 | le16 reparse_data_length; /* Byte size of reparse data. */ | ||
2336 | le16 reserved; /* Align to 8-byte boundary. */ | ||
2337 | u8 reparse_data[0]; /* Meaning depends on reparse_tag. */ | ||
2338 | } __attribute__ ((__packed__)) REPARSE_POINT; | ||
2339 | |||
2340 | /* | ||
2341 | * Attribute: Extended attribute (EA) information (0xd0). | ||
2342 | * | ||
2343 | * NOTE: Always resident. (Is this true???) | ||
2344 | */ | ||
2345 | typedef struct { | ||
2346 | le16 ea_length; /* Byte size of the packed extended | ||
2347 | attributes. */ | ||
2348 | le16 need_ea_count; /* The number of extended attributes which have | ||
2349 | the NEED_EA bit set. */ | ||
2350 | le32 ea_query_length; /* Byte size of the buffer required to query | ||
2351 | the extended attributes when calling | ||
2352 | ZwQueryEaFile() in Windows NT/2k. I.e. the | ||
2353 | byte size of the unpacked extended | ||
2354 | attributes. */ | ||
2355 | } __attribute__ ((__packed__)) EA_INFORMATION; | ||
2356 | |||
2357 | /* | ||
2358 | * Extended attribute flags (8-bit). | ||
2359 | */ | ||
2360 | enum { | ||
2361 | NEED_EA = 0x80 | ||
2362 | } __attribute__ ((__packed__)); | ||
2363 | |||
2364 | typedef u8 EA_FLAGS; | ||
2365 | |||
2366 | /* | ||
2367 | * Attribute: Extended attribute (EA) (0xe0). | ||
2368 | * | ||
2369 | * NOTE: Always non-resident. (Is this true?) | ||
2370 | * | ||
2371 | * Like the attribute list and the index buffer list, the EA attribute value is | ||
2372 | * a sequence of EA_ATTR variable length records. | ||
2373 | * | ||
2374 | * FIXME: It appears weird that the EA name is not unicode. Is it true? | ||
2375 | */ | ||
2376 | typedef struct { | ||
2377 | le32 next_entry_offset; /* Offset to the next EA_ATTR. */ | ||
2378 | EA_FLAGS flags; /* Flags describing the EA. */ | ||
2379 | u8 ea_name_length; /* Length of the name of the EA in bytes. */ | ||
2380 | le16 ea_value_length; /* Byte size of the EA's value. */ | ||
2381 | u8 ea_name[0]; /* Name of the EA. */ | ||
2382 | u8 ea_value[0]; /* The value of the EA. Immediately follows | ||
2383 | the name. */ | ||
2384 | } __attribute__ ((__packed__)) EA_ATTR; | ||
2385 | |||
2386 | /* | ||
2387 | * Attribute: Property set (0xf0). | ||
2388 | * | ||
2389 | * Intended to support Native Structure Storage (NSS) - a feature removed from | ||
2390 | * NTFS 3.0 during beta testing. | ||
2391 | */ | ||
2392 | typedef struct { | ||
2393 | /* Irrelevant as feature unused. */ | ||
2394 | } __attribute__ ((__packed__)) PROPERTY_SET; | ||
2395 | |||
2396 | /* | ||
2397 | * Attribute: Logged utility stream (0x100). | ||
2398 | * | ||
2399 | * NOTE: Can be resident or non-resident. | ||
2400 | * | ||
2401 | * Operations on this attribute are logged to the journal ($LogFile) like | ||
2402 | * normal metadata changes. | ||
2403 | * | ||
2404 | * Used by the Encrypting File System (EFS). All encrypted files have this | ||
2405 | * attribute with the name $EFS. | ||
2406 | */ | ||
2407 | typedef struct { | ||
2408 | /* Can be anything the creator chooses. */ | ||
2409 | /* EFS uses it as follows: */ | ||
2410 | // FIXME: Type this info, verifying it along the way. (AIA) | ||
2411 | } __attribute__ ((__packed__)) LOGGED_UTILITY_STREAM, EFS_ATTR; | ||
2412 | |||
2413 | #endif /* _LINUX_NTFS_LAYOUT_H */ | ||
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c new file mode 100644 index 000000000000..23fd911078b1 --- /dev/null +++ b/fs/ntfs/lcnalloc.c | |||
@@ -0,0 +1,1002 @@ | |||
1 | /* | ||
2 | * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifdef NTFS_RW | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | |||
26 | #include "lcnalloc.h" | ||
27 | #include "debug.h" | ||
28 | #include "bitmap.h" | ||
29 | #include "inode.h" | ||
30 | #include "volume.h" | ||
31 | #include "attrib.h" | ||
32 | #include "malloc.h" | ||
33 | #include "aops.h" | ||
34 | #include "ntfs.h" | ||
35 | |||
36 | /** | ||
37 | * ntfs_cluster_free_from_rl_nolock - free clusters from runlist | ||
38 | * @vol: mounted ntfs volume on which to free the clusters | ||
39 | * @rl: runlist describing the clusters to free | ||
40 | * | ||
41 | * Free all the clusters described by the runlist @rl on the volume @vol. In | ||
42 | * the case of an error being returned, at least some of the clusters were not | ||
43 | * freed. | ||
44 | * | ||
45 | * Return 0 on success and -errno on error. | ||
46 | * | ||
47 | * Locking: - The volume lcn bitmap must be locked for writing on entry and is | ||
48 | * left locked on return. | ||
49 | */ | ||
50 | int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | ||
51 | const runlist_element *rl) | ||
52 | { | ||
53 | struct inode *lcnbmp_vi = vol->lcnbmp_ino; | ||
54 | int ret = 0; | ||
55 | |||
56 | ntfs_debug("Entering."); | ||
57 | for (; rl->length; rl++) { | ||
58 | int err; | ||
59 | |||
60 | if (rl->lcn < 0) | ||
61 | continue; | ||
62 | err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); | ||
63 | if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) | ||
64 | ret = err; | ||
65 | } | ||
66 | ntfs_debug("Done."); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * ntfs_cluster_alloc - allocate clusters on an ntfs volume | ||
72 | * @vol: mounted ntfs volume on which to allocate the clusters | ||
73 | * @start_vcn: vcn to use for the first allocated cluster | ||
74 | * @count: number of clusters to allocate | ||
75 | * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) | ||
76 | * @zone: zone from which to allocate the clusters | ||
77 | * | ||
78 | * Allocate @count clusters preferably starting at cluster @start_lcn or at the | ||
79 | * current allocator position if @start_lcn is -1, on the mounted ntfs volume | ||
80 | * @vol. @zone is either DATA_ZONE for allocation of normal clusters or | ||
81 | * MFT_ZONE for allocation of clusters for the master file table, i.e. the | ||
82 | * $MFT/$DATA attribute. | ||
83 | * | ||
84 | * @start_vcn specifies the vcn of the first allocated cluster. This makes | ||
85 | * merging the resulting runlist with the old runlist easier. | ||
86 | * | ||
87 | * You need to check the return value with IS_ERR(). If this is false, the | ||
88 | * function was successful and the return value is a runlist describing the | ||
89 | * allocated cluster(s). If IS_ERR() is true, the function failed and | ||
90 | * PTR_ERR() gives you the error code. | ||
91 | * | ||
92 | * Notes on the allocation algorithm | ||
93 | * ================================= | ||
94 | * | ||
95 | * There are two data zones. First is the area between the end of the mft zone | ||
96 | * and the end of the volume, and second is the area between the start of the | ||
97 | * volume and the start of the mft zone. On unmodified/standard NTFS 1.x | ||
98 | * volumes, the second data zone does not exist due to the mft zone being | ||
99 | * expanded to cover the start of the volume in order to reserve space for the | ||
100 | * mft bitmap attribute. | ||
101 | * | ||
102 | * This is not the prettiest function but the complexity stems from the need of | ||
103 | * implementing the mft vs data zoned approach and from the fact that we have | ||
104 | * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we | ||
105 | * need to cope with crossing over boundaries of two buffers. Further, the | ||
106 | * fact that the allocator allows for caller supplied hints as to the location | ||
107 | * of where allocation should begin and the fact that the allocator keeps track | ||
108 | * of where in the data zones the next natural allocation should occur, | ||
109 | * contribute to the complexity of the function. But it should all be | ||
110 | * worthwhile, because this allocator should: 1) be a full implementation of | ||
111 | * the MFT zone approach used by Windows NT, 2) cause reduction in | ||
112 | * fragmentation, and 3) be speedy in allocations (the code is not optimized | ||
113 | * for speed, but the algorithm is, so further speed improvements are probably | ||
114 | * possible). | ||
115 | * | ||
116 | * FIXME: We should be monitoring cluster allocation and increment the MFT zone | ||
117 | * size dynamically but this is something for the future. We will just cause | ||
118 | * heavier fragmentation by not doing it and I am not even sure Windows would | ||
119 | * grow the MFT zone dynamically, so it might even be correct not to do this. | ||
120 | * The overhead in doing dynamic MFT zone expansion would be very large and | ||
121 | * unlikely worth the effort. (AIA) | ||
122 | * | ||
123 | * TODO: I have added in double the required zone position pointer wrap around | ||
124 | * logic which can be optimized to having only one of the two logic sets. | ||
125 | * However, having the double logic will work fine, but if we have only one of | ||
126 | * the sets and we get it wrong somewhere, then we get into trouble, so | ||
127 | * removing the duplicate logic requires _very_ careful consideration of _all_ | ||
128 | * possible code paths. So at least for now, I am leaving the double logic - | ||
129 | * better safe than sorry... (AIA) | ||
130 | * | ||
131 | * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked | ||
132 | * on return. | ||
133 | * - This function takes the volume lcn bitmap lock for writing and | ||
134 | * modifies the bitmap contents. | ||
135 | */ | ||
136 | runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, | ||
137 | const s64 count, const LCN start_lcn, | ||
138 | const NTFS_CLUSTER_ALLOCATION_ZONES zone) | ||
139 | { | ||
140 | LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; | ||
141 | LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; | ||
142 | s64 clusters; | ||
143 | struct inode *lcnbmp_vi; | ||
144 | runlist_element *rl = NULL; | ||
145 | struct address_space *mapping; | ||
146 | struct page *page = NULL; | ||
147 | u8 *buf, *byte; | ||
148 | int err = 0, rlpos, rlsize, buf_size; | ||
149 | u8 pass, done_zones, search_zone, need_writeback = 0, bit; | ||
150 | |||
151 | ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn " | ||
152 | "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn, | ||
153 | (unsigned long long)count, | ||
154 | (unsigned long long)start_lcn, | ||
155 | zone == MFT_ZONE ? "MFT" : "DATA"); | ||
156 | BUG_ON(!vol); | ||
157 | lcnbmp_vi = vol->lcnbmp_ino; | ||
158 | BUG_ON(!lcnbmp_vi); | ||
159 | BUG_ON(start_vcn < 0); | ||
160 | BUG_ON(count < 0); | ||
161 | BUG_ON(start_lcn < -1); | ||
162 | BUG_ON(zone < FIRST_ZONE); | ||
163 | BUG_ON(zone > LAST_ZONE); | ||
164 | |||
165 | /* Return empty runlist if @count == 0 */ | ||
166 | // FIXME: Do we want to just return NULL instead? (AIA) | ||
167 | if (!count) { | ||
168 | rl = ntfs_malloc_nofs(PAGE_SIZE); | ||
169 | if (!rl) | ||
170 | return ERR_PTR(-ENOMEM); | ||
171 | rl[0].vcn = start_vcn; | ||
172 | rl[0].lcn = LCN_RL_NOT_MAPPED; | ||
173 | rl[0].length = 0; | ||
174 | return rl; | ||
175 | } | ||
176 | /* Take the lcnbmp lock for writing. */ | ||
177 | down_write(&vol->lcnbmp_lock); | ||
178 | /* | ||
179 | * If no specific @start_lcn was requested, use the current data zone | ||
180 | * position, otherwise use the requested @start_lcn but make sure it | ||
181 | * lies outside the mft zone. Also set done_zones to 0 (no zones done) | ||
182 | * and pass depending on whether we are starting inside a zone (1) or | ||
183 | * at the beginning of a zone (2). If requesting from the MFT_ZONE, | ||
184 | * we either start at the current position within the mft zone or at | ||
185 | * the specified position. If the latter is out of bounds then we start | ||
186 | * at the beginning of the MFT_ZONE. | ||
187 | */ | ||
188 | done_zones = 0; | ||
189 | pass = 1; | ||
190 | /* | ||
191 | * zone_start and zone_end are the current search range. search_zone | ||
192 | * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of | ||
193 | * volume) and 4 for data zone 2 (start of volume till start of mft | ||
194 | * zone). | ||
195 | */ | ||
196 | zone_start = start_lcn; | ||
197 | if (zone_start < 0) { | ||
198 | if (zone == DATA_ZONE) | ||
199 | zone_start = vol->data1_zone_pos; | ||
200 | else | ||
201 | zone_start = vol->mft_zone_pos; | ||
202 | if (!zone_start) { | ||
203 | /* | ||
204 | * Zone starts at beginning of volume which means a | ||
205 | * single pass is sufficient. | ||
206 | */ | ||
207 | pass = 2; | ||
208 | } | ||
209 | } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start && | ||
210 | zone_start < vol->mft_zone_end) { | ||
211 | zone_start = vol->mft_zone_end; | ||
212 | /* | ||
213 | * Starting at beginning of data1_zone which means a single | ||
214 | * pass in this zone is sufficient. | ||
215 | */ | ||
216 | pass = 2; | ||
217 | } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start || | ||
218 | zone_start >= vol->mft_zone_end)) { | ||
219 | zone_start = vol->mft_lcn; | ||
220 | if (!vol->mft_zone_end) | ||
221 | zone_start = 0; | ||
222 | /* | ||
223 | * Starting at beginning of volume which means a single pass | ||
224 | * is sufficient. | ||
225 | */ | ||
226 | pass = 2; | ||
227 | } | ||
228 | if (zone == MFT_ZONE) { | ||
229 | zone_end = vol->mft_zone_end; | ||
230 | search_zone = 1; | ||
231 | } else /* if (zone == DATA_ZONE) */ { | ||
232 | /* Skip searching the mft zone. */ | ||
233 | done_zones |= 1; | ||
234 | if (zone_start >= vol->mft_zone_end) { | ||
235 | zone_end = vol->nr_clusters; | ||
236 | search_zone = 2; | ||
237 | } else { | ||
238 | zone_end = vol->mft_zone_start; | ||
239 | search_zone = 4; | ||
240 | } | ||
241 | } | ||
242 | /* | ||
243 | * bmp_pos is the current bit position inside the bitmap. We use | ||
244 | * bmp_initial_pos to determine whether or not to do a zone switch. | ||
245 | */ | ||
246 | bmp_pos = bmp_initial_pos = zone_start; | ||
247 | |||
248 | /* Loop until all clusters are allocated, i.e. clusters == 0. */ | ||
249 | clusters = count; | ||
250 | rlpos = rlsize = 0; | ||
251 | mapping = lcnbmp_vi->i_mapping; | ||
252 | while (1) { | ||
253 | ntfs_debug("Start of outer while loop: done_zones 0x%x, " | ||
254 | "search_zone %i, pass %i, zone_start 0x%llx, " | ||
255 | "zone_end 0x%llx, bmp_initial_pos 0x%llx, " | ||
256 | "bmp_pos 0x%llx, rlpos %i, rlsize %i.", | ||
257 | done_zones, search_zone, pass, | ||
258 | (unsigned long long)zone_start, | ||
259 | (unsigned long long)zone_end, | ||
260 | (unsigned long long)bmp_initial_pos, | ||
261 | (unsigned long long)bmp_pos, rlpos, rlsize); | ||
262 | /* Loop until we run out of free clusters. */ | ||
263 | last_read_pos = bmp_pos >> 3; | ||
264 | ntfs_debug("last_read_pos 0x%llx.", | ||
265 | (unsigned long long)last_read_pos); | ||
266 | if (last_read_pos > lcnbmp_vi->i_size) { | ||
267 | ntfs_debug("End of attribute reached. " | ||
268 | "Skipping to zone_pass_done."); | ||
269 | goto zone_pass_done; | ||
270 | } | ||
271 | if (likely(page)) { | ||
272 | if (need_writeback) { | ||
273 | ntfs_debug("Marking page dirty."); | ||
274 | flush_dcache_page(page); | ||
275 | set_page_dirty(page); | ||
276 | need_writeback = 0; | ||
277 | } | ||
278 | ntfs_unmap_page(page); | ||
279 | } | ||
280 | page = ntfs_map_page(mapping, last_read_pos >> | ||
281 | PAGE_CACHE_SHIFT); | ||
282 | if (IS_ERR(page)) { | ||
283 | err = PTR_ERR(page); | ||
284 | ntfs_error(vol->sb, "Failed to map page."); | ||
285 | goto out; | ||
286 | } | ||
287 | buf_size = last_read_pos & ~PAGE_CACHE_MASK; | ||
288 | buf = page_address(page) + buf_size; | ||
289 | buf_size = PAGE_CACHE_SIZE - buf_size; | ||
290 | if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) | ||
291 | buf_size = lcnbmp_vi->i_size - last_read_pos; | ||
292 | buf_size <<= 3; | ||
293 | lcn = bmp_pos & 7; | ||
294 | bmp_pos &= ~7; | ||
295 | ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " | ||
296 | "bmp_pos 0x%llx, need_writeback %i.", buf_size, | ||
297 | (unsigned long long)lcn, | ||
298 | (unsigned long long)bmp_pos, need_writeback); | ||
299 | while (lcn < buf_size && lcn + bmp_pos < zone_end) { | ||
300 | byte = buf + (lcn >> 3); | ||
301 | ntfs_debug("In inner while loop: buf_size %i, " | ||
302 | "lcn 0x%llx, bmp_pos 0x%llx, " | ||
303 | "need_writeback %i, byte ofs 0x%x, " | ||
304 | "*byte 0x%x.", buf_size, | ||
305 | (unsigned long long)lcn, | ||
306 | (unsigned long long)bmp_pos, | ||
307 | need_writeback, | ||
308 | (unsigned int)(lcn >> 3), | ||
309 | (unsigned int)*byte); | ||
310 | /* Skip full bytes. */ | ||
311 | if (*byte == 0xff) { | ||
312 | lcn = (lcn + 8) & ~7; | ||
313 | ntfs_debug("Continuing while loop 1."); | ||
314 | continue; | ||
315 | } | ||
316 | bit = 1 << (lcn & 7); | ||
317 | ntfs_debug("bit %i.", bit); | ||
318 | /* If the bit is already set, go onto the next one. */ | ||
319 | if (*byte & bit) { | ||
320 | lcn++; | ||
321 | ntfs_debug("Continuing while loop 2."); | ||
322 | continue; | ||
323 | } | ||
324 | /* | ||
325 | * Allocate more memory if needed, including space for | ||
326 | * the terminator element. | ||
327 | * ntfs_malloc_nofs() operates on whole pages only. | ||
328 | */ | ||
329 | if ((rlpos + 2) * sizeof(*rl) > rlsize) { | ||
330 | runlist_element *rl2; | ||
331 | |||
332 | ntfs_debug("Reallocating memory."); | ||
333 | if (!rl) | ||
334 | ntfs_debug("First free bit is at LCN " | ||
335 | "0x%llx.", | ||
336 | (unsigned long long) | ||
337 | (lcn + bmp_pos)); | ||
338 | rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); | ||
339 | if (unlikely(!rl2)) { | ||
340 | err = -ENOMEM; | ||
341 | ntfs_error(vol->sb, "Failed to " | ||
342 | "allocate memory."); | ||
343 | goto out; | ||
344 | } | ||
345 | memcpy(rl2, rl, rlsize); | ||
346 | ntfs_free(rl); | ||
347 | rl = rl2; | ||
348 | rlsize += PAGE_SIZE; | ||
349 | ntfs_debug("Reallocated memory, rlsize 0x%x.", | ||
350 | rlsize); | ||
351 | } | ||
352 | /* Allocate the bitmap bit. */ | ||
353 | *byte |= bit; | ||
354 | /* We need to write this bitmap page to disk. */ | ||
355 | need_writeback = 1; | ||
356 | ntfs_debug("*byte 0x%x, need_writeback is set.", | ||
357 | (unsigned int)*byte); | ||
358 | /* | ||
359 | * Coalesce with previous run if adjacent LCNs. | ||
360 | * Otherwise, append a new run. | ||
361 | */ | ||
362 | ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), " | ||
363 | "prev_lcn 0x%llx, lcn 0x%llx, " | ||
364 | "bmp_pos 0x%llx, prev_run_len 0x%llx, " | ||
365 | "rlpos %i.", | ||
366 | (unsigned long long)(lcn + bmp_pos), | ||
367 | 1ULL, (unsigned long long)prev_lcn, | ||
368 | (unsigned long long)lcn, | ||
369 | (unsigned long long)bmp_pos, | ||
370 | (unsigned long long)prev_run_len, | ||
371 | rlpos); | ||
372 | if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { | ||
373 | ntfs_debug("Coalescing to run (lcn 0x%llx, " | ||
374 | "len 0x%llx).", | ||
375 | (unsigned long long) | ||
376 | rl[rlpos - 1].lcn, | ||
377 | (unsigned long long) | ||
378 | rl[rlpos - 1].length); | ||
379 | rl[rlpos - 1].length = ++prev_run_len; | ||
380 | ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), " | ||
381 | "prev_run_len 0x%llx.", | ||
382 | (unsigned long long) | ||
383 | rl[rlpos - 1].lcn, | ||
384 | (unsigned long long) | ||
385 | rl[rlpos - 1].length, | ||
386 | (unsigned long long) | ||
387 | prev_run_len); | ||
388 | } else { | ||
389 | if (likely(rlpos)) { | ||
390 | ntfs_debug("Adding new run, (previous " | ||
391 | "run lcn 0x%llx, " | ||
392 | "len 0x%llx).", | ||
393 | (unsigned long long) | ||
394 | rl[rlpos - 1].lcn, | ||
395 | (unsigned long long) | ||
396 | rl[rlpos - 1].length); | ||
397 | rl[rlpos].vcn = rl[rlpos - 1].vcn + | ||
398 | prev_run_len; | ||
399 | } else { | ||
400 | ntfs_debug("Adding new run, is first " | ||
401 | "run."); | ||
402 | rl[rlpos].vcn = start_vcn; | ||
403 | } | ||
404 | rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; | ||
405 | rl[rlpos].length = prev_run_len = 1; | ||
406 | rlpos++; | ||
407 | } | ||
408 | /* Done? */ | ||
409 | if (!--clusters) { | ||
410 | LCN tc; | ||
411 | /* | ||
412 | * Update the current zone position. Positions | ||
413 | * of already scanned zones have been updated | ||
414 | * during the respective zone switches. | ||
415 | */ | ||
416 | tc = lcn + bmp_pos + 1; | ||
417 | ntfs_debug("Done. Updating current zone " | ||
418 | "position, tc 0x%llx, " | ||
419 | "search_zone %i.", | ||
420 | (unsigned long long)tc, | ||
421 | search_zone); | ||
422 | switch (search_zone) { | ||
423 | case 1: | ||
424 | ntfs_debug("Before checks, " | ||
425 | "vol->mft_zone_pos " | ||
426 | "0x%llx.", | ||
427 | (unsigned long long) | ||
428 | vol->mft_zone_pos); | ||
429 | if (tc >= vol->mft_zone_end) { | ||
430 | vol->mft_zone_pos = | ||
431 | vol->mft_lcn; | ||
432 | if (!vol->mft_zone_end) | ||
433 | vol->mft_zone_pos = 0; | ||
434 | } else if ((bmp_initial_pos >= | ||
435 | vol->mft_zone_pos || | ||
436 | tc > vol->mft_zone_pos) | ||
437 | && tc >= vol->mft_lcn) | ||
438 | vol->mft_zone_pos = tc; | ||
439 | ntfs_debug("After checks, " | ||
440 | "vol->mft_zone_pos " | ||
441 | "0x%llx.", | ||
442 | (unsigned long long) | ||
443 | vol->mft_zone_pos); | ||
444 | break; | ||
445 | case 2: | ||
446 | ntfs_debug("Before checks, " | ||
447 | "vol->data1_zone_pos " | ||
448 | "0x%llx.", | ||
449 | (unsigned long long) | ||
450 | vol->data1_zone_pos); | ||
451 | if (tc >= vol->nr_clusters) | ||
452 | vol->data1_zone_pos = | ||
453 | vol->mft_zone_end; | ||
454 | else if ((bmp_initial_pos >= | ||
455 | vol->data1_zone_pos || | ||
456 | tc > vol->data1_zone_pos) | ||
457 | && tc >= vol->mft_zone_end) | ||
458 | vol->data1_zone_pos = tc; | ||
459 | ntfs_debug("After checks, " | ||
460 | "vol->data1_zone_pos " | ||
461 | "0x%llx.", | ||
462 | (unsigned long long) | ||
463 | vol->data1_zone_pos); | ||
464 | break; | ||
465 | case 4: | ||
466 | ntfs_debug("Before checks, " | ||
467 | "vol->data2_zone_pos " | ||
468 | "0x%llx.", | ||
469 | (unsigned long long) | ||
470 | vol->data2_zone_pos); | ||
471 | if (tc >= vol->mft_zone_start) | ||
472 | vol->data2_zone_pos = 0; | ||
473 | else if (bmp_initial_pos >= | ||
474 | vol->data2_zone_pos || | ||
475 | tc > vol->data2_zone_pos) | ||
476 | vol->data2_zone_pos = tc; | ||
477 | ntfs_debug("After checks, " | ||
478 | "vol->data2_zone_pos " | ||
479 | "0x%llx.", | ||
480 | (unsigned long long) | ||
481 | vol->data2_zone_pos); | ||
482 | break; | ||
483 | default: | ||
484 | BUG(); | ||
485 | } | ||
486 | ntfs_debug("Finished. Going to out."); | ||
487 | goto out; | ||
488 | } | ||
489 | lcn++; | ||
490 | } | ||
491 | bmp_pos += buf_size; | ||
492 | ntfs_debug("After inner while loop: buf_size 0x%x, lcn " | ||
493 | "0x%llx, bmp_pos 0x%llx, need_writeback %i.", | ||
494 | buf_size, (unsigned long long)lcn, | ||
495 | (unsigned long long)bmp_pos, need_writeback); | ||
496 | if (bmp_pos < zone_end) { | ||
497 | ntfs_debug("Continuing outer while loop, " | ||
498 | "bmp_pos 0x%llx, zone_end 0x%llx.", | ||
499 | (unsigned long long)bmp_pos, | ||
500 | (unsigned long long)zone_end); | ||
501 | continue; | ||
502 | } | ||
503 | zone_pass_done: /* Finished with the current zone pass. */ | ||
504 | ntfs_debug("At zone_pass_done, pass %i.", pass); | ||
505 | if (pass == 1) { | ||
506 | /* | ||
507 | * Now do pass 2, scanning the first part of the zone | ||
508 | * we omitted in pass 1. | ||
509 | */ | ||
510 | pass = 2; | ||
511 | zone_end = zone_start; | ||
512 | switch (search_zone) { | ||
513 | case 1: /* mft_zone */ | ||
514 | zone_start = vol->mft_zone_start; | ||
515 | break; | ||
516 | case 2: /* data1_zone */ | ||
517 | zone_start = vol->mft_zone_end; | ||
518 | break; | ||
519 | case 4: /* data2_zone */ | ||
520 | zone_start = 0; | ||
521 | break; | ||
522 | default: | ||
523 | BUG(); | ||
524 | } | ||
525 | /* Sanity check. */ | ||
526 | if (zone_end < zone_start) | ||
527 | zone_end = zone_start; | ||
528 | bmp_pos = zone_start; | ||
529 | ntfs_debug("Continuing outer while loop, pass 2, " | ||
530 | "zone_start 0x%llx, zone_end 0x%llx, " | ||
531 | "bmp_pos 0x%llx.", | ||
532 | (unsigned long long)zone_start, | ||
533 | (unsigned long long)zone_end, | ||
534 | (unsigned long long)bmp_pos); | ||
535 | continue; | ||
536 | } /* pass == 2 */ | ||
537 | done_zones_check: | ||
538 | ntfs_debug("At done_zones_check, search_zone %i, done_zones " | ||
539 | "before 0x%x, done_zones after 0x%x.", | ||
540 | search_zone, done_zones, | ||
541 | done_zones | search_zone); | ||
542 | done_zones |= search_zone; | ||
543 | if (done_zones < 7) { | ||
544 | ntfs_debug("Switching zone."); | ||
545 | /* Now switch to the next zone we haven't done yet. */ | ||
546 | pass = 1; | ||
547 | switch (search_zone) { | ||
548 | case 1: | ||
549 | ntfs_debug("Switching from mft zone to data1 " | ||
550 | "zone."); | ||
551 | /* Update mft zone position. */ | ||
552 | if (rlpos) { | ||
553 | LCN tc; | ||
554 | |||
555 | ntfs_debug("Before checks, " | ||
556 | "vol->mft_zone_pos " | ||
557 | "0x%llx.", | ||
558 | (unsigned long long) | ||
559 | vol->mft_zone_pos); | ||
560 | tc = rl[rlpos - 1].lcn + | ||
561 | rl[rlpos - 1].length; | ||
562 | if (tc >= vol->mft_zone_end) { | ||
563 | vol->mft_zone_pos = | ||
564 | vol->mft_lcn; | ||
565 | if (!vol->mft_zone_end) | ||
566 | vol->mft_zone_pos = 0; | ||
567 | } else if ((bmp_initial_pos >= | ||
568 | vol->mft_zone_pos || | ||
569 | tc > vol->mft_zone_pos) | ||
570 | && tc >= vol->mft_lcn) | ||
571 | vol->mft_zone_pos = tc; | ||
572 | ntfs_debug("After checks, " | ||
573 | "vol->mft_zone_pos " | ||
574 | "0x%llx.", | ||
575 | (unsigned long long) | ||
576 | vol->mft_zone_pos); | ||
577 | } | ||
578 | /* Switch from mft zone to data1 zone. */ | ||
579 | switch_to_data1_zone: search_zone = 2; | ||
580 | zone_start = bmp_initial_pos = | ||
581 | vol->data1_zone_pos; | ||
582 | zone_end = vol->nr_clusters; | ||
583 | if (zone_start == vol->mft_zone_end) | ||
584 | pass = 2; | ||
585 | if (zone_start >= zone_end) { | ||
586 | vol->data1_zone_pos = zone_start = | ||
587 | vol->mft_zone_end; | ||
588 | pass = 2; | ||
589 | } | ||
590 | break; | ||
591 | case 2: | ||
592 | ntfs_debug("Switching from data1 zone to " | ||
593 | "data2 zone."); | ||
594 | /* Update data1 zone position. */ | ||
595 | if (rlpos) { | ||
596 | LCN tc; | ||
597 | |||
598 | ntfs_debug("Before checks, " | ||
599 | "vol->data1_zone_pos " | ||
600 | "0x%llx.", | ||
601 | (unsigned long long) | ||
602 | vol->data1_zone_pos); | ||
603 | tc = rl[rlpos - 1].lcn + | ||
604 | rl[rlpos - 1].length; | ||
605 | if (tc >= vol->nr_clusters) | ||
606 | vol->data1_zone_pos = | ||
607 | vol->mft_zone_end; | ||
608 | else if ((bmp_initial_pos >= | ||
609 | vol->data1_zone_pos || | ||
610 | tc > vol->data1_zone_pos) | ||
611 | && tc >= vol->mft_zone_end) | ||
612 | vol->data1_zone_pos = tc; | ||
613 | ntfs_debug("After checks, " | ||
614 | "vol->data1_zone_pos " | ||
615 | "0x%llx.", | ||
616 | (unsigned long long) | ||
617 | vol->data1_zone_pos); | ||
618 | } | ||
619 | /* Switch from data1 zone to data2 zone. */ | ||
620 | search_zone = 4; | ||
621 | zone_start = bmp_initial_pos = | ||
622 | vol->data2_zone_pos; | ||
623 | zone_end = vol->mft_zone_start; | ||
624 | if (!zone_start) | ||
625 | pass = 2; | ||
626 | if (zone_start >= zone_end) { | ||
627 | vol->data2_zone_pos = zone_start = | ||
628 | bmp_initial_pos = 0; | ||
629 | pass = 2; | ||
630 | } | ||
631 | break; | ||
632 | case 4: | ||
633 | ntfs_debug("Switching from data2 zone to " | ||
634 | "data1 zone."); | ||
635 | /* Update data2 zone position. */ | ||
636 | if (rlpos) { | ||
637 | LCN tc; | ||
638 | |||
639 | ntfs_debug("Before checks, " | ||
640 | "vol->data2_zone_pos " | ||
641 | "0x%llx.", | ||
642 | (unsigned long long) | ||
643 | vol->data2_zone_pos); | ||
644 | tc = rl[rlpos - 1].lcn + | ||
645 | rl[rlpos - 1].length; | ||
646 | if (tc >= vol->mft_zone_start) | ||
647 | vol->data2_zone_pos = 0; | ||
648 | else if (bmp_initial_pos >= | ||
649 | vol->data2_zone_pos || | ||
650 | tc > vol->data2_zone_pos) | ||
651 | vol->data2_zone_pos = tc; | ||
652 | ntfs_debug("After checks, " | ||
653 | "vol->data2_zone_pos " | ||
654 | "0x%llx.", | ||
655 | (unsigned long long) | ||
656 | vol->data2_zone_pos); | ||
657 | } | ||
658 | /* Switch from data2 zone to data1 zone. */ | ||
659 | goto switch_to_data1_zone; | ||
660 | default: | ||
661 | BUG(); | ||
662 | } | ||
663 | ntfs_debug("After zone switch, search_zone %i, " | ||
664 | "pass %i, bmp_initial_pos 0x%llx, " | ||
665 | "zone_start 0x%llx, zone_end 0x%llx.", | ||
666 | search_zone, pass, | ||
667 | (unsigned long long)bmp_initial_pos, | ||
668 | (unsigned long long)zone_start, | ||
669 | (unsigned long long)zone_end); | ||
670 | bmp_pos = zone_start; | ||
671 | if (zone_start == zone_end) { | ||
672 | ntfs_debug("Empty zone, going to " | ||
673 | "done_zones_check."); | ||
674 | /* Empty zone. Don't bother searching it. */ | ||
675 | goto done_zones_check; | ||
676 | } | ||
677 | ntfs_debug("Continuing outer while loop."); | ||
678 | continue; | ||
679 | } /* done_zones == 7 */ | ||
680 | ntfs_debug("All zones are finished."); | ||
681 | /* | ||
682 | * All zones are finished! If DATA_ZONE, shrink mft zone. If | ||
683 | * MFT_ZONE, we have really run out of space. | ||
684 | */ | ||
685 | mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; | ||
686 | ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end " | ||
687 | "0x%llx, mft_zone_size 0x%llx.", | ||
688 | (unsigned long long)vol->mft_zone_start, | ||
689 | (unsigned long long)vol->mft_zone_end, | ||
690 | (unsigned long long)mft_zone_size); | ||
691 | if (zone == MFT_ZONE || mft_zone_size <= 0) { | ||
692 | ntfs_debug("No free clusters left, going to out."); | ||
693 | /* Really no more space left on device. */ | ||
694 | err = ENOSPC; | ||
695 | goto out; | ||
696 | } /* zone == DATA_ZONE && mft_zone_size > 0 */ | ||
697 | ntfs_debug("Shrinking mft zone."); | ||
698 | zone_end = vol->mft_zone_end; | ||
699 | mft_zone_size >>= 1; | ||
700 | if (mft_zone_size > 0) | ||
701 | vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; | ||
702 | else /* mft zone and data2 zone no longer exist. */ | ||
703 | vol->data2_zone_pos = vol->mft_zone_start = | ||
704 | vol->mft_zone_end = 0; | ||
705 | if (vol->mft_zone_pos >= vol->mft_zone_end) { | ||
706 | vol->mft_zone_pos = vol->mft_lcn; | ||
707 | if (!vol->mft_zone_end) | ||
708 | vol->mft_zone_pos = 0; | ||
709 | } | ||
710 | bmp_pos = zone_start = bmp_initial_pos = | ||
711 | vol->data1_zone_pos = vol->mft_zone_end; | ||
712 | search_zone = 2; | ||
713 | pass = 2; | ||
714 | done_zones &= ~2; | ||
715 | ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, " | ||
716 | "vol->mft_zone_start 0x%llx, " | ||
717 | "vol->mft_zone_end 0x%llx, " | ||
718 | "vol->mft_zone_pos 0x%llx, search_zone 2, " | ||
719 | "pass 2, dones_zones 0x%x, zone_start 0x%llx, " | ||
720 | "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, " | ||
721 | "continuing outer while loop.", | ||
722 | (unsigned long long)mft_zone_size, | ||
723 | (unsigned long long)vol->mft_zone_start, | ||
724 | (unsigned long long)vol->mft_zone_end, | ||
725 | (unsigned long long)vol->mft_zone_pos, | ||
726 | done_zones, (unsigned long long)zone_start, | ||
727 | (unsigned long long)zone_end, | ||
728 | (unsigned long long)vol->data1_zone_pos); | ||
729 | } | ||
730 | ntfs_debug("After outer while loop."); | ||
731 | out: | ||
732 | ntfs_debug("At out."); | ||
733 | /* Add runlist terminator element. */ | ||
734 | if (likely(rl)) { | ||
735 | rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; | ||
736 | rl[rlpos].lcn = LCN_RL_NOT_MAPPED; | ||
737 | rl[rlpos].length = 0; | ||
738 | } | ||
739 | if (likely(page && !IS_ERR(page))) { | ||
740 | if (need_writeback) { | ||
741 | ntfs_debug("Marking page dirty."); | ||
742 | flush_dcache_page(page); | ||
743 | set_page_dirty(page); | ||
744 | need_writeback = 0; | ||
745 | } | ||
746 | ntfs_unmap_page(page); | ||
747 | } | ||
748 | if (likely(!err)) { | ||
749 | up_write(&vol->lcnbmp_lock); | ||
750 | ntfs_debug("Done."); | ||
751 | return rl; | ||
752 | } | ||
753 | ntfs_error(vol->sb, "Failed to allocate clusters, aborting " | ||
754 | "(error %i).", err); | ||
755 | if (rl) { | ||
756 | int err2; | ||
757 | |||
758 | if (err == ENOSPC) | ||
759 | ntfs_debug("Not enough space to complete allocation, " | ||
760 | "err ENOSPC, first free lcn 0x%llx, " | ||
761 | "could allocate up to 0x%llx " | ||
762 | "clusters.", | ||
763 | (unsigned long long)rl[0].lcn, | ||
764 | (unsigned long long)count - clusters); | ||
765 | /* Deallocate all allocated clusters. */ | ||
766 | ntfs_debug("Attempting rollback..."); | ||
767 | err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); | ||
768 | if (err2) { | ||
769 | ntfs_error(vol->sb, "Failed to rollback (error %i). " | ||
770 | "Leaving inconsistent metadata! " | ||
771 | "Unmount and run chkdsk.", err2); | ||
772 | NVolSetErrors(vol); | ||
773 | } | ||
774 | /* Free the runlist. */ | ||
775 | ntfs_free(rl); | ||
776 | } else if (err == ENOSPC) | ||
777 | ntfs_debug("No space left at all, err = ENOSPC, " | ||
778 | "first free lcn = 0x%llx.", | ||
779 | (unsigned long long)vol->data1_zone_pos); | ||
780 | up_write(&vol->lcnbmp_lock); | ||
781 | return ERR_PTR(err); | ||
782 | } | ||
783 | |||
784 | /** | ||
785 | * __ntfs_cluster_free - free clusters on an ntfs volume | ||
786 | * @vi: vfs inode whose runlist describes the clusters to free | ||
787 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters | ||
788 | * @count: number of clusters to free or -1 for all clusters | ||
789 | * @is_rollback: if TRUE this is a rollback operation | ||
790 | * | ||
791 | * Free @count clusters starting at the cluster @start_vcn in the runlist | ||
792 | * described by the vfs inode @vi. | ||
793 | * | ||
794 | * If @count is -1, all clusters from @start_vcn to the end of the runlist are | ||
795 | * deallocated. Thus, to completely free all clusters in a runlist, use | ||
796 | * @start_vcn = 0 and @count = -1. | ||
797 | * | ||
798 | * @is_rollback should always be FALSE, it is for internal use to rollback | ||
799 | * errors. You probably want to use ntfs_cluster_free() instead. | ||
800 | * | ||
801 | * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller | ||
802 | * has to deal with it later. | ||
803 | * | ||
804 | * Return the number of deallocated clusters (not counting sparse ones) on | ||
805 | * success and -errno on error. | ||
806 | * | ||
807 | * Locking: - The runlist described by @vi must be unlocked on entry and is | ||
808 | * unlocked on return. | ||
809 | * - This function takes the runlist lock of @vi for reading and | ||
810 | * sometimes for writing and sometimes modifies the runlist. | ||
811 | * - The volume lcn bitmap must be unlocked on entry and is unlocked | ||
812 | * on return. | ||
813 | * - This function takes the volume lcn bitmap lock for writing and | ||
814 | * modifies the bitmap contents. | ||
815 | */ | ||
816 | s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | ||
817 | const BOOL is_rollback) | ||
818 | { | ||
819 | s64 delta, to_free, total_freed, real_freed; | ||
820 | ntfs_inode *ni; | ||
821 | ntfs_volume *vol; | ||
822 | struct inode *lcnbmp_vi; | ||
823 | runlist_element *rl; | ||
824 | int err; | ||
825 | |||
826 | BUG_ON(!vi); | ||
827 | ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count " | ||
828 | "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn, | ||
829 | (unsigned long long)count, | ||
830 | is_rollback ? " (rollback)" : ""); | ||
831 | ni = NTFS_I(vi); | ||
832 | vol = ni->vol; | ||
833 | lcnbmp_vi = vol->lcnbmp_ino; | ||
834 | BUG_ON(!lcnbmp_vi); | ||
835 | BUG_ON(start_vcn < 0); | ||
836 | BUG_ON(count < -1); | ||
837 | /* | ||
838 | * Lock the lcn bitmap for writing but only if not rolling back. We | ||
839 | * must hold the lock all the way including through rollback otherwise | ||
840 | * rollback is not possible because once we have cleared a bit and | ||
841 | * dropped the lock, anyone could have set the bit again, thus | ||
842 | * allocating the cluster for another use. | ||
843 | */ | ||
844 | if (likely(!is_rollback)) | ||
845 | down_write(&vol->lcnbmp_lock); | ||
846 | |||
847 | total_freed = real_freed = 0; | ||
848 | |||
849 | /* This returns with ni->runlist locked for reading on success. */ | ||
850 | rl = ntfs_find_vcn(ni, start_vcn, FALSE); | ||
851 | if (IS_ERR(rl)) { | ||
852 | if (!is_rollback) | ||
853 | ntfs_error(vol->sb, "Failed to find first runlist " | ||
854 | "element (error %li), aborting.", | ||
855 | PTR_ERR(rl)); | ||
856 | err = PTR_ERR(rl); | ||
857 | goto err_out; | ||
858 | } | ||
859 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
860 | if (!is_rollback) | ||
861 | ntfs_error(vol->sb, "First runlist element has " | ||
862 | "invalid lcn, aborting."); | ||
863 | err = -EIO; | ||
864 | goto unl_err_out; | ||
865 | } | ||
866 | /* Find the starting cluster inside the run that needs freeing. */ | ||
867 | delta = start_vcn - rl->vcn; | ||
868 | |||
869 | /* The number of clusters in this run that need freeing. */ | ||
870 | to_free = rl->length - delta; | ||
871 | if (count >= 0 && to_free > count) | ||
872 | to_free = count; | ||
873 | |||
874 | if (likely(rl->lcn >= 0)) { | ||
875 | /* Do the actual freeing of the clusters in this run. */ | ||
876 | err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta, | ||
877 | to_free, likely(!is_rollback) ? 0 : 1); | ||
878 | if (unlikely(err)) { | ||
879 | if (!is_rollback) | ||
880 | ntfs_error(vol->sb, "Failed to clear first run " | ||
881 | "(error %i), aborting.", err); | ||
882 | goto unl_err_out; | ||
883 | } | ||
884 | /* We have freed @to_free real clusters. */ | ||
885 | real_freed = to_free; | ||
886 | }; | ||
887 | /* Go to the next run and adjust the number of clusters left to free. */ | ||
888 | ++rl; | ||
889 | if (count >= 0) | ||
890 | count -= to_free; | ||
891 | |||
892 | /* Keep track of the total "freed" clusters, including sparse ones. */ | ||
893 | total_freed = to_free; | ||
894 | /* | ||
895 | * Loop over the remaining runs, using @count as a capping value, and | ||
896 | * free them. | ||
897 | */ | ||
898 | for (; rl->length && count != 0; ++rl) { | ||
899 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
900 | VCN vcn; | ||
901 | |||
902 | /* | ||
903 | * Attempt to map runlist, dropping runlist lock for | ||
904 | * the duration. | ||
905 | */ | ||
906 | vcn = rl->vcn; | ||
907 | up_read(&ni->runlist.lock); | ||
908 | err = ntfs_map_runlist(ni, vcn); | ||
909 | if (err) { | ||
910 | if (!is_rollback) | ||
911 | ntfs_error(vol->sb, "Failed to map " | ||
912 | "runlist fragment."); | ||
913 | if (err == -EINVAL || err == -ENOENT) | ||
914 | err = -EIO; | ||
915 | goto err_out; | ||
916 | } | ||
917 | /* | ||
918 | * This returns with ni->runlist locked for reading on | ||
919 | * success. | ||
920 | */ | ||
921 | rl = ntfs_find_vcn(ni, vcn, FALSE); | ||
922 | if (IS_ERR(rl)) { | ||
923 | err = PTR_ERR(rl); | ||
924 | if (!is_rollback) | ||
925 | ntfs_error(vol->sb, "Failed to find " | ||
926 | "subsequent runlist " | ||
927 | "element."); | ||
928 | goto err_out; | ||
929 | } | ||
930 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
931 | if (!is_rollback) | ||
932 | ntfs_error(vol->sb, "Runlist element " | ||
933 | "has invalid lcn " | ||
934 | "(0x%llx).", | ||
935 | (unsigned long long) | ||
936 | rl->lcn); | ||
937 | err = -EIO; | ||
938 | goto unl_err_out; | ||
939 | } | ||
940 | } | ||
941 | /* The number of clusters in this run that need freeing. */ | ||
942 | to_free = rl->length; | ||
943 | if (count >= 0 && to_free > count) | ||
944 | to_free = count; | ||
945 | |||
946 | if (likely(rl->lcn >= 0)) { | ||
947 | /* Do the actual freeing of the clusters in the run. */ | ||
948 | err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn, | ||
949 | to_free, likely(!is_rollback) ? 0 : 1); | ||
950 | if (unlikely(err)) { | ||
951 | if (!is_rollback) | ||
952 | ntfs_error(vol->sb, "Failed to clear " | ||
953 | "subsequent run."); | ||
954 | goto unl_err_out; | ||
955 | } | ||
956 | /* We have freed @to_free real clusters. */ | ||
957 | real_freed += to_free; | ||
958 | } | ||
959 | /* Adjust the number of clusters left to free. */ | ||
960 | if (count >= 0) | ||
961 | count -= to_free; | ||
962 | |||
963 | /* Update the total done clusters. */ | ||
964 | total_freed += to_free; | ||
965 | } | ||
966 | up_read(&ni->runlist.lock); | ||
967 | if (likely(!is_rollback)) | ||
968 | up_write(&vol->lcnbmp_lock); | ||
969 | |||
970 | BUG_ON(count > 0); | ||
971 | |||
972 | /* We are done. Return the number of actually freed clusters. */ | ||
973 | ntfs_debug("Done."); | ||
974 | return real_freed; | ||
975 | unl_err_out: | ||
976 | up_read(&ni->runlist.lock); | ||
977 | err_out: | ||
978 | if (is_rollback) | ||
979 | return err; | ||
980 | /* If no real clusters were freed, no need to rollback. */ | ||
981 | if (!real_freed) { | ||
982 | up_write(&vol->lcnbmp_lock); | ||
983 | return err; | ||
984 | } | ||
985 | /* | ||
986 | * Attempt to rollback and if that succeeds just return the error code. | ||
987 | * If rollback fails, set the volume errors flag, emit an error | ||
988 | * message, and return the error code. | ||
989 | */ | ||
990 | delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); | ||
991 | if (delta < 0) { | ||
992 | ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " | ||
993 | "inconsistent metadata! Unmount and run " | ||
994 | "chkdsk.", (int)delta); | ||
995 | NVolSetErrors(vol); | ||
996 | } | ||
997 | up_write(&vol->lcnbmp_lock); | ||
998 | ntfs_error(vol->sb, "Aborting (error %i).", err); | ||
999 | return err; | ||
1000 | } | ||
1001 | |||
1002 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h new file mode 100644 index 000000000000..4cac1c024af6 --- /dev/null +++ b/fs/ntfs/lcnalloc.h | |||
@@ -0,0 +1,112 @@ | |||
1 | /* | ||
2 | * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation. Part of the | ||
3 | * Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_LCNALLOC_H | ||
24 | #define _LINUX_NTFS_LCNALLOC_H | ||
25 | |||
26 | #ifdef NTFS_RW | ||
27 | |||
28 | #include <linux/fs.h> | ||
29 | |||
30 | #include "types.h" | ||
31 | #include "runlist.h" | ||
32 | #include "volume.h" | ||
33 | |||
34 | typedef enum { | ||
35 | FIRST_ZONE = 0, /* For sanity checking. */ | ||
36 | MFT_ZONE = 0, /* Allocate from $MFT zone. */ | ||
37 | DATA_ZONE = 1, /* Allocate from $DATA zone. */ | ||
38 | LAST_ZONE = 1, /* For sanity checking. */ | ||
39 | } NTFS_CLUSTER_ALLOCATION_ZONES; | ||
40 | |||
41 | extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, | ||
42 | const VCN start_vcn, const s64 count, const LCN start_lcn, | ||
43 | const NTFS_CLUSTER_ALLOCATION_ZONES zone); | ||
44 | |||
45 | extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, | ||
46 | s64 count, const BOOL is_rollback); | ||
47 | |||
48 | /** | ||
49 | * ntfs_cluster_free - free clusters on an ntfs volume | ||
50 | * @vi: vfs inode whose runlist describes the clusters to free | ||
51 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters | ||
52 | * @count: number of clusters to free or -1 for all clusters | ||
53 | * | ||
54 | * Free @count clusters starting at the cluster @start_vcn in the runlist | ||
55 | * described by the vfs inode @vi. | ||
56 | * | ||
57 | * If @count is -1, all clusters from @start_vcn to the end of the runlist are | ||
58 | * deallocated. Thus, to completely free all clusters in a runlist, use | ||
59 | * @start_vcn = 0 and @count = -1. | ||
60 | * | ||
61 | * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller | ||
62 | * has to deal with it later. | ||
63 | * | ||
64 | * Return the number of deallocated clusters (not counting sparse ones) on | ||
65 | * success and -errno on error. | ||
66 | * | ||
67 | * Locking: - The runlist described by @vi must be unlocked on entry and is | ||
68 | * unlocked on return. | ||
69 | * - This function takes the runlist lock of @vi for reading and | ||
70 | * sometimes for writing and sometimes modifies the runlist. | ||
71 | * - The volume lcn bitmap must be unlocked on entry and is unlocked | ||
72 | * on return. | ||
73 | * - This function takes the volume lcn bitmap lock for writing and | ||
74 | * modifies the bitmap contents. | ||
75 | */ | ||
76 | static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, | ||
77 | s64 count) | ||
78 | { | ||
79 | return __ntfs_cluster_free(vi, start_vcn, count, FALSE); | ||
80 | } | ||
81 | |||
82 | extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | ||
83 | const runlist_element *rl); | ||
84 | |||
85 | /** | ||
86 | * ntfs_cluster_free_from_rl - free clusters from runlist | ||
87 | * @vol: mounted ntfs volume on which to free the clusters | ||
88 | * @rl: runlist describing the clusters to free | ||
89 | * | ||
90 | * Free all the clusters described by the runlist @rl on the volume @vol. In | ||
91 | * the case of an error being returned, at least some of the clusters were not | ||
92 | * freed. | ||
93 | * | ||
94 | * Return 0 on success and -errno on error. | ||
95 | * | ||
96 | * Locking: This function takes the volume lcn bitmap lock for writing and | ||
97 | * modifies the bitmap contents. | ||
98 | */ | ||
99 | static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, | ||
100 | const runlist_element *rl) | ||
101 | { | ||
102 | int ret; | ||
103 | |||
104 | down_write(&vol->lcnbmp_lock); | ||
105 | ret = ntfs_cluster_free_from_rl_nolock(vol, rl); | ||
106 | up_write(&vol->lcnbmp_lock); | ||
107 | return ret; | ||
108 | } | ||
109 | |||
110 | #endif /* NTFS_RW */ | ||
111 | |||
112 | #endif /* defined _LINUX_NTFS_LCNALLOC_H */ | ||
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c new file mode 100644 index 000000000000..5e280abafab3 --- /dev/null +++ b/fs/ntfs/logfile.c | |||
@@ -0,0 +1,705 @@ | |||
1 | /* | ||
2 | * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2002-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifdef NTFS_RW | ||
23 | |||
24 | #include <linux/types.h> | ||
25 | #include <linux/fs.h> | ||
26 | #include <linux/highmem.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/bitops.h> | ||
29 | |||
30 | #include "attrib.h" | ||
31 | #include "aops.h" | ||
32 | #include "debug.h" | ||
33 | #include "logfile.h" | ||
34 | #include "malloc.h" | ||
35 | #include "volume.h" | ||
36 | #include "ntfs.h" | ||
37 | |||
38 | /** | ||
39 | * ntfs_check_restart_page_header - check the page header for consistency | ||
40 | * @vi: $LogFile inode to which the restart page header belongs | ||
41 | * @rp: restart page header to check | ||
42 | * @pos: position in @vi at which the restart page header resides | ||
43 | * | ||
44 | * Check the restart page header @rp for consistency and return TRUE if it is | ||
45 | * consistent and FALSE otherwise. | ||
46 | * | ||
47 | * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not | ||
48 | * require the full restart page. | ||
49 | */ | ||
50 | static BOOL ntfs_check_restart_page_header(struct inode *vi, | ||
51 | RESTART_PAGE_HEADER *rp, s64 pos) | ||
52 | { | ||
53 | u32 logfile_system_page_size, logfile_log_page_size; | ||
54 | u16 usa_count, usa_ofs, usa_end, ra_ofs; | ||
55 | |||
56 | ntfs_debug("Entering."); | ||
57 | /* | ||
58 | * If the system or log page sizes are smaller than the ntfs block size | ||
59 | * or either is not a power of 2 we cannot handle this log file. | ||
60 | */ | ||
61 | logfile_system_page_size = le32_to_cpu(rp->system_page_size); | ||
62 | logfile_log_page_size = le32_to_cpu(rp->log_page_size); | ||
63 | if (logfile_system_page_size < NTFS_BLOCK_SIZE || | ||
64 | logfile_log_page_size < NTFS_BLOCK_SIZE || | ||
65 | logfile_system_page_size & | ||
66 | (logfile_system_page_size - 1) || | ||
67 | logfile_log_page_size & (logfile_log_page_size - 1)) { | ||
68 | ntfs_error(vi->i_sb, "$LogFile uses unsupported page size."); | ||
69 | return FALSE; | ||
70 | } | ||
71 | /* | ||
72 | * We must be either at !pos (1st restart page) or at pos = system page | ||
73 | * size (2nd restart page). | ||
74 | */ | ||
75 | if (pos && pos != logfile_system_page_size) { | ||
76 | ntfs_error(vi->i_sb, "Found restart area in incorrect " | ||
77 | "position in $LogFile."); | ||
78 | return FALSE; | ||
79 | } | ||
80 | /* We only know how to handle version 1.1. */ | ||
81 | if (sle16_to_cpu(rp->major_ver) != 1 || | ||
82 | sle16_to_cpu(rp->minor_ver) != 1) { | ||
83 | ntfs_error(vi->i_sb, "$LogFile version %i.%i is not " | ||
84 | "supported. (This driver supports version " | ||
85 | "1.1 only.)", (int)sle16_to_cpu(rp->major_ver), | ||
86 | (int)sle16_to_cpu(rp->minor_ver)); | ||
87 | return FALSE; | ||
88 | } | ||
89 | /* Verify the size of the update sequence array. */ | ||
90 | usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS); | ||
91 | if (usa_count != le16_to_cpu(rp->usa_count)) { | ||
92 | ntfs_error(vi->i_sb, "$LogFile restart page specifies " | ||
93 | "inconsistent update sequence array count."); | ||
94 | return FALSE; | ||
95 | } | ||
96 | /* Verify the position of the update sequence array. */ | ||
97 | usa_ofs = le16_to_cpu(rp->usa_ofs); | ||
98 | usa_end = usa_ofs + usa_count * sizeof(u16); | ||
99 | if (usa_ofs < sizeof(RESTART_PAGE_HEADER) || | ||
100 | usa_end > NTFS_BLOCK_SIZE - sizeof(u16)) { | ||
101 | ntfs_error(vi->i_sb, "$LogFile restart page specifies " | ||
102 | "inconsistent update sequence array offset."); | ||
103 | return FALSE; | ||
104 | } | ||
105 | /* | ||
106 | * Verify the position of the restart area. It must be: | ||
107 | * - aligned to 8-byte boundary, | ||
108 | * - after the update sequence array, and | ||
109 | * - within the system page size. | ||
110 | */ | ||
111 | ra_ofs = le16_to_cpu(rp->restart_area_offset); | ||
112 | if (ra_ofs & 7 || ra_ofs < usa_end || | ||
113 | ra_ofs > logfile_system_page_size) { | ||
114 | ntfs_error(vi->i_sb, "$LogFile restart page specifies " | ||
115 | "inconsistent restart area offset."); | ||
116 | return FALSE; | ||
117 | } | ||
118 | /* | ||
119 | * Only restart pages modified by chkdsk are allowed to have chkdsk_lsn | ||
120 | * set. | ||
121 | */ | ||
122 | if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { | ||
123 | ntfs_error(vi->i_sb, "$LogFile restart page is not modified " | ||
124 | "chkdsk but a chkdsk LSN is specified."); | ||
125 | return FALSE; | ||
126 | } | ||
127 | ntfs_debug("Done."); | ||
128 | return TRUE; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * ntfs_check_restart_area - check the restart area for consistency | ||
133 | * @vi: $LogFile inode to which the restart page belongs | ||
134 | * @rp: restart page whose restart area to check | ||
135 | * | ||
136 | * Check the restart area of the restart page @rp for consistency and return | ||
137 | * TRUE if it is consistent and FALSE otherwise. | ||
138 | * | ||
139 | * This function assumes that the restart page header has already been | ||
140 | * consistency checked. | ||
141 | * | ||
142 | * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not | ||
143 | * require the full restart page. | ||
144 | */ | ||
145 | static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) | ||
146 | { | ||
147 | u64 file_size; | ||
148 | RESTART_AREA *ra; | ||
149 | u16 ra_ofs, ra_len, ca_ofs; | ||
150 | u8 fs_bits; | ||
151 | |||
152 | ntfs_debug("Entering."); | ||
153 | ra_ofs = le16_to_cpu(rp->restart_area_offset); | ||
154 | ra = (RESTART_AREA*)((u8*)rp + ra_ofs); | ||
155 | /* | ||
156 | * Everything before ra->file_size must be before the first word | ||
157 | * protected by an update sequence number. This ensures that it is | ||
158 | * safe to access ra->client_array_offset. | ||
159 | */ | ||
160 | if (ra_ofs + offsetof(RESTART_AREA, file_size) > | ||
161 | NTFS_BLOCK_SIZE - sizeof(u16)) { | ||
162 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
163 | "inconsistent file offset."); | ||
164 | return FALSE; | ||
165 | } | ||
166 | /* | ||
167 | * Now that we can access ra->client_array_offset, make sure everything | ||
168 | * up to the log client array is before the first word protected by an | ||
169 | * update sequence number. This ensures we can access all of the | ||
170 | * restart area elements safely. Also, the client array offset must be | ||
171 | * aligned to an 8-byte boundary. | ||
172 | */ | ||
173 | ca_ofs = le16_to_cpu(ra->client_array_offset); | ||
174 | if (((ca_ofs + 7) & ~7) != ca_ofs || | ||
175 | ra_ofs + ca_ofs > NTFS_BLOCK_SIZE - sizeof(u16)) { | ||
176 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
177 | "inconsistent client array offset."); | ||
178 | return FALSE; | ||
179 | } | ||
180 | /* | ||
181 | * The restart area must end within the system page size both when | ||
182 | * calculated manually and as specified by ra->restart_area_length. | ||
183 | * Also, the calculated length must not exceed the specified length. | ||
184 | */ | ||
185 | ra_len = ca_ofs + le16_to_cpu(ra->log_clients) * | ||
186 | sizeof(LOG_CLIENT_RECORD); | ||
187 | if (ra_ofs + ra_len > le32_to_cpu(rp->system_page_size) || | ||
188 | ra_ofs + le16_to_cpu(ra->restart_area_length) > | ||
189 | le32_to_cpu(rp->system_page_size) || | ||
190 | ra_len > le16_to_cpu(ra->restart_area_length)) { | ||
191 | ntfs_error(vi->i_sb, "$LogFile restart area is out of bounds " | ||
192 | "of the system page size specified by the " | ||
193 | "restart page header and/or the specified " | ||
194 | "restart area length is inconsistent."); | ||
195 | return FALSE; | ||
196 | } | ||
197 | /* | ||
198 | * The ra->client_free_list and ra->client_in_use_list must be either | ||
199 | * LOGFILE_NO_CLIENT or less than ra->log_clients or they are | ||
200 | * overflowing the client array. | ||
201 | */ | ||
202 | if ((ra->client_free_list != LOGFILE_NO_CLIENT && | ||
203 | le16_to_cpu(ra->client_free_list) >= | ||
204 | le16_to_cpu(ra->log_clients)) || | ||
205 | (ra->client_in_use_list != LOGFILE_NO_CLIENT && | ||
206 | le16_to_cpu(ra->client_in_use_list) >= | ||
207 | le16_to_cpu(ra->log_clients))) { | ||
208 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
209 | "overflowing client free and/or in use lists."); | ||
210 | return FALSE; | ||
211 | } | ||
212 | /* | ||
213 | * Check ra->seq_number_bits against ra->file_size for consistency. | ||
214 | * We cannot just use ffs() because the file size is not a power of 2. | ||
215 | */ | ||
216 | file_size = (u64)sle64_to_cpu(ra->file_size); | ||
217 | fs_bits = 0; | ||
218 | while (file_size) { | ||
219 | file_size >>= 1; | ||
220 | fs_bits++; | ||
221 | } | ||
222 | if (le32_to_cpu(ra->seq_number_bits) != 67 - fs_bits) { | ||
223 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
224 | "inconsistent sequence number bits."); | ||
225 | return FALSE; | ||
226 | } | ||
227 | /* The log record header length must be a multiple of 8. */ | ||
228 | if (((le16_to_cpu(ra->log_record_header_length) + 7) & ~7) != | ||
229 | le16_to_cpu(ra->log_record_header_length)) { | ||
230 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
231 | "inconsistent log record header length."); | ||
232 | return FALSE; | ||
233 | } | ||
234 | /* Dito for the log page data offset. */ | ||
235 | if (((le16_to_cpu(ra->log_page_data_offset) + 7) & ~7) != | ||
236 | le16_to_cpu(ra->log_page_data_offset)) { | ||
237 | ntfs_error(vi->i_sb, "$LogFile restart area specifies " | ||
238 | "inconsistent log page data offset."); | ||
239 | return FALSE; | ||
240 | } | ||
241 | ntfs_debug("Done."); | ||
242 | return TRUE; | ||
243 | } | ||
244 | |||
245 | /** | ||
246 | * ntfs_check_log_client_array - check the log client array for consistency | ||
247 | * @vi: $LogFile inode to which the restart page belongs | ||
248 | * @rp: restart page whose log client array to check | ||
249 | * | ||
250 | * Check the log client array of the restart page @rp for consistency and | ||
251 | * return TRUE if it is consistent and FALSE otherwise. | ||
252 | * | ||
253 | * This function assumes that the restart page header and the restart area have | ||
254 | * already been consistency checked. | ||
255 | * | ||
256 | * Unlike ntfs_check_restart_page_header() and ntfs_check_restart_area(), this | ||
257 | * function needs @rp->system_page_size bytes in @rp, i.e. it requires the full | ||
258 | * restart page and the page must be multi sector transfer deprotected. | ||
259 | */ | ||
260 | static BOOL ntfs_check_log_client_array(struct inode *vi, | ||
261 | RESTART_PAGE_HEADER *rp) | ||
262 | { | ||
263 | RESTART_AREA *ra; | ||
264 | LOG_CLIENT_RECORD *ca, *cr; | ||
265 | u16 nr_clients, idx; | ||
266 | BOOL in_free_list, idx_is_first; | ||
267 | |||
268 | ntfs_debug("Entering."); | ||
269 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); | ||
270 | ca = (LOG_CLIENT_RECORD*)((u8*)ra + | ||
271 | le16_to_cpu(ra->client_array_offset)); | ||
272 | /* | ||
273 | * Check the ra->client_free_list first and then check the | ||
274 | * ra->client_in_use_list. Check each of the log client records in | ||
275 | * each of the lists and check that the array does not overflow the | ||
276 | * ra->log_clients value. Also keep track of the number of records | ||
277 | * visited as there cannot be more than ra->log_clients records and | ||
278 | * that way we detect eventual loops in within a list. | ||
279 | */ | ||
280 | nr_clients = le16_to_cpu(ra->log_clients); | ||
281 | idx = le16_to_cpu(ra->client_free_list); | ||
282 | in_free_list = TRUE; | ||
283 | check_list: | ||
284 | for (idx_is_first = TRUE; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--, | ||
285 | idx = le16_to_cpu(cr->next_client)) { | ||
286 | if (!nr_clients || idx >= le16_to_cpu(ra->log_clients)) | ||
287 | goto err_out; | ||
288 | /* Set @cr to the current log client record. */ | ||
289 | cr = ca + idx; | ||
290 | /* The first log client record must not have a prev_client. */ | ||
291 | if (idx_is_first) { | ||
292 | if (cr->prev_client != LOGFILE_NO_CLIENT) | ||
293 | goto err_out; | ||
294 | idx_is_first = FALSE; | ||
295 | } | ||
296 | } | ||
297 | /* Switch to and check the in use list if we just did the free list. */ | ||
298 | if (in_free_list) { | ||
299 | in_free_list = FALSE; | ||
300 | idx = le16_to_cpu(ra->client_in_use_list); | ||
301 | goto check_list; | ||
302 | } | ||
303 | ntfs_debug("Done."); | ||
304 | return TRUE; | ||
305 | err_out: | ||
306 | ntfs_error(vi->i_sb, "$LogFile log client array is corrupt."); | ||
307 | return FALSE; | ||
308 | } | ||
309 | |||
310 | /** | ||
311 | * ntfs_check_and_load_restart_page - check the restart page for consistency | ||
312 | * @vi: $LogFile inode to which the restart page belongs | ||
313 | * @rp: restart page to check | ||
314 | * @pos: position in @vi at which the restart page resides | ||
315 | * @wrp: copy of the multi sector transfer deprotected restart page | ||
316 | * | ||
317 | * Check the restart page @rp for consistency and return TRUE if it is | ||
318 | * consistent and FALSE otherwise. | ||
319 | * | ||
320 | * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not | ||
321 | * require the full restart page. | ||
322 | * | ||
323 | * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a | ||
324 | * copy of the complete multi sector transfer deprotected page. On failure, | ||
325 | * *@wrp is undefined. | ||
326 | */ | ||
327 | static BOOL ntfs_check_and_load_restart_page(struct inode *vi, | ||
328 | RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) | ||
329 | { | ||
330 | RESTART_AREA *ra; | ||
331 | RESTART_PAGE_HEADER *trp; | ||
332 | int size; | ||
333 | BOOL ret; | ||
334 | |||
335 | ntfs_debug("Entering."); | ||
336 | /* Check the restart page header for consistency. */ | ||
337 | if (!ntfs_check_restart_page_header(vi, rp, pos)) { | ||
338 | /* Error output already done inside the function. */ | ||
339 | return FALSE; | ||
340 | } | ||
341 | /* Check the restart area for consistency. */ | ||
342 | if (!ntfs_check_restart_area(vi, rp)) { | ||
343 | /* Error output already done inside the function. */ | ||
344 | return FALSE; | ||
345 | } | ||
346 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); | ||
347 | /* | ||
348 | * Allocate a buffer to store the whole restart page so we can multi | ||
349 | * sector transfer deprotect it. | ||
350 | */ | ||
351 | trp = ntfs_malloc_nofs(le32_to_cpu(rp->system_page_size)); | ||
352 | if (!trp) { | ||
353 | ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " | ||
354 | "restart page buffer."); | ||
355 | return FALSE; | ||
356 | } | ||
357 | /* | ||
358 | * Read the whole of the restart page into the buffer. If it fits | ||
359 | * completely inside @rp, just copy it from there. Otherwise map all | ||
360 | * the required pages and copy the data from them. | ||
361 | */ | ||
362 | size = PAGE_CACHE_SIZE - (pos & ~PAGE_CACHE_MASK); | ||
363 | if (size >= le32_to_cpu(rp->system_page_size)) { | ||
364 | memcpy(trp, rp, le32_to_cpu(rp->system_page_size)); | ||
365 | } else { | ||
366 | pgoff_t idx; | ||
367 | struct page *page; | ||
368 | int have_read, to_read; | ||
369 | |||
370 | /* First copy what we already have in @rp. */ | ||
371 | memcpy(trp, rp, size); | ||
372 | /* Copy the remaining data one page at a time. */ | ||
373 | have_read = size; | ||
374 | to_read = le32_to_cpu(rp->system_page_size) - size; | ||
375 | idx = (pos + size) >> PAGE_CACHE_SHIFT; | ||
376 | BUG_ON((pos + size) & ~PAGE_CACHE_MASK); | ||
377 | do { | ||
378 | page = ntfs_map_page(vi->i_mapping, idx); | ||
379 | if (IS_ERR(page)) { | ||
380 | ntfs_error(vi->i_sb, "Error mapping $LogFile " | ||
381 | "page (index %lu).", idx); | ||
382 | goto err_out; | ||
383 | } | ||
384 | size = min_t(int, to_read, PAGE_CACHE_SIZE); | ||
385 | memcpy((u8*)trp + have_read, page_address(page), size); | ||
386 | ntfs_unmap_page(page); | ||
387 | have_read += size; | ||
388 | to_read -= size; | ||
389 | idx++; | ||
390 | } while (to_read > 0); | ||
391 | } | ||
392 | /* Perform the multi sector transfer deprotection on the buffer. */ | ||
393 | if (post_read_mst_fixup((NTFS_RECORD*)trp, | ||
394 | le32_to_cpu(rp->system_page_size))) { | ||
395 | ntfs_error(vi->i_sb, "Multi sector transfer error detected in " | ||
396 | "$LogFile restart page."); | ||
397 | goto err_out; | ||
398 | } | ||
399 | /* Check the log client records for consistency. */ | ||
400 | ret = ntfs_check_log_client_array(vi, trp); | ||
401 | if (ret && wrp) | ||
402 | *wrp = trp; | ||
403 | else | ||
404 | ntfs_free(trp); | ||
405 | ntfs_debug("Done."); | ||
406 | return ret; | ||
407 | err_out: | ||
408 | ntfs_free(trp); | ||
409 | return FALSE; | ||
410 | } | ||
411 | |||
412 | /** | ||
413 | * ntfs_ckeck_logfile - check in the journal if the volume is consistent | ||
414 | * @log_vi: struct inode of loaded journal $LogFile to check | ||
415 | * | ||
416 | * Check the $LogFile journal for consistency and return TRUE if it is | ||
417 | * consistent and FALSE if not. | ||
418 | * | ||
419 | * At present we only check the two restart pages and ignore the log record | ||
420 | * pages. | ||
421 | * | ||
422 | * Note that the MstProtected flag is not set on the $LogFile inode and hence | ||
423 | * when reading pages they are not deprotected. This is because we do not know | ||
424 | * if the $LogFile was created on a system with a different page size to ours | ||
425 | * yet and mst deprotection would fail if our page size is smaller. | ||
426 | */ | ||
427 | BOOL ntfs_check_logfile(struct inode *log_vi) | ||
428 | { | ||
429 | s64 size, pos, rstr1_pos, rstr2_pos; | ||
430 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); | ||
431 | struct address_space *mapping = log_vi->i_mapping; | ||
432 | struct page *page = NULL; | ||
433 | u8 *kaddr = NULL; | ||
434 | RESTART_PAGE_HEADER *rstr1_ph = NULL; | ||
435 | RESTART_PAGE_HEADER *rstr2_ph = NULL; | ||
436 | int log_page_size, log_page_mask, ofs; | ||
437 | BOOL logfile_is_empty = TRUE; | ||
438 | BOOL rstr1_found = FALSE; | ||
439 | BOOL rstr2_found = FALSE; | ||
440 | u8 log_page_bits; | ||
441 | |||
442 | ntfs_debug("Entering."); | ||
443 | /* An empty $LogFile must have been clean before it got emptied. */ | ||
444 | if (NVolLogFileEmpty(vol)) | ||
445 | goto is_empty; | ||
446 | size = log_vi->i_size; | ||
447 | /* Make sure the file doesn't exceed the maximum allowed size. */ | ||
448 | if (size > MaxLogFileSize) | ||
449 | size = MaxLogFileSize; | ||
450 | /* | ||
451 | * Truncate size to a multiple of the page cache size or the default | ||
452 | * log page size if the page cache size is between the default log page | ||
453 | * log page size if the page cache size is between the default log page | ||
454 | * size and twice that. | ||
455 | */ | ||
456 | if (PAGE_CACHE_SIZE >= DefaultLogPageSize && PAGE_CACHE_SIZE <= | ||
457 | DefaultLogPageSize * 2) | ||
458 | log_page_size = DefaultLogPageSize; | ||
459 | else | ||
460 | log_page_size = PAGE_CACHE_SIZE; | ||
461 | log_page_mask = log_page_size - 1; | ||
462 | /* | ||
463 | * Use generic_ffs() instead of ffs() to enable the compiler to | ||
464 | * optimize log_page_size and log_page_bits into constants. | ||
465 | */ | ||
466 | log_page_bits = generic_ffs(log_page_size) - 1; | ||
467 | size &= ~(log_page_size - 1); | ||
468 | /* | ||
469 | * Ensure the log file is big enough to store at least the two restart | ||
470 | * pages and the minimum number of log record pages. | ||
471 | */ | ||
472 | if (size < log_page_size * 2 || (size - log_page_size * 2) >> | ||
473 | log_page_bits < MinLogRecordPages) { | ||
474 | ntfs_error(vol->sb, "$LogFile is too small."); | ||
475 | return FALSE; | ||
476 | } | ||
477 | /* | ||
478 | * Read through the file looking for a restart page. Since the restart | ||
479 | * page header is at the beginning of a page we only need to search at | ||
480 | * what could be the beginning of a page (for each page size) rather | ||
481 | * than scanning the whole file byte by byte. If all potential places | ||
482 | * contain empty and uninitialzed records, the log file can be assumed | ||
483 | * to be empty. | ||
484 | */ | ||
485 | for (pos = 0; pos < size; pos <<= 1) { | ||
486 | pgoff_t idx = pos >> PAGE_CACHE_SHIFT; | ||
487 | if (!page || page->index != idx) { | ||
488 | if (page) | ||
489 | ntfs_unmap_page(page); | ||
490 | page = ntfs_map_page(mapping, idx); | ||
491 | if (IS_ERR(page)) { | ||
492 | ntfs_error(vol->sb, "Error mapping $LogFile " | ||
493 | "page (index %lu).", idx); | ||
494 | return FALSE; | ||
495 | } | ||
496 | } | ||
497 | kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); | ||
498 | /* | ||
499 | * A non-empty block means the logfile is not empty while an | ||
500 | * empty block after a non-empty block has been encountered | ||
501 | * means we are done. | ||
502 | */ | ||
503 | if (!ntfs_is_empty_recordp((le32*)kaddr)) | ||
504 | logfile_is_empty = FALSE; | ||
505 | else if (!logfile_is_empty) | ||
506 | break; | ||
507 | /* | ||
508 | * A log record page means there cannot be a restart page after | ||
509 | * this so no need to continue searching. | ||
510 | */ | ||
511 | if (ntfs_is_rcrd_recordp((le32*)kaddr)) | ||
512 | break; | ||
513 | /* | ||
514 | * A modified by chkdsk restart page means we cannot handle | ||
515 | * this log file. | ||
516 | */ | ||
517 | if (ntfs_is_chkd_recordp((le32*)kaddr)) { | ||
518 | ntfs_error(vol->sb, "$LogFile has been modified by " | ||
519 | "chkdsk. Mount this volume in " | ||
520 | "Windows."); | ||
521 | goto err_out; | ||
522 | } | ||
523 | /* If not a restart page, continue. */ | ||
524 | if (!ntfs_is_rstr_recordp((le32*)kaddr)) { | ||
525 | /* Skip to the minimum page size for the next one. */ | ||
526 | if (!pos) | ||
527 | pos = NTFS_BLOCK_SIZE >> 1; | ||
528 | continue; | ||
529 | } | ||
530 | /* We now know we have a restart page. */ | ||
531 | if (!pos) { | ||
532 | rstr1_found = TRUE; | ||
533 | rstr1_pos = pos; | ||
534 | } else { | ||
535 | if (rstr2_found) { | ||
536 | ntfs_error(vol->sb, "Found more than two " | ||
537 | "restart pages in $LogFile."); | ||
538 | goto err_out; | ||
539 | } | ||
540 | rstr2_found = TRUE; | ||
541 | rstr2_pos = pos; | ||
542 | } | ||
543 | /* | ||
544 | * Check the restart page for consistency and get a copy of the | ||
545 | * complete multi sector transfer deprotected restart page. | ||
546 | */ | ||
547 | if (!ntfs_check_and_load_restart_page(log_vi, | ||
548 | (RESTART_PAGE_HEADER*)kaddr, pos, | ||
549 | !pos ? &rstr1_ph : &rstr2_ph)) { | ||
550 | /* Error output already done inside the function. */ | ||
551 | goto err_out; | ||
552 | } | ||
553 | /* | ||
554 | * We have a valid restart page. The next one must be after | ||
555 | * a whole system page size as specified by the valid restart | ||
556 | * page. | ||
557 | */ | ||
558 | if (!pos) | ||
559 | pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; | ||
560 | } | ||
561 | if (page) { | ||
562 | ntfs_unmap_page(page); | ||
563 | page = NULL; | ||
564 | } | ||
565 | if (logfile_is_empty) { | ||
566 | NVolSetLogFileEmpty(vol); | ||
567 | is_empty: | ||
568 | ntfs_debug("Done. ($LogFile is empty.)"); | ||
569 | return TRUE; | ||
570 | } | ||
571 | if (!rstr1_found || !rstr2_found) { | ||
572 | ntfs_error(vol->sb, "Did not find two restart pages in " | ||
573 | "$LogFile."); | ||
574 | goto err_out; | ||
575 | } | ||
576 | /* | ||
577 | * The two restart areas must be identical except for the update | ||
578 | * sequence number. | ||
579 | */ | ||
580 | ofs = le16_to_cpu(rstr1_ph->usa_ofs); | ||
581 | if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16), | ||
582 | memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs, | ||
583 | le32_to_cpu(rstr1_ph->system_page_size) - ofs))) { | ||
584 | ntfs_error(vol->sb, "The two restart pages in $LogFile do not " | ||
585 | "match."); | ||
586 | goto err_out; | ||
587 | } | ||
588 | ntfs_free(rstr1_ph); | ||
589 | ntfs_free(rstr2_ph); | ||
590 | /* All consistency checks passed. */ | ||
591 | ntfs_debug("Done."); | ||
592 | return TRUE; | ||
593 | err_out: | ||
594 | if (page) | ||
595 | ntfs_unmap_page(page); | ||
596 | if (rstr1_ph) | ||
597 | ntfs_free(rstr1_ph); | ||
598 | if (rstr2_ph) | ||
599 | ntfs_free(rstr2_ph); | ||
600 | return FALSE; | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * ntfs_is_logfile_clean - check in the journal if the volume is clean | ||
605 | * @log_vi: struct inode of loaded journal $LogFile to check | ||
606 | * | ||
607 | * Analyze the $LogFile journal and return TRUE if it indicates the volume was | ||
608 | * shutdown cleanly and FALSE if not. | ||
609 | * | ||
610 | * At present we only look at the two restart pages and ignore the log record | ||
611 | * pages. This is a little bit crude in that there will be a very small number | ||
612 | * of cases where we think that a volume is dirty when in fact it is clean. | ||
613 | * This should only affect volumes that have not been shutdown cleanly but did | ||
614 | * not have any pending, non-check-pointed i/o, i.e. they were completely idle | ||
615 | * at least for the five seconds preceeding the unclean shutdown. | ||
616 | * | ||
617 | * This function assumes that the $LogFile journal has already been consistency | ||
618 | * checked by a call to ntfs_check_logfile() and in particular if the $LogFile | ||
619 | * is empty this function requires that NVolLogFileEmpty() is true otherwise an | ||
620 | * empty volume will be reported as dirty. | ||
621 | */ | ||
622 | BOOL ntfs_is_logfile_clean(struct inode *log_vi) | ||
623 | { | ||
624 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); | ||
625 | struct page *page; | ||
626 | RESTART_PAGE_HEADER *rp; | ||
627 | RESTART_AREA *ra; | ||
628 | |||
629 | ntfs_debug("Entering."); | ||
630 | /* An empty $LogFile must have been clean before it got emptied. */ | ||
631 | if (NVolLogFileEmpty(vol)) { | ||
632 | ntfs_debug("Done. ($LogFile is empty.)"); | ||
633 | return TRUE; | ||
634 | } | ||
635 | /* | ||
636 | * Read the first restart page. It will be possibly incomplete and | ||
637 | * will not be multi sector transfer deprotected but we only need the | ||
638 | * first NTFS_BLOCK_SIZE bytes so it does not matter. | ||
639 | */ | ||
640 | page = ntfs_map_page(log_vi->i_mapping, 0); | ||
641 | if (IS_ERR(page)) { | ||
642 | ntfs_error(vol->sb, "Error mapping $LogFile page (index 0)."); | ||
643 | return FALSE; | ||
644 | } | ||
645 | rp = (RESTART_PAGE_HEADER*)page_address(page); | ||
646 | if (!ntfs_is_rstr_record(rp->magic)) { | ||
647 | ntfs_error(vol->sb, "No restart page found at offset zero in " | ||
648 | "$LogFile. This is probably a bug in that " | ||
649 | "the $LogFile should have been consistency " | ||
650 | "checked before calling this function."); | ||
651 | goto err_out; | ||
652 | } | ||
653 | ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); | ||
654 | /* | ||
655 | * If the $LogFile has active clients, i.e. it is open, and we do not | ||
656 | * have the RESTART_VOLUME_IS_CLEAN bit set in the restart area flags, | ||
657 | * we assume there was an unclean shutdown. | ||
658 | */ | ||
659 | if (ra->client_in_use_list != LOGFILE_NO_CLIENT && | ||
660 | !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { | ||
661 | ntfs_debug("Done. $LogFile indicates a dirty shutdown."); | ||
662 | goto err_out; | ||
663 | } | ||
664 | ntfs_unmap_page(page); | ||
665 | /* $LogFile indicates a clean shutdown. */ | ||
666 | ntfs_debug("Done. $LogFile indicates a clean shutdown."); | ||
667 | return TRUE; | ||
668 | err_out: | ||
669 | ntfs_unmap_page(page); | ||
670 | return FALSE; | ||
671 | } | ||
672 | |||
673 | /** | ||
674 | * ntfs_empty_logfile - empty the contents of the $LogFile journal | ||
675 | * @log_vi: struct inode of loaded journal $LogFile to empty | ||
676 | * | ||
677 | * Empty the contents of the $LogFile journal @log_vi and return TRUE on | ||
678 | * success and FALSE on error. | ||
679 | * | ||
680 | * This function assumes that the $LogFile journal has already been consistency | ||
681 | * checked by a call to ntfs_check_logfile() and that ntfs_is_logfile_clean() | ||
682 | * has been used to ensure that the $LogFile is clean. | ||
683 | */ | ||
684 | BOOL ntfs_empty_logfile(struct inode *log_vi) | ||
685 | { | ||
686 | ntfs_volume *vol = NTFS_SB(log_vi->i_sb); | ||
687 | |||
688 | ntfs_debug("Entering."); | ||
689 | if (!NVolLogFileEmpty(vol)) { | ||
690 | int err; | ||
691 | |||
692 | err = ntfs_attr_set(NTFS_I(log_vi), 0, log_vi->i_size, 0xff); | ||
693 | if (unlikely(err)) { | ||
694 | ntfs_error(vol->sb, "Failed to fill $LogFile with " | ||
695 | "0xff bytes (error code %i).", err); | ||
696 | return FALSE; | ||
697 | } | ||
698 | /* Set the flag so we do not have to do it again on remount. */ | ||
699 | NVolSetLogFileEmpty(vol); | ||
700 | } | ||
701 | ntfs_debug("Done."); | ||
702 | return TRUE; | ||
703 | } | ||
704 | |||
705 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h new file mode 100644 index 000000000000..4ee4378de061 --- /dev/null +++ b/fs/ntfs/logfile.h | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of | ||
3 | * the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2000-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_LOGFILE_H | ||
24 | #define _LINUX_NTFS_LOGFILE_H | ||
25 | |||
26 | #ifdef NTFS_RW | ||
27 | |||
28 | #include <linux/fs.h> | ||
29 | |||
30 | #include "types.h" | ||
31 | #include "endian.h" | ||
32 | #include "layout.h" | ||
33 | |||
34 | /* | ||
35 | * Journal ($LogFile) organization: | ||
36 | * | ||
37 | * Two restart areas present in the first two pages (restart pages, one restart | ||
38 | * area in each page). When the volume is dismounted they should be identical, | ||
39 | * except for the update sequence array which usually has a different update | ||
40 | * sequence number. | ||
41 | * | ||
42 | * These are followed by log records organized in pages headed by a log record | ||
43 | * header going up to log file size. Not all pages contain log records when a | ||
44 | * volume is first formatted, but as the volume ages, all records will be used. | ||
45 | * When the log file fills up, the records at the beginning are purged (by | ||
46 | * modifying the oldest_lsn to a higher value presumably) and writing begins | ||
47 | * at the beginning of the file. Effectively, the log file is viewed as a | ||
48 | * circular entity. | ||
49 | * | ||
50 | * NOTE: Windows NT, 2000, and XP all use log file version 1.1 but they accept | ||
51 | * versions <= 1.x, including 0.-1. (Yes, that is a minus one in there!) We | ||
52 | * probably only want to support 1.1 as this seems to be the current version | ||
53 | * and we don't know how that differs from the older versions. The only | ||
54 | * exception is if the journal is clean as marked by the two restart pages | ||
55 | * then it doesn't matter whether we are on an earlier version. We can just | ||
56 | * reinitialize the logfile and start again with version 1.1. | ||
57 | */ | ||
58 | |||
59 | /* Some $LogFile related constants. */ | ||
60 | #define MaxLogFileSize 0x100000000ULL | ||
61 | #define DefaultLogPageSize 4096 | ||
62 | #define MinLogRecordPages 48 | ||
63 | |||
64 | /* | ||
65 | * Log file restart page header (begins the restart area). | ||
66 | */ | ||
67 | typedef struct { | ||
68 | /*Ofs*/ | ||
69 | /* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ | ||
70 | /* 0*/ NTFS_RECORD_TYPE magic; /* The magic is "RSTR". */ | ||
71 | /* 4*/ le16 usa_ofs; /* See NTFS_RECORD definition in layout.h. | ||
72 | When creating, set this to be immediately | ||
73 | after this header structure (without any | ||
74 | alignment). */ | ||
75 | /* 6*/ le16 usa_count; /* See NTFS_RECORD definition in layout.h. */ | ||
76 | |||
77 | /* 8*/ leLSN chkdsk_lsn; /* The last log file sequence number found by | ||
78 | chkdsk. Only used when the magic is changed | ||
79 | to "CHKD". Otherwise this is zero. */ | ||
80 | /* 16*/ le32 system_page_size; /* Byte size of system pages when the log file | ||
81 | was created, has to be >= 512 and a power of | ||
82 | 2. Use this to calculate the required size | ||
83 | of the usa (usa_count) and add it to usa_ofs. | ||
84 | Then verify that the result is less than the | ||
85 | value of the restart_area_offset. */ | ||
86 | /* 20*/ le32 log_page_size; /* Byte size of log file pages, has to be >= | ||
87 | 512 and a power of 2. The default is 4096 | ||
88 | and is used when the system page size is | ||
89 | between 4096 and 8192. Otherwise this is | ||
90 | set to the system page size instead. */ | ||
91 | /* 24*/ le16 restart_area_offset;/* Byte offset from the start of this header to | ||
92 | the RESTART_AREA. Value has to be aligned | ||
93 | to 8-byte boundary. When creating, set this | ||
94 | to be after the usa. */ | ||
95 | /* 26*/ sle16 minor_ver; /* Log file minor version. Only check if major | ||
96 | version is 1. */ | ||
97 | /* 28*/ sle16 major_ver; /* Log file major version. We only support | ||
98 | version 1.1. */ | ||
99 | /* sizeof() = 30 (0x1e) bytes */ | ||
100 | } __attribute__ ((__packed__)) RESTART_PAGE_HEADER; | ||
101 | |||
102 | /* | ||
103 | * Constant for the log client indices meaning that there are no client records | ||
104 | * in this particular client array. Also inside the client records themselves, | ||
105 | * this means that there are no client records preceding or following this one. | ||
106 | */ | ||
107 | #define LOGFILE_NO_CLIENT const_cpu_to_le16(0xffff) | ||
108 | #define LOGFILE_NO_CLIENT_CPU 0xffff | ||
109 | |||
110 | /* | ||
111 | * These are the so far known RESTART_AREA_* flags (16-bit) which contain | ||
112 | * information about the log file in which they are present. | ||
113 | */ | ||
114 | enum { | ||
115 | RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002), | ||
116 | RESTART_SPACE_FILLER = 0xffff, /* gcc: Force enum bit width to 16. */ | ||
117 | } __attribute__ ((__packed__)); | ||
118 | |||
119 | typedef le16 RESTART_AREA_FLAGS; | ||
120 | |||
121 | /* | ||
122 | * Log file restart area record. The offset of this record is found by adding | ||
123 | * the offset of the RESTART_PAGE_HEADER to the restart_area_offset value found | ||
124 | * in it. See notes at restart_area_offset above. | ||
125 | */ | ||
126 | typedef struct { | ||
127 | /*Ofs*/ | ||
128 | /* 0*/ leLSN current_lsn; /* The current, i.e. last LSN inside the log | ||
129 | when the restart area was last written. | ||
130 | This happens often but what is the interval? | ||
131 | Is it just fixed time or is it every time a | ||
132 | check point is written or somethine else? | ||
133 | On create set to 0. */ | ||
134 | /* 8*/ le16 log_clients; /* Number of log client records in the array of | ||
135 | log client records which follows this | ||
136 | restart area. Must be 1. */ | ||
137 | /* 10*/ le16 client_free_list; /* The index of the first free log client record | ||
138 | in the array of log client records. | ||
139 | LOGFILE_NO_CLIENT means that there are no | ||
140 | free log client records in the array. | ||
141 | If != LOGFILE_NO_CLIENT, check that | ||
142 | log_clients > client_free_list. On Win2k | ||
143 | and presumably earlier, on a clean volume | ||
144 | this is != LOGFILE_NO_CLIENT, and it should | ||
145 | be 0, i.e. the first (and only) client | ||
146 | record is free and thus the logfile is | ||
147 | closed and hence clean. A dirty volume | ||
148 | would have left the logfile open and hence | ||
149 | this would be LOGFILE_NO_CLIENT. On WinXP | ||
150 | and presumably later, the logfile is always | ||
151 | open, even on clean shutdown so this should | ||
152 | always be LOGFILE_NO_CLIENT. */ | ||
153 | /* 12*/ le16 client_in_use_list;/* The index of the first in-use log client | ||
154 | record in the array of log client records. | ||
155 | LOGFILE_NO_CLIENT means that there are no | ||
156 | in-use log client records in the array. If | ||
157 | != LOGFILE_NO_CLIENT check that log_clients | ||
158 | > client_in_use_list. On Win2k and | ||
159 | presumably earlier, on a clean volume this | ||
160 | is LOGFILE_NO_CLIENT, i.e. there are no | ||
161 | client records in use and thus the logfile | ||
162 | is closed and hence clean. A dirty volume | ||
163 | would have left the logfile open and hence | ||
164 | this would be != LOGFILE_NO_CLIENT, and it | ||
165 | should be 0, i.e. the first (and only) | ||
166 | client record is in use. On WinXP and | ||
167 | presumably later, the logfile is always | ||
168 | open, even on clean shutdown so this should | ||
169 | always be 0. */ | ||
170 | /* 14*/ RESTART_AREA_FLAGS flags;/* Flags modifying LFS behaviour. On Win2k | ||
171 | and presumably earlier this is always 0. On | ||
172 | WinXP and presumably later, if the logfile | ||
173 | was shutdown cleanly, the second bit, | ||
174 | RESTART_VOLUME_IS_CLEAN, is set. This bit | ||
175 | is cleared when the volume is mounted by | ||
176 | WinXP and set when the volume is dismounted, | ||
177 | thus if the logfile is dirty, this bit is | ||
178 | clear. Thus we don't need to check the | ||
179 | Windows version to determine if the logfile | ||
180 | is clean. Instead if the logfile is closed, | ||
181 | we know it must be clean. If it is open and | ||
182 | this bit is set, we also know it must be | ||
183 | clean. If on the other hand the logfile is | ||
184 | open and this bit is clear, we can be almost | ||
185 | certain that the logfile is dirty. */ | ||
186 | /* 16*/ le32 seq_number_bits; /* How many bits to use for the sequence | ||
187 | number. This is calculated as 67 - the | ||
188 | number of bits required to store the logfile | ||
189 | size in bytes and this can be used in with | ||
190 | the specified file_size as a consistency | ||
191 | check. */ | ||
192 | /* 20*/ le16 restart_area_length;/* Length of the restart area including the | ||
193 | client array. Following checks required if | ||
194 | version matches. Otherwise, skip them. | ||
195 | restart_area_offset + restart_area_length | ||
196 | has to be <= system_page_size. Also, | ||
197 | restart_area_length has to be >= | ||
198 | client_array_offset + (log_clients * | ||
199 | sizeof(log client record)). */ | ||
200 | /* 22*/ le16 client_array_offset;/* Offset from the start of this record to | ||
201 | the first log client record if versions are | ||
202 | matched. When creating, set this to be | ||
203 | after this restart area structure, aligned | ||
204 | to 8-bytes boundary. If the versions do not | ||
205 | match, this is ignored and the offset is | ||
206 | assumed to be (sizeof(RESTART_AREA) + 7) & | ||
207 | ~7, i.e. rounded up to first 8-byte | ||
208 | boundary. Either way, client_array_offset | ||
209 | has to be aligned to an 8-byte boundary. | ||
210 | Also, restart_area_offset + | ||
211 | client_array_offset has to be <= 510. | ||
212 | Finally, client_array_offset + (log_clients | ||
213 | * sizeof(log client record)) has to be <= | ||
214 | system_page_size. On Win2k and presumably | ||
215 | earlier, this is 0x30, i.e. immediately | ||
216 | following this record. On WinXP and | ||
217 | presumably later, this is 0x40, i.e. there | ||
218 | are 16 extra bytes between this record and | ||
219 | the client array. This probably means that | ||
220 | the RESTART_AREA record is actually bigger | ||
221 | in WinXP and later. */ | ||
222 | /* 24*/ sle64 file_size; /* Usable byte size of the log file. If the | ||
223 | restart_area_offset + the offset of the | ||
224 | file_size are > 510 then corruption has | ||
225 | occured. This is the very first check when | ||
226 | starting with the restart_area as if it | ||
227 | fails it means that some of the above values | ||
228 | will be corrupted by the multi sector | ||
229 | transfer protection. The file_size has to | ||
230 | be rounded down to be a multiple of the | ||
231 | log_page_size in the RESTART_PAGE_HEADER and | ||
232 | then it has to be at least big enough to | ||
233 | store the two restart pages and 48 (0x30) | ||
234 | log record pages. */ | ||
235 | /* 32*/ le32 last_lsn_data_length;/* Length of data of last LSN, not including | ||
236 | the log record header. On create set to | ||
237 | 0. */ | ||
238 | /* 36*/ le16 log_record_header_length;/* Byte size of the log record header. | ||
239 | If the version matches then check that the | ||
240 | value of log_record_header_length is a | ||
241 | multiple of 8, i.e. | ||
242 | (log_record_header_length + 7) & ~7 == | ||
243 | log_record_header_length. When creating set | ||
244 | it to sizeof(LOG_RECORD_HEADER), aligned to | ||
245 | 8 bytes. */ | ||
246 | /* 38*/ le16 log_page_data_offset;/* Offset to the start of data in a log record | ||
247 | page. Must be a multiple of 8. On create | ||
248 | set it to immediately after the update | ||
249 | sequence array of the log record page. */ | ||
250 | /* 40*/ le32 restart_log_open_count;/* A counter that gets incremented every | ||
251 | time the logfile is restarted which happens | ||
252 | at mount time when the logfile is opened. | ||
253 | When creating set to a random value. Win2k | ||
254 | sets it to the low 32 bits of the current | ||
255 | system time in NTFS format (see time.h). */ | ||
256 | /* 44*/ le32 reserved; /* Reserved/alignment to 8-byte boundary. */ | ||
257 | /* sizeof() = 48 (0x30) bytes */ | ||
258 | } __attribute__ ((__packed__)) RESTART_AREA; | ||
259 | |||
260 | /* | ||
261 | * Log client record. The offset of this record is found by adding the offset | ||
262 | * of the RESTART_AREA to the client_array_offset value found in it. | ||
263 | */ | ||
264 | typedef struct { | ||
265 | /*Ofs*/ | ||
266 | /* 0*/ leLSN oldest_lsn; /* Oldest LSN needed by this client. On create | ||
267 | set to 0. */ | ||
268 | /* 8*/ leLSN client_restart_lsn;/* LSN at which this client needs to restart | ||
269 | the volume, i.e. the current position within | ||
270 | the log file. At present, if clean this | ||
271 | should = current_lsn in restart area but it | ||
272 | probably also = current_lsn when dirty most | ||
273 | of the time. At create set to 0. */ | ||
274 | /* 16*/ le16 prev_client; /* The offset to the previous log client record | ||
275 | in the array of log client records. | ||
276 | LOGFILE_NO_CLIENT means there is no previous | ||
277 | client record, i.e. this is the first one. | ||
278 | This is always LOGFILE_NO_CLIENT. */ | ||
279 | /* 18*/ le16 next_client; /* The offset to the next log client record in | ||
280 | the array of log client records. | ||
281 | LOGFILE_NO_CLIENT means there are no next | ||
282 | client records, i.e. this is the last one. | ||
283 | This is always LOGFILE_NO_CLIENT. */ | ||
284 | /* 20*/ le16 seq_number; /* On Win2k and presumably earlier, this is set | ||
285 | to zero every time the logfile is restarted | ||
286 | and it is incremented when the logfile is | ||
287 | closed at dismount time. Thus it is 0 when | ||
288 | dirty and 1 when clean. On WinXP and | ||
289 | presumably later, this is always 0. */ | ||
290 | /* 22*/ u8 reserved[6]; /* Reserved/alignment. */ | ||
291 | /* 28*/ le32 client_name_length;/* Length of client name in bytes. Should | ||
292 | always be 8. */ | ||
293 | /* 32*/ ntfschar client_name[64];/* Name of the client in Unicode. Should | ||
294 | always be "NTFS" with the remaining bytes | ||
295 | set to 0. */ | ||
296 | /* sizeof() = 160 (0xa0) bytes */ | ||
297 | } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; | ||
298 | |||
299 | extern BOOL ntfs_check_logfile(struct inode *log_vi); | ||
300 | |||
301 | extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); | ||
302 | |||
303 | extern BOOL ntfs_empty_logfile(struct inode *log_vi); | ||
304 | |||
305 | #endif /* NTFS_RW */ | ||
306 | |||
307 | #endif /* _LINUX_NTFS_LOGFILE_H */ | ||
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h new file mode 100644 index 000000000000..fac5944df6d8 --- /dev/null +++ b/fs/ntfs/malloc.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifndef _LINUX_NTFS_MALLOC_H | ||
23 | #define _LINUX_NTFS_MALLOC_H | ||
24 | |||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/highmem.h> | ||
28 | |||
29 | /** | ||
30 | * ntfs_malloc_nofs - allocate memory in multiples of pages | ||
31 | * @size number of bytes to allocate | ||
32 | * | ||
33 | * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and | ||
34 | * returns a pointer to the allocated memory. | ||
35 | * | ||
36 | * If there was insufficient memory to complete the request, return NULL. | ||
37 | */ | ||
38 | static inline void *ntfs_malloc_nofs(unsigned long size) | ||
39 | { | ||
40 | if (likely(size <= PAGE_SIZE)) { | ||
41 | BUG_ON(!size); | ||
42 | /* kmalloc() has per-CPU caches so is faster for now. */ | ||
43 | return kmalloc(PAGE_SIZE, GFP_NOFS); | ||
44 | /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ | ||
45 | } | ||
46 | if (likely(size >> PAGE_SHIFT < num_physpages)) | ||
47 | return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); | ||
48 | return NULL; | ||
49 | } | ||
50 | |||
51 | static inline void ntfs_free(void *addr) | ||
52 | { | ||
53 | if (likely(((unsigned long)addr < VMALLOC_START) || | ||
54 | ((unsigned long)addr >= VMALLOC_END ))) { | ||
55 | kfree(addr); | ||
56 | /* free_page((unsigned long)addr); */ | ||
57 | return; | ||
58 | } | ||
59 | vfree(addr); | ||
60 | } | ||
61 | |||
62 | #endif /* _LINUX_NTFS_MALLOC_H */ | ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c new file mode 100644 index 000000000000..dfa85ac2f8ba --- /dev/null +++ b/fs/ntfs/mft.c | |||
@@ -0,0 +1,2829 @@ | |||
1 | /** | ||
2 | * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * Copyright (c) 2002 Richard Russon | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/swap.h> | ||
25 | |||
26 | #include "attrib.h" | ||
27 | #include "aops.h" | ||
28 | #include "bitmap.h" | ||
29 | #include "debug.h" | ||
30 | #include "dir.h" | ||
31 | #include "lcnalloc.h" | ||
32 | #include "malloc.h" | ||
33 | #include "mft.h" | ||
34 | #include "ntfs.h" | ||
35 | |||
36 | /** | ||
37 | * map_mft_record_page - map the page in which a specific mft record resides | ||
38 | * @ni: ntfs inode whose mft record page to map | ||
39 | * | ||
40 | * This maps the page in which the mft record of the ntfs inode @ni is situated | ||
41 | * and returns a pointer to the mft record within the mapped page. | ||
42 | * | ||
43 | * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR() | ||
44 | * contains the negative error code returned. | ||
45 | */ | ||
46 | static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) | ||
47 | { | ||
48 | ntfs_volume *vol = ni->vol; | ||
49 | struct inode *mft_vi = vol->mft_ino; | ||
50 | struct page *page; | ||
51 | unsigned long index, ofs, end_index; | ||
52 | |||
53 | BUG_ON(ni->page); | ||
54 | /* | ||
55 | * The index into the page cache and the offset within the page cache | ||
56 | * page of the wanted mft record. FIXME: We need to check for | ||
57 | * overflowing the unsigned long, but I don't think we would ever get | ||
58 | * here if the volume was that big... | ||
59 | */ | ||
60 | index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; | ||
61 | ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; | ||
62 | |||
63 | /* The maximum valid index into the page cache for $MFT's data. */ | ||
64 | end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; | ||
65 | |||
66 | /* If the wanted index is out of bounds the mft record doesn't exist. */ | ||
67 | if (unlikely(index >= end_index)) { | ||
68 | if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < | ||
69 | ofs + vol->mft_record_size) { | ||
70 | page = ERR_PTR(-ENOENT); | ||
71 | ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " | ||
72 | "which is beyond the end of the mft. " | ||
73 | "This is probably a bug in the ntfs " | ||
74 | "driver.", ni->mft_no); | ||
75 | goto err_out; | ||
76 | } | ||
77 | } | ||
78 | /* Read, map, and pin the page. */ | ||
79 | page = ntfs_map_page(mft_vi->i_mapping, index); | ||
80 | if (likely(!IS_ERR(page))) { | ||
81 | /* Catch multi sector transfer fixup errors. */ | ||
82 | if (likely(ntfs_is_mft_recordp((le32*)(page_address(page) + | ||
83 | ofs)))) { | ||
84 | ni->page = page; | ||
85 | ni->page_ofs = ofs; | ||
86 | return page_address(page) + ofs; | ||
87 | } | ||
88 | ntfs_error(vol->sb, "Mft record 0x%lx is corrupt. " | ||
89 | "Run chkdsk.", ni->mft_no); | ||
90 | ntfs_unmap_page(page); | ||
91 | page = ERR_PTR(-EIO); | ||
92 | } | ||
93 | err_out: | ||
94 | ni->page = NULL; | ||
95 | ni->page_ofs = 0; | ||
96 | return (void*)page; | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * map_mft_record - map, pin and lock an mft record | ||
101 | * @ni: ntfs inode whose MFT record to map | ||
102 | * | ||
103 | * First, take the mrec_lock semaphore. We might now be sleeping, while waiting | ||
104 | * for the semaphore if it was already locked by someone else. | ||
105 | * | ||
106 | * The page of the record is mapped using map_mft_record_page() before being | ||
107 | * returned to the caller. | ||
108 | * | ||
109 | * This in turn uses ntfs_map_page() to get the page containing the wanted mft | ||
110 | * record (it in turn calls read_cache_page() which reads it in from disk if | ||
111 | * necessary, increments the use count on the page so that it cannot disappear | ||
112 | * under us and returns a reference to the page cache page). | ||
113 | * | ||
114 | * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it | ||
115 | * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed | ||
116 | * and the post-read mst fixups on each mft record in the page have been | ||
117 | * performed, the page gets PG_uptodate set and PG_locked cleared (this is done | ||
118 | * in our asynchronous I/O completion handler end_buffer_read_mft_async()). | ||
119 | * ntfs_map_page() waits for PG_locked to become clear and checks if | ||
120 | * PG_uptodate is set and returns an error code if not. This provides | ||
121 | * sufficient protection against races when reading/using the page. | ||
122 | * | ||
123 | * However there is the write mapping to think about. Doing the above described | ||
124 | * checking here will be fine, because when initiating the write we will set | ||
125 | * PG_locked and clear PG_uptodate making sure nobody is touching the page | ||
126 | * contents. Doing the locking this way means that the commit to disk code in | ||
127 | * the page cache code paths is automatically sufficiently locked with us as | ||
128 | * we will not touch a page that has been locked or is not uptodate. The only | ||
129 | * locking problem then is them locking the page while we are accessing it. | ||
130 | * | ||
131 | * So that code will end up having to own the mrec_lock of all mft | ||
132 | * records/inodes present in the page before I/O can proceed. In that case we | ||
133 | * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be | ||
134 | * accessing anything without owning the mrec_lock semaphore. But we do need | ||
135 | * to use them because of the read_cache_page() invocation and the code becomes | ||
136 | * so much simpler this way that it is well worth it. | ||
137 | * | ||
138 | * The mft record is now ours and we return a pointer to it. You need to check | ||
139 | * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return | ||
140 | * the error code. | ||
141 | * | ||
142 | * NOTE: Caller is responsible for setting the mft record dirty before calling | ||
143 | * unmap_mft_record(). This is obviously only necessary if the caller really | ||
144 | * modified the mft record... | ||
145 | * Q: Do we want to recycle one of the VFS inode state bits instead? | ||
146 | * A: No, the inode ones mean we want to change the mft record, not we want to | ||
147 | * write it out. | ||
148 | */ | ||
149 | MFT_RECORD *map_mft_record(ntfs_inode *ni) | ||
150 | { | ||
151 | MFT_RECORD *m; | ||
152 | |||
153 | ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); | ||
154 | |||
155 | /* Make sure the ntfs inode doesn't go away. */ | ||
156 | atomic_inc(&ni->count); | ||
157 | |||
158 | /* Serialize access to this mft record. */ | ||
159 | down(&ni->mrec_lock); | ||
160 | |||
161 | m = map_mft_record_page(ni); | ||
162 | if (likely(!IS_ERR(m))) | ||
163 | return m; | ||
164 | |||
165 | up(&ni->mrec_lock); | ||
166 | atomic_dec(&ni->count); | ||
167 | ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); | ||
168 | return m; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * unmap_mft_record_page - unmap the page in which a specific mft record resides | ||
173 | * @ni: ntfs inode whose mft record page to unmap | ||
174 | * | ||
175 | * This unmaps the page in which the mft record of the ntfs inode @ni is | ||
176 | * situated and returns. This is a NOOP if highmem is not configured. | ||
177 | * | ||
178 | * The unmap happens via ntfs_unmap_page() which in turn decrements the use | ||
179 | * count on the page thus releasing it from the pinned state. | ||
180 | * | ||
181 | * We do not actually unmap the page from memory of course, as that will be | ||
182 | * done by the page cache code itself when memory pressure increases or | ||
183 | * whatever. | ||
184 | */ | ||
185 | static inline void unmap_mft_record_page(ntfs_inode *ni) | ||
186 | { | ||
187 | BUG_ON(!ni->page); | ||
188 | |||
189 | // TODO: If dirty, blah... | ||
190 | ntfs_unmap_page(ni->page); | ||
191 | ni->page = NULL; | ||
192 | ni->page_ofs = 0; | ||
193 | return; | ||
194 | } | ||
195 | |||
196 | /** | ||
197 | * unmap_mft_record - release a mapped mft record | ||
198 | * @ni: ntfs inode whose MFT record to unmap | ||
199 | * | ||
200 | * We release the page mapping and the mrec_lock mutex which unmaps the mft | ||
201 | * record and releases it for others to get hold of. We also release the ntfs | ||
202 | * inode by decrementing the ntfs inode reference count. | ||
203 | * | ||
204 | * NOTE: If caller has modified the mft record, it is imperative to set the mft | ||
205 | * record dirty BEFORE calling unmap_mft_record(). | ||
206 | */ | ||
207 | void unmap_mft_record(ntfs_inode *ni) | ||
208 | { | ||
209 | struct page *page = ni->page; | ||
210 | |||
211 | BUG_ON(!page); | ||
212 | |||
213 | ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); | ||
214 | |||
215 | unmap_mft_record_page(ni); | ||
216 | up(&ni->mrec_lock); | ||
217 | atomic_dec(&ni->count); | ||
218 | /* | ||
219 | * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to | ||
220 | * ntfs_clear_extent_inode() in the extent inode case, and to the | ||
221 | * caller in the non-extent, yet pure ntfs inode case, to do the actual | ||
222 | * tear down of all structures and freeing of all allocated memory. | ||
223 | */ | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * map_extent_mft_record - load an extent inode and attach it to its base | ||
229 | * @base_ni: base ntfs inode | ||
230 | * @mref: mft reference of the extent inode to load | ||
231 | * @ntfs_ino: on successful return, pointer to the ntfs_inode structure | ||
232 | * | ||
233 | * Load the extent mft record @mref and attach it to its base inode @base_ni. | ||
234 | * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise | ||
235 | * PTR_ERR(result) gives the negative error code. | ||
236 | * | ||
237 | * On successful return, @ntfs_ino contains a pointer to the ntfs_inode | ||
238 | * structure of the mapped extent inode. | ||
239 | */ | ||
240 | MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, | ||
241 | ntfs_inode **ntfs_ino) | ||
242 | { | ||
243 | MFT_RECORD *m; | ||
244 | ntfs_inode *ni = NULL; | ||
245 | ntfs_inode **extent_nis = NULL; | ||
246 | int i; | ||
247 | unsigned long mft_no = MREF(mref); | ||
248 | u16 seq_no = MSEQNO(mref); | ||
249 | BOOL destroy_ni = FALSE; | ||
250 | |||
251 | ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", | ||
252 | mft_no, base_ni->mft_no); | ||
253 | /* Make sure the base ntfs inode doesn't go away. */ | ||
254 | atomic_inc(&base_ni->count); | ||
255 | /* | ||
256 | * Check if this extent inode has already been added to the base inode, | ||
257 | * in which case just return it. If not found, add it to the base | ||
258 | * inode before returning it. | ||
259 | */ | ||
260 | down(&base_ni->extent_lock); | ||
261 | if (base_ni->nr_extents > 0) { | ||
262 | extent_nis = base_ni->ext.extent_ntfs_inos; | ||
263 | for (i = 0; i < base_ni->nr_extents; i++) { | ||
264 | if (mft_no != extent_nis[i]->mft_no) | ||
265 | continue; | ||
266 | ni = extent_nis[i]; | ||
267 | /* Make sure the ntfs inode doesn't go away. */ | ||
268 | atomic_inc(&ni->count); | ||
269 | break; | ||
270 | } | ||
271 | } | ||
272 | if (likely(ni != NULL)) { | ||
273 | up(&base_ni->extent_lock); | ||
274 | atomic_dec(&base_ni->count); | ||
275 | /* We found the record; just have to map and return it. */ | ||
276 | m = map_mft_record(ni); | ||
277 | /* map_mft_record() has incremented this on success. */ | ||
278 | atomic_dec(&ni->count); | ||
279 | if (likely(!IS_ERR(m))) { | ||
280 | /* Verify the sequence number. */ | ||
281 | if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { | ||
282 | ntfs_debug("Done 1."); | ||
283 | *ntfs_ino = ni; | ||
284 | return m; | ||
285 | } | ||
286 | unmap_mft_record(ni); | ||
287 | ntfs_error(base_ni->vol->sb, "Found stale extent mft " | ||
288 | "reference! Corrupt file system. " | ||
289 | "Run chkdsk."); | ||
290 | return ERR_PTR(-EIO); | ||
291 | } | ||
292 | map_err_out: | ||
293 | ntfs_error(base_ni->vol->sb, "Failed to map extent " | ||
294 | "mft record, error code %ld.", -PTR_ERR(m)); | ||
295 | return m; | ||
296 | } | ||
297 | /* Record wasn't there. Get a new ntfs inode and initialize it. */ | ||
298 | ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); | ||
299 | if (unlikely(!ni)) { | ||
300 | up(&base_ni->extent_lock); | ||
301 | atomic_dec(&base_ni->count); | ||
302 | return ERR_PTR(-ENOMEM); | ||
303 | } | ||
304 | ni->vol = base_ni->vol; | ||
305 | ni->seq_no = seq_no; | ||
306 | ni->nr_extents = -1; | ||
307 | ni->ext.base_ntfs_ino = base_ni; | ||
308 | /* Now map the record. */ | ||
309 | m = map_mft_record(ni); | ||
310 | if (IS_ERR(m)) { | ||
311 | up(&base_ni->extent_lock); | ||
312 | atomic_dec(&base_ni->count); | ||
313 | ntfs_clear_extent_inode(ni); | ||
314 | goto map_err_out; | ||
315 | } | ||
316 | /* Verify the sequence number if it is present. */ | ||
317 | if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { | ||
318 | ntfs_error(base_ni->vol->sb, "Found stale extent mft " | ||
319 | "reference! Corrupt file system. Run chkdsk."); | ||
320 | destroy_ni = TRUE; | ||
321 | m = ERR_PTR(-EIO); | ||
322 | goto unm_err_out; | ||
323 | } | ||
324 | /* Attach extent inode to base inode, reallocating memory if needed. */ | ||
325 | if (!(base_ni->nr_extents & 3)) { | ||
326 | ntfs_inode **tmp; | ||
327 | int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); | ||
328 | |||
329 | tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); | ||
330 | if (unlikely(!tmp)) { | ||
331 | ntfs_error(base_ni->vol->sb, "Failed to allocate " | ||
332 | "internal buffer."); | ||
333 | destroy_ni = TRUE; | ||
334 | m = ERR_PTR(-ENOMEM); | ||
335 | goto unm_err_out; | ||
336 | } | ||
337 | if (base_ni->nr_extents) { | ||
338 | BUG_ON(!base_ni->ext.extent_ntfs_inos); | ||
339 | memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - | ||
340 | 4 * sizeof(ntfs_inode *)); | ||
341 | kfree(base_ni->ext.extent_ntfs_inos); | ||
342 | } | ||
343 | base_ni->ext.extent_ntfs_inos = tmp; | ||
344 | } | ||
345 | base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; | ||
346 | up(&base_ni->extent_lock); | ||
347 | atomic_dec(&base_ni->count); | ||
348 | ntfs_debug("Done 2."); | ||
349 | *ntfs_ino = ni; | ||
350 | return m; | ||
351 | unm_err_out: | ||
352 | unmap_mft_record(ni); | ||
353 | up(&base_ni->extent_lock); | ||
354 | atomic_dec(&base_ni->count); | ||
355 | /* | ||
356 | * If the extent inode was not attached to the base inode we need to | ||
357 | * release it or we will leak memory. | ||
358 | */ | ||
359 | if (destroy_ni) | ||
360 | ntfs_clear_extent_inode(ni); | ||
361 | return m; | ||
362 | } | ||
363 | |||
364 | #ifdef NTFS_RW | ||
365 | |||
366 | /** | ||
367 | * __mark_mft_record_dirty - set the mft record and the page containing it dirty | ||
368 | * @ni: ntfs inode describing the mapped mft record | ||
369 | * | ||
370 | * Internal function. Users should call mark_mft_record_dirty() instead. | ||
371 | * | ||
372 | * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni, | ||
373 | * as well as the page containing the mft record, dirty. Also, mark the base | ||
374 | * vfs inode dirty. This ensures that any changes to the mft record are | ||
375 | * written out to disk. | ||
376 | * | ||
377 | * NOTE: We only set I_DIRTY_SYNC and I_DIRTY_DATASYNC (and not I_DIRTY_PAGES) | ||
378 | * on the base vfs inode, because even though file data may have been modified, | ||
379 | * it is dirty in the inode meta data rather than the data page cache of the | ||
380 | * inode, and thus there are no data pages that need writing out. Therefore, a | ||
381 | * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the | ||
382 | * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to | ||
383 | * ensure ->write_inode is called from generic_osync_inode() and this needs to | ||
384 | * happen or the file data would not necessarily hit the device synchronously, | ||
385 | * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC | ||
386 | * simply "feels" better than just I_DIRTY_SYNC, since the file data has not | ||
387 | * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own | ||
388 | * would suggest. | ||
389 | */ | ||
390 | void __mark_mft_record_dirty(ntfs_inode *ni) | ||
391 | { | ||
392 | ntfs_inode *base_ni; | ||
393 | |||
394 | ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); | ||
395 | BUG_ON(NInoAttr(ni)); | ||
396 | mark_ntfs_record_dirty(ni->page, ni->page_ofs); | ||
397 | /* Determine the base vfs inode and mark it dirty, too. */ | ||
398 | down(&ni->extent_lock); | ||
399 | if (likely(ni->nr_extents >= 0)) | ||
400 | base_ni = ni; | ||
401 | else | ||
402 | base_ni = ni->ext.base_ntfs_ino; | ||
403 | up(&ni->extent_lock); | ||
404 | __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
405 | } | ||
406 | |||
407 | static const char *ntfs_please_email = "Please email " | ||
408 | "linux-ntfs-dev@lists.sourceforge.net and say that you saw " | ||
409 | "this message. Thank you."; | ||
410 | |||
411 | /** | ||
412 | * ntfs_sync_mft_mirror_umount - synchronise an mft record to the mft mirror | ||
413 | * @vol: ntfs volume on which the mft record to synchronize resides | ||
414 | * @mft_no: mft record number of mft record to synchronize | ||
415 | * @m: mapped, mst protected (extent) mft record to synchronize | ||
416 | * | ||
417 | * Write the mapped, mst protected (extent) mft record @m with mft record | ||
418 | * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol, | ||
419 | * bypassing the page cache and the $MFTMirr inode itself. | ||
420 | * | ||
421 | * This function is only for use at umount time when the mft mirror inode has | ||
422 | * already been disposed off. We BUG() if we are called while the mft mirror | ||
423 | * inode is still attached to the volume. | ||
424 | * | ||
425 | * On success return 0. On error return -errno. | ||
426 | * | ||
427 | * NOTE: This function is not implemented yet as I am not convinced it can | ||
428 | * actually be triggered considering the sequence of commits we do in super.c:: | ||
429 | * ntfs_put_super(). But just in case we provide this place holder as the | ||
430 | * alternative would be either to BUG() or to get a NULL pointer dereference | ||
431 | * and Oops. | ||
432 | */ | ||
433 | static int ntfs_sync_mft_mirror_umount(ntfs_volume *vol, | ||
434 | const unsigned long mft_no, MFT_RECORD *m) | ||
435 | { | ||
436 | BUG_ON(vol->mftmirr_ino); | ||
437 | ntfs_error(vol->sb, "Umount time mft mirror syncing is not " | ||
438 | "implemented yet. %s", ntfs_please_email); | ||
439 | return -EOPNOTSUPP; | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * ntfs_sync_mft_mirror - synchronize an mft record to the mft mirror | ||
444 | * @vol: ntfs volume on which the mft record to synchronize resides | ||
445 | * @mft_no: mft record number of mft record to synchronize | ||
446 | * @m: mapped, mst protected (extent) mft record to synchronize | ||
447 | * @sync: if true, wait for i/o completion | ||
448 | * | ||
449 | * Write the mapped, mst protected (extent) mft record @m with mft record | ||
450 | * number @mft_no to the mft mirror ($MFTMirr) of the ntfs volume @vol. | ||
451 | * | ||
452 | * On success return 0. On error return -errno and set the volume errors flag | ||
453 | * in the ntfs volume @vol. | ||
454 | * | ||
455 | * NOTE: We always perform synchronous i/o and ignore the @sync parameter. | ||
456 | * | ||
457 | * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just | ||
458 | * schedule i/o via ->writepage or do it via kntfsd or whatever. | ||
459 | */ | ||
460 | int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, | ||
461 | MFT_RECORD *m, int sync) | ||
462 | { | ||
463 | struct page *page; | ||
464 | unsigned int blocksize = vol->sb->s_blocksize; | ||
465 | int max_bhs = vol->mft_record_size / blocksize; | ||
466 | struct buffer_head *bhs[max_bhs]; | ||
467 | struct buffer_head *bh, *head; | ||
468 | u8 *kmirr; | ||
469 | runlist_element *rl; | ||
470 | unsigned int block_start, block_end, m_start, m_end, page_ofs; | ||
471 | int i_bhs, nr_bhs, err = 0; | ||
472 | unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits; | ||
473 | |||
474 | ntfs_debug("Entering for inode 0x%lx.", mft_no); | ||
475 | BUG_ON(!max_bhs); | ||
476 | if (unlikely(!vol->mftmirr_ino)) { | ||
477 | /* This could happen during umount... */ | ||
478 | err = ntfs_sync_mft_mirror_umount(vol, mft_no, m); | ||
479 | if (likely(!err)) | ||
480 | return err; | ||
481 | goto err_out; | ||
482 | } | ||
483 | /* Get the page containing the mirror copy of the mft record @m. */ | ||
484 | page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >> | ||
485 | (PAGE_CACHE_SHIFT - vol->mft_record_size_bits)); | ||
486 | if (IS_ERR(page)) { | ||
487 | ntfs_error(vol->sb, "Failed to map mft mirror page."); | ||
488 | err = PTR_ERR(page); | ||
489 | goto err_out; | ||
490 | } | ||
491 | lock_page(page); | ||
492 | BUG_ON(!PageUptodate(page)); | ||
493 | ClearPageUptodate(page); | ||
494 | /* Offset of the mft mirror record inside the page. */ | ||
495 | page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; | ||
496 | /* The address in the page of the mirror copy of the mft record @m. */ | ||
497 | kmirr = page_address(page) + page_ofs; | ||
498 | /* Copy the mst protected mft record to the mirror. */ | ||
499 | memcpy(kmirr, m, vol->mft_record_size); | ||
500 | /* Create uptodate buffers if not present. */ | ||
501 | if (unlikely(!page_has_buffers(page))) { | ||
502 | struct buffer_head *tail; | ||
503 | |||
504 | bh = head = alloc_page_buffers(page, blocksize, 1); | ||
505 | do { | ||
506 | set_buffer_uptodate(bh); | ||
507 | tail = bh; | ||
508 | bh = bh->b_this_page; | ||
509 | } while (bh); | ||
510 | tail->b_this_page = head; | ||
511 | attach_page_buffers(page, head); | ||
512 | BUG_ON(!page_has_buffers(page)); | ||
513 | } | ||
514 | bh = head = page_buffers(page); | ||
515 | BUG_ON(!bh); | ||
516 | rl = NULL; | ||
517 | nr_bhs = 0; | ||
518 | block_start = 0; | ||
519 | m_start = kmirr - (u8*)page_address(page); | ||
520 | m_end = m_start + vol->mft_record_size; | ||
521 | do { | ||
522 | block_end = block_start + blocksize; | ||
523 | /* If the buffer is outside the mft record, skip it. */ | ||
524 | if (block_end <= m_start) | ||
525 | continue; | ||
526 | if (unlikely(block_start >= m_end)) | ||
527 | break; | ||
528 | /* Need to map the buffer if it is not mapped already. */ | ||
529 | if (unlikely(!buffer_mapped(bh))) { | ||
530 | VCN vcn; | ||
531 | LCN lcn; | ||
532 | unsigned int vcn_ofs; | ||
533 | |||
534 | /* Obtain the vcn and offset of the current block. */ | ||
535 | vcn = ((VCN)mft_no << vol->mft_record_size_bits) + | ||
536 | (block_start - m_start); | ||
537 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
538 | vcn >>= vol->cluster_size_bits; | ||
539 | if (!rl) { | ||
540 | down_read(&NTFS_I(vol->mftmirr_ino)-> | ||
541 | runlist.lock); | ||
542 | rl = NTFS_I(vol->mftmirr_ino)->runlist.rl; | ||
543 | /* | ||
544 | * $MFTMirr always has the whole of its runlist | ||
545 | * in memory. | ||
546 | */ | ||
547 | BUG_ON(!rl); | ||
548 | } | ||
549 | /* Seek to element containing target vcn. */ | ||
550 | while (rl->length && rl[1].vcn <= vcn) | ||
551 | rl++; | ||
552 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
553 | /* For $MFTMirr, only lcn >= 0 is a successful remap. */ | ||
554 | if (likely(lcn >= 0)) { | ||
555 | /* Setup buffer head to correct block. */ | ||
556 | bh->b_blocknr = ((lcn << | ||
557 | vol->cluster_size_bits) + | ||
558 | vcn_ofs) >> blocksize_bits; | ||
559 | set_buffer_mapped(bh); | ||
560 | } else { | ||
561 | bh->b_blocknr = -1; | ||
562 | ntfs_error(vol->sb, "Cannot write mft mirror " | ||
563 | "record 0x%lx because its " | ||
564 | "location on disk could not " | ||
565 | "be determined (error code " | ||
566 | "%lli).", mft_no, | ||
567 | (long long)lcn); | ||
568 | err = -EIO; | ||
569 | } | ||
570 | } | ||
571 | BUG_ON(!buffer_uptodate(bh)); | ||
572 | BUG_ON(!nr_bhs && (m_start != block_start)); | ||
573 | BUG_ON(nr_bhs >= max_bhs); | ||
574 | bhs[nr_bhs++] = bh; | ||
575 | BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); | ||
576 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
577 | if (unlikely(rl)) | ||
578 | up_read(&NTFS_I(vol->mftmirr_ino)->runlist.lock); | ||
579 | if (likely(!err)) { | ||
580 | /* Lock buffers and start synchronous write i/o on them. */ | ||
581 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { | ||
582 | struct buffer_head *tbh = bhs[i_bhs]; | ||
583 | |||
584 | if (unlikely(test_set_buffer_locked(tbh))) | ||
585 | BUG(); | ||
586 | BUG_ON(!buffer_uptodate(tbh)); | ||
587 | clear_buffer_dirty(tbh); | ||
588 | get_bh(tbh); | ||
589 | tbh->b_end_io = end_buffer_write_sync; | ||
590 | submit_bh(WRITE, tbh); | ||
591 | } | ||
592 | /* Wait on i/o completion of buffers. */ | ||
593 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { | ||
594 | struct buffer_head *tbh = bhs[i_bhs]; | ||
595 | |||
596 | wait_on_buffer(tbh); | ||
597 | if (unlikely(!buffer_uptodate(tbh))) { | ||
598 | err = -EIO; | ||
599 | /* | ||
600 | * Set the buffer uptodate so the page and | ||
601 | * buffer states do not become out of sync. | ||
602 | */ | ||
603 | set_buffer_uptodate(tbh); | ||
604 | } | ||
605 | } | ||
606 | } else /* if (unlikely(err)) */ { | ||
607 | /* Clean the buffers. */ | ||
608 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) | ||
609 | clear_buffer_dirty(bhs[i_bhs]); | ||
610 | } | ||
611 | /* Current state: all buffers are clean, unlocked, and uptodate. */ | ||
612 | /* Remove the mst protection fixups again. */ | ||
613 | post_write_mst_fixup((NTFS_RECORD*)kmirr); | ||
614 | flush_dcache_page(page); | ||
615 | SetPageUptodate(page); | ||
616 | unlock_page(page); | ||
617 | ntfs_unmap_page(page); | ||
618 | if (likely(!err)) { | ||
619 | ntfs_debug("Done."); | ||
620 | } else { | ||
621 | ntfs_error(vol->sb, "I/O error while writing mft mirror " | ||
622 | "record 0x%lx!", mft_no); | ||
623 | err_out: | ||
624 | ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error " | ||
625 | "code %i). Volume will be left marked dirty " | ||
626 | "on umount. Run ntfsfix on the partition " | ||
627 | "after umounting to correct this.", -err); | ||
628 | NVolSetErrors(vol); | ||
629 | } | ||
630 | return err; | ||
631 | } | ||
632 | |||
633 | /** | ||
634 | * write_mft_record_nolock - write out a mapped (extent) mft record | ||
635 | * @ni: ntfs inode describing the mapped (extent) mft record | ||
636 | * @m: mapped (extent) mft record to write | ||
637 | * @sync: if true, wait for i/o completion | ||
638 | * | ||
639 | * Write the mapped (extent) mft record @m described by the (regular or extent) | ||
640 | * ntfs inode @ni to backing store. If the mft record @m has a counterpart in | ||
641 | * the mft mirror, that is also updated. | ||
642 | * | ||
643 | * We only write the mft record if the ntfs inode @ni is dirty and the first | ||
644 | * buffer belonging to its mft record is dirty, too. We ignore the dirty state | ||
645 | * of subsequent buffers because we could have raced with | ||
646 | * fs/ntfs/aops.c::mark_ntfs_record_dirty(). | ||
647 | * | ||
648 | * On success, clean the mft record and return 0. On error, leave the mft | ||
649 | * record dirty and return -errno. The caller should call make_bad_inode() on | ||
650 | * the base inode to ensure no more access happens to this inode. We do not do | ||
651 | * it here as the caller may want to finish writing other extent mft records | ||
652 | * first to minimize on-disk metadata inconsistencies. | ||
653 | * | ||
654 | * NOTE: We always perform synchronous i/o and ignore the @sync parameter. | ||
655 | * However, if the mft record has a counterpart in the mft mirror and @sync is | ||
656 | * true, we write the mft record, wait for i/o completion, and only then write | ||
657 | * the mft mirror copy. This ensures that if the system crashes either the mft | ||
658 | * or the mft mirror will contain a self-consistent mft record @m. If @sync is | ||
659 | * false on the other hand, we start i/o on both and then wait for completion | ||
660 | * on them. This provides a speedup but no longer guarantees that you will end | ||
661 | * up with a self-consistent mft record in the case of a crash but if you asked | ||
662 | * for asynchronous writing you probably do not care about that anyway. | ||
663 | * | ||
664 | * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just | ||
665 | * schedule i/o via ->writepage or do it via kntfsd or whatever. | ||
666 | */ | ||
667 | int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) | ||
668 | { | ||
669 | ntfs_volume *vol = ni->vol; | ||
670 | struct page *page = ni->page; | ||
671 | unsigned char blocksize_bits = vol->mft_ino->i_blkbits; | ||
672 | unsigned int blocksize = 1 << blocksize_bits; | ||
673 | int max_bhs = vol->mft_record_size / blocksize; | ||
674 | struct buffer_head *bhs[max_bhs]; | ||
675 | struct buffer_head *bh, *head; | ||
676 | runlist_element *rl; | ||
677 | unsigned int block_start, block_end, m_start, m_end; | ||
678 | int i_bhs, nr_bhs, err = 0; | ||
679 | |||
680 | ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); | ||
681 | BUG_ON(NInoAttr(ni)); | ||
682 | BUG_ON(!max_bhs); | ||
683 | BUG_ON(!PageLocked(page)); | ||
684 | /* | ||
685 | * If the ntfs_inode is clean no need to do anything. If it is dirty, | ||
686 | * mark it as clean now so that it can be redirtied later on if needed. | ||
687 | * There is no danger of races since the caller is holding the locks | ||
688 | * for the mft record @m and the page it is in. | ||
689 | */ | ||
690 | if (!NInoTestClearDirty(ni)) | ||
691 | goto done; | ||
692 | BUG_ON(!page_has_buffers(page)); | ||
693 | bh = head = page_buffers(page); | ||
694 | BUG_ON(!bh); | ||
695 | rl = NULL; | ||
696 | nr_bhs = 0; | ||
697 | block_start = 0; | ||
698 | m_start = ni->page_ofs; | ||
699 | m_end = m_start + vol->mft_record_size; | ||
700 | do { | ||
701 | block_end = block_start + blocksize; | ||
702 | /* If the buffer is outside the mft record, skip it. */ | ||
703 | if (block_end <= m_start) | ||
704 | continue; | ||
705 | if (unlikely(block_start >= m_end)) | ||
706 | break; | ||
707 | /* | ||
708 | * If this block is not the first one in the record, we ignore | ||
709 | * the buffer's dirty state because we could have raced with a | ||
710 | * parallel mark_ntfs_record_dirty(). | ||
711 | */ | ||
712 | if (block_start == m_start) { | ||
713 | /* This block is the first one in the record. */ | ||
714 | if (!buffer_dirty(bh)) { | ||
715 | BUG_ON(nr_bhs); | ||
716 | /* Clean records are not written out. */ | ||
717 | break; | ||
718 | } | ||
719 | } | ||
720 | /* Need to map the buffer if it is not mapped already. */ | ||
721 | if (unlikely(!buffer_mapped(bh))) { | ||
722 | VCN vcn; | ||
723 | LCN lcn; | ||
724 | unsigned int vcn_ofs; | ||
725 | |||
726 | /* Obtain the vcn and offset of the current block. */ | ||
727 | vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + | ||
728 | (block_start - m_start); | ||
729 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
730 | vcn >>= vol->cluster_size_bits; | ||
731 | if (!rl) { | ||
732 | down_read(&NTFS_I(vol->mft_ino)->runlist.lock); | ||
733 | rl = NTFS_I(vol->mft_ino)->runlist.rl; | ||
734 | BUG_ON(!rl); | ||
735 | } | ||
736 | /* Seek to element containing target vcn. */ | ||
737 | while (rl->length && rl[1].vcn <= vcn) | ||
738 | rl++; | ||
739 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
740 | /* For $MFT, only lcn >= 0 is a successful remap. */ | ||
741 | if (likely(lcn >= 0)) { | ||
742 | /* Setup buffer head to correct block. */ | ||
743 | bh->b_blocknr = ((lcn << | ||
744 | vol->cluster_size_bits) + | ||
745 | vcn_ofs) >> blocksize_bits; | ||
746 | set_buffer_mapped(bh); | ||
747 | } else { | ||
748 | bh->b_blocknr = -1; | ||
749 | ntfs_error(vol->sb, "Cannot write mft record " | ||
750 | "0x%lx because its location " | ||
751 | "on disk could not be " | ||
752 | "determined (error code %lli).", | ||
753 | ni->mft_no, (long long)lcn); | ||
754 | err = -EIO; | ||
755 | } | ||
756 | } | ||
757 | BUG_ON(!buffer_uptodate(bh)); | ||
758 | BUG_ON(!nr_bhs && (m_start != block_start)); | ||
759 | BUG_ON(nr_bhs >= max_bhs); | ||
760 | bhs[nr_bhs++] = bh; | ||
761 | BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); | ||
762 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
763 | if (unlikely(rl)) | ||
764 | up_read(&NTFS_I(vol->mft_ino)->runlist.lock); | ||
765 | if (!nr_bhs) | ||
766 | goto done; | ||
767 | if (unlikely(err)) | ||
768 | goto cleanup_out; | ||
769 | /* Apply the mst protection fixups. */ | ||
770 | err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); | ||
771 | if (err) { | ||
772 | ntfs_error(vol->sb, "Failed to apply mst fixups!"); | ||
773 | goto cleanup_out; | ||
774 | } | ||
775 | flush_dcache_mft_record_page(ni); | ||
776 | /* Lock buffers and start synchronous write i/o on them. */ | ||
777 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { | ||
778 | struct buffer_head *tbh = bhs[i_bhs]; | ||
779 | |||
780 | if (unlikely(test_set_buffer_locked(tbh))) | ||
781 | BUG(); | ||
782 | BUG_ON(!buffer_uptodate(tbh)); | ||
783 | clear_buffer_dirty(tbh); | ||
784 | get_bh(tbh); | ||
785 | tbh->b_end_io = end_buffer_write_sync; | ||
786 | submit_bh(WRITE, tbh); | ||
787 | } | ||
788 | /* Synchronize the mft mirror now if not @sync. */ | ||
789 | if (!sync && ni->mft_no < vol->mftmirr_size) | ||
790 | ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); | ||
791 | /* Wait on i/o completion of buffers. */ | ||
792 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { | ||
793 | struct buffer_head *tbh = bhs[i_bhs]; | ||
794 | |||
795 | wait_on_buffer(tbh); | ||
796 | if (unlikely(!buffer_uptodate(tbh))) { | ||
797 | err = -EIO; | ||
798 | /* | ||
799 | * Set the buffer uptodate so the page and buffer | ||
800 | * states do not become out of sync. | ||
801 | */ | ||
802 | if (PageUptodate(page)) | ||
803 | set_buffer_uptodate(tbh); | ||
804 | } | ||
805 | } | ||
806 | /* If @sync, now synchronize the mft mirror. */ | ||
807 | if (sync && ni->mft_no < vol->mftmirr_size) | ||
808 | ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync); | ||
809 | /* Remove the mst protection fixups again. */ | ||
810 | post_write_mst_fixup((NTFS_RECORD*)m); | ||
811 | flush_dcache_mft_record_page(ni); | ||
812 | if (unlikely(err)) { | ||
813 | /* I/O error during writing. This is really bad! */ | ||
814 | ntfs_error(vol->sb, "I/O error while writing mft record " | ||
815 | "0x%lx! Marking base inode as bad. You " | ||
816 | "should unmount the volume and run chkdsk.", | ||
817 | ni->mft_no); | ||
818 | goto err_out; | ||
819 | } | ||
820 | done: | ||
821 | ntfs_debug("Done."); | ||
822 | return 0; | ||
823 | cleanup_out: | ||
824 | /* Clean the buffers. */ | ||
825 | for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) | ||
826 | clear_buffer_dirty(bhs[i_bhs]); | ||
827 | err_out: | ||
828 | /* | ||
829 | * Current state: all buffers are clean, unlocked, and uptodate. | ||
830 | * The caller should mark the base inode as bad so that no more i/o | ||
831 | * happens. ->clear_inode() will still be invoked so all extent inodes | ||
832 | * and other allocated memory will be freed. | ||
833 | */ | ||
834 | if (err == -ENOMEM) { | ||
835 | ntfs_error(vol->sb, "Not enough memory to write mft record. " | ||
836 | "Redirtying so the write is retried later."); | ||
837 | mark_mft_record_dirty(ni); | ||
838 | err = 0; | ||
839 | } else | ||
840 | NVolSetErrors(vol); | ||
841 | return err; | ||
842 | } | ||
843 | |||
844 | /** | ||
845 | * ntfs_may_write_mft_record - check if an mft record may be written out | ||
846 | * @vol: [IN] ntfs volume on which the mft record to check resides | ||
847 | * @mft_no: [IN] mft record number of the mft record to check | ||
848 | * @m: [IN] mapped mft record to check | ||
849 | * @locked_ni: [OUT] caller has to unlock this ntfs inode if one is returned | ||
850 | * | ||
851 | * Check if the mapped (base or extent) mft record @m with mft record number | ||
852 | * @mft_no belonging to the ntfs volume @vol may be written out. If necessary | ||
853 | * and possible the ntfs inode of the mft record is locked and the base vfs | ||
854 | * inode is pinned. The locked ntfs inode is then returned in @locked_ni. The | ||
855 | * caller is responsible for unlocking the ntfs inode and unpinning the base | ||
856 | * vfs inode. | ||
857 | * | ||
858 | * Return TRUE if the mft record may be written out and FALSE if not. | ||
859 | * | ||
860 | * The caller has locked the page and cleared the uptodate flag on it which | ||
861 | * means that we can safely write out any dirty mft records that do not have | ||
862 | * their inodes in icache as determined by ilookup5() as anyone | ||
863 | * opening/creating such an inode would block when attempting to map the mft | ||
864 | * record in read_cache_page() until we are finished with the write out. | ||
865 | * | ||
866 | * Here is a description of the tests we perform: | ||
867 | * | ||
868 | * If the inode is found in icache we know the mft record must be a base mft | ||
869 | * record. If it is dirty, we do not write it and return FALSE as the vfs | ||
870 | * inode write paths will result in the access times being updated which would | ||
871 | * cause the base mft record to be redirtied and written out again. (We know | ||
872 | * the access time update will modify the base mft record because Windows | ||
873 | * chkdsk complains if the standard information attribute is not in the base | ||
874 | * mft record.) | ||
875 | * | ||
876 | * If the inode is in icache and not dirty, we attempt to lock the mft record | ||
877 | * and if we find the lock was already taken, it is not safe to write the mft | ||
878 | * record and we return FALSE. | ||
879 | * | ||
880 | * If we manage to obtain the lock we have exclusive access to the mft record, | ||
881 | * which also allows us safe writeout of the mft record. We then set | ||
882 | * @locked_ni to the locked ntfs inode and return TRUE. | ||
883 | * | ||
884 | * Note we cannot just lock the mft record and sleep while waiting for the lock | ||
885 | * because this would deadlock due to lock reversal (normally the mft record is | ||
886 | * locked before the page is locked but we already have the page locked here | ||
887 | * when we try to lock the mft record). | ||
888 | * | ||
889 | * If the inode is not in icache we need to perform further checks. | ||
890 | * | ||
891 | * If the mft record is not a FILE record or it is a base mft record, we can | ||
892 | * safely write it and return TRUE. | ||
893 | * | ||
894 | * We now know the mft record is an extent mft record. We check if the inode | ||
895 | * corresponding to its base mft record is in icache and obtain a reference to | ||
896 | * it if it is. If it is not, we can safely write it and return TRUE. | ||
897 | * | ||
898 | * We now have the base inode for the extent mft record. We check if it has an | ||
899 | * ntfs inode for the extent mft record attached and if not it is safe to write | ||
900 | * the extent mft record and we return TRUE. | ||
901 | * | ||
902 | * The ntfs inode for the extent mft record is attached to the base inode so we | ||
903 | * attempt to lock the extent mft record and if we find the lock was already | ||
904 | * taken, it is not safe to write the extent mft record and we return FALSE. | ||
905 | * | ||
906 | * If we manage to obtain the lock we have exclusive access to the extent mft | ||
907 | * record, which also allows us safe writeout of the extent mft record. We | ||
908 | * set the ntfs inode of the extent mft record clean and then set @locked_ni to | ||
909 | * the now locked ntfs inode and return TRUE. | ||
910 | * | ||
911 | * Note, the reason for actually writing dirty mft records here and not just | ||
912 | * relying on the vfs inode dirty code paths is that we can have mft records | ||
913 | * modified without them ever having actual inodes in memory. Also we can have | ||
914 | * dirty mft records with clean ntfs inodes in memory. None of the described | ||
915 | * cases would result in the dirty mft records being written out if we only | ||
916 | * relied on the vfs inode dirty code paths. And these cases can really occur | ||
917 | * during allocation of new mft records and in particular when the | ||
918 | * initialized_size of the $MFT/$DATA attribute is extended and the new space | ||
919 | * is initialized using ntfs_mft_record_format(). The clean inode can then | ||
920 | * appear if the mft record is reused for a new inode before it got written | ||
921 | * out. | ||
922 | */ | ||
923 | BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, | ||
924 | const MFT_RECORD *m, ntfs_inode **locked_ni) | ||
925 | { | ||
926 | struct super_block *sb = vol->sb; | ||
927 | struct inode *mft_vi = vol->mft_ino; | ||
928 | struct inode *vi; | ||
929 | ntfs_inode *ni, *eni, **extent_nis; | ||
930 | int i; | ||
931 | ntfs_attr na; | ||
932 | |||
933 | ntfs_debug("Entering for inode 0x%lx.", mft_no); | ||
934 | /* | ||
935 | * Normally we do not return a locked inode so set @locked_ni to NULL. | ||
936 | */ | ||
937 | BUG_ON(!locked_ni); | ||
938 | *locked_ni = NULL; | ||
939 | /* | ||
940 | * Check if the inode corresponding to this mft record is in the VFS | ||
941 | * inode cache and obtain a reference to it if it is. | ||
942 | */ | ||
943 | ntfs_debug("Looking for inode 0x%lx in icache.", mft_no); | ||
944 | na.mft_no = mft_no; | ||
945 | na.name = NULL; | ||
946 | na.name_len = 0; | ||
947 | na.type = AT_UNUSED; | ||
948 | /* | ||
949 | * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or | ||
950 | * we deadlock because the inode is already locked by the kernel | ||
951 | * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits | ||
952 | * until the inode is unlocked before returning it and it never gets | ||
953 | * unlocked because ntfs_should_write_mft_record() never returns. )-: | ||
954 | * Fortunately, we have inode 0 pinned in icache for the duration of | ||
955 | * the mount so we can access it directly. | ||
956 | */ | ||
957 | if (!mft_no) { | ||
958 | /* Balance the below iput(). */ | ||
959 | vi = igrab(mft_vi); | ||
960 | BUG_ON(vi != mft_vi); | ||
961 | } else | ||
962 | vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na); | ||
963 | if (vi) { | ||
964 | ntfs_debug("Base inode 0x%lx is in icache.", mft_no); | ||
965 | /* The inode is in icache. */ | ||
966 | ni = NTFS_I(vi); | ||
967 | /* Take a reference to the ntfs inode. */ | ||
968 | atomic_inc(&ni->count); | ||
969 | /* If the inode is dirty, do not write this record. */ | ||
970 | if (NInoDirty(ni)) { | ||
971 | ntfs_debug("Inode 0x%lx is dirty, do not write it.", | ||
972 | mft_no); | ||
973 | atomic_dec(&ni->count); | ||
974 | iput(vi); | ||
975 | return FALSE; | ||
976 | } | ||
977 | ntfs_debug("Inode 0x%lx is not dirty.", mft_no); | ||
978 | /* The inode is not dirty, try to take the mft record lock. */ | ||
979 | if (unlikely(down_trylock(&ni->mrec_lock))) { | ||
980 | ntfs_debug("Mft record 0x%lx is already locked, do " | ||
981 | "not write it.", mft_no); | ||
982 | atomic_dec(&ni->count); | ||
983 | iput(vi); | ||
984 | return FALSE; | ||
985 | } | ||
986 | ntfs_debug("Managed to lock mft record 0x%lx, write it.", | ||
987 | mft_no); | ||
988 | /* | ||
989 | * The write has to occur while we hold the mft record lock so | ||
990 | * return the locked ntfs inode. | ||
991 | */ | ||
992 | *locked_ni = ni; | ||
993 | return TRUE; | ||
994 | } | ||
995 | ntfs_debug("Inode 0x%lx is not in icache.", mft_no); | ||
996 | /* The inode is not in icache. */ | ||
997 | /* Write the record if it is not a mft record (type "FILE"). */ | ||
998 | if (!ntfs_is_mft_record(m->magic)) { | ||
999 | ntfs_debug("Mft record 0x%lx is not a FILE record, write it.", | ||
1000 | mft_no); | ||
1001 | return TRUE; | ||
1002 | } | ||
1003 | /* Write the mft record if it is a base inode. */ | ||
1004 | if (!m->base_mft_record) { | ||
1005 | ntfs_debug("Mft record 0x%lx is a base record, write it.", | ||
1006 | mft_no); | ||
1007 | return TRUE; | ||
1008 | } | ||
1009 | /* | ||
1010 | * This is an extent mft record. Check if the inode corresponding to | ||
1011 | * its base mft record is in icache and obtain a reference to it if it | ||
1012 | * is. | ||
1013 | */ | ||
1014 | na.mft_no = MREF_LE(m->base_mft_record); | ||
1015 | ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " | ||
1016 | "inode 0x%lx in icache.", mft_no, na.mft_no); | ||
1017 | vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na); | ||
1018 | if (!vi) { | ||
1019 | /* | ||
1020 | * The base inode is not in icache, write this extent mft | ||
1021 | * record. | ||
1022 | */ | ||
1023 | ntfs_debug("Base inode 0x%lx is not in icache, write the " | ||
1024 | "extent record.", na.mft_no); | ||
1025 | return TRUE; | ||
1026 | } | ||
1027 | ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no); | ||
1028 | /* | ||
1029 | * The base inode is in icache. Check if it has the extent inode | ||
1030 | * corresponding to this extent mft record attached. | ||
1031 | */ | ||
1032 | ni = NTFS_I(vi); | ||
1033 | down(&ni->extent_lock); | ||
1034 | if (ni->nr_extents <= 0) { | ||
1035 | /* | ||
1036 | * The base inode has no attached extent inodes, write this | ||
1037 | * extent mft record. | ||
1038 | */ | ||
1039 | up(&ni->extent_lock); | ||
1040 | iput(vi); | ||
1041 | ntfs_debug("Base inode 0x%lx has no attached extent inodes, " | ||
1042 | "write the extent record.", na.mft_no); | ||
1043 | return TRUE; | ||
1044 | } | ||
1045 | /* Iterate over the attached extent inodes. */ | ||
1046 | extent_nis = ni->ext.extent_ntfs_inos; | ||
1047 | for (eni = NULL, i = 0; i < ni->nr_extents; ++i) { | ||
1048 | if (mft_no == extent_nis[i]->mft_no) { | ||
1049 | /* | ||
1050 | * Found the extent inode corresponding to this extent | ||
1051 | * mft record. | ||
1052 | */ | ||
1053 | eni = extent_nis[i]; | ||
1054 | break; | ||
1055 | } | ||
1056 | } | ||
1057 | /* | ||
1058 | * If the extent inode was not attached to the base inode, write this | ||
1059 | * extent mft record. | ||
1060 | */ | ||
1061 | if (!eni) { | ||
1062 | up(&ni->extent_lock); | ||
1063 | iput(vi); | ||
1064 | ntfs_debug("Extent inode 0x%lx is not attached to its base " | ||
1065 | "inode 0x%lx, write the extent record.", | ||
1066 | mft_no, na.mft_no); | ||
1067 | return TRUE; | ||
1068 | } | ||
1069 | ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.", | ||
1070 | mft_no, na.mft_no); | ||
1071 | /* Take a reference to the extent ntfs inode. */ | ||
1072 | atomic_inc(&eni->count); | ||
1073 | up(&ni->extent_lock); | ||
1074 | /* | ||
1075 | * Found the extent inode coresponding to this extent mft record. | ||
1076 | * Try to take the mft record lock. | ||
1077 | */ | ||
1078 | if (unlikely(down_trylock(&eni->mrec_lock))) { | ||
1079 | atomic_dec(&eni->count); | ||
1080 | iput(vi); | ||
1081 | ntfs_debug("Extent mft record 0x%lx is already locked, do " | ||
1082 | "not write it.", mft_no); | ||
1083 | return FALSE; | ||
1084 | } | ||
1085 | ntfs_debug("Managed to lock extent mft record 0x%lx, write it.", | ||
1086 | mft_no); | ||
1087 | if (NInoTestClearDirty(eni)) | ||
1088 | ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.", | ||
1089 | mft_no); | ||
1090 | /* | ||
1091 | * The write has to occur while we hold the mft record lock so return | ||
1092 | * the locked extent ntfs inode. | ||
1093 | */ | ||
1094 | *locked_ni = eni; | ||
1095 | return TRUE; | ||
1096 | } | ||
1097 | |||
1098 | static const char *es = " Leaving inconsistent metadata. Unmount and run " | ||
1099 | "chkdsk."; | ||
1100 | |||
1101 | /** | ||
1102 | * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name | ||
1103 | * @vol: volume on which to search for a free mft record | ||
1104 | * @base_ni: open base inode if allocating an extent mft record or NULL | ||
1105 | * | ||
1106 | * Search for a free mft record in the mft bitmap attribute on the ntfs volume | ||
1107 | * @vol. | ||
1108 | * | ||
1109 | * If @base_ni is NULL start the search at the default allocator position. | ||
1110 | * | ||
1111 | * If @base_ni is not NULL start the search at the mft record after the base | ||
1112 | * mft record @base_ni. | ||
1113 | * | ||
1114 | * Return the free mft record on success and -errno on error. An error code of | ||
1115 | * -ENOSPC means that there are no free mft records in the currently | ||
1116 | * initialized mft bitmap. | ||
1117 | * | ||
1118 | * Locking: Caller must hold vol->mftbmp_lock for writing. | ||
1119 | */ | ||
1120 | static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, | ||
1121 | ntfs_inode *base_ni) | ||
1122 | { | ||
1123 | s64 pass_end, ll, data_pos, pass_start, ofs, bit; | ||
1124 | struct address_space *mftbmp_mapping; | ||
1125 | u8 *buf, *byte; | ||
1126 | struct page *page; | ||
1127 | unsigned int page_ofs, size; | ||
1128 | u8 pass, b; | ||
1129 | |||
1130 | ntfs_debug("Searching for free mft record in the currently " | ||
1131 | "initialized mft bitmap."); | ||
1132 | mftbmp_mapping = vol->mftbmp_ino->i_mapping; | ||
1133 | /* | ||
1134 | * Set the end of the pass making sure we do not overflow the mft | ||
1135 | * bitmap. | ||
1136 | */ | ||
1137 | pass_end = NTFS_I(vol->mft_ino)->allocated_size >> | ||
1138 | vol->mft_record_size_bits; | ||
1139 | ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; | ||
1140 | if (pass_end > ll) | ||
1141 | pass_end = ll; | ||
1142 | pass = 1; | ||
1143 | if (!base_ni) | ||
1144 | data_pos = vol->mft_data_pos; | ||
1145 | else | ||
1146 | data_pos = base_ni->mft_no + 1; | ||
1147 | if (data_pos < 24) | ||
1148 | data_pos = 24; | ||
1149 | if (data_pos >= pass_end) { | ||
1150 | data_pos = 24; | ||
1151 | pass = 2; | ||
1152 | /* This happens on a freshly formatted volume. */ | ||
1153 | if (data_pos >= pass_end) | ||
1154 | return -ENOSPC; | ||
1155 | } | ||
1156 | pass_start = data_pos; | ||
1157 | ntfs_debug("Starting bitmap search: pass %u, pass_start 0x%llx, " | ||
1158 | "pass_end 0x%llx, data_pos 0x%llx.", pass, | ||
1159 | (long long)pass_start, (long long)pass_end, | ||
1160 | (long long)data_pos); | ||
1161 | /* Loop until a free mft record is found. */ | ||
1162 | for (; pass <= 2;) { | ||
1163 | /* Cap size to pass_end. */ | ||
1164 | ofs = data_pos >> 3; | ||
1165 | page_ofs = ofs & ~PAGE_CACHE_MASK; | ||
1166 | size = PAGE_CACHE_SIZE - page_ofs; | ||
1167 | ll = ((pass_end + 7) >> 3) - ofs; | ||
1168 | if (size > ll) | ||
1169 | size = ll; | ||
1170 | size <<= 3; | ||
1171 | /* | ||
1172 | * If we are still within the active pass, search the next page | ||
1173 | * for a zero bit. | ||
1174 | */ | ||
1175 | if (size) { | ||
1176 | page = ntfs_map_page(mftbmp_mapping, | ||
1177 | ofs >> PAGE_CACHE_SHIFT); | ||
1178 | if (unlikely(IS_ERR(page))) { | ||
1179 | ntfs_error(vol->sb, "Failed to read mft " | ||
1180 | "bitmap, aborting."); | ||
1181 | return PTR_ERR(page); | ||
1182 | } | ||
1183 | buf = (u8*)page_address(page) + page_ofs; | ||
1184 | bit = data_pos & 7; | ||
1185 | data_pos &= ~7ull; | ||
1186 | ntfs_debug("Before inner for loop: size 0x%x, " | ||
1187 | "data_pos 0x%llx, bit 0x%llx", size, | ||
1188 | (long long)data_pos, (long long)bit); | ||
1189 | for (; bit < size && data_pos + bit < pass_end; | ||
1190 | bit &= ~7ull, bit += 8) { | ||
1191 | byte = buf + (bit >> 3); | ||
1192 | if (*byte == 0xff) | ||
1193 | continue; | ||
1194 | b = ffz((unsigned long)*byte); | ||
1195 | if (b < 8 && b >= (bit & 7)) { | ||
1196 | ll = data_pos + (bit & ~7ull) + b; | ||
1197 | if (unlikely(ll > (1ll << 32))) { | ||
1198 | ntfs_unmap_page(page); | ||
1199 | return -ENOSPC; | ||
1200 | } | ||
1201 | *byte |= 1 << b; | ||
1202 | flush_dcache_page(page); | ||
1203 | set_page_dirty(page); | ||
1204 | ntfs_unmap_page(page); | ||
1205 | ntfs_debug("Done. (Found and " | ||
1206 | "allocated mft record " | ||
1207 | "0x%llx.)", | ||
1208 | (long long)ll); | ||
1209 | return ll; | ||
1210 | } | ||
1211 | } | ||
1212 | ntfs_debug("After inner for loop: size 0x%x, " | ||
1213 | "data_pos 0x%llx, bit 0x%llx", size, | ||
1214 | (long long)data_pos, (long long)bit); | ||
1215 | data_pos += size; | ||
1216 | ntfs_unmap_page(page); | ||
1217 | /* | ||
1218 | * If the end of the pass has not been reached yet, | ||
1219 | * continue searching the mft bitmap for a zero bit. | ||
1220 | */ | ||
1221 | if (data_pos < pass_end) | ||
1222 | continue; | ||
1223 | } | ||
1224 | /* Do the next pass. */ | ||
1225 | if (++pass == 2) { | ||
1226 | /* | ||
1227 | * Starting the second pass, in which we scan the first | ||
1228 | * part of the zone which we omitted earlier. | ||
1229 | */ | ||
1230 | pass_end = pass_start; | ||
1231 | data_pos = pass_start = 24; | ||
1232 | ntfs_debug("pass %i, pass_start 0x%llx, pass_end " | ||
1233 | "0x%llx.", pass, (long long)pass_start, | ||
1234 | (long long)pass_end); | ||
1235 | if (data_pos >= pass_end) | ||
1236 | break; | ||
1237 | } | ||
1238 | } | ||
1239 | /* No free mft records in currently initialized mft bitmap. */ | ||
1240 | ntfs_debug("Done. (No free mft records left in currently initialized " | ||
1241 | "mft bitmap.)"); | ||
1242 | return -ENOSPC; | ||
1243 | } | ||
1244 | |||
1245 | /** | ||
1246 | * ntfs_mft_bitmap_extend_allocation_nolock - extend mft bitmap by a cluster | ||
1247 | * @vol: volume on which to extend the mft bitmap attribute | ||
1248 | * | ||
1249 | * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster. | ||
1250 | * | ||
1251 | * Note: Only changes allocated_size, i.e. does not touch initialized_size or | ||
1252 | * data_size. | ||
1253 | * | ||
1254 | * Return 0 on success and -errno on error. | ||
1255 | * | ||
1256 | * Locking: - Caller must hold vol->mftbmp_lock for writing. | ||
1257 | * - This function takes NTFS_I(vol->mftbmp_ino)->runlist.lock for | ||
1258 | * writing and releases it before returning. | ||
1259 | * - This function takes vol->lcnbmp_lock for writing and releases it | ||
1260 | * before returning. | ||
1261 | */ | ||
1262 | static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) | ||
1263 | { | ||
1264 | LCN lcn; | ||
1265 | s64 ll; | ||
1266 | struct page *page; | ||
1267 | ntfs_inode *mft_ni, *mftbmp_ni; | ||
1268 | runlist_element *rl, *rl2 = NULL; | ||
1269 | ntfs_attr_search_ctx *ctx = NULL; | ||
1270 | MFT_RECORD *mrec; | ||
1271 | ATTR_RECORD *a = NULL; | ||
1272 | int ret, mp_size; | ||
1273 | u32 old_alen = 0; | ||
1274 | u8 *b, tb; | ||
1275 | struct { | ||
1276 | u8 added_cluster:1; | ||
1277 | u8 added_run:1; | ||
1278 | u8 mp_rebuilt:1; | ||
1279 | } status = { 0, 0, 0 }; | ||
1280 | |||
1281 | ntfs_debug("Extending mft bitmap allocation."); | ||
1282 | mft_ni = NTFS_I(vol->mft_ino); | ||
1283 | mftbmp_ni = NTFS_I(vol->mftbmp_ino); | ||
1284 | /* | ||
1285 | * Determine the last lcn of the mft bitmap. The allocated size of the | ||
1286 | * mft bitmap cannot be zero so we are ok to do this. | ||
1287 | * ntfs_find_vcn() returns the runlist locked on success. | ||
1288 | */ | ||
1289 | rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >> | ||
1290 | vol->cluster_size_bits, TRUE); | ||
1291 | if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { | ||
1292 | ntfs_error(vol->sb, "Failed to determine last allocated " | ||
1293 | "cluster of mft bitmap attribute."); | ||
1294 | if (!IS_ERR(rl)) { | ||
1295 | up_write(&mftbmp_ni->runlist.lock); | ||
1296 | ret = -EIO; | ||
1297 | } else | ||
1298 | ret = PTR_ERR(rl); | ||
1299 | return ret; | ||
1300 | } | ||
1301 | lcn = rl->lcn + rl->length; | ||
1302 | ntfs_debug("Last lcn of mft bitmap attribute is 0x%llx.", | ||
1303 | (long long)lcn); | ||
1304 | /* | ||
1305 | * Attempt to get the cluster following the last allocated cluster by | ||
1306 | * hand as it may be in the MFT zone so the allocator would not give it | ||
1307 | * to us. | ||
1308 | */ | ||
1309 | ll = lcn >> 3; | ||
1310 | page = ntfs_map_page(vol->lcnbmp_ino->i_mapping, | ||
1311 | ll >> PAGE_CACHE_SHIFT); | ||
1312 | if (IS_ERR(page)) { | ||
1313 | up_write(&mftbmp_ni->runlist.lock); | ||
1314 | ntfs_error(vol->sb, "Failed to read from lcn bitmap."); | ||
1315 | return PTR_ERR(page); | ||
1316 | } | ||
1317 | b = (u8*)page_address(page) + (ll & ~PAGE_CACHE_MASK); | ||
1318 | tb = 1 << (lcn & 7ull); | ||
1319 | down_write(&vol->lcnbmp_lock); | ||
1320 | if (*b != 0xff && !(*b & tb)) { | ||
1321 | /* Next cluster is free, allocate it. */ | ||
1322 | *b |= tb; | ||
1323 | flush_dcache_page(page); | ||
1324 | set_page_dirty(page); | ||
1325 | up_write(&vol->lcnbmp_lock); | ||
1326 | ntfs_unmap_page(page); | ||
1327 | /* Update the mft bitmap runlist. */ | ||
1328 | rl->length++; | ||
1329 | rl[1].vcn++; | ||
1330 | status.added_cluster = 1; | ||
1331 | ntfs_debug("Appending one cluster to mft bitmap."); | ||
1332 | } else { | ||
1333 | up_write(&vol->lcnbmp_lock); | ||
1334 | ntfs_unmap_page(page); | ||
1335 | /* Allocate a cluster from the DATA_ZONE. */ | ||
1336 | rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE); | ||
1337 | if (IS_ERR(rl2)) { | ||
1338 | up_write(&mftbmp_ni->runlist.lock); | ||
1339 | ntfs_error(vol->sb, "Failed to allocate a cluster for " | ||
1340 | "the mft bitmap."); | ||
1341 | return PTR_ERR(rl2); | ||
1342 | } | ||
1343 | rl = ntfs_runlists_merge(mftbmp_ni->runlist.rl, rl2); | ||
1344 | if (IS_ERR(rl)) { | ||
1345 | up_write(&mftbmp_ni->runlist.lock); | ||
1346 | ntfs_error(vol->sb, "Failed to merge runlists for mft " | ||
1347 | "bitmap."); | ||
1348 | if (ntfs_cluster_free_from_rl(vol, rl2)) { | ||
1349 | ntfs_error(vol->sb, "Failed to dealocate " | ||
1350 | "allocated cluster.%s", es); | ||
1351 | NVolSetErrors(vol); | ||
1352 | } | ||
1353 | ntfs_free(rl2); | ||
1354 | return PTR_ERR(rl); | ||
1355 | } | ||
1356 | mftbmp_ni->runlist.rl = rl; | ||
1357 | status.added_run = 1; | ||
1358 | ntfs_debug("Adding one run to mft bitmap."); | ||
1359 | /* Find the last run in the new runlist. */ | ||
1360 | for (; rl[1].length; rl++) | ||
1361 | ; | ||
1362 | } | ||
1363 | /* | ||
1364 | * Update the attribute record as well. Note: @rl is the last | ||
1365 | * (non-terminator) runlist element of mft bitmap. | ||
1366 | */ | ||
1367 | mrec = map_mft_record(mft_ni); | ||
1368 | if (IS_ERR(mrec)) { | ||
1369 | ntfs_error(vol->sb, "Failed to map mft record."); | ||
1370 | ret = PTR_ERR(mrec); | ||
1371 | goto undo_alloc; | ||
1372 | } | ||
1373 | ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); | ||
1374 | if (unlikely(!ctx)) { | ||
1375 | ntfs_error(vol->sb, "Failed to get search context."); | ||
1376 | ret = -ENOMEM; | ||
1377 | goto undo_alloc; | ||
1378 | } | ||
1379 | ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, | ||
1380 | mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, | ||
1381 | 0, ctx); | ||
1382 | if (unlikely(ret)) { | ||
1383 | ntfs_error(vol->sb, "Failed to find last attribute extent of " | ||
1384 | "mft bitmap attribute."); | ||
1385 | if (ret == -ENOENT) | ||
1386 | ret = -EIO; | ||
1387 | goto undo_alloc; | ||
1388 | } | ||
1389 | a = ctx->attr; | ||
1390 | ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); | ||
1391 | /* Search back for the previous last allocated cluster of mft bitmap. */ | ||
1392 | for (rl2 = rl; rl2 > mftbmp_ni->runlist.rl; rl2--) { | ||
1393 | if (ll >= rl2->vcn) | ||
1394 | break; | ||
1395 | } | ||
1396 | BUG_ON(ll < rl2->vcn); | ||
1397 | BUG_ON(ll >= rl2->vcn + rl2->length); | ||
1398 | /* Get the size for the new mapping pairs array for this extent. */ | ||
1399 | mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); | ||
1400 | if (unlikely(mp_size <= 0)) { | ||
1401 | ntfs_error(vol->sb, "Get size for mapping pairs failed for " | ||
1402 | "mft bitmap attribute extent."); | ||
1403 | ret = mp_size; | ||
1404 | if (!ret) | ||
1405 | ret = -EIO; | ||
1406 | goto undo_alloc; | ||
1407 | } | ||
1408 | /* Expand the attribute record if necessary. */ | ||
1409 | old_alen = le32_to_cpu(a->length); | ||
1410 | ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + | ||
1411 | le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); | ||
1412 | if (unlikely(ret)) { | ||
1413 | if (ret != -ENOSPC) { | ||
1414 | ntfs_error(vol->sb, "Failed to resize attribute " | ||
1415 | "record for mft bitmap attribute."); | ||
1416 | goto undo_alloc; | ||
1417 | } | ||
1418 | // TODO: Deal with this by moving this extent to a new mft | ||
1419 | // record or by starting a new extent in a new mft record or by | ||
1420 | // moving other attributes out of this mft record. | ||
1421 | ntfs_error(vol->sb, "Not enough space in this mft record to " | ||
1422 | "accomodate extended mft bitmap attribute " | ||
1423 | "extent. Cannot handle this yet."); | ||
1424 | ret = -EOPNOTSUPP; | ||
1425 | goto undo_alloc; | ||
1426 | } | ||
1427 | status.mp_rebuilt = 1; | ||
1428 | /* Generate the mapping pairs array directly into the attr record. */ | ||
1429 | ret = ntfs_mapping_pairs_build(vol, (u8*)a + | ||
1430 | le16_to_cpu(a->data.non_resident.mapping_pairs_offset), | ||
1431 | mp_size, rl2, ll, NULL); | ||
1432 | if (unlikely(ret)) { | ||
1433 | ntfs_error(vol->sb, "Failed to build mapping pairs array for " | ||
1434 | "mft bitmap attribute."); | ||
1435 | goto undo_alloc; | ||
1436 | } | ||
1437 | /* Update the highest_vcn. */ | ||
1438 | a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); | ||
1439 | /* | ||
1440 | * We now have extended the mft bitmap allocated_size by one cluster. | ||
1441 | * Reflect this in the ntfs_inode structure and the attribute record. | ||
1442 | */ | ||
1443 | if (a->data.non_resident.lowest_vcn) { | ||
1444 | /* | ||
1445 | * We are not in the first attribute extent, switch to it, but | ||
1446 | * first ensure the changes will make it to disk later. | ||
1447 | */ | ||
1448 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1449 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1450 | ntfs_attr_reinit_search_ctx(ctx); | ||
1451 | ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, | ||
1452 | mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, | ||
1453 | 0, ctx); | ||
1454 | if (unlikely(ret)) { | ||
1455 | ntfs_error(vol->sb, "Failed to find first attribute " | ||
1456 | "extent of mft bitmap attribute."); | ||
1457 | goto restore_undo_alloc; | ||
1458 | } | ||
1459 | a = ctx->attr; | ||
1460 | } | ||
1461 | mftbmp_ni->allocated_size += vol->cluster_size; | ||
1462 | a->data.non_resident.allocated_size = | ||
1463 | cpu_to_sle64(mftbmp_ni->allocated_size); | ||
1464 | /* Ensure the changes make it to disk. */ | ||
1465 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1466 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1467 | ntfs_attr_put_search_ctx(ctx); | ||
1468 | unmap_mft_record(mft_ni); | ||
1469 | up_write(&mftbmp_ni->runlist.lock); | ||
1470 | ntfs_debug("Done."); | ||
1471 | return 0; | ||
1472 | restore_undo_alloc: | ||
1473 | ntfs_attr_reinit_search_ctx(ctx); | ||
1474 | if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, | ||
1475 | mftbmp_ni->name_len, CASE_SENSITIVE, rl[1].vcn, NULL, | ||
1476 | 0, ctx)) { | ||
1477 | ntfs_error(vol->sb, "Failed to find last attribute extent of " | ||
1478 | "mft bitmap attribute.%s", es); | ||
1479 | mftbmp_ni->allocated_size += vol->cluster_size; | ||
1480 | ntfs_attr_put_search_ctx(ctx); | ||
1481 | unmap_mft_record(mft_ni); | ||
1482 | up_write(&mftbmp_ni->runlist.lock); | ||
1483 | /* | ||
1484 | * The only thing that is now wrong is ->allocated_size of the | ||
1485 | * base attribute extent which chkdsk should be able to fix. | ||
1486 | */ | ||
1487 | NVolSetErrors(vol); | ||
1488 | return ret; | ||
1489 | } | ||
1490 | a = ctx->attr; | ||
1491 | a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 2); | ||
1492 | undo_alloc: | ||
1493 | if (status.added_cluster) { | ||
1494 | /* Truncate the last run in the runlist by one cluster. */ | ||
1495 | rl->length--; | ||
1496 | rl[1].vcn--; | ||
1497 | } else if (status.added_run) { | ||
1498 | lcn = rl->lcn; | ||
1499 | /* Remove the last run from the runlist. */ | ||
1500 | rl->lcn = rl[1].lcn; | ||
1501 | rl->length = 0; | ||
1502 | } | ||
1503 | /* Deallocate the cluster. */ | ||
1504 | down_write(&vol->lcnbmp_lock); | ||
1505 | if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { | ||
1506 | ntfs_error(vol->sb, "Failed to free allocated cluster.%s", es); | ||
1507 | NVolSetErrors(vol); | ||
1508 | } | ||
1509 | up_write(&vol->lcnbmp_lock); | ||
1510 | if (status.mp_rebuilt) { | ||
1511 | if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( | ||
1512 | a->data.non_resident.mapping_pairs_offset), | ||
1513 | old_alen - le16_to_cpu( | ||
1514 | a->data.non_resident.mapping_pairs_offset), | ||
1515 | rl2, ll, NULL)) { | ||
1516 | ntfs_error(vol->sb, "Failed to restore mapping pairs " | ||
1517 | "array.%s", es); | ||
1518 | NVolSetErrors(vol); | ||
1519 | } | ||
1520 | if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { | ||
1521 | ntfs_error(vol->sb, "Failed to restore attribute " | ||
1522 | "record.%s", es); | ||
1523 | NVolSetErrors(vol); | ||
1524 | } | ||
1525 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1526 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1527 | } | ||
1528 | if (ctx) | ||
1529 | ntfs_attr_put_search_ctx(ctx); | ||
1530 | if (!IS_ERR(mrec)) | ||
1531 | unmap_mft_record(mft_ni); | ||
1532 | up_write(&mftbmp_ni->runlist.lock); | ||
1533 | return ret; | ||
1534 | } | ||
1535 | |||
1536 | /** | ||
1537 | * ntfs_mft_bitmap_extend_initialized_nolock - extend mftbmp initialized data | ||
1538 | * @vol: volume on which to extend the mft bitmap attribute | ||
1539 | * | ||
1540 | * Extend the initialized portion of the mft bitmap attribute on the ntfs | ||
1541 | * volume @vol by 8 bytes. | ||
1542 | * | ||
1543 | * Note: Only changes initialized_size and data_size, i.e. requires that | ||
1544 | * allocated_size is big enough to fit the new initialized_size. | ||
1545 | * | ||
1546 | * Return 0 on success and -error on error. | ||
1547 | * | ||
1548 | * Locking: Caller must hold vol->mftbmp_lock for writing. | ||
1549 | */ | ||
1550 | static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) | ||
1551 | { | ||
1552 | s64 old_data_size, old_initialized_size; | ||
1553 | struct inode *mftbmp_vi; | ||
1554 | ntfs_inode *mft_ni, *mftbmp_ni; | ||
1555 | ntfs_attr_search_ctx *ctx; | ||
1556 | MFT_RECORD *mrec; | ||
1557 | ATTR_RECORD *a; | ||
1558 | int ret; | ||
1559 | |||
1560 | ntfs_debug("Extending mft bitmap initiailized (and data) size."); | ||
1561 | mft_ni = NTFS_I(vol->mft_ino); | ||
1562 | mftbmp_vi = vol->mftbmp_ino; | ||
1563 | mftbmp_ni = NTFS_I(mftbmp_vi); | ||
1564 | /* Get the attribute record. */ | ||
1565 | mrec = map_mft_record(mft_ni); | ||
1566 | if (IS_ERR(mrec)) { | ||
1567 | ntfs_error(vol->sb, "Failed to map mft record."); | ||
1568 | return PTR_ERR(mrec); | ||
1569 | } | ||
1570 | ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); | ||
1571 | if (unlikely(!ctx)) { | ||
1572 | ntfs_error(vol->sb, "Failed to get search context."); | ||
1573 | ret = -ENOMEM; | ||
1574 | goto unm_err_out; | ||
1575 | } | ||
1576 | ret = ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, | ||
1577 | mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1578 | if (unlikely(ret)) { | ||
1579 | ntfs_error(vol->sb, "Failed to find first attribute extent of " | ||
1580 | "mft bitmap attribute."); | ||
1581 | if (ret == -ENOENT) | ||
1582 | ret = -EIO; | ||
1583 | goto put_err_out; | ||
1584 | } | ||
1585 | a = ctx->attr; | ||
1586 | old_data_size = mftbmp_vi->i_size; | ||
1587 | old_initialized_size = mftbmp_ni->initialized_size; | ||
1588 | /* | ||
1589 | * We can simply update the initialized_size before filling the space | ||
1590 | * with zeroes because the caller is holding the mft bitmap lock for | ||
1591 | * writing which ensures that no one else is trying to access the data. | ||
1592 | */ | ||
1593 | mftbmp_ni->initialized_size += 8; | ||
1594 | a->data.non_resident.initialized_size = | ||
1595 | cpu_to_sle64(mftbmp_ni->initialized_size); | ||
1596 | if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) { | ||
1597 | mftbmp_vi->i_size = mftbmp_ni->initialized_size; | ||
1598 | a->data.non_resident.data_size = | ||
1599 | cpu_to_sle64(mftbmp_vi->i_size); | ||
1600 | } | ||
1601 | /* Ensure the changes make it to disk. */ | ||
1602 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1603 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1604 | ntfs_attr_put_search_ctx(ctx); | ||
1605 | unmap_mft_record(mft_ni); | ||
1606 | /* Initialize the mft bitmap attribute value with zeroes. */ | ||
1607 | ret = ntfs_attr_set(mftbmp_ni, old_initialized_size, 8, 0); | ||
1608 | if (likely(!ret)) { | ||
1609 | ntfs_debug("Done. (Wrote eight initialized bytes to mft " | ||
1610 | "bitmap."); | ||
1611 | return 0; | ||
1612 | } | ||
1613 | ntfs_error(vol->sb, "Failed to write to mft bitmap."); | ||
1614 | /* Try to recover from the error. */ | ||
1615 | mrec = map_mft_record(mft_ni); | ||
1616 | if (IS_ERR(mrec)) { | ||
1617 | ntfs_error(vol->sb, "Failed to map mft record.%s", es); | ||
1618 | NVolSetErrors(vol); | ||
1619 | return ret; | ||
1620 | } | ||
1621 | ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); | ||
1622 | if (unlikely(!ctx)) { | ||
1623 | ntfs_error(vol->sb, "Failed to get search context.%s", es); | ||
1624 | NVolSetErrors(vol); | ||
1625 | goto unm_err_out; | ||
1626 | } | ||
1627 | if (ntfs_attr_lookup(mftbmp_ni->type, mftbmp_ni->name, | ||
1628 | mftbmp_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx)) { | ||
1629 | ntfs_error(vol->sb, "Failed to find first attribute extent of " | ||
1630 | "mft bitmap attribute.%s", es); | ||
1631 | NVolSetErrors(vol); | ||
1632 | put_err_out: | ||
1633 | ntfs_attr_put_search_ctx(ctx); | ||
1634 | unm_err_out: | ||
1635 | unmap_mft_record(mft_ni); | ||
1636 | goto err_out; | ||
1637 | } | ||
1638 | a = ctx->attr; | ||
1639 | mftbmp_ni->initialized_size = old_initialized_size; | ||
1640 | a->data.non_resident.initialized_size = | ||
1641 | cpu_to_sle64(old_initialized_size); | ||
1642 | if (mftbmp_vi->i_size != old_data_size) { | ||
1643 | mftbmp_vi->i_size = old_data_size; | ||
1644 | a->data.non_resident.data_size = cpu_to_sle64(old_data_size); | ||
1645 | } | ||
1646 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1647 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1648 | ntfs_attr_put_search_ctx(ctx); | ||
1649 | unmap_mft_record(mft_ni); | ||
1650 | ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " | ||
1651 | "data_size 0x%llx, initialized_size 0x%llx.", | ||
1652 | (long long)mftbmp_ni->allocated_size, | ||
1653 | (long long)mftbmp_vi->i_size, | ||
1654 | (long long)mftbmp_ni->initialized_size); | ||
1655 | err_out: | ||
1656 | return ret; | ||
1657 | } | ||
1658 | |||
1659 | /** | ||
1660 | * ntfs_mft_data_extend_allocation_nolock - extend mft data attribute | ||
1661 | * @vol: volume on which to extend the mft data attribute | ||
1662 | * | ||
1663 | * Extend the mft data attribute on the ntfs volume @vol by 16 mft records | ||
1664 | * worth of clusters or if not enough space for this by one mft record worth | ||
1665 | * of clusters. | ||
1666 | * | ||
1667 | * Note: Only changes allocated_size, i.e. does not touch initialized_size or | ||
1668 | * data_size. | ||
1669 | * | ||
1670 | * Return 0 on success and -errno on error. | ||
1671 | * | ||
1672 | * Locking: - Caller must hold vol->mftbmp_lock for writing. | ||
1673 | * - This function takes NTFS_I(vol->mft_ino)->runlist.lock for | ||
1674 | * writing and releases it before returning. | ||
1675 | * - This function calls functions which take vol->lcnbmp_lock for | ||
1676 | * writing and release it before returning. | ||
1677 | */ | ||
1678 | static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) | ||
1679 | { | ||
1680 | LCN lcn; | ||
1681 | VCN old_last_vcn; | ||
1682 | s64 min_nr, nr, ll = 0; | ||
1683 | ntfs_inode *mft_ni; | ||
1684 | runlist_element *rl, *rl2; | ||
1685 | ntfs_attr_search_ctx *ctx = NULL; | ||
1686 | MFT_RECORD *mrec; | ||
1687 | ATTR_RECORD *a = NULL; | ||
1688 | int ret, mp_size; | ||
1689 | u32 old_alen = 0; | ||
1690 | BOOL mp_rebuilt = FALSE; | ||
1691 | |||
1692 | ntfs_debug("Extending mft data allocation."); | ||
1693 | mft_ni = NTFS_I(vol->mft_ino); | ||
1694 | /* | ||
1695 | * Determine the preferred allocation location, i.e. the last lcn of | ||
1696 | * the mft data attribute. The allocated size of the mft data | ||
1697 | * attribute cannot be zero so we are ok to do this. | ||
1698 | * ntfs_find_vcn() returns the runlist locked on success. | ||
1699 | */ | ||
1700 | rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >> | ||
1701 | vol->cluster_size_bits, TRUE); | ||
1702 | if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { | ||
1703 | ntfs_error(vol->sb, "Failed to determine last allocated " | ||
1704 | "cluster of mft data attribute."); | ||
1705 | if (!IS_ERR(rl)) { | ||
1706 | up_write(&mft_ni->runlist.lock); | ||
1707 | ret = -EIO; | ||
1708 | } else | ||
1709 | ret = PTR_ERR(rl); | ||
1710 | return ret; | ||
1711 | } | ||
1712 | lcn = rl->lcn + rl->length; | ||
1713 | ntfs_debug("Last lcn of mft data attribute is 0x%llx.", | ||
1714 | (long long)lcn); | ||
1715 | /* Minimum allocation is one mft record worth of clusters. */ | ||
1716 | min_nr = vol->mft_record_size >> vol->cluster_size_bits; | ||
1717 | if (!min_nr) | ||
1718 | min_nr = 1; | ||
1719 | /* Want to allocate 16 mft records worth of clusters. */ | ||
1720 | nr = vol->mft_record_size << 4 >> vol->cluster_size_bits; | ||
1721 | if (!nr) | ||
1722 | nr = min_nr; | ||
1723 | /* Ensure we do not go above 2^32-1 mft records. */ | ||
1724 | if (unlikely((mft_ni->allocated_size + | ||
1725 | (nr << vol->cluster_size_bits)) >> | ||
1726 | vol->mft_record_size_bits >= (1ll << 32))) { | ||
1727 | nr = min_nr; | ||
1728 | if (unlikely((mft_ni->allocated_size + | ||
1729 | (nr << vol->cluster_size_bits)) >> | ||
1730 | vol->mft_record_size_bits >= (1ll << 32))) { | ||
1731 | ntfs_warning(vol->sb, "Cannot allocate mft record " | ||
1732 | "because the maximum number of inodes " | ||
1733 | "(2^32) has already been reached."); | ||
1734 | up_write(&mft_ni->runlist.lock); | ||
1735 | return -ENOSPC; | ||
1736 | } | ||
1737 | } | ||
1738 | ntfs_debug("Trying mft data allocation with %s cluster count %lli.", | ||
1739 | nr > min_nr ? "default" : "minimal", (long long)nr); | ||
1740 | old_last_vcn = rl[1].vcn; | ||
1741 | do { | ||
1742 | rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE); | ||
1743 | if (likely(!IS_ERR(rl2))) | ||
1744 | break; | ||
1745 | if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { | ||
1746 | ntfs_error(vol->sb, "Failed to allocate the minimal " | ||
1747 | "number of clusters (%lli) for the " | ||
1748 | "mft data attribute.", (long long)nr); | ||
1749 | up_write(&mft_ni->runlist.lock); | ||
1750 | return PTR_ERR(rl2); | ||
1751 | } | ||
1752 | /* | ||
1753 | * There is not enough space to do the allocation, but there | ||
1754 | * might be enough space to do a minimal allocation so try that | ||
1755 | * before failing. | ||
1756 | */ | ||
1757 | nr = min_nr; | ||
1758 | ntfs_debug("Retrying mft data allocation with minimal cluster " | ||
1759 | "count %lli.", (long long)nr); | ||
1760 | } while (1); | ||
1761 | rl = ntfs_runlists_merge(mft_ni->runlist.rl, rl2); | ||
1762 | if (IS_ERR(rl)) { | ||
1763 | up_write(&mft_ni->runlist.lock); | ||
1764 | ntfs_error(vol->sb, "Failed to merge runlists for mft data " | ||
1765 | "attribute."); | ||
1766 | if (ntfs_cluster_free_from_rl(vol, rl2)) { | ||
1767 | ntfs_error(vol->sb, "Failed to dealocate clusters " | ||
1768 | "from the mft data attribute.%s", es); | ||
1769 | NVolSetErrors(vol); | ||
1770 | } | ||
1771 | ntfs_free(rl2); | ||
1772 | return PTR_ERR(rl); | ||
1773 | } | ||
1774 | mft_ni->runlist.rl = rl; | ||
1775 | ntfs_debug("Allocated %lli clusters.", nr); | ||
1776 | /* Find the last run in the new runlist. */ | ||
1777 | for (; rl[1].length; rl++) | ||
1778 | ; | ||
1779 | /* Update the attribute record as well. */ | ||
1780 | mrec = map_mft_record(mft_ni); | ||
1781 | if (IS_ERR(mrec)) { | ||
1782 | ntfs_error(vol->sb, "Failed to map mft record."); | ||
1783 | ret = PTR_ERR(mrec); | ||
1784 | goto undo_alloc; | ||
1785 | } | ||
1786 | ctx = ntfs_attr_get_search_ctx(mft_ni, mrec); | ||
1787 | if (unlikely(!ctx)) { | ||
1788 | ntfs_error(vol->sb, "Failed to get search context."); | ||
1789 | ret = -ENOMEM; | ||
1790 | goto undo_alloc; | ||
1791 | } | ||
1792 | ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, | ||
1793 | CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx); | ||
1794 | if (unlikely(ret)) { | ||
1795 | ntfs_error(vol->sb, "Failed to find last attribute extent of " | ||
1796 | "mft data attribute."); | ||
1797 | if (ret == -ENOENT) | ||
1798 | ret = -EIO; | ||
1799 | goto undo_alloc; | ||
1800 | } | ||
1801 | a = ctx->attr; | ||
1802 | ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); | ||
1803 | /* Search back for the previous last allocated cluster of mft bitmap. */ | ||
1804 | for (rl2 = rl; rl2 > mft_ni->runlist.rl; rl2--) { | ||
1805 | if (ll >= rl2->vcn) | ||
1806 | break; | ||
1807 | } | ||
1808 | BUG_ON(ll < rl2->vcn); | ||
1809 | BUG_ON(ll >= rl2->vcn + rl2->length); | ||
1810 | /* Get the size for the new mapping pairs array for this extent. */ | ||
1811 | mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); | ||
1812 | if (unlikely(mp_size <= 0)) { | ||
1813 | ntfs_error(vol->sb, "Get size for mapping pairs failed for " | ||
1814 | "mft data attribute extent."); | ||
1815 | ret = mp_size; | ||
1816 | if (!ret) | ||
1817 | ret = -EIO; | ||
1818 | goto undo_alloc; | ||
1819 | } | ||
1820 | /* Expand the attribute record if necessary. */ | ||
1821 | old_alen = le32_to_cpu(a->length); | ||
1822 | ret = ntfs_attr_record_resize(ctx->mrec, a, mp_size + | ||
1823 | le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); | ||
1824 | if (unlikely(ret)) { | ||
1825 | if (ret != -ENOSPC) { | ||
1826 | ntfs_error(vol->sb, "Failed to resize attribute " | ||
1827 | "record for mft data attribute."); | ||
1828 | goto undo_alloc; | ||
1829 | } | ||
1830 | // TODO: Deal with this by moving this extent to a new mft | ||
1831 | // record or by starting a new extent in a new mft record or by | ||
1832 | // moving other attributes out of this mft record. | ||
1833 | // Note: Use the special reserved mft records and ensure that | ||
1834 | // this extent is not required to find the mft record in | ||
1835 | // question. | ||
1836 | ntfs_error(vol->sb, "Not enough space in this mft record to " | ||
1837 | "accomodate extended mft data attribute " | ||
1838 | "extent. Cannot handle this yet."); | ||
1839 | ret = -EOPNOTSUPP; | ||
1840 | goto undo_alloc; | ||
1841 | } | ||
1842 | mp_rebuilt = TRUE; | ||
1843 | /* Generate the mapping pairs array directly into the attr record. */ | ||
1844 | ret = ntfs_mapping_pairs_build(vol, (u8*)a + | ||
1845 | le16_to_cpu(a->data.non_resident.mapping_pairs_offset), | ||
1846 | mp_size, rl2, ll, NULL); | ||
1847 | if (unlikely(ret)) { | ||
1848 | ntfs_error(vol->sb, "Failed to build mapping pairs array of " | ||
1849 | "mft data attribute."); | ||
1850 | goto undo_alloc; | ||
1851 | } | ||
1852 | /* Update the highest_vcn. */ | ||
1853 | a->data.non_resident.highest_vcn = cpu_to_sle64(rl[1].vcn - 1); | ||
1854 | /* | ||
1855 | * We now have extended the mft data allocated_size by nr clusters. | ||
1856 | * Reflect this in the ntfs_inode structure and the attribute record. | ||
1857 | * @rl is the last (non-terminator) runlist element of mft data | ||
1858 | * attribute. | ||
1859 | */ | ||
1860 | if (a->data.non_resident.lowest_vcn) { | ||
1861 | /* | ||
1862 | * We are not in the first attribute extent, switch to it, but | ||
1863 | * first ensure the changes will make it to disk later. | ||
1864 | */ | ||
1865 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1866 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1867 | ntfs_attr_reinit_search_ctx(ctx); | ||
1868 | ret = ntfs_attr_lookup(mft_ni->type, mft_ni->name, | ||
1869 | mft_ni->name_len, CASE_SENSITIVE, 0, NULL, 0, | ||
1870 | ctx); | ||
1871 | if (unlikely(ret)) { | ||
1872 | ntfs_error(vol->sb, "Failed to find first attribute " | ||
1873 | "extent of mft data attribute."); | ||
1874 | goto restore_undo_alloc; | ||
1875 | } | ||
1876 | a = ctx->attr; | ||
1877 | } | ||
1878 | mft_ni->allocated_size += nr << vol->cluster_size_bits; | ||
1879 | a->data.non_resident.allocated_size = | ||
1880 | cpu_to_sle64(mft_ni->allocated_size); | ||
1881 | /* Ensure the changes make it to disk. */ | ||
1882 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1883 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1884 | ntfs_attr_put_search_ctx(ctx); | ||
1885 | unmap_mft_record(mft_ni); | ||
1886 | up_write(&mft_ni->runlist.lock); | ||
1887 | ntfs_debug("Done."); | ||
1888 | return 0; | ||
1889 | restore_undo_alloc: | ||
1890 | ntfs_attr_reinit_search_ctx(ctx); | ||
1891 | if (ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, | ||
1892 | CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { | ||
1893 | ntfs_error(vol->sb, "Failed to find last attribute extent of " | ||
1894 | "mft data attribute.%s", es); | ||
1895 | mft_ni->allocated_size += nr << vol->cluster_size_bits; | ||
1896 | ntfs_attr_put_search_ctx(ctx); | ||
1897 | unmap_mft_record(mft_ni); | ||
1898 | up_write(&mft_ni->runlist.lock); | ||
1899 | /* | ||
1900 | * The only thing that is now wrong is ->allocated_size of the | ||
1901 | * base attribute extent which chkdsk should be able to fix. | ||
1902 | */ | ||
1903 | NVolSetErrors(vol); | ||
1904 | return ret; | ||
1905 | } | ||
1906 | a = ctx->attr; | ||
1907 | a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); | ||
1908 | undo_alloc: | ||
1909 | if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { | ||
1910 | ntfs_error(vol->sb, "Failed to free clusters from mft data " | ||
1911 | "attribute.%s", es); | ||
1912 | NVolSetErrors(vol); | ||
1913 | } | ||
1914 | if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { | ||
1915 | ntfs_error(vol->sb, "Failed to truncate mft data attribute " | ||
1916 | "runlist.%s", es); | ||
1917 | NVolSetErrors(vol); | ||
1918 | } | ||
1919 | if (mp_rebuilt) { | ||
1920 | if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( | ||
1921 | a->data.non_resident.mapping_pairs_offset), | ||
1922 | old_alen - le16_to_cpu( | ||
1923 | a->data.non_resident.mapping_pairs_offset), | ||
1924 | rl2, ll, NULL)) { | ||
1925 | ntfs_error(vol->sb, "Failed to restore mapping pairs " | ||
1926 | "array.%s", es); | ||
1927 | NVolSetErrors(vol); | ||
1928 | } | ||
1929 | if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) { | ||
1930 | ntfs_error(vol->sb, "Failed to restore attribute " | ||
1931 | "record.%s", es); | ||
1932 | NVolSetErrors(vol); | ||
1933 | } | ||
1934 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1935 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1936 | } | ||
1937 | if (ctx) | ||
1938 | ntfs_attr_put_search_ctx(ctx); | ||
1939 | if (!IS_ERR(mrec)) | ||
1940 | unmap_mft_record(mft_ni); | ||
1941 | up_write(&mft_ni->runlist.lock); | ||
1942 | return ret; | ||
1943 | } | ||
1944 | |||
1945 | /** | ||
1946 | * ntfs_mft_record_layout - layout an mft record into a memory buffer | ||
1947 | * @vol: volume to which the mft record will belong | ||
1948 | * @mft_no: mft reference specifying the mft record number | ||
1949 | * @m: destination buffer of size >= @vol->mft_record_size bytes | ||
1950 | * | ||
1951 | * Layout an empty, unused mft record with the mft record number @mft_no into | ||
1952 | * the buffer @m. The volume @vol is needed because the mft record structure | ||
1953 | * was modified in NTFS 3.1 so we need to know which volume version this mft | ||
1954 | * record will be used on. | ||
1955 | * | ||
1956 | * Return 0 on success and -errno on error. | ||
1957 | */ | ||
1958 | static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, | ||
1959 | MFT_RECORD *m) | ||
1960 | { | ||
1961 | ATTR_RECORD *a; | ||
1962 | |||
1963 | ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); | ||
1964 | if (mft_no >= (1ll << 32)) { | ||
1965 | ntfs_error(vol->sb, "Mft record number 0x%llx exceeds " | ||
1966 | "maximum of 2^32.", (long long)mft_no); | ||
1967 | return -ERANGE; | ||
1968 | } | ||
1969 | /* Start by clearing the whole mft record to gives us a clean slate. */ | ||
1970 | memset(m, 0, vol->mft_record_size); | ||
1971 | /* Aligned to 2-byte boundary. */ | ||
1972 | if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver)) | ||
1973 | m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1); | ||
1974 | else { | ||
1975 | m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); | ||
1976 | /* | ||
1977 | * Set the NTFS 3.1+ specific fields while we know that the | ||
1978 | * volume version is 3.1+. | ||
1979 | */ | ||
1980 | m->reserved = 0; | ||
1981 | m->mft_record_number = cpu_to_le32((u32)mft_no); | ||
1982 | } | ||
1983 | m->magic = magic_FILE; | ||
1984 | if (vol->mft_record_size >= NTFS_BLOCK_SIZE) | ||
1985 | m->usa_count = cpu_to_le16(vol->mft_record_size / | ||
1986 | NTFS_BLOCK_SIZE + 1); | ||
1987 | else { | ||
1988 | m->usa_count = cpu_to_le16(1); | ||
1989 | ntfs_warning(vol->sb, "Sector size is bigger than mft record " | ||
1990 | "size. Setting usa_count to 1. If chkdsk " | ||
1991 | "reports this as corruption, please email " | ||
1992 | "linux-ntfs-dev@lists.sourceforge.net stating " | ||
1993 | "that you saw this message and that the " | ||
1994 | "modified file system created was corrupt. " | ||
1995 | "Thank you."); | ||
1996 | } | ||
1997 | /* Set the update sequence number to 1. */ | ||
1998 | *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = cpu_to_le16(1); | ||
1999 | m->lsn = 0; | ||
2000 | m->sequence_number = cpu_to_le16(1); | ||
2001 | m->link_count = 0; | ||
2002 | /* | ||
2003 | * Place the attributes straight after the update sequence array, | ||
2004 | * aligned to 8-byte boundary. | ||
2005 | */ | ||
2006 | m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + | ||
2007 | (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); | ||
2008 | m->flags = 0; | ||
2009 | /* | ||
2010 | * Using attrs_offset plus eight bytes (for the termination attribute). | ||
2011 | * attrs_offset is already aligned to 8-byte boundary, so no need to | ||
2012 | * align again. | ||
2013 | */ | ||
2014 | m->bytes_in_use = cpu_to_le32(le16_to_cpu(m->attrs_offset) + 8); | ||
2015 | m->bytes_allocated = cpu_to_le32(vol->mft_record_size); | ||
2016 | m->base_mft_record = 0; | ||
2017 | m->next_attr_instance = 0; | ||
2018 | /* Add the termination attribute. */ | ||
2019 | a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)); | ||
2020 | a->type = AT_END; | ||
2021 | a->length = 0; | ||
2022 | ntfs_debug("Done."); | ||
2023 | return 0; | ||
2024 | } | ||
2025 | |||
2026 | /** | ||
2027 | * ntfs_mft_record_format - format an mft record on an ntfs volume | ||
2028 | * @vol: volume on which to format the mft record | ||
2029 | * @mft_no: mft record number to format | ||
2030 | * | ||
2031 | * Format the mft record @mft_no in $MFT/$DATA, i.e. lay out an empty, unused | ||
2032 | * mft record into the appropriate place of the mft data attribute. This is | ||
2033 | * used when extending the mft data attribute. | ||
2034 | * | ||
2035 | * Return 0 on success and -errno on error. | ||
2036 | */ | ||
2037 | static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) | ||
2038 | { | ||
2039 | struct inode *mft_vi = vol->mft_ino; | ||
2040 | struct page *page; | ||
2041 | MFT_RECORD *m; | ||
2042 | pgoff_t index, end_index; | ||
2043 | unsigned int ofs; | ||
2044 | int err; | ||
2045 | |||
2046 | ntfs_debug("Entering for mft record 0x%llx.", (long long)mft_no); | ||
2047 | /* | ||
2048 | * The index into the page cache and the offset within the page cache | ||
2049 | * page of the wanted mft record. | ||
2050 | */ | ||
2051 | index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; | ||
2052 | ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; | ||
2053 | /* The maximum valid index into the page cache for $MFT's data. */ | ||
2054 | end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; | ||
2055 | if (unlikely(index >= end_index)) { | ||
2056 | if (unlikely(index > end_index || ofs + vol->mft_record_size >= | ||
2057 | (mft_vi->i_size & ~PAGE_CACHE_MASK))) { | ||
2058 | ntfs_error(vol->sb, "Tried to format non-existing mft " | ||
2059 | "record 0x%llx.", (long long)mft_no); | ||
2060 | return -ENOENT; | ||
2061 | } | ||
2062 | } | ||
2063 | /* Read, map, and pin the page containing the mft record. */ | ||
2064 | page = ntfs_map_page(mft_vi->i_mapping, index); | ||
2065 | if (unlikely(IS_ERR(page))) { | ||
2066 | ntfs_error(vol->sb, "Failed to map page containing mft record " | ||
2067 | "to format 0x%llx.", (long long)mft_no); | ||
2068 | return PTR_ERR(page); | ||
2069 | } | ||
2070 | lock_page(page); | ||
2071 | BUG_ON(!PageUptodate(page)); | ||
2072 | ClearPageUptodate(page); | ||
2073 | m = (MFT_RECORD*)((u8*)page_address(page) + ofs); | ||
2074 | err = ntfs_mft_record_layout(vol, mft_no, m); | ||
2075 | if (unlikely(err)) { | ||
2076 | ntfs_error(vol->sb, "Failed to layout mft record 0x%llx.", | ||
2077 | (long long)mft_no); | ||
2078 | SetPageUptodate(page); | ||
2079 | unlock_page(page); | ||
2080 | ntfs_unmap_page(page); | ||
2081 | return err; | ||
2082 | } | ||
2083 | flush_dcache_page(page); | ||
2084 | SetPageUptodate(page); | ||
2085 | unlock_page(page); | ||
2086 | /* | ||
2087 | * Make sure the mft record is written out to disk. We could use | ||
2088 | * ilookup5() to check if an inode is in icache and so on but this is | ||
2089 | * unnecessary as ntfs_writepage() will write the dirty record anyway. | ||
2090 | */ | ||
2091 | mark_ntfs_record_dirty(page, ofs); | ||
2092 | ntfs_unmap_page(page); | ||
2093 | ntfs_debug("Done."); | ||
2094 | return 0; | ||
2095 | } | ||
2096 | |||
2097 | /** | ||
2098 | * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume | ||
2099 | * @vol: [IN] volume on which to allocate the mft record | ||
2100 | * @mode: [IN] mode if want a file or directory, i.e. base inode or 0 | ||
2101 | * @base_ni: [IN] open base inode if allocating an extent mft record or NULL | ||
2102 | * @mrec: [OUT] on successful return this is the mapped mft record | ||
2103 | * | ||
2104 | * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol. | ||
2105 | * | ||
2106 | * If @base_ni is NULL make the mft record a base mft record, i.e. a file or | ||
2107 | * direvctory inode, and allocate it at the default allocator position. In | ||
2108 | * this case @mode is the file mode as given to us by the caller. We in | ||
2109 | * particular use @mode to distinguish whether a file or a directory is being | ||
2110 | * created (S_IFDIR(mode) and S_IFREG(mode), respectively). | ||
2111 | * | ||
2112 | * If @base_ni is not NULL make the allocated mft record an extent record, | ||
2113 | * allocate it starting at the mft record after the base mft record and attach | ||
2114 | * the allocated and opened ntfs inode to the base inode @base_ni. In this | ||
2115 | * case @mode must be 0 as it is meaningless for extent inodes. | ||
2116 | * | ||
2117 | * You need to check the return value with IS_ERR(). If false, the function | ||
2118 | * was successful and the return value is the now opened ntfs inode of the | ||
2119 | * allocated mft record. *@mrec is then set to the allocated, mapped, pinned, | ||
2120 | * and locked mft record. If IS_ERR() is true, the function failed and the | ||
2121 | * error code is obtained from PTR_ERR(return value). *@mrec is undefined in | ||
2122 | * this case. | ||
2123 | * | ||
2124 | * Allocation strategy: | ||
2125 | * | ||
2126 | * To find a free mft record, we scan the mft bitmap for a zero bit. To | ||
2127 | * optimize this we start scanning at the place specified by @base_ni or if | ||
2128 | * @base_ni is NULL we start where we last stopped and we perform wrap around | ||
2129 | * when we reach the end. Note, we do not try to allocate mft records below | ||
2130 | * number 24 because numbers 0 to 15 are the defined system files anyway and 16 | ||
2131 | * to 24 are special in that they are used for storing extension mft records | ||
2132 | * for the $DATA attribute of $MFT. This is required to avoid the possibility | ||
2133 | * of creating a runlist with a circular dependency which once written to disk | ||
2134 | * can never be read in again. Windows will only use records 16 to 24 for | ||
2135 | * normal files if the volume is completely out of space. We never use them | ||
2136 | * which means that when the volume is really out of space we cannot create any | ||
2137 | * more files while Windows can still create up to 8 small files. We can start | ||
2138 | * doing this at some later time, it does not matter much for now. | ||
2139 | * | ||
2140 | * When scanning the mft bitmap, we only search up to the last allocated mft | ||
2141 | * record. If there are no free records left in the range 24 to number of | ||
2142 | * allocated mft records, then we extend the $MFT/$DATA attribute in order to | ||
2143 | * create free mft records. We extend the allocated size of $MFT/$DATA by 16 | ||
2144 | * records at a time or one cluster, if cluster size is above 16kiB. If there | ||
2145 | * is not sufficient space to do this, we try to extend by a single mft record | ||
2146 | * or one cluster, if cluster size is above the mft record size. | ||
2147 | * | ||
2148 | * No matter how many mft records we allocate, we initialize only the first | ||
2149 | * allocated mft record, incrementing mft data size and initialized size | ||
2150 | * accordingly, open an ntfs_inode for it and return it to the caller, unless | ||
2151 | * there are less than 24 mft records, in which case we allocate and initialize | ||
2152 | * mft records until we reach record 24 which we consider as the first free mft | ||
2153 | * record for use by normal files. | ||
2154 | * | ||
2155 | * If during any stage we overflow the initialized data in the mft bitmap, we | ||
2156 | * extend the initialized size (and data size) by 8 bytes, allocating another | ||
2157 | * cluster if required. The bitmap data size has to be at least equal to the | ||
2158 | * number of mft records in the mft, but it can be bigger, in which case the | ||
2159 | * superflous bits are padded with zeroes. | ||
2160 | * | ||
2161 | * Thus, when we return successfully (IS_ERR() is false), we will have: | ||
2162 | * - initialized / extended the mft bitmap if necessary, | ||
2163 | * - initialized / extended the mft data if necessary, | ||
2164 | * - set the bit corresponding to the mft record being allocated in the | ||
2165 | * mft bitmap, | ||
2166 | * - opened an ntfs_inode for the allocated mft record, and we will have | ||
2167 | * - returned the ntfs_inode as well as the allocated mapped, pinned, and | ||
2168 | * locked mft record. | ||
2169 | * | ||
2170 | * On error, the volume will be left in a consistent state and no record will | ||
2171 | * be allocated. If rolling back a partial operation fails, we may leave some | ||
2172 | * inconsistent metadata in which case we set NVolErrors() so the volume is | ||
2173 | * left dirty when unmounted. | ||
2174 | * | ||
2175 | * Note, this function cannot make use of most of the normal functions, like | ||
2176 | * for example for attribute resizing, etc, because when the run list overflows | ||
2177 | * the base mft record and an attribute list is used, it is very important that | ||
2178 | * the extension mft records used to store the $DATA attribute of $MFT can be | ||
2179 | * reached without having to read the information contained inside them, as | ||
2180 | * this would make it impossible to find them in the first place after the | ||
2181 | * volume is unmounted. $MFT/$BITMAP probably does not need to follow this | ||
2182 | * rule because the bitmap is not essential for finding the mft records, but on | ||
2183 | * the other hand, handling the bitmap in this special way would make life | ||
2184 | * easier because otherwise there might be circular invocations of functions | ||
2185 | * when reading the bitmap. | ||
2186 | */ | ||
2187 | ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, | ||
2188 | ntfs_inode *base_ni, MFT_RECORD **mrec) | ||
2189 | { | ||
2190 | s64 ll, bit, old_data_initialized, old_data_size; | ||
2191 | struct inode *vi; | ||
2192 | struct page *page; | ||
2193 | ntfs_inode *mft_ni, *mftbmp_ni, *ni; | ||
2194 | ntfs_attr_search_ctx *ctx; | ||
2195 | MFT_RECORD *m; | ||
2196 | ATTR_RECORD *a; | ||
2197 | pgoff_t index; | ||
2198 | unsigned int ofs; | ||
2199 | int err; | ||
2200 | le16 seq_no, usn; | ||
2201 | BOOL record_formatted = FALSE; | ||
2202 | |||
2203 | if (base_ni) { | ||
2204 | ntfs_debug("Entering (allocating an extent mft record for " | ||
2205 | "base mft record 0x%llx).", | ||
2206 | (long long)base_ni->mft_no); | ||
2207 | /* @mode and @base_ni are mutually exclusive. */ | ||
2208 | BUG_ON(mode); | ||
2209 | } else | ||
2210 | ntfs_debug("Entering (allocating a base mft record)."); | ||
2211 | if (mode) { | ||
2212 | /* @mode and @base_ni are mutually exclusive. */ | ||
2213 | BUG_ON(base_ni); | ||
2214 | /* We only support creation of normal files and directories. */ | ||
2215 | if (!S_ISREG(mode) && !S_ISDIR(mode)) | ||
2216 | return ERR_PTR(-EOPNOTSUPP); | ||
2217 | } | ||
2218 | BUG_ON(!mrec); | ||
2219 | mft_ni = NTFS_I(vol->mft_ino); | ||
2220 | mftbmp_ni = NTFS_I(vol->mftbmp_ino); | ||
2221 | down_write(&vol->mftbmp_lock); | ||
2222 | bit = ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(vol, base_ni); | ||
2223 | if (bit >= 0) { | ||
2224 | ntfs_debug("Found and allocated free record (#1), bit 0x%llx.", | ||
2225 | (long long)bit); | ||
2226 | goto have_alloc_rec; | ||
2227 | } | ||
2228 | if (bit != -ENOSPC) { | ||
2229 | up_write(&vol->mftbmp_lock); | ||
2230 | return ERR_PTR(bit); | ||
2231 | } | ||
2232 | /* | ||
2233 | * No free mft records left. If the mft bitmap already covers more | ||
2234 | * than the currently used mft records, the next records are all free, | ||
2235 | * so we can simply allocate the first unused mft record. | ||
2236 | * Note: We also have to make sure that the mft bitmap at least covers | ||
2237 | * the first 24 mft records as they are special and whilst they may not | ||
2238 | * be in use, we do not allocate from them. | ||
2239 | */ | ||
2240 | ll = mft_ni->initialized_size >> vol->mft_record_size_bits; | ||
2241 | if (mftbmp_ni->initialized_size << 3 > ll && | ||
2242 | mftbmp_ni->initialized_size > 3) { | ||
2243 | bit = ll; | ||
2244 | if (bit < 24) | ||
2245 | bit = 24; | ||
2246 | if (unlikely(bit >= (1ll << 32))) | ||
2247 | goto max_err_out; | ||
2248 | ntfs_debug("Found free record (#2), bit 0x%llx.", | ||
2249 | (long long)bit); | ||
2250 | goto found_free_rec; | ||
2251 | } | ||
2252 | /* | ||
2253 | * The mft bitmap needs to be expanded until it covers the first unused | ||
2254 | * mft record that we can allocate. | ||
2255 | * Note: The smallest mft record we allocate is mft record 24. | ||
2256 | */ | ||
2257 | bit = mftbmp_ni->initialized_size << 3; | ||
2258 | if (unlikely(bit >= (1ll << 32))) | ||
2259 | goto max_err_out; | ||
2260 | ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " | ||
2261 | "data_size 0x%llx, initialized_size 0x%llx.", | ||
2262 | (long long)mftbmp_ni->allocated_size, | ||
2263 | (long long)vol->mftbmp_ino->i_size, | ||
2264 | (long long)mftbmp_ni->initialized_size); | ||
2265 | if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) { | ||
2266 | /* Need to extend bitmap by one more cluster. */ | ||
2267 | ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); | ||
2268 | err = ntfs_mft_bitmap_extend_allocation_nolock(vol); | ||
2269 | if (unlikely(err)) { | ||
2270 | up_write(&vol->mftbmp_lock); | ||
2271 | goto err_out; | ||
2272 | } | ||
2273 | ntfs_debug("Status of mftbmp after allocation extension: " | ||
2274 | "allocated_size 0x%llx, data_size 0x%llx, " | ||
2275 | "initialized_size 0x%llx.", | ||
2276 | (long long)mftbmp_ni->allocated_size, | ||
2277 | (long long)vol->mftbmp_ino->i_size, | ||
2278 | (long long)mftbmp_ni->initialized_size); | ||
2279 | } | ||
2280 | /* | ||
2281 | * We now have sufficient allocated space, extend the initialized_size | ||
2282 | * as well as the data_size if necessary and fill the new space with | ||
2283 | * zeroes. | ||
2284 | */ | ||
2285 | err = ntfs_mft_bitmap_extend_initialized_nolock(vol); | ||
2286 | if (unlikely(err)) { | ||
2287 | up_write(&vol->mftbmp_lock); | ||
2288 | goto err_out; | ||
2289 | } | ||
2290 | ntfs_debug("Status of mftbmp after initialized extention: " | ||
2291 | "allocated_size 0x%llx, data_size 0x%llx, " | ||
2292 | "initialized_size 0x%llx.", | ||
2293 | (long long)mftbmp_ni->allocated_size, | ||
2294 | (long long)vol->mftbmp_ino->i_size, | ||
2295 | (long long)mftbmp_ni->initialized_size); | ||
2296 | ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); | ||
2297 | found_free_rec: | ||
2298 | /* @bit is the found free mft record, allocate it in the mft bitmap. */ | ||
2299 | ntfs_debug("At found_free_rec."); | ||
2300 | err = ntfs_bitmap_set_bit(vol->mftbmp_ino, bit); | ||
2301 | if (unlikely(err)) { | ||
2302 | ntfs_error(vol->sb, "Failed to allocate bit in mft bitmap."); | ||
2303 | up_write(&vol->mftbmp_lock); | ||
2304 | goto err_out; | ||
2305 | } | ||
2306 | ntfs_debug("Set bit 0x%llx in mft bitmap.", (long long)bit); | ||
2307 | have_alloc_rec: | ||
2308 | /* | ||
2309 | * The mft bitmap is now uptodate. Deal with mft data attribute now. | ||
2310 | * Note, we keep hold of the mft bitmap lock for writing until all | ||
2311 | * modifications to the mft data attribute are complete, too, as they | ||
2312 | * will impact decisions for mft bitmap and mft record allocation done | ||
2313 | * by a parallel allocation and if the lock is not maintained a | ||
2314 | * parallel allocation could allocate the same mft record as this one. | ||
2315 | */ | ||
2316 | ll = (bit + 1) << vol->mft_record_size_bits; | ||
2317 | if (ll <= mft_ni->initialized_size) { | ||
2318 | ntfs_debug("Allocated mft record already initialized."); | ||
2319 | goto mft_rec_already_initialized; | ||
2320 | } | ||
2321 | ntfs_debug("Initializing allocated mft record."); | ||
2322 | /* | ||
2323 | * The mft record is outside the initialized data. Extend the mft data | ||
2324 | * attribute until it covers the allocated record. The loop is only | ||
2325 | * actually traversed more than once when a freshly formatted volume is | ||
2326 | * first written to so it optimizes away nicely in the common case. | ||
2327 | */ | ||
2328 | ntfs_debug("Status of mft data before extension: " | ||
2329 | "allocated_size 0x%llx, data_size 0x%llx, " | ||
2330 | "initialized_size 0x%llx.", | ||
2331 | (long long)mft_ni->allocated_size, | ||
2332 | (long long)vol->mft_ino->i_size, | ||
2333 | (long long)mft_ni->initialized_size); | ||
2334 | while (ll > mft_ni->allocated_size) { | ||
2335 | err = ntfs_mft_data_extend_allocation_nolock(vol); | ||
2336 | if (unlikely(err)) { | ||
2337 | ntfs_error(vol->sb, "Failed to extend mft data " | ||
2338 | "allocation."); | ||
2339 | goto undo_mftbmp_alloc_nolock; | ||
2340 | } | ||
2341 | ntfs_debug("Status of mft data after allocation extension: " | ||
2342 | "allocated_size 0x%llx, data_size 0x%llx, " | ||
2343 | "initialized_size 0x%llx.", | ||
2344 | (long long)mft_ni->allocated_size, | ||
2345 | (long long)vol->mft_ino->i_size, | ||
2346 | (long long)mft_ni->initialized_size); | ||
2347 | } | ||
2348 | /* | ||
2349 | * Extend mft data initialized size (and data size of course) to reach | ||
2350 | * the allocated mft record, formatting the mft records allong the way. | ||
2351 | * Note: We only modify the ntfs_inode structure as that is all that is | ||
2352 | * needed by ntfs_mft_record_format(). We will update the attribute | ||
2353 | * record itself in one fell swoop later on. | ||
2354 | */ | ||
2355 | old_data_initialized = mft_ni->initialized_size; | ||
2356 | old_data_size = vol->mft_ino->i_size; | ||
2357 | while (ll > mft_ni->initialized_size) { | ||
2358 | s64 new_initialized_size, mft_no; | ||
2359 | |||
2360 | new_initialized_size = mft_ni->initialized_size + | ||
2361 | vol->mft_record_size; | ||
2362 | mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; | ||
2363 | if (new_initialized_size > vol->mft_ino->i_size) | ||
2364 | vol->mft_ino->i_size = new_initialized_size; | ||
2365 | ntfs_debug("Initializing mft record 0x%llx.", | ||
2366 | (long long)mft_no); | ||
2367 | err = ntfs_mft_record_format(vol, mft_no); | ||
2368 | if (unlikely(err)) { | ||
2369 | ntfs_error(vol->sb, "Failed to format mft record."); | ||
2370 | goto undo_data_init; | ||
2371 | } | ||
2372 | mft_ni->initialized_size = new_initialized_size; | ||
2373 | } | ||
2374 | record_formatted = TRUE; | ||
2375 | /* Update the mft data attribute record to reflect the new sizes. */ | ||
2376 | m = map_mft_record(mft_ni); | ||
2377 | if (IS_ERR(m)) { | ||
2378 | ntfs_error(vol->sb, "Failed to map mft record."); | ||
2379 | err = PTR_ERR(m); | ||
2380 | goto undo_data_init; | ||
2381 | } | ||
2382 | ctx = ntfs_attr_get_search_ctx(mft_ni, m); | ||
2383 | if (unlikely(!ctx)) { | ||
2384 | ntfs_error(vol->sb, "Failed to get search context."); | ||
2385 | err = -ENOMEM; | ||
2386 | unmap_mft_record(mft_ni); | ||
2387 | goto undo_data_init; | ||
2388 | } | ||
2389 | err = ntfs_attr_lookup(mft_ni->type, mft_ni->name, mft_ni->name_len, | ||
2390 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
2391 | if (unlikely(err)) { | ||
2392 | ntfs_error(vol->sb, "Failed to find first attribute extent of " | ||
2393 | "mft data attribute."); | ||
2394 | ntfs_attr_put_search_ctx(ctx); | ||
2395 | unmap_mft_record(mft_ni); | ||
2396 | goto undo_data_init; | ||
2397 | } | ||
2398 | a = ctx->attr; | ||
2399 | a->data.non_resident.initialized_size = | ||
2400 | cpu_to_sle64(mft_ni->initialized_size); | ||
2401 | a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size); | ||
2402 | /* Ensure the changes make it to disk. */ | ||
2403 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
2404 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
2405 | ntfs_attr_put_search_ctx(ctx); | ||
2406 | unmap_mft_record(mft_ni); | ||
2407 | ntfs_debug("Status of mft data after mft record initialization: " | ||
2408 | "allocated_size 0x%llx, data_size 0x%llx, " | ||
2409 | "initialized_size 0x%llx.", | ||
2410 | (long long)mft_ni->allocated_size, | ||
2411 | (long long)vol->mft_ino->i_size, | ||
2412 | (long long)mft_ni->initialized_size); | ||
2413 | BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size); | ||
2414 | BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size); | ||
2415 | mft_rec_already_initialized: | ||
2416 | /* | ||
2417 | * We can finally drop the mft bitmap lock as the mft data attribute | ||
2418 | * has been fully updated. The only disparity left is that the | ||
2419 | * allocated mft record still needs to be marked as in use to match the | ||
2420 | * set bit in the mft bitmap but this is actually not a problem since | ||
2421 | * this mft record is not referenced from anywhere yet and the fact | ||
2422 | * that it is allocated in the mft bitmap means that no-one will try to | ||
2423 | * allocate it either. | ||
2424 | */ | ||
2425 | up_write(&vol->mftbmp_lock); | ||
2426 | /* | ||
2427 | * We now have allocated and initialized the mft record. Calculate the | ||
2428 | * index of and the offset within the page cache page the record is in. | ||
2429 | */ | ||
2430 | index = bit << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; | ||
2431 | ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; | ||
2432 | /* Read, map, and pin the page containing the mft record. */ | ||
2433 | page = ntfs_map_page(vol->mft_ino->i_mapping, index); | ||
2434 | if (unlikely(IS_ERR(page))) { | ||
2435 | ntfs_error(vol->sb, "Failed to map page containing allocated " | ||
2436 | "mft record 0x%llx.", (long long)bit); | ||
2437 | err = PTR_ERR(page); | ||
2438 | goto undo_mftbmp_alloc; | ||
2439 | } | ||
2440 | lock_page(page); | ||
2441 | BUG_ON(!PageUptodate(page)); | ||
2442 | ClearPageUptodate(page); | ||
2443 | m = (MFT_RECORD*)((u8*)page_address(page) + ofs); | ||
2444 | /* If we just formatted the mft record no need to do it again. */ | ||
2445 | if (!record_formatted) { | ||
2446 | /* Sanity check that the mft record is really not in use. */ | ||
2447 | if (ntfs_is_file_record(m->magic) && | ||
2448 | (m->flags & MFT_RECORD_IN_USE)) { | ||
2449 | ntfs_error(vol->sb, "Mft record 0x%llx was marked " | ||
2450 | "free in mft bitmap but is marked " | ||
2451 | "used itself. Corrupt filesystem. " | ||
2452 | "Unmount and run chkdsk.", | ||
2453 | (long long)bit); | ||
2454 | err = -EIO; | ||
2455 | SetPageUptodate(page); | ||
2456 | unlock_page(page); | ||
2457 | ntfs_unmap_page(page); | ||
2458 | NVolSetErrors(vol); | ||
2459 | goto undo_mftbmp_alloc; | ||
2460 | } | ||
2461 | /* | ||
2462 | * We need to (re-)format the mft record, preserving the | ||
2463 | * sequence number if it is not zero as well as the update | ||
2464 | * sequence number if it is not zero or -1 (0xffff). This | ||
2465 | * means we do not need to care whether or not something went | ||
2466 | * wrong with the previous mft record. | ||
2467 | */ | ||
2468 | seq_no = m->sequence_number; | ||
2469 | usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)); | ||
2470 | err = ntfs_mft_record_layout(vol, bit, m); | ||
2471 | if (unlikely(err)) { | ||
2472 | ntfs_error(vol->sb, "Failed to layout allocated mft " | ||
2473 | "record 0x%llx.", (long long)bit); | ||
2474 | SetPageUptodate(page); | ||
2475 | unlock_page(page); | ||
2476 | ntfs_unmap_page(page); | ||
2477 | goto undo_mftbmp_alloc; | ||
2478 | } | ||
2479 | if (seq_no) | ||
2480 | m->sequence_number = seq_no; | ||
2481 | if (usn && le16_to_cpu(usn) != 0xffff) | ||
2482 | *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn; | ||
2483 | } | ||
2484 | /* Set the mft record itself in use. */ | ||
2485 | m->flags |= MFT_RECORD_IN_USE; | ||
2486 | if (S_ISDIR(mode)) | ||
2487 | m->flags |= MFT_RECORD_IS_DIRECTORY; | ||
2488 | flush_dcache_page(page); | ||
2489 | SetPageUptodate(page); | ||
2490 | if (base_ni) { | ||
2491 | /* | ||
2492 | * Setup the base mft record in the extent mft record. This | ||
2493 | * completes initialization of the allocated extent mft record | ||
2494 | * and we can simply use it with map_extent_mft_record(). | ||
2495 | */ | ||
2496 | m->base_mft_record = MK_LE_MREF(base_ni->mft_no, | ||
2497 | base_ni->seq_no); | ||
2498 | /* | ||
2499 | * Allocate an extent inode structure for the new mft record, | ||
2500 | * attach it to the base inode @base_ni and map, pin, and lock | ||
2501 | * its, i.e. the allocated, mft record. | ||
2502 | */ | ||
2503 | m = map_extent_mft_record(base_ni, bit, &ni); | ||
2504 | if (IS_ERR(m)) { | ||
2505 | ntfs_error(vol->sb, "Failed to map allocated extent " | ||
2506 | "mft record 0x%llx.", (long long)bit); | ||
2507 | err = PTR_ERR(m); | ||
2508 | /* Set the mft record itself not in use. */ | ||
2509 | m->flags &= cpu_to_le16( | ||
2510 | ~le16_to_cpu(MFT_RECORD_IN_USE)); | ||
2511 | flush_dcache_page(page); | ||
2512 | /* Make sure the mft record is written out to disk. */ | ||
2513 | mark_ntfs_record_dirty(page, ofs); | ||
2514 | unlock_page(page); | ||
2515 | ntfs_unmap_page(page); | ||
2516 | goto undo_mftbmp_alloc; | ||
2517 | } | ||
2518 | /* | ||
2519 | * Make sure the allocated mft record is written out to disk. | ||
2520 | * No need to set the inode dirty because the caller is going | ||
2521 | * to do that anyway after finishing with the new extent mft | ||
2522 | * record (e.g. at a minimum a new attribute will be added to | ||
2523 | * the mft record. | ||
2524 | */ | ||
2525 | mark_ntfs_record_dirty(page, ofs); | ||
2526 | unlock_page(page); | ||
2527 | /* | ||
2528 | * Need to unmap the page since map_extent_mft_record() mapped | ||
2529 | * it as well so we have it mapped twice at the moment. | ||
2530 | */ | ||
2531 | ntfs_unmap_page(page); | ||
2532 | } else { | ||
2533 | /* | ||
2534 | * Allocate a new VFS inode and set it up. NOTE: @vi->i_nlink | ||
2535 | * is set to 1 but the mft record->link_count is 0. The caller | ||
2536 | * needs to bear this in mind. | ||
2537 | */ | ||
2538 | vi = new_inode(vol->sb); | ||
2539 | if (unlikely(!vi)) { | ||
2540 | err = -ENOMEM; | ||
2541 | /* Set the mft record itself not in use. */ | ||
2542 | m->flags &= cpu_to_le16( | ||
2543 | ~le16_to_cpu(MFT_RECORD_IN_USE)); | ||
2544 | flush_dcache_page(page); | ||
2545 | /* Make sure the mft record is written out to disk. */ | ||
2546 | mark_ntfs_record_dirty(page, ofs); | ||
2547 | unlock_page(page); | ||
2548 | ntfs_unmap_page(page); | ||
2549 | goto undo_mftbmp_alloc; | ||
2550 | } | ||
2551 | vi->i_ino = bit; | ||
2552 | /* | ||
2553 | * This is the optimal IO size (for stat), not the fs block | ||
2554 | * size. | ||
2555 | */ | ||
2556 | vi->i_blksize = PAGE_CACHE_SIZE; | ||
2557 | /* | ||
2558 | * This is for checking whether an inode has changed w.r.t. a | ||
2559 | * file so that the file can be updated if necessary (compare | ||
2560 | * with f_version). | ||
2561 | */ | ||
2562 | vi->i_version = 1; | ||
2563 | |||
2564 | /* The owner and group come from the ntfs volume. */ | ||
2565 | vi->i_uid = vol->uid; | ||
2566 | vi->i_gid = vol->gid; | ||
2567 | |||
2568 | /* Initialize the ntfs specific part of @vi. */ | ||
2569 | ntfs_init_big_inode(vi); | ||
2570 | ni = NTFS_I(vi); | ||
2571 | /* | ||
2572 | * Set the appropriate mode, attribute type, and name. For | ||
2573 | * directories, also setup the index values to the defaults. | ||
2574 | */ | ||
2575 | if (S_ISDIR(mode)) { | ||
2576 | vi->i_mode = S_IFDIR | S_IRWXUGO; | ||
2577 | vi->i_mode &= ~vol->dmask; | ||
2578 | |||
2579 | NInoSetMstProtected(ni); | ||
2580 | ni->type = AT_INDEX_ALLOCATION; | ||
2581 | ni->name = I30; | ||
2582 | ni->name_len = 4; | ||
2583 | |||
2584 | ni->itype.index.block_size = 4096; | ||
2585 | ni->itype.index.block_size_bits = generic_ffs(4096) - 1; | ||
2586 | ni->itype.index.collation_rule = COLLATION_FILE_NAME; | ||
2587 | if (vol->cluster_size <= ni->itype.index.block_size) { | ||
2588 | ni->itype.index.vcn_size = vol->cluster_size; | ||
2589 | ni->itype.index.vcn_size_bits = | ||
2590 | vol->cluster_size_bits; | ||
2591 | } else { | ||
2592 | ni->itype.index.vcn_size = vol->sector_size; | ||
2593 | ni->itype.index.vcn_size_bits = | ||
2594 | vol->sector_size_bits; | ||
2595 | } | ||
2596 | } else { | ||
2597 | vi->i_mode = S_IFREG | S_IRWXUGO; | ||
2598 | vi->i_mode &= ~vol->fmask; | ||
2599 | |||
2600 | ni->type = AT_DATA; | ||
2601 | ni->name = NULL; | ||
2602 | ni->name_len = 0; | ||
2603 | } | ||
2604 | if (IS_RDONLY(vi)) | ||
2605 | vi->i_mode &= ~S_IWUGO; | ||
2606 | |||
2607 | /* Set the inode times to the current time. */ | ||
2608 | vi->i_atime = vi->i_mtime = vi->i_ctime = | ||
2609 | current_fs_time(vi->i_sb); | ||
2610 | /* | ||
2611 | * Set the file size to 0, the ntfs inode sizes are set to 0 by | ||
2612 | * the call to ntfs_init_big_inode() below. | ||
2613 | */ | ||
2614 | vi->i_size = 0; | ||
2615 | vi->i_blocks = 0; | ||
2616 | |||
2617 | /* Set the sequence number. */ | ||
2618 | vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); | ||
2619 | /* | ||
2620 | * Manually map, pin, and lock the mft record as we already | ||
2621 | * have its page mapped and it is very easy to do. | ||
2622 | */ | ||
2623 | atomic_inc(&ni->count); | ||
2624 | down(&ni->mrec_lock); | ||
2625 | ni->page = page; | ||
2626 | ni->page_ofs = ofs; | ||
2627 | /* | ||
2628 | * Make sure the allocated mft record is written out to disk. | ||
2629 | * NOTE: We do not set the ntfs inode dirty because this would | ||
2630 | * fail in ntfs_write_inode() because the inode does not have a | ||
2631 | * standard information attribute yet. Also, there is no need | ||
2632 | * to set the inode dirty because the caller is going to do | ||
2633 | * that anyway after finishing with the new mft record (e.g. at | ||
2634 | * a minimum some new attributes will be added to the mft | ||
2635 | * record. | ||
2636 | */ | ||
2637 | mark_ntfs_record_dirty(page, ofs); | ||
2638 | unlock_page(page); | ||
2639 | |||
2640 | /* Add the inode to the inode hash for the superblock. */ | ||
2641 | insert_inode_hash(vi); | ||
2642 | |||
2643 | /* Update the default mft allocation position. */ | ||
2644 | vol->mft_data_pos = bit + 1; | ||
2645 | } | ||
2646 | /* | ||
2647 | * Return the opened, allocated inode of the allocated mft record as | ||
2648 | * well as the mapped, pinned, and locked mft record. | ||
2649 | */ | ||
2650 | ntfs_debug("Returning opened, allocated %sinode 0x%llx.", | ||
2651 | base_ni ? "extent " : "", (long long)bit); | ||
2652 | *mrec = m; | ||
2653 | return ni; | ||
2654 | undo_data_init: | ||
2655 | mft_ni->initialized_size = old_data_initialized; | ||
2656 | vol->mft_ino->i_size = old_data_size; | ||
2657 | goto undo_mftbmp_alloc_nolock; | ||
2658 | undo_mftbmp_alloc: | ||
2659 | down_write(&vol->mftbmp_lock); | ||
2660 | undo_mftbmp_alloc_nolock: | ||
2661 | if (ntfs_bitmap_clear_bit(vol->mftbmp_ino, bit)) { | ||
2662 | ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); | ||
2663 | NVolSetErrors(vol); | ||
2664 | } | ||
2665 | up_write(&vol->mftbmp_lock); | ||
2666 | err_out: | ||
2667 | return ERR_PTR(err); | ||
2668 | max_err_out: | ||
2669 | ntfs_warning(vol->sb, "Cannot allocate mft record because the maximum " | ||
2670 | "number of inodes (2^32) has already been reached."); | ||
2671 | up_write(&vol->mftbmp_lock); | ||
2672 | return ERR_PTR(-ENOSPC); | ||
2673 | } | ||
2674 | |||
2675 | /** | ||
2676 | * ntfs_extent_mft_record_free - free an extent mft record on an ntfs volume | ||
2677 | * @ni: ntfs inode of the mapped extent mft record to free | ||
2678 | * @m: mapped extent mft record of the ntfs inode @ni | ||
2679 | * | ||
2680 | * Free the mapped extent mft record @m of the extent ntfs inode @ni. | ||
2681 | * | ||
2682 | * Note that this function unmaps the mft record and closes and destroys @ni | ||
2683 | * internally and hence you cannot use either @ni nor @m any more after this | ||
2684 | * function returns success. | ||
2685 | * | ||
2686 | * On success return 0 and on error return -errno. @ni and @m are still valid | ||
2687 | * in this case and have not been freed. | ||
2688 | * | ||
2689 | * For some errors an error message is displayed and the success code 0 is | ||
2690 | * returned and the volume is then left dirty on umount. This makes sense in | ||
2691 | * case we could not rollback the changes that were already done since the | ||
2692 | * caller no longer wants to reference this mft record so it does not matter to | ||
2693 | * the caller if something is wrong with it as long as it is properly detached | ||
2694 | * from the base inode. | ||
2695 | */ | ||
2696 | int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) | ||
2697 | { | ||
2698 | unsigned long mft_no = ni->mft_no; | ||
2699 | ntfs_volume *vol = ni->vol; | ||
2700 | ntfs_inode *base_ni; | ||
2701 | ntfs_inode **extent_nis; | ||
2702 | int i, err; | ||
2703 | le16 old_seq_no; | ||
2704 | u16 seq_no; | ||
2705 | |||
2706 | BUG_ON(NInoAttr(ni)); | ||
2707 | BUG_ON(ni->nr_extents != -1); | ||
2708 | |||
2709 | down(&ni->extent_lock); | ||
2710 | base_ni = ni->ext.base_ntfs_ino; | ||
2711 | up(&ni->extent_lock); | ||
2712 | |||
2713 | BUG_ON(base_ni->nr_extents <= 0); | ||
2714 | |||
2715 | ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n", | ||
2716 | mft_no, base_ni->mft_no); | ||
2717 | |||
2718 | down(&base_ni->extent_lock); | ||
2719 | |||
2720 | /* Make sure we are holding the only reference to the extent inode. */ | ||
2721 | if (atomic_read(&ni->count) > 2) { | ||
2722 | ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, " | ||
2723 | "not freeing.", base_ni->mft_no); | ||
2724 | up(&base_ni->extent_lock); | ||
2725 | return -EBUSY; | ||
2726 | } | ||
2727 | |||
2728 | /* Dissociate the ntfs inode from the base inode. */ | ||
2729 | extent_nis = base_ni->ext.extent_ntfs_inos; | ||
2730 | err = -ENOENT; | ||
2731 | for (i = 0; i < base_ni->nr_extents; i++) { | ||
2732 | if (ni != extent_nis[i]) | ||
2733 | continue; | ||
2734 | extent_nis += i; | ||
2735 | base_ni->nr_extents--; | ||
2736 | memmove(extent_nis, extent_nis + 1, (base_ni->nr_extents - i) * | ||
2737 | sizeof(ntfs_inode*)); | ||
2738 | err = 0; | ||
2739 | break; | ||
2740 | } | ||
2741 | |||
2742 | up(&base_ni->extent_lock); | ||
2743 | |||
2744 | if (unlikely(err)) { | ||
2745 | ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to " | ||
2746 | "its base inode 0x%lx.", mft_no, | ||
2747 | base_ni->mft_no); | ||
2748 | BUG(); | ||
2749 | } | ||
2750 | |||
2751 | /* | ||
2752 | * The extent inode is no longer attached to the base inode so no one | ||
2753 | * can get a reference to it any more. | ||
2754 | */ | ||
2755 | |||
2756 | /* Mark the mft record as not in use. */ | ||
2757 | m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE)); | ||
2758 | |||
2759 | /* Increment the sequence number, skipping zero, if it is not zero. */ | ||
2760 | old_seq_no = m->sequence_number; | ||
2761 | seq_no = le16_to_cpu(old_seq_no); | ||
2762 | if (seq_no == 0xffff) | ||
2763 | seq_no = 1; | ||
2764 | else if (seq_no) | ||
2765 | seq_no++; | ||
2766 | m->sequence_number = cpu_to_le16(seq_no); | ||
2767 | |||
2768 | /* | ||
2769 | * Set the ntfs inode dirty and write it out. We do not need to worry | ||
2770 | * about the base inode here since whatever caused the extent mft | ||
2771 | * record to be freed is guaranteed to do it already. | ||
2772 | */ | ||
2773 | NInoSetDirty(ni); | ||
2774 | err = write_mft_record(ni, m, 0); | ||
2775 | if (unlikely(err)) { | ||
2776 | ntfs_error(vol->sb, "Failed to write mft record 0x%lx, not " | ||
2777 | "freeing.", mft_no); | ||
2778 | goto rollback; | ||
2779 | } | ||
2780 | rollback_error: | ||
2781 | /* Unmap and throw away the now freed extent inode. */ | ||
2782 | unmap_extent_mft_record(ni); | ||
2783 | ntfs_clear_extent_inode(ni); | ||
2784 | |||
2785 | /* Clear the bit in the $MFT/$BITMAP corresponding to this record. */ | ||
2786 | down_write(&vol->mftbmp_lock); | ||
2787 | err = ntfs_bitmap_clear_bit(vol->mftbmp_ino, mft_no); | ||
2788 | up_write(&vol->mftbmp_lock); | ||
2789 | if (unlikely(err)) { | ||
2790 | /* | ||
2791 | * The extent inode is gone but we failed to deallocate it in | ||
2792 | * the mft bitmap. Just emit a warning and leave the volume | ||
2793 | * dirty on umount. | ||
2794 | */ | ||
2795 | ntfs_error(vol->sb, "Failed to clear bit in mft bitmap.%s", es); | ||
2796 | NVolSetErrors(vol); | ||
2797 | } | ||
2798 | return 0; | ||
2799 | rollback: | ||
2800 | /* Rollback what we did... */ | ||
2801 | down(&base_ni->extent_lock); | ||
2802 | extent_nis = base_ni->ext.extent_ntfs_inos; | ||
2803 | if (!(base_ni->nr_extents & 3)) { | ||
2804 | int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); | ||
2805 | |||
2806 | extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS); | ||
2807 | if (unlikely(!extent_nis)) { | ||
2808 | ntfs_error(vol->sb, "Failed to allocate internal " | ||
2809 | "buffer during rollback.%s", es); | ||
2810 | up(&base_ni->extent_lock); | ||
2811 | NVolSetErrors(vol); | ||
2812 | goto rollback_error; | ||
2813 | } | ||
2814 | if (base_ni->nr_extents) { | ||
2815 | BUG_ON(!base_ni->ext.extent_ntfs_inos); | ||
2816 | memcpy(extent_nis, base_ni->ext.extent_ntfs_inos, | ||
2817 | new_size - 4 * sizeof(ntfs_inode*)); | ||
2818 | kfree(base_ni->ext.extent_ntfs_inos); | ||
2819 | } | ||
2820 | base_ni->ext.extent_ntfs_inos = extent_nis; | ||
2821 | } | ||
2822 | m->flags |= MFT_RECORD_IN_USE; | ||
2823 | m->sequence_number = old_seq_no; | ||
2824 | extent_nis[base_ni->nr_extents++] = ni; | ||
2825 | up(&base_ni->extent_lock); | ||
2826 | mark_mft_record_dirty(ni); | ||
2827 | return err; | ||
2828 | } | ||
2829 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h new file mode 100644 index 000000000000..407de2cef1d6 --- /dev/null +++ b/fs/ntfs/mft.h | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * mft.h - Defines for mft record handling in NTFS Linux kernel driver. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_MFT_H | ||
24 | #define _LINUX_NTFS_MFT_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/highmem.h> | ||
28 | #include <linux/pagemap.h> | ||
29 | |||
30 | #include "inode.h" | ||
31 | |||
32 | extern MFT_RECORD *map_mft_record(ntfs_inode *ni); | ||
33 | extern void unmap_mft_record(ntfs_inode *ni); | ||
34 | |||
35 | extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, | ||
36 | ntfs_inode **ntfs_ino); | ||
37 | |||
38 | static inline void unmap_extent_mft_record(ntfs_inode *ni) | ||
39 | { | ||
40 | unmap_mft_record(ni); | ||
41 | return; | ||
42 | } | ||
43 | |||
44 | #ifdef NTFS_RW | ||
45 | |||
46 | /** | ||
47 | * flush_dcache_mft_record_page - flush_dcache_page() for mft records | ||
48 | * @ni: ntfs inode structure of mft record | ||
49 | * | ||
50 | * Call flush_dcache_page() for the page in which an mft record resides. | ||
51 | * | ||
52 | * This must be called every time an mft record is modified, just after the | ||
53 | * modification. | ||
54 | */ | ||
55 | static inline void flush_dcache_mft_record_page(ntfs_inode *ni) | ||
56 | { | ||
57 | flush_dcache_page(ni->page); | ||
58 | } | ||
59 | |||
60 | extern void __mark_mft_record_dirty(ntfs_inode *ni); | ||
61 | |||
62 | /** | ||
63 | * mark_mft_record_dirty - set the mft record and the page containing it dirty | ||
64 | * @ni: ntfs inode describing the mapped mft record | ||
65 | * | ||
66 | * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni, | ||
67 | * as well as the page containing the mft record, dirty. Also, mark the base | ||
68 | * vfs inode dirty. This ensures that any changes to the mft record are | ||
69 | * written out to disk. | ||
70 | * | ||
71 | * NOTE: Do not do anything if the mft record is already marked dirty. | ||
72 | */ | ||
73 | static inline void mark_mft_record_dirty(ntfs_inode *ni) | ||
74 | { | ||
75 | if (!NInoTestSetDirty(ni)) | ||
76 | __mark_mft_record_dirty(ni); | ||
77 | } | ||
78 | |||
79 | extern int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, | ||
80 | MFT_RECORD *m, int sync); | ||
81 | |||
82 | extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync); | ||
83 | |||
84 | /** | ||
85 | * write_mft_record - write out a mapped (extent) mft record | ||
86 | * @ni: ntfs inode describing the mapped (extent) mft record | ||
87 | * @m: mapped (extent) mft record to write | ||
88 | * @sync: if true, wait for i/o completion | ||
89 | * | ||
90 | * This is just a wrapper for write_mft_record_nolock() (see mft.c), which | ||
91 | * locks the page for the duration of the write. This ensures that there are | ||
92 | * no race conditions between writing the mft record via the dirty inode code | ||
93 | * paths and via the page cache write back code paths or between writing | ||
94 | * neighbouring mft records residing in the same page. | ||
95 | * | ||
96 | * Locking the page also serializes us against ->readpage() if the page is not | ||
97 | * uptodate. | ||
98 | * | ||
99 | * On success, clean the mft record and return 0. On error, leave the mft | ||
100 | * record dirty and return -errno. The caller should call make_bad_inode() on | ||
101 | * the base inode to ensure no more access happens to this inode. We do not do | ||
102 | * it here as the caller may want to finish writing other extent mft records | ||
103 | * first to minimize on-disk metadata inconsistencies. | ||
104 | */ | ||
105 | static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync) | ||
106 | { | ||
107 | struct page *page = ni->page; | ||
108 | int err; | ||
109 | |||
110 | BUG_ON(!page); | ||
111 | lock_page(page); | ||
112 | err = write_mft_record_nolock(ni, m, sync); | ||
113 | unlock_page(page); | ||
114 | return err; | ||
115 | } | ||
116 | |||
117 | extern BOOL ntfs_may_write_mft_record(ntfs_volume *vol, | ||
118 | const unsigned long mft_no, const MFT_RECORD *m, | ||
119 | ntfs_inode **locked_ni); | ||
120 | |||
121 | extern ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, | ||
122 | ntfs_inode *base_ni, MFT_RECORD **mrec); | ||
123 | extern int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m); | ||
124 | |||
125 | #endif /* NTFS_RW */ | ||
126 | |||
127 | #endif /* _LINUX_NTFS_MFT_H */ | ||
diff --git a/fs/ntfs/mst.c b/fs/ntfs/mst.c new file mode 100644 index 000000000000..5a858d839b65 --- /dev/null +++ b/fs/ntfs/mst.c | |||
@@ -0,0 +1,203 @@ | |||
1 | /* | ||
2 | * mst.c - NTFS multi sector transfer protection handling code. Part of the | ||
3 | * Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include "ntfs.h" | ||
24 | |||
25 | /** | ||
26 | * post_read_mst_fixup - deprotect multi sector transfer protected data | ||
27 | * @b: pointer to the data to deprotect | ||
28 | * @size: size in bytes of @b | ||
29 | * | ||
30 | * Perform the necessary post read multi sector transfer fixup and detect the | ||
31 | * presence of incomplete multi sector transfers. - In that case, overwrite the | ||
32 | * magic of the ntfs record header being processed with "BAAD" (in memory only!) | ||
33 | * and abort processing. | ||
34 | * | ||
35 | * Return 0 on success and -EINVAL on error ("BAAD" magic will be present). | ||
36 | * | ||
37 | * NOTE: We consider the absence / invalidity of an update sequence array to | ||
38 | * mean that the structure is not protected at all and hence doesn't need to | ||
39 | * be fixed up. Thus, we return success and not failure in this case. This is | ||
40 | * in contrast to pre_write_mst_fixup(), see below. | ||
41 | */ | ||
42 | int post_read_mst_fixup(NTFS_RECORD *b, const u32 size) | ||
43 | { | ||
44 | u16 usa_ofs, usa_count, usn; | ||
45 | u16 *usa_pos, *data_pos; | ||
46 | |||
47 | /* Setup the variables. */ | ||
48 | usa_ofs = le16_to_cpu(b->usa_ofs); | ||
49 | /* Decrement usa_count to get number of fixups. */ | ||
50 | usa_count = le16_to_cpu(b->usa_count) - 1; | ||
51 | /* Size and alignment checks. */ | ||
52 | if ( size & (NTFS_BLOCK_SIZE - 1) || | ||
53 | usa_ofs & 1 || | ||
54 | usa_ofs + (usa_count * 2) > size || | ||
55 | (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) | ||
56 | return 0; | ||
57 | /* Position of usn in update sequence array. */ | ||
58 | usa_pos = (u16*)b + usa_ofs/sizeof(u16); | ||
59 | /* | ||
60 | * The update sequence number which has to be equal to each of the | ||
61 | * u16 values before they are fixed up. Note no need to care for | ||
62 | * endianness since we are comparing and moving data for on disk | ||
63 | * structures which means the data is consistent. - If it is | ||
64 | * consistenty the wrong endianness it doesn't make any difference. | ||
65 | */ | ||
66 | usn = *usa_pos; | ||
67 | /* | ||
68 | * Position in protected data of first u16 that needs fixing up. | ||
69 | */ | ||
70 | data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; | ||
71 | /* | ||
72 | * Check for incomplete multi sector transfer(s). | ||
73 | */ | ||
74 | while (usa_count--) { | ||
75 | if (*data_pos != usn) { | ||
76 | /* | ||
77 | * Incomplete multi sector transfer detected! )-: | ||
78 | * Set the magic to "BAAD" and return failure. | ||
79 | * Note that magic_BAAD is already converted to le32. | ||
80 | */ | ||
81 | b->magic = magic_BAAD; | ||
82 | return -EINVAL; | ||
83 | } | ||
84 | data_pos += NTFS_BLOCK_SIZE/sizeof(u16); | ||
85 | } | ||
86 | /* Re-setup the variables. */ | ||
87 | usa_count = le16_to_cpu(b->usa_count) - 1; | ||
88 | data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; | ||
89 | /* Fixup all sectors. */ | ||
90 | while (usa_count--) { | ||
91 | /* | ||
92 | * Increment position in usa and restore original data from | ||
93 | * the usa into the data buffer. | ||
94 | */ | ||
95 | *data_pos = *(++usa_pos); | ||
96 | /* Increment position in data as well. */ | ||
97 | data_pos += NTFS_BLOCK_SIZE/sizeof(u16); | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * pre_write_mst_fixup - apply multi sector transfer protection | ||
104 | * @b: pointer to the data to protect | ||
105 | * @size: size in bytes of @b | ||
106 | * | ||
107 | * Perform the necessary pre write multi sector transfer fixup on the data | ||
108 | * pointer to by @b of @size. | ||
109 | * | ||
110 | * Return 0 if fixup applied (success) or -EINVAL if no fixup was performed | ||
111 | * (assumed not needed). This is in contrast to post_read_mst_fixup() above. | ||
112 | * | ||
113 | * NOTE: We consider the absence / invalidity of an update sequence array to | ||
114 | * mean that the structure is not subject to protection and hence doesn't need | ||
115 | * to be fixed up. This means that you have to create a valid update sequence | ||
116 | * array header in the ntfs record before calling this function, otherwise it | ||
117 | * will fail (the header needs to contain the position of the update sequence | ||
118 | * array together with the number of elements in the array). You also need to | ||
119 | * initialise the update sequence number before calling this function | ||
120 | * otherwise a random word will be used (whatever was in the record at that | ||
121 | * position at that time). | ||
122 | */ | ||
123 | int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size) | ||
124 | { | ||
125 | le16 *usa_pos, *data_pos; | ||
126 | u16 usa_ofs, usa_count, usn; | ||
127 | le16 le_usn; | ||
128 | |||
129 | /* Sanity check + only fixup if it makes sense. */ | ||
130 | if (!b || ntfs_is_baad_record(b->magic) || | ||
131 | ntfs_is_hole_record(b->magic)) | ||
132 | return -EINVAL; | ||
133 | /* Setup the variables. */ | ||
134 | usa_ofs = le16_to_cpu(b->usa_ofs); | ||
135 | /* Decrement usa_count to get number of fixups. */ | ||
136 | usa_count = le16_to_cpu(b->usa_count) - 1; | ||
137 | /* Size and alignment checks. */ | ||
138 | if ( size & (NTFS_BLOCK_SIZE - 1) || | ||
139 | usa_ofs & 1 || | ||
140 | usa_ofs + (usa_count * 2) > size || | ||
141 | (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) | ||
142 | return -EINVAL; | ||
143 | /* Position of usn in update sequence array. */ | ||
144 | usa_pos = (le16*)((u8*)b + usa_ofs); | ||
145 | /* | ||
146 | * Cyclically increment the update sequence number | ||
147 | * (skipping 0 and -1, i.e. 0xffff). | ||
148 | */ | ||
149 | usn = le16_to_cpup(usa_pos) + 1; | ||
150 | if (usn == 0xffff || !usn) | ||
151 | usn = 1; | ||
152 | le_usn = cpu_to_le16(usn); | ||
153 | *usa_pos = le_usn; | ||
154 | /* Position in data of first u16 that needs fixing up. */ | ||
155 | data_pos = (le16*)b + NTFS_BLOCK_SIZE/sizeof(le16) - 1; | ||
156 | /* Fixup all sectors. */ | ||
157 | while (usa_count--) { | ||
158 | /* | ||
159 | * Increment the position in the usa and save the | ||
160 | * original data from the data buffer into the usa. | ||
161 | */ | ||
162 | *(++usa_pos) = *data_pos; | ||
163 | /* Apply fixup to data. */ | ||
164 | *data_pos = le_usn; | ||
165 | /* Increment position in data as well. */ | ||
166 | data_pos += NTFS_BLOCK_SIZE/sizeof(le16); | ||
167 | } | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * post_write_mst_fixup - fast deprotect multi sector transfer protected data | ||
173 | * @b: pointer to the data to deprotect | ||
174 | * | ||
175 | * Perform the necessary post write multi sector transfer fixup, not checking | ||
176 | * for any errors, because we assume we have just used pre_write_mst_fixup(), | ||
177 | * thus the data will be fine or we would never have gotten here. | ||
178 | */ | ||
179 | void post_write_mst_fixup(NTFS_RECORD *b) | ||
180 | { | ||
181 | le16 *usa_pos, *data_pos; | ||
182 | |||
183 | u16 usa_ofs = le16_to_cpu(b->usa_ofs); | ||
184 | u16 usa_count = le16_to_cpu(b->usa_count) - 1; | ||
185 | |||
186 | /* Position of usn in update sequence array. */ | ||
187 | usa_pos = (le16*)b + usa_ofs/sizeof(le16); | ||
188 | |||
189 | /* Position in protected data of first u16 that needs fixing up. */ | ||
190 | data_pos = (le16*)b + NTFS_BLOCK_SIZE/sizeof(le16) - 1; | ||
191 | |||
192 | /* Fixup all sectors. */ | ||
193 | while (usa_count--) { | ||
194 | /* | ||
195 | * Increment position in usa and restore original data from | ||
196 | * the usa into the data buffer. | ||
197 | */ | ||
198 | *data_pos = *(++usa_pos); | ||
199 | |||
200 | /* Increment position in data as well. */ | ||
201 | data_pos += NTFS_BLOCK_SIZE/sizeof(le16); | ||
202 | } | ||
203 | } | ||
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c new file mode 100644 index 000000000000..7c7e13b43b2e --- /dev/null +++ b/fs/ntfs/namei.c | |||
@@ -0,0 +1,498 @@ | |||
1 | /* | ||
2 | * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/dcache.h> | ||
24 | #include <linux/security.h> | ||
25 | |||
26 | #include "attrib.h" | ||
27 | #include "debug.h" | ||
28 | #include "dir.h" | ||
29 | #include "mft.h" | ||
30 | #include "ntfs.h" | ||
31 | |||
32 | /** | ||
33 | * ntfs_lookup - find the inode represented by a dentry in a directory inode | ||
34 | * @dir_ino: directory inode in which to look for the inode | ||
35 | * @dent: dentry representing the inode to look for | ||
36 | * @nd: lookup nameidata | ||
37 | * | ||
38 | * In short, ntfs_lookup() looks for the inode represented by the dentry @dent | ||
39 | * in the directory inode @dir_ino and if found attaches the inode to the | ||
40 | * dentry @dent. | ||
41 | * | ||
42 | * In more detail, the dentry @dent specifies which inode to look for by | ||
43 | * supplying the name of the inode in @dent->d_name.name. ntfs_lookup() | ||
44 | * converts the name to Unicode and walks the contents of the directory inode | ||
45 | * @dir_ino looking for the converted Unicode name. If the name is found in the | ||
46 | * directory, the corresponding inode is loaded by calling ntfs_iget() on its | ||
47 | * inode number and the inode is associated with the dentry @dent via a call to | ||
48 | * d_splice_alias(). | ||
49 | * | ||
50 | * If the name is not found in the directory, a NULL inode is inserted into the | ||
51 | * dentry @dent via a call to d_add(). The dentry is then termed a negative | ||
52 | * dentry. | ||
53 | * | ||
54 | * Only if an actual error occurs, do we return an error via ERR_PTR(). | ||
55 | * | ||
56 | * In order to handle the case insensitivity issues of NTFS with regards to the | ||
57 | * dcache and the dcache requiring only one dentry per directory, we deal with | ||
58 | * dentry aliases that only differ in case in ->ntfs_lookup() while maintaining | ||
59 | * a case sensitive dcache. This means that we get the full benefit of dcache | ||
60 | * speed when the file/directory is looked up with the same case as returned by | ||
61 | * ->ntfs_readdir() but that a lookup for any other case (or for the short file | ||
62 | * name) will not find anything in dcache and will enter ->ntfs_lookup() | ||
63 | * instead, where we search the directory for a fully matching file name | ||
64 | * (including case) and if that is not found, we search for a file name that | ||
65 | * matches with different case and if that has non-POSIX semantics we return | ||
66 | * that. We actually do only one search (case sensitive) and keep tabs on | ||
67 | * whether we have found a case insensitive match in the process. | ||
68 | * | ||
69 | * To simplify matters for us, we do not treat the short vs long filenames as | ||
70 | * two hard links but instead if the lookup matches a short filename, we | ||
71 | * return the dentry for the corresponding long filename instead. | ||
72 | * | ||
73 | * There are three cases we need to distinguish here: | ||
74 | * | ||
75 | * 1) @dent perfectly matches (i.e. including case) a directory entry with a | ||
76 | * file name in the WIN32 or POSIX namespaces. In this case | ||
77 | * ntfs_lookup_inode_by_name() will return with name set to NULL and we | ||
78 | * just d_splice_alias() @dent. | ||
79 | * 2) @dent matches (not including case) a directory entry with a file name in | ||
80 | * the WIN32 namespace. In this case ntfs_lookup_inode_by_name() will return | ||
81 | * with name set to point to a kmalloc()ed ntfs_name structure containing | ||
82 | * the properly cased little endian Unicode name. We convert the name to the | ||
83 | * current NLS code page, search if a dentry with this name already exists | ||
84 | * and if so return that instead of @dent. At this point things are | ||
85 | * complicated by the possibility of 'disconnected' dentries due to NFS | ||
86 | * which we deal with appropriately (see the code comments). The VFS will | ||
87 | * then destroy the old @dent and use the one we returned. If a dentry is | ||
88 | * not found, we allocate a new one, d_splice_alias() it, and return it as | ||
89 | * above. | ||
90 | * 3) @dent matches either perfectly or not (i.e. we don't care about case) a | ||
91 | * directory entry with a file name in the DOS namespace. In this case | ||
92 | * ntfs_lookup_inode_by_name() will return with name set to point to a | ||
93 | * kmalloc()ed ntfs_name structure containing the mft reference (cpu endian) | ||
94 | * of the inode. We use the mft reference to read the inode and to find the | ||
95 | * file name in the WIN32 namespace corresponding to the matched short file | ||
96 | * name. We then convert the name to the current NLS code page, and proceed | ||
97 | * searching for a dentry with this name, etc, as in case 2), above. | ||
98 | * | ||
99 | * Locking: Caller must hold i_sem on the directory. | ||
100 | */ | ||
101 | static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, | ||
102 | struct nameidata *nd) | ||
103 | { | ||
104 | ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); | ||
105 | struct inode *dent_inode; | ||
106 | ntfschar *uname; | ||
107 | ntfs_name *name = NULL; | ||
108 | MFT_REF mref; | ||
109 | unsigned long dent_ino; | ||
110 | int uname_len; | ||
111 | |||
112 | ntfs_debug("Looking up %s in directory inode 0x%lx.", | ||
113 | dent->d_name.name, dir_ino->i_ino); | ||
114 | /* Convert the name of the dentry to Unicode. */ | ||
115 | uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len, | ||
116 | &uname); | ||
117 | if (uname_len < 0) { | ||
118 | ntfs_error(vol->sb, "Failed to convert name to Unicode."); | ||
119 | return ERR_PTR(uname_len); | ||
120 | } | ||
121 | mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len, | ||
122 | &name); | ||
123 | kmem_cache_free(ntfs_name_cache, uname); | ||
124 | if (!IS_ERR_MREF(mref)) { | ||
125 | dent_ino = MREF(mref); | ||
126 | ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino); | ||
127 | dent_inode = ntfs_iget(vol->sb, dent_ino); | ||
128 | if (likely(!IS_ERR(dent_inode))) { | ||
129 | /* Consistency check. */ | ||
130 | if (is_bad_inode(dent_inode) || MSEQNO(mref) == | ||
131 | NTFS_I(dent_inode)->seq_no || | ||
132 | dent_ino == FILE_MFT) { | ||
133 | /* Perfect WIN32/POSIX match. -- Case 1. */ | ||
134 | if (!name) { | ||
135 | ntfs_debug("Done. (Case 1.)"); | ||
136 | return d_splice_alias(dent_inode, dent); | ||
137 | } | ||
138 | /* | ||
139 | * We are too indented. Handle imperfect | ||
140 | * matches and short file names further below. | ||
141 | */ | ||
142 | goto handle_name; | ||
143 | } | ||
144 | ntfs_error(vol->sb, "Found stale reference to inode " | ||
145 | "0x%lx (reference sequence number = " | ||
146 | "0x%x, inode sequence number = 0x%x), " | ||
147 | "returning -EIO. Run chkdsk.", | ||
148 | dent_ino, MSEQNO(mref), | ||
149 | NTFS_I(dent_inode)->seq_no); | ||
150 | iput(dent_inode); | ||
151 | dent_inode = ERR_PTR(-EIO); | ||
152 | } else | ||
153 | ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " | ||
154 | "error code %li.", dent_ino, | ||
155 | PTR_ERR(dent_inode)); | ||
156 | if (name) | ||
157 | kfree(name); | ||
158 | /* Return the error code. */ | ||
159 | return (struct dentry *)dent_inode; | ||
160 | } | ||
161 | /* It is guaranteed that name is no longer allocated at this point. */ | ||
162 | if (MREF_ERR(mref) == -ENOENT) { | ||
163 | ntfs_debug("Entry was not found, adding negative dentry."); | ||
164 | /* The dcache will handle negative entries. */ | ||
165 | d_add(dent, NULL); | ||
166 | ntfs_debug("Done."); | ||
167 | return NULL; | ||
168 | } | ||
169 | ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error " | ||
170 | "code %i.", -MREF_ERR(mref)); | ||
171 | return ERR_PTR(MREF_ERR(mref)); | ||
172 | |||
173 | // TODO: Consider moving this lot to a separate function! (AIA) | ||
174 | handle_name: | ||
175 | { | ||
176 | struct dentry *real_dent, *new_dent; | ||
177 | MFT_RECORD *m; | ||
178 | ntfs_attr_search_ctx *ctx; | ||
179 | ntfs_inode *ni = NTFS_I(dent_inode); | ||
180 | int err; | ||
181 | struct qstr nls_name; | ||
182 | |||
183 | nls_name.name = NULL; | ||
184 | if (name->type != FILE_NAME_DOS) { /* Case 2. */ | ||
185 | ntfs_debug("Case 2."); | ||
186 | nls_name.len = (unsigned)ntfs_ucstonls(vol, | ||
187 | (ntfschar*)&name->name, name->len, | ||
188 | (unsigned char**)&nls_name.name, 0); | ||
189 | kfree(name); | ||
190 | } else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */ | ||
191 | FILE_NAME_ATTR *fn; | ||
192 | |||
193 | ntfs_debug("Case 3."); | ||
194 | kfree(name); | ||
195 | |||
196 | /* Find the WIN32 name corresponding to the matched DOS name. */ | ||
197 | ni = NTFS_I(dent_inode); | ||
198 | m = map_mft_record(ni); | ||
199 | if (IS_ERR(m)) { | ||
200 | err = PTR_ERR(m); | ||
201 | m = NULL; | ||
202 | ctx = NULL; | ||
203 | goto err_out; | ||
204 | } | ||
205 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
206 | if (unlikely(!ctx)) { | ||
207 | err = -ENOMEM; | ||
208 | goto err_out; | ||
209 | } | ||
210 | do { | ||
211 | ATTR_RECORD *a; | ||
212 | u32 val_len; | ||
213 | |||
214 | err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, | ||
215 | NULL, 0, ctx); | ||
216 | if (unlikely(err)) { | ||
217 | ntfs_error(vol->sb, "Inode corrupt: No WIN32 " | ||
218 | "namespace counterpart to DOS " | ||
219 | "file name. Run chkdsk."); | ||
220 | if (err == -ENOENT) | ||
221 | err = -EIO; | ||
222 | goto err_out; | ||
223 | } | ||
224 | /* Consistency checks. */ | ||
225 | a = ctx->attr; | ||
226 | if (a->non_resident || a->flags) | ||
227 | goto eio_err_out; | ||
228 | val_len = le32_to_cpu(a->data.resident.value_length); | ||
229 | if (le16_to_cpu(a->data.resident.value_offset) + | ||
230 | val_len > le32_to_cpu(a->length)) | ||
231 | goto eio_err_out; | ||
232 | fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu( | ||
233 | ctx->attr->data.resident.value_offset)); | ||
234 | if ((u32)(fn->file_name_length * sizeof(ntfschar) + | ||
235 | sizeof(FILE_NAME_ATTR)) > val_len) | ||
236 | goto eio_err_out; | ||
237 | } while (fn->file_name_type != FILE_NAME_WIN32); | ||
238 | |||
239 | /* Convert the found WIN32 name to current NLS code page. */ | ||
240 | nls_name.len = (unsigned)ntfs_ucstonls(vol, | ||
241 | (ntfschar*)&fn->file_name, fn->file_name_length, | ||
242 | (unsigned char**)&nls_name.name, 0); | ||
243 | |||
244 | ntfs_attr_put_search_ctx(ctx); | ||
245 | unmap_mft_record(ni); | ||
246 | } | ||
247 | m = NULL; | ||
248 | ctx = NULL; | ||
249 | |||
250 | /* Check if a conversion error occurred. */ | ||
251 | if ((signed)nls_name.len < 0) { | ||
252 | err = (signed)nls_name.len; | ||
253 | goto err_out; | ||
254 | } | ||
255 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); | ||
256 | |||
257 | /* | ||
258 | * Note: No need for dent->d_lock lock as i_sem is held on the | ||
259 | * parent inode. | ||
260 | */ | ||
261 | |||
262 | /* Does a dentry matching the nls_name exist already? */ | ||
263 | real_dent = d_lookup(dent->d_parent, &nls_name); | ||
264 | /* If not, create it now. */ | ||
265 | if (!real_dent) { | ||
266 | real_dent = d_alloc(dent->d_parent, &nls_name); | ||
267 | kfree(nls_name.name); | ||
268 | if (!real_dent) { | ||
269 | err = -ENOMEM; | ||
270 | goto err_out; | ||
271 | } | ||
272 | new_dent = d_splice_alias(dent_inode, real_dent); | ||
273 | if (new_dent) | ||
274 | dput(real_dent); | ||
275 | else | ||
276 | new_dent = real_dent; | ||
277 | ntfs_debug("Done. (Created new dentry.)"); | ||
278 | return new_dent; | ||
279 | } | ||
280 | kfree(nls_name.name); | ||
281 | /* Matching dentry exists, check if it is negative. */ | ||
282 | if (real_dent->d_inode) { | ||
283 | if (unlikely(real_dent->d_inode != dent_inode)) { | ||
284 | /* This can happen because bad inodes are unhashed. */ | ||
285 | BUG_ON(!is_bad_inode(dent_inode)); | ||
286 | BUG_ON(!is_bad_inode(real_dent->d_inode)); | ||
287 | } | ||
288 | /* | ||
289 | * Already have the inode and the dentry attached, decrement | ||
290 | * the reference count to balance the ntfs_iget() we did | ||
291 | * earlier on. We found the dentry using d_lookup() so it | ||
292 | * cannot be disconnected and thus we do not need to worry | ||
293 | * about any NFS/disconnectedness issues here. | ||
294 | */ | ||
295 | iput(dent_inode); | ||
296 | ntfs_debug("Done. (Already had inode and dentry.)"); | ||
297 | return real_dent; | ||
298 | } | ||
299 | /* | ||
300 | * Negative dentry: instantiate it unless the inode is a directory and | ||
301 | * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), | ||
302 | * in which case d_move() that in place of the found dentry. | ||
303 | */ | ||
304 | if (!S_ISDIR(dent_inode->i_mode)) { | ||
305 | /* Not a directory; everything is easy. */ | ||
306 | d_instantiate(real_dent, dent_inode); | ||
307 | ntfs_debug("Done. (Already had negative file dentry.)"); | ||
308 | return real_dent; | ||
309 | } | ||
310 | spin_lock(&dcache_lock); | ||
311 | if (list_empty(&dent_inode->i_dentry)) { | ||
312 | /* | ||
313 | * Directory without a 'disconnected' dentry; we need to do | ||
314 | * d_instantiate() by hand because it takes dcache_lock which | ||
315 | * we already hold. | ||
316 | */ | ||
317 | list_add(&real_dent->d_alias, &dent_inode->i_dentry); | ||
318 | real_dent->d_inode = dent_inode; | ||
319 | spin_unlock(&dcache_lock); | ||
320 | security_d_instantiate(real_dent, dent_inode); | ||
321 | ntfs_debug("Done. (Already had negative directory dentry.)"); | ||
322 | return real_dent; | ||
323 | } | ||
324 | /* | ||
325 | * Directory with a 'disconnected' dentry; get a reference to the | ||
326 | * 'disconnected' dentry. | ||
327 | */ | ||
328 | new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, | ||
329 | d_alias); | ||
330 | dget_locked(new_dent); | ||
331 | spin_unlock(&dcache_lock); | ||
332 | /* Do security vodoo. */ | ||
333 | security_d_instantiate(real_dent, dent_inode); | ||
334 | /* Move new_dent in place of real_dent. */ | ||
335 | d_move(new_dent, real_dent); | ||
336 | /* Balance the ntfs_iget() we did above. */ | ||
337 | iput(dent_inode); | ||
338 | /* Throw away real_dent. */ | ||
339 | dput(real_dent); | ||
340 | /* Use new_dent as the actual dentry. */ | ||
341 | ntfs_debug("Done. (Already had negative, disconnected directory " | ||
342 | "dentry.)"); | ||
343 | return new_dent; | ||
344 | |||
345 | eio_err_out: | ||
346 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); | ||
347 | err = -EIO; | ||
348 | err_out: | ||
349 | if (ctx) | ||
350 | ntfs_attr_put_search_ctx(ctx); | ||
351 | if (m) | ||
352 | unmap_mft_record(ni); | ||
353 | iput(dent_inode); | ||
354 | ntfs_error(vol->sb, "Failed, returning error code %i.", err); | ||
355 | return ERR_PTR(err); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /** | ||
360 | * Inode operations for directories. | ||
361 | */ | ||
362 | struct inode_operations ntfs_dir_inode_ops = { | ||
363 | .lookup = ntfs_lookup, /* VFS: Lookup directory. */ | ||
364 | }; | ||
365 | |||
366 | /** | ||
367 | * ntfs_get_parent - find the dentry of the parent of a given directory dentry | ||
368 | * @child_dent: dentry of the directory whose parent directory to find | ||
369 | * | ||
370 | * Find the dentry for the parent directory of the directory specified by the | ||
371 | * dentry @child_dent. This function is called from | ||
372 | * fs/exportfs/expfs.c::find_exported_dentry() which in turn is called from the | ||
373 | * default ->decode_fh() which is export_decode_fh() in the same file. | ||
374 | * | ||
375 | * The code is based on the ext3 ->get_parent() implementation found in | ||
376 | * fs/ext3/namei.c::ext3_get_parent(). | ||
377 | * | ||
378 | * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down. | ||
379 | * | ||
380 | * Return the dentry of the parent directory on success or the error code on | ||
381 | * error (IS_ERR() is true). | ||
382 | */ | ||
383 | struct dentry *ntfs_get_parent(struct dentry *child_dent) | ||
384 | { | ||
385 | struct inode *vi = child_dent->d_inode; | ||
386 | ntfs_inode *ni = NTFS_I(vi); | ||
387 | MFT_RECORD *mrec; | ||
388 | ntfs_attr_search_ctx *ctx; | ||
389 | ATTR_RECORD *attr; | ||
390 | FILE_NAME_ATTR *fn; | ||
391 | struct inode *parent_vi; | ||
392 | struct dentry *parent_dent; | ||
393 | unsigned long parent_ino; | ||
394 | int err; | ||
395 | |||
396 | ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); | ||
397 | /* Get the mft record of the inode belonging to the child dentry. */ | ||
398 | mrec = map_mft_record(ni); | ||
399 | if (IS_ERR(mrec)) | ||
400 | return (struct dentry *)mrec; | ||
401 | /* Find the first file name attribute in the mft record. */ | ||
402 | ctx = ntfs_attr_get_search_ctx(ni, mrec); | ||
403 | if (unlikely(!ctx)) { | ||
404 | unmap_mft_record(ni); | ||
405 | return ERR_PTR(-ENOMEM); | ||
406 | } | ||
407 | try_next: | ||
408 | err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, CASE_SENSITIVE, 0, NULL, | ||
409 | 0, ctx); | ||
410 | if (unlikely(err)) { | ||
411 | ntfs_attr_put_search_ctx(ctx); | ||
412 | unmap_mft_record(ni); | ||
413 | if (err == -ENOENT) | ||
414 | ntfs_error(vi->i_sb, "Inode 0x%lx does not have a " | ||
415 | "file name attribute. Run chkdsk.", | ||
416 | vi->i_ino); | ||
417 | return ERR_PTR(err); | ||
418 | } | ||
419 | attr = ctx->attr; | ||
420 | if (unlikely(attr->non_resident)) | ||
421 | goto try_next; | ||
422 | fn = (FILE_NAME_ATTR *)((u8 *)attr + | ||
423 | le16_to_cpu(attr->data.resident.value_offset)); | ||
424 | if (unlikely((u8 *)fn + le32_to_cpu(attr->data.resident.value_length) > | ||
425 | (u8*)attr + le32_to_cpu(attr->length))) | ||
426 | goto try_next; | ||
427 | /* Get the inode number of the parent directory. */ | ||
428 | parent_ino = MREF_LE(fn->parent_directory); | ||
429 | /* Release the search context and the mft record of the child. */ | ||
430 | ntfs_attr_put_search_ctx(ctx); | ||
431 | unmap_mft_record(ni); | ||
432 | /* Get the inode of the parent directory. */ | ||
433 | parent_vi = ntfs_iget(vi->i_sb, parent_ino); | ||
434 | if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) { | ||
435 | if (!IS_ERR(parent_vi)) | ||
436 | iput(parent_vi); | ||
437 | ntfs_error(vi->i_sb, "Failed to get parent directory inode " | ||
438 | "0x%lx of child inode 0x%lx.", parent_ino, | ||
439 | vi->i_ino); | ||
440 | return ERR_PTR(-EACCES); | ||
441 | } | ||
442 | /* Finally get a dentry for the parent directory and return it. */ | ||
443 | parent_dent = d_alloc_anon(parent_vi); | ||
444 | if (unlikely(!parent_dent)) { | ||
445 | iput(parent_vi); | ||
446 | return ERR_PTR(-ENOMEM); | ||
447 | } | ||
448 | ntfs_debug("Done for inode 0x%lx.", vi->i_ino); | ||
449 | return parent_dent; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment | ||
454 | * @sb: super block identifying the mounted ntfs volume | ||
455 | * @fh: the file handle sub-fragment | ||
456 | * | ||
457 | * Find a dentry for the inode given a file handle sub-fragment. This function | ||
458 | * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is | ||
459 | * called from the default ->decode_fh() which is export_decode_fh() in the | ||
460 | * same file. The code is closely based on the default ->get_dentry() helper | ||
461 | * fs/exportfs/expfs.c::get_object(). | ||
462 | * | ||
463 | * The @fh contains two 32-bit unsigned values, the first one is the inode | ||
464 | * number and the second one is the inode generation. | ||
465 | * | ||
466 | * Return the dentry on success or the error code on error (IS_ERR() is true). | ||
467 | */ | ||
468 | struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) | ||
469 | { | ||
470 | struct inode *vi; | ||
471 | struct dentry *dent; | ||
472 | unsigned long ino = ((u32 *)fh)[0]; | ||
473 | u32 gen = ((u32 *)fh)[1]; | ||
474 | |||
475 | ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen); | ||
476 | vi = ntfs_iget(sb, ino); | ||
477 | if (IS_ERR(vi)) { | ||
478 | ntfs_error(sb, "Failed to get inode 0x%lx.", ino); | ||
479 | return (struct dentry *)vi; | ||
480 | } | ||
481 | if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) { | ||
482 | /* We didn't find the right inode. */ | ||
483 | ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x " | ||
484 | "0x%x.", vi->i_ino, vi->i_nlink, | ||
485 | atomic_read(&vi->i_count), vi->i_generation, | ||
486 | gen); | ||
487 | iput(vi); | ||
488 | return ERR_PTR(-ESTALE); | ||
489 | } | ||
490 | /* Now find a dentry. If possible, get a well-connected one. */ | ||
491 | dent = d_alloc_anon(vi); | ||
492 | if (unlikely(!dent)) { | ||
493 | iput(vi); | ||
494 | return ERR_PTR(-ENOMEM); | ||
495 | } | ||
496 | ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); | ||
497 | return dent; | ||
498 | } | ||
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h new file mode 100644 index 000000000000..720ffb71bab8 --- /dev/null +++ b/fs/ntfs/ntfs.h | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (C) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_H | ||
25 | #define _LINUX_NTFS_H | ||
26 | |||
27 | #include <linux/stddef.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/compiler.h> | ||
31 | #include <linux/fs.h> | ||
32 | #include <linux/nls.h> | ||
33 | #include <linux/smp.h> | ||
34 | |||
35 | #include "types.h" | ||
36 | #include "volume.h" | ||
37 | #include "layout.h" | ||
38 | |||
39 | typedef enum { | ||
40 | NTFS_BLOCK_SIZE = 512, | ||
41 | NTFS_BLOCK_SIZE_BITS = 9, | ||
42 | NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ | ||
43 | NTFS_MAX_NAME_LEN = 255, | ||
44 | } NTFS_CONSTANTS; | ||
45 | |||
46 | /* Global variables. */ | ||
47 | |||
48 | /* Slab caches (from super.c). */ | ||
49 | extern kmem_cache_t *ntfs_name_cache; | ||
50 | extern kmem_cache_t *ntfs_inode_cache; | ||
51 | extern kmem_cache_t *ntfs_big_inode_cache; | ||
52 | extern kmem_cache_t *ntfs_attr_ctx_cache; | ||
53 | extern kmem_cache_t *ntfs_index_ctx_cache; | ||
54 | |||
55 | /* The various operations structs defined throughout the driver files. */ | ||
56 | extern struct address_space_operations ntfs_aops; | ||
57 | extern struct address_space_operations ntfs_mst_aops; | ||
58 | |||
59 | extern struct file_operations ntfs_file_ops; | ||
60 | extern struct inode_operations ntfs_file_inode_ops; | ||
61 | |||
62 | extern struct file_operations ntfs_dir_ops; | ||
63 | extern struct inode_operations ntfs_dir_inode_ops; | ||
64 | |||
65 | extern struct file_operations ntfs_empty_file_ops; | ||
66 | extern struct inode_operations ntfs_empty_inode_ops; | ||
67 | |||
68 | /** | ||
69 | * NTFS_SB - return the ntfs volume given a vfs super block | ||
70 | * @sb: VFS super block | ||
71 | * | ||
72 | * NTFS_SB() returns the ntfs volume associated with the VFS super block @sb. | ||
73 | */ | ||
74 | static inline ntfs_volume *NTFS_SB(struct super_block *sb) | ||
75 | { | ||
76 | return sb->s_fs_info; | ||
77 | } | ||
78 | |||
79 | /* Declarations of functions and global variables. */ | ||
80 | |||
81 | /* From fs/ntfs/compress.c */ | ||
82 | extern int ntfs_read_compressed_block(struct page *page); | ||
83 | extern int allocate_compression_buffers(void); | ||
84 | extern void free_compression_buffers(void); | ||
85 | |||
86 | /* From fs/ntfs/super.c */ | ||
87 | #define default_upcase_len 0x10000 | ||
88 | extern struct semaphore ntfs_lock; | ||
89 | |||
90 | typedef struct { | ||
91 | int val; | ||
92 | char *str; | ||
93 | } option_t; | ||
94 | extern const option_t on_errors_arr[]; | ||
95 | |||
96 | /* From fs/ntfs/mst.c */ | ||
97 | extern int post_read_mst_fixup(NTFS_RECORD *b, const u32 size); | ||
98 | extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size); | ||
99 | extern void post_write_mst_fixup(NTFS_RECORD *b); | ||
100 | |||
101 | /* From fs/ntfs/unistr.c */ | ||
102 | extern BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, | ||
103 | const ntfschar *s2, size_t s2_len, | ||
104 | const IGNORE_CASE_BOOL ic, | ||
105 | const ntfschar *upcase, const u32 upcase_size); | ||
106 | extern int ntfs_collate_names(const ntfschar *name1, const u32 name1_len, | ||
107 | const ntfschar *name2, const u32 name2_len, | ||
108 | const int err_val, const IGNORE_CASE_BOOL ic, | ||
109 | const ntfschar *upcase, const u32 upcase_len); | ||
110 | extern int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n); | ||
111 | extern int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n, | ||
112 | const ntfschar *upcase, const u32 upcase_size); | ||
113 | extern void ntfs_upcase_name(ntfschar *name, u32 name_len, | ||
114 | const ntfschar *upcase, const u32 upcase_len); | ||
115 | extern void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, | ||
116 | const ntfschar *upcase, const u32 upcase_len); | ||
117 | extern int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, | ||
118 | FILE_NAME_ATTR *file_name_attr2, | ||
119 | const int err_val, const IGNORE_CASE_BOOL ic, | ||
120 | const ntfschar *upcase, const u32 upcase_len); | ||
121 | extern int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, | ||
122 | const int ins_len, ntfschar **outs); | ||
123 | extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins, | ||
124 | const int ins_len, unsigned char **outs, int outs_len); | ||
125 | |||
126 | /* From fs/ntfs/upcase.c */ | ||
127 | extern ntfschar *generate_default_upcase(void); | ||
128 | |||
129 | #endif /* _LINUX_NTFS_H */ | ||
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c new file mode 100644 index 000000000000..833df2a4e9fb --- /dev/null +++ b/fs/ntfs/quota.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * quota.c - NTFS kernel quota ($Quota) handling. Part of the Linux-NTFS | ||
3 | * project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifdef NTFS_RW | ||
24 | |||
25 | #include "index.h" | ||
26 | #include "quota.h" | ||
27 | #include "debug.h" | ||
28 | #include "ntfs.h" | ||
29 | |||
30 | /** | ||
31 | * ntfs_mark_quotas_out_of_date - mark the quotas out of date on an ntfs volume | ||
32 | * @vol: ntfs volume on which to mark the quotas out of date | ||
33 | * | ||
34 | * Mark the quotas out of date on the ntfs volume @vol and return TRUE on | ||
35 | * success and FALSE on error. | ||
36 | */ | ||
37 | BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) | ||
38 | { | ||
39 | ntfs_index_context *ictx; | ||
40 | QUOTA_CONTROL_ENTRY *qce; | ||
41 | const le32 qid = QUOTA_DEFAULTS_ID; | ||
42 | int err; | ||
43 | |||
44 | ntfs_debug("Entering."); | ||
45 | if (NVolQuotaOutOfDate(vol)) | ||
46 | goto done; | ||
47 | if (!vol->quota_ino || !vol->quota_q_ino) { | ||
48 | ntfs_error(vol->sb, "Quota inodes are not open."); | ||
49 | return FALSE; | ||
50 | } | ||
51 | down(&vol->quota_q_ino->i_sem); | ||
52 | ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino)); | ||
53 | if (!ictx) { | ||
54 | ntfs_error(vol->sb, "Failed to get index context."); | ||
55 | goto err_out; | ||
56 | } | ||
57 | err = ntfs_index_lookup(&qid, sizeof(qid), ictx); | ||
58 | if (err) { | ||
59 | if (err == -ENOENT) | ||
60 | ntfs_error(vol->sb, "Quota defaults entry is not " | ||
61 | "present."); | ||
62 | else | ||
63 | ntfs_error(vol->sb, "Lookup of quota defaults entry " | ||
64 | "failed."); | ||
65 | goto err_out; | ||
66 | } | ||
67 | if (ictx->data_len < offsetof(QUOTA_CONTROL_ENTRY, sid)) { | ||
68 | ntfs_error(vol->sb, "Quota defaults entry size is invalid. " | ||
69 | "Run chkdsk."); | ||
70 | goto err_out; | ||
71 | } | ||
72 | qce = (QUOTA_CONTROL_ENTRY*)ictx->data; | ||
73 | if (le32_to_cpu(qce->version) != QUOTA_VERSION) { | ||
74 | ntfs_error(vol->sb, "Quota defaults entry version 0x%x is not " | ||
75 | "supported.", le32_to_cpu(qce->version)); | ||
76 | goto err_out; | ||
77 | } | ||
78 | ntfs_debug("Quota defaults flags = 0x%x.", le32_to_cpu(qce->flags)); | ||
79 | /* If quotas are already marked out of date, no need to do anything. */ | ||
80 | if (qce->flags & QUOTA_FLAG_OUT_OF_DATE) | ||
81 | goto set_done; | ||
82 | /* | ||
83 | * If quota tracking is neither requested, nor enabled and there are no | ||
84 | * pending deletes, no need to mark the quotas out of date. | ||
85 | */ | ||
86 | if (!(qce->flags & (QUOTA_FLAG_TRACKING_ENABLED | | ||
87 | QUOTA_FLAG_TRACKING_REQUESTED | | ||
88 | QUOTA_FLAG_PENDING_DELETES))) | ||
89 | goto set_done; | ||
90 | /* | ||
91 | * Set the QUOTA_FLAG_OUT_OF_DATE bit thus marking quotas out of date. | ||
92 | * This is verified on WinXP to be sufficient to cause windows to | ||
93 | * rescan the volume on boot and update all quota entries. | ||
94 | */ | ||
95 | qce->flags |= QUOTA_FLAG_OUT_OF_DATE; | ||
96 | /* Ensure the modified flags are written to disk. */ | ||
97 | ntfs_index_entry_flush_dcache_page(ictx); | ||
98 | ntfs_index_entry_mark_dirty(ictx); | ||
99 | set_done: | ||
100 | ntfs_index_ctx_put(ictx); | ||
101 | up(&vol->quota_q_ino->i_sem); | ||
102 | /* | ||
103 | * We set the flag so we do not try to mark the quotas out of date | ||
104 | * again on remount. | ||
105 | */ | ||
106 | NVolSetQuotaOutOfDate(vol); | ||
107 | done: | ||
108 | ntfs_debug("Done."); | ||
109 | return TRUE; | ||
110 | err_out: | ||
111 | if (ictx) | ||
112 | ntfs_index_ctx_put(ictx); | ||
113 | up(&vol->quota_q_ino->i_sem); | ||
114 | return FALSE; | ||
115 | } | ||
116 | |||
117 | #endif /* NTFS_RW */ | ||
diff --git a/fs/ntfs/quota.h b/fs/ntfs/quota.h new file mode 100644 index 000000000000..40e4763aa222 --- /dev/null +++ b/fs/ntfs/quota.h | |||
@@ -0,0 +1,35 @@ | |||
1 | /* | ||
2 | * quota.h - Defines for NTFS kernel quota ($Quota) handling. Part of the | ||
3 | * Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_QUOTA_H | ||
24 | #define _LINUX_NTFS_QUOTA_H | ||
25 | |||
26 | #ifdef NTFS_RW | ||
27 | |||
28 | #include "types.h" | ||
29 | #include "volume.h" | ||
30 | |||
31 | extern BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol); | ||
32 | |||
33 | #endif /* NTFS_RW */ | ||
34 | |||
35 | #endif /* _LINUX_NTFS_QUOTA_H */ | ||
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c new file mode 100644 index 000000000000..8438fb1da219 --- /dev/null +++ b/fs/ntfs/runlist.c | |||
@@ -0,0 +1,1438 @@ | |||
1 | /** | ||
2 | * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * Copyright (c) 2002 Richard Russon | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include "debug.h" | ||
24 | #include "dir.h" | ||
25 | #include "endian.h" | ||
26 | #include "malloc.h" | ||
27 | #include "ntfs.h" | ||
28 | |||
29 | /** | ||
30 | * ntfs_rl_mm - runlist memmove | ||
31 | * | ||
32 | * It is up to the caller to serialize access to the runlist @base. | ||
33 | */ | ||
34 | static inline void ntfs_rl_mm(runlist_element *base, int dst, int src, | ||
35 | int size) | ||
36 | { | ||
37 | if (likely((dst != src) && (size > 0))) | ||
38 | memmove(base + dst, base + src, size * sizeof (*base)); | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * ntfs_rl_mc - runlist memory copy | ||
43 | * | ||
44 | * It is up to the caller to serialize access to the runlists @dstbase and | ||
45 | * @srcbase. | ||
46 | */ | ||
47 | static inline void ntfs_rl_mc(runlist_element *dstbase, int dst, | ||
48 | runlist_element *srcbase, int src, int size) | ||
49 | { | ||
50 | if (likely(size > 0)) | ||
51 | memcpy(dstbase + dst, srcbase + src, size * sizeof(*dstbase)); | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * ntfs_rl_realloc - Reallocate memory for runlists | ||
56 | * @rl: original runlist | ||
57 | * @old_size: number of runlist elements in the original runlist @rl | ||
58 | * @new_size: number of runlist elements we need space for | ||
59 | * | ||
60 | * As the runlists grow, more memory will be required. To prevent the | ||
61 | * kernel having to allocate and reallocate large numbers of small bits of | ||
62 | * memory, this function returns and entire page of memory. | ||
63 | * | ||
64 | * It is up to the caller to serialize access to the runlist @rl. | ||
65 | * | ||
66 | * N.B. If the new allocation doesn't require a different number of pages in | ||
67 | * memory, the function will return the original pointer. | ||
68 | * | ||
69 | * On success, return a pointer to the newly allocated, or recycled, memory. | ||
70 | * On error, return -errno. The following error codes are defined: | ||
71 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
72 | * -EINVAL - Invalid parameters were passed in. | ||
73 | */ | ||
74 | static inline runlist_element *ntfs_rl_realloc(runlist_element *rl, | ||
75 | int old_size, int new_size) | ||
76 | { | ||
77 | runlist_element *new_rl; | ||
78 | |||
79 | old_size = PAGE_ALIGN(old_size * sizeof(*rl)); | ||
80 | new_size = PAGE_ALIGN(new_size * sizeof(*rl)); | ||
81 | if (old_size == new_size) | ||
82 | return rl; | ||
83 | |||
84 | new_rl = ntfs_malloc_nofs(new_size); | ||
85 | if (unlikely(!new_rl)) | ||
86 | return ERR_PTR(-ENOMEM); | ||
87 | |||
88 | if (likely(rl != NULL)) { | ||
89 | if (unlikely(old_size > new_size)) | ||
90 | old_size = new_size; | ||
91 | memcpy(new_rl, rl, old_size); | ||
92 | ntfs_free(rl); | ||
93 | } | ||
94 | return new_rl; | ||
95 | } | ||
96 | |||
97 | /** | ||
98 | * ntfs_are_rl_mergeable - test if two runlists can be joined together | ||
99 | * @dst: original runlist | ||
100 | * @src: new runlist to test for mergeability with @dst | ||
101 | * | ||
102 | * Test if two runlists can be joined together. For this, their VCNs and LCNs | ||
103 | * must be adjacent. | ||
104 | * | ||
105 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
106 | * | ||
107 | * Return: TRUE Success, the runlists can be merged. | ||
108 | * FALSE Failure, the runlists cannot be merged. | ||
109 | */ | ||
110 | static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst, | ||
111 | runlist_element *src) | ||
112 | { | ||
113 | BUG_ON(!dst); | ||
114 | BUG_ON(!src); | ||
115 | |||
116 | if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ | ||
117 | return FALSE; | ||
118 | if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ | ||
119 | return FALSE; | ||
120 | if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ | ||
121 | return FALSE; | ||
122 | |||
123 | return TRUE; | ||
124 | } | ||
125 | |||
126 | /** | ||
127 | * __ntfs_rl_merge - merge two runlists without testing if they can be merged | ||
128 | * @dst: original, destination runlist | ||
129 | * @src: new runlist to merge with @dst | ||
130 | * | ||
131 | * Merge the two runlists, writing into the destination runlist @dst. The | ||
132 | * caller must make sure the runlists can be merged or this will corrupt the | ||
133 | * destination runlist. | ||
134 | * | ||
135 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
136 | */ | ||
137 | static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src) | ||
138 | { | ||
139 | dst->length += src->length; | ||
140 | } | ||
141 | |||
142 | /** | ||
143 | * ntfs_rl_append - append a runlist after a given element | ||
144 | * @dst: original runlist to be worked on | ||
145 | * @dsize: number of elements in @dst (including end marker) | ||
146 | * @src: runlist to be inserted into @dst | ||
147 | * @ssize: number of elements in @src (excluding end marker) | ||
148 | * @loc: append the new runlist @src after this element in @dst | ||
149 | * | ||
150 | * Append the runlist @src after element @loc in @dst. Merge the right end of | ||
151 | * the new runlist, if necessary. Adjust the size of the hole before the | ||
152 | * appended runlist. | ||
153 | * | ||
154 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
155 | * | ||
156 | * On success, return a pointer to the new, combined, runlist. Note, both | ||
157 | * runlists @dst and @src are deallocated before returning so you cannot use | ||
158 | * the pointers for anything any more. (Strictly speaking the returned runlist | ||
159 | * may be the same as @dst but this is irrelevant.) | ||
160 | * | ||
161 | * On error, return -errno. Both runlists are left unmodified. The following | ||
162 | * error codes are defined: | ||
163 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
164 | * -EINVAL - Invalid parameters were passed in. | ||
165 | */ | ||
166 | static inline runlist_element *ntfs_rl_append(runlist_element *dst, | ||
167 | int dsize, runlist_element *src, int ssize, int loc) | ||
168 | { | ||
169 | BOOL right; | ||
170 | int magic; | ||
171 | |||
172 | BUG_ON(!dst); | ||
173 | BUG_ON(!src); | ||
174 | |||
175 | /* First, check if the right hand end needs merging. */ | ||
176 | right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); | ||
177 | |||
178 | /* Space required: @dst size + @src size, less one if we merged. */ | ||
179 | dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right); | ||
180 | if (IS_ERR(dst)) | ||
181 | return dst; | ||
182 | /* | ||
183 | * We are guaranteed to succeed from here so can start modifying the | ||
184 | * original runlists. | ||
185 | */ | ||
186 | |||
187 | /* First, merge the right hand end, if necessary. */ | ||
188 | if (right) | ||
189 | __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); | ||
190 | |||
191 | magic = loc + ssize; | ||
192 | |||
193 | /* Move the tail of @dst out of the way, then copy in @src. */ | ||
194 | ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right); | ||
195 | ntfs_rl_mc(dst, loc + 1, src, 0, ssize); | ||
196 | |||
197 | /* Adjust the size of the preceding hole. */ | ||
198 | dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; | ||
199 | |||
200 | /* We may have changed the length of the file, so fix the end marker */ | ||
201 | if (dst[magic + 1].lcn == LCN_ENOENT) | ||
202 | dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length; | ||
203 | |||
204 | return dst; | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * ntfs_rl_insert - insert a runlist into another | ||
209 | * @dst: original runlist to be worked on | ||
210 | * @dsize: number of elements in @dst (including end marker) | ||
211 | * @src: new runlist to be inserted | ||
212 | * @ssize: number of elements in @src (excluding end marker) | ||
213 | * @loc: insert the new runlist @src before this element in @dst | ||
214 | * | ||
215 | * Insert the runlist @src before element @loc in the runlist @dst. Merge the | ||
216 | * left end of the new runlist, if necessary. Adjust the size of the hole | ||
217 | * after the inserted runlist. | ||
218 | * | ||
219 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
220 | * | ||
221 | * On success, return a pointer to the new, combined, runlist. Note, both | ||
222 | * runlists @dst and @src are deallocated before returning so you cannot use | ||
223 | * the pointers for anything any more. (Strictly speaking the returned runlist | ||
224 | * may be the same as @dst but this is irrelevant.) | ||
225 | * | ||
226 | * On error, return -errno. Both runlists are left unmodified. The following | ||
227 | * error codes are defined: | ||
228 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
229 | * -EINVAL - Invalid parameters were passed in. | ||
230 | */ | ||
231 | static inline runlist_element *ntfs_rl_insert(runlist_element *dst, | ||
232 | int dsize, runlist_element *src, int ssize, int loc) | ||
233 | { | ||
234 | BOOL left = FALSE; | ||
235 | BOOL disc = FALSE; /* Discontinuity */ | ||
236 | BOOL hole = FALSE; /* Following a hole */ | ||
237 | int magic; | ||
238 | |||
239 | BUG_ON(!dst); | ||
240 | BUG_ON(!src); | ||
241 | |||
242 | /* disc => Discontinuity between the end of @dst and the start of @src. | ||
243 | * This means we might need to insert a hole. | ||
244 | * hole => @dst ends with a hole or an unmapped region which we can | ||
245 | * extend to match the discontinuity. */ | ||
246 | if (loc == 0) | ||
247 | disc = (src[0].vcn > 0); | ||
248 | else { | ||
249 | s64 merged_length; | ||
250 | |||
251 | left = ntfs_are_rl_mergeable(dst + loc - 1, src); | ||
252 | |||
253 | merged_length = dst[loc - 1].length; | ||
254 | if (left) | ||
255 | merged_length += src->length; | ||
256 | |||
257 | disc = (src[0].vcn > dst[loc - 1].vcn + merged_length); | ||
258 | if (disc) | ||
259 | hole = (dst[loc - 1].lcn == LCN_HOLE); | ||
260 | } | ||
261 | |||
262 | /* Space required: @dst size + @src size, less one if we merged, plus | ||
263 | * one if there was a discontinuity, less one for a trailing hole. */ | ||
264 | dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole); | ||
265 | if (IS_ERR(dst)) | ||
266 | return dst; | ||
267 | /* | ||
268 | * We are guaranteed to succeed from here so can start modifying the | ||
269 | * original runlist. | ||
270 | */ | ||
271 | |||
272 | if (left) | ||
273 | __ntfs_rl_merge(dst + loc - 1, src); | ||
274 | |||
275 | magic = loc + ssize - left + disc - hole; | ||
276 | |||
277 | /* Move the tail of @dst out of the way, then copy in @src. */ | ||
278 | ntfs_rl_mm(dst, magic, loc, dsize - loc); | ||
279 | ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left); | ||
280 | |||
281 | /* Adjust the VCN of the last run ... */ | ||
282 | if (dst[magic].lcn <= LCN_HOLE) | ||
283 | dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; | ||
284 | /* ... and the length. */ | ||
285 | if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED) | ||
286 | dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn; | ||
287 | |||
288 | /* Writing beyond the end of the file and there's a discontinuity. */ | ||
289 | if (disc) { | ||
290 | if (hole) | ||
291 | dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn; | ||
292 | else { | ||
293 | if (loc > 0) { | ||
294 | dst[loc].vcn = dst[loc - 1].vcn + | ||
295 | dst[loc - 1].length; | ||
296 | dst[loc].length = dst[loc + 1].vcn - | ||
297 | dst[loc].vcn; | ||
298 | } else { | ||
299 | dst[loc].vcn = 0; | ||
300 | dst[loc].length = dst[loc + 1].vcn; | ||
301 | } | ||
302 | dst[loc].lcn = LCN_RL_NOT_MAPPED; | ||
303 | } | ||
304 | |||
305 | magic += hole; | ||
306 | |||
307 | if (dst[magic].lcn == LCN_ENOENT) | ||
308 | dst[magic].vcn = dst[magic - 1].vcn + | ||
309 | dst[magic - 1].length; | ||
310 | } | ||
311 | return dst; | ||
312 | } | ||
313 | |||
314 | /** | ||
315 | * ntfs_rl_replace - overwrite a runlist element with another runlist | ||
316 | * @dst: original runlist to be worked on | ||
317 | * @dsize: number of elements in @dst (including end marker) | ||
318 | * @src: new runlist to be inserted | ||
319 | * @ssize: number of elements in @src (excluding end marker) | ||
320 | * @loc: index in runlist @dst to overwrite with @src | ||
321 | * | ||
322 | * Replace the runlist element @dst at @loc with @src. Merge the left and | ||
323 | * right ends of the inserted runlist, if necessary. | ||
324 | * | ||
325 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
326 | * | ||
327 | * On success, return a pointer to the new, combined, runlist. Note, both | ||
328 | * runlists @dst and @src are deallocated before returning so you cannot use | ||
329 | * the pointers for anything any more. (Strictly speaking the returned runlist | ||
330 | * may be the same as @dst but this is irrelevant.) | ||
331 | * | ||
332 | * On error, return -errno. Both runlists are left unmodified. The following | ||
333 | * error codes are defined: | ||
334 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
335 | * -EINVAL - Invalid parameters were passed in. | ||
336 | */ | ||
337 | static inline runlist_element *ntfs_rl_replace(runlist_element *dst, | ||
338 | int dsize, runlist_element *src, int ssize, int loc) | ||
339 | { | ||
340 | BOOL left = FALSE; | ||
341 | BOOL right; | ||
342 | int magic; | ||
343 | |||
344 | BUG_ON(!dst); | ||
345 | BUG_ON(!src); | ||
346 | |||
347 | /* First, merge the left and right ends, if necessary. */ | ||
348 | right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); | ||
349 | if (loc > 0) | ||
350 | left = ntfs_are_rl_mergeable(dst + loc - 1, src); | ||
351 | |||
352 | /* Allocate some space. We'll need less if the left, right, or both | ||
353 | * ends were merged. */ | ||
354 | dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); | ||
355 | if (IS_ERR(dst)) | ||
356 | return dst; | ||
357 | /* | ||
358 | * We are guaranteed to succeed from here so can start modifying the | ||
359 | * original runlists. | ||
360 | */ | ||
361 | if (right) | ||
362 | __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); | ||
363 | if (left) | ||
364 | __ntfs_rl_merge(dst + loc - 1, src); | ||
365 | |||
366 | /* FIXME: What does this mean? (AIA) */ | ||
367 | magic = loc + ssize - left; | ||
368 | |||
369 | /* Move the tail of @dst out of the way, then copy in @src. */ | ||
370 | ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1); | ||
371 | ntfs_rl_mc(dst, loc, src, left, ssize - left); | ||
372 | |||
373 | /* We may have changed the length of the file, so fix the end marker */ | ||
374 | if (dst[magic].lcn == LCN_ENOENT) | ||
375 | dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; | ||
376 | return dst; | ||
377 | } | ||
378 | |||
379 | /** | ||
380 | * ntfs_rl_split - insert a runlist into the centre of a hole | ||
381 | * @dst: original runlist to be worked on | ||
382 | * @dsize: number of elements in @dst (including end marker) | ||
383 | * @src: new runlist to be inserted | ||
384 | * @ssize: number of elements in @src (excluding end marker) | ||
385 | * @loc: index in runlist @dst at which to split and insert @src | ||
386 | * | ||
387 | * Split the runlist @dst at @loc into two and insert @new in between the two | ||
388 | * fragments. No merging of runlists is necessary. Adjust the size of the | ||
389 | * holes either side. | ||
390 | * | ||
391 | * It is up to the caller to serialize access to the runlists @dst and @src. | ||
392 | * | ||
393 | * On success, return a pointer to the new, combined, runlist. Note, both | ||
394 | * runlists @dst and @src are deallocated before returning so you cannot use | ||
395 | * the pointers for anything any more. (Strictly speaking the returned runlist | ||
396 | * may be the same as @dst but this is irrelevant.) | ||
397 | * | ||
398 | * On error, return -errno. Both runlists are left unmodified. The following | ||
399 | * error codes are defined: | ||
400 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
401 | * -EINVAL - Invalid parameters were passed in. | ||
402 | */ | ||
403 | static inline runlist_element *ntfs_rl_split(runlist_element *dst, int dsize, | ||
404 | runlist_element *src, int ssize, int loc) | ||
405 | { | ||
406 | BUG_ON(!dst); | ||
407 | BUG_ON(!src); | ||
408 | |||
409 | /* Space required: @dst size + @src size + one new hole. */ | ||
410 | dst = ntfs_rl_realloc(dst, dsize, dsize + ssize + 1); | ||
411 | if (IS_ERR(dst)) | ||
412 | return dst; | ||
413 | /* | ||
414 | * We are guaranteed to succeed from here so can start modifying the | ||
415 | * original runlists. | ||
416 | */ | ||
417 | |||
418 | /* Move the tail of @dst out of the way, then copy in @src. */ | ||
419 | ntfs_rl_mm(dst, loc + 1 + ssize, loc, dsize - loc); | ||
420 | ntfs_rl_mc(dst, loc + 1, src, 0, ssize); | ||
421 | |||
422 | /* Adjust the size of the holes either size of @src. */ | ||
423 | dst[loc].length = dst[loc+1].vcn - dst[loc].vcn; | ||
424 | dst[loc+ssize+1].vcn = dst[loc+ssize].vcn + dst[loc+ssize].length; | ||
425 | dst[loc+ssize+1].length = dst[loc+ssize+2].vcn - dst[loc+ssize+1].vcn; | ||
426 | |||
427 | return dst; | ||
428 | } | ||
429 | |||
430 | /** | ||
431 | * ntfs_runlists_merge - merge two runlists into one | ||
432 | * @drl: original runlist to be worked on | ||
433 | * @srl: new runlist to be merged into @drl | ||
434 | * | ||
435 | * First we sanity check the two runlists @srl and @drl to make sure that they | ||
436 | * are sensible and can be merged. The runlist @srl must be either after the | ||
437 | * runlist @drl or completely within a hole (or unmapped region) in @drl. | ||
438 | * | ||
439 | * It is up to the caller to serialize access to the runlists @drl and @srl. | ||
440 | * | ||
441 | * Merging of runlists is necessary in two cases: | ||
442 | * 1. When attribute lists are used and a further extent is being mapped. | ||
443 | * 2. When new clusters are allocated to fill a hole or extend a file. | ||
444 | * | ||
445 | * There are four possible ways @srl can be merged. It can: | ||
446 | * - be inserted at the beginning of a hole, | ||
447 | * - split the hole in two and be inserted between the two fragments, | ||
448 | * - be appended at the end of a hole, or it can | ||
449 | * - replace the whole hole. | ||
450 | * It can also be appended to the end of the runlist, which is just a variant | ||
451 | * of the insert case. | ||
452 | * | ||
453 | * On success, return a pointer to the new, combined, runlist. Note, both | ||
454 | * runlists @drl and @srl are deallocated before returning so you cannot use | ||
455 | * the pointers for anything any more. (Strictly speaking the returned runlist | ||
456 | * may be the same as @dst but this is irrelevant.) | ||
457 | * | ||
458 | * On error, return -errno. Both runlists are left unmodified. The following | ||
459 | * error codes are defined: | ||
460 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
461 | * -EINVAL - Invalid parameters were passed in. | ||
462 | * -ERANGE - The runlists overlap and cannot be merged. | ||
463 | */ | ||
464 | runlist_element *ntfs_runlists_merge(runlist_element *drl, | ||
465 | runlist_element *srl) | ||
466 | { | ||
467 | int di, si; /* Current index into @[ds]rl. */ | ||
468 | int sstart; /* First index with lcn > LCN_RL_NOT_MAPPED. */ | ||
469 | int dins; /* Index into @drl at which to insert @srl. */ | ||
470 | int dend, send; /* Last index into @[ds]rl. */ | ||
471 | int dfinal, sfinal; /* The last index into @[ds]rl with | ||
472 | lcn >= LCN_HOLE. */ | ||
473 | int marker = 0; | ||
474 | VCN marker_vcn = 0; | ||
475 | |||
476 | #ifdef DEBUG | ||
477 | ntfs_debug("dst:"); | ||
478 | ntfs_debug_dump_runlist(drl); | ||
479 | ntfs_debug("src:"); | ||
480 | ntfs_debug_dump_runlist(srl); | ||
481 | #endif | ||
482 | |||
483 | /* Check for silly calling... */ | ||
484 | if (unlikely(!srl)) | ||
485 | return drl; | ||
486 | if (IS_ERR(srl) || IS_ERR(drl)) | ||
487 | return ERR_PTR(-EINVAL); | ||
488 | |||
489 | /* Check for the case where the first mapping is being done now. */ | ||
490 | if (unlikely(!drl)) { | ||
491 | drl = srl; | ||
492 | /* Complete the source runlist if necessary. */ | ||
493 | if (unlikely(drl[0].vcn)) { | ||
494 | /* Scan to the end of the source runlist. */ | ||
495 | for (dend = 0; likely(drl[dend].length); dend++) | ||
496 | ; | ||
497 | drl = ntfs_rl_realloc(drl, dend, dend + 1); | ||
498 | if (IS_ERR(drl)) | ||
499 | return drl; | ||
500 | /* Insert start element at the front of the runlist. */ | ||
501 | ntfs_rl_mm(drl, 1, 0, dend); | ||
502 | drl[0].vcn = 0; | ||
503 | drl[0].lcn = LCN_RL_NOT_MAPPED; | ||
504 | drl[0].length = drl[1].vcn; | ||
505 | } | ||
506 | goto finished; | ||
507 | } | ||
508 | |||
509 | si = di = 0; | ||
510 | |||
511 | /* Skip any unmapped start element(s) in the source runlist. */ | ||
512 | while (srl[si].length && srl[si].lcn < LCN_HOLE) | ||
513 | si++; | ||
514 | |||
515 | /* Can't have an entirely unmapped source runlist. */ | ||
516 | BUG_ON(!srl[si].length); | ||
517 | |||
518 | /* Record the starting points. */ | ||
519 | sstart = si; | ||
520 | |||
521 | /* | ||
522 | * Skip forward in @drl until we reach the position where @srl needs to | ||
523 | * be inserted. If we reach the end of @drl, @srl just needs to be | ||
524 | * appended to @drl. | ||
525 | */ | ||
526 | for (; drl[di].length; di++) { | ||
527 | if (drl[di].vcn + drl[di].length > srl[sstart].vcn) | ||
528 | break; | ||
529 | } | ||
530 | dins = di; | ||
531 | |||
532 | /* Sanity check for illegal overlaps. */ | ||
533 | if ((drl[di].vcn == srl[si].vcn) && (drl[di].lcn >= 0) && | ||
534 | (srl[si].lcn >= 0)) { | ||
535 | ntfs_error(NULL, "Run lists overlap. Cannot merge!"); | ||
536 | return ERR_PTR(-ERANGE); | ||
537 | } | ||
538 | |||
539 | /* Scan to the end of both runlists in order to know their sizes. */ | ||
540 | for (send = si; srl[send].length; send++) | ||
541 | ; | ||
542 | for (dend = di; drl[dend].length; dend++) | ||
543 | ; | ||
544 | |||
545 | if (srl[send].lcn == LCN_ENOENT) | ||
546 | marker_vcn = srl[marker = send].vcn; | ||
547 | |||
548 | /* Scan to the last element with lcn >= LCN_HOLE. */ | ||
549 | for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--) | ||
550 | ; | ||
551 | for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--) | ||
552 | ; | ||
553 | |||
554 | { | ||
555 | BOOL start; | ||
556 | BOOL finish; | ||
557 | int ds = dend + 1; /* Number of elements in drl & srl */ | ||
558 | int ss = sfinal - sstart + 1; | ||
559 | |||
560 | start = ((drl[dins].lcn < LCN_RL_NOT_MAPPED) || /* End of file */ | ||
561 | (drl[dins].vcn == srl[sstart].vcn)); /* Start of hole */ | ||
562 | finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) && /* End of file */ | ||
563 | ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ | ||
564 | (srl[send - 1].vcn + srl[send - 1].length))); | ||
565 | |||
566 | /* Or we'll lose an end marker */ | ||
567 | if (start && finish && (drl[dins].length == 0)) | ||
568 | ss++; | ||
569 | if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) | ||
570 | finish = FALSE; | ||
571 | #if 0 | ||
572 | ntfs_debug("dfinal = %i, dend = %i", dfinal, dend); | ||
573 | ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send); | ||
574 | ntfs_debug("start = %i, finish = %i", start, finish); | ||
575 | ntfs_debug("ds = %i, ss = %i, dins = %i", ds, ss, dins); | ||
576 | #endif | ||
577 | if (start) { | ||
578 | if (finish) | ||
579 | drl = ntfs_rl_replace(drl, ds, srl + sstart, ss, dins); | ||
580 | else | ||
581 | drl = ntfs_rl_insert(drl, ds, srl + sstart, ss, dins); | ||
582 | } else { | ||
583 | if (finish) | ||
584 | drl = ntfs_rl_append(drl, ds, srl + sstart, ss, dins); | ||
585 | else | ||
586 | drl = ntfs_rl_split(drl, ds, srl + sstart, ss, dins); | ||
587 | } | ||
588 | if (IS_ERR(drl)) { | ||
589 | ntfs_error(NULL, "Merge failed."); | ||
590 | return drl; | ||
591 | } | ||
592 | ntfs_free(srl); | ||
593 | if (marker) { | ||
594 | ntfs_debug("Triggering marker code."); | ||
595 | for (ds = dend; drl[ds].length; ds++) | ||
596 | ; | ||
597 | /* We only need to care if @srl ended after @drl. */ | ||
598 | if (drl[ds].vcn <= marker_vcn) { | ||
599 | int slots = 0; | ||
600 | |||
601 | if (drl[ds].vcn == marker_vcn) { | ||
602 | ntfs_debug("Old marker = 0x%llx, replacing " | ||
603 | "with LCN_ENOENT.", | ||
604 | (unsigned long long) | ||
605 | drl[ds].lcn); | ||
606 | drl[ds].lcn = LCN_ENOENT; | ||
607 | goto finished; | ||
608 | } | ||
609 | /* | ||
610 | * We need to create an unmapped runlist element in | ||
611 | * @drl or extend an existing one before adding the | ||
612 | * ENOENT terminator. | ||
613 | */ | ||
614 | if (drl[ds].lcn == LCN_ENOENT) { | ||
615 | ds--; | ||
616 | slots = 1; | ||
617 | } | ||
618 | if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { | ||
619 | /* Add an unmapped runlist element. */ | ||
620 | if (!slots) { | ||
621 | /* FIXME/TODO: We need to have the | ||
622 | * extra memory already! (AIA) */ | ||
623 | drl = ntfs_rl_realloc(drl, ds, ds + 2); | ||
624 | if (!drl) | ||
625 | goto critical_error; | ||
626 | slots = 2; | ||
627 | } | ||
628 | ds++; | ||
629 | /* Need to set vcn if it isn't set already. */ | ||
630 | if (slots != 1) | ||
631 | drl[ds].vcn = drl[ds - 1].vcn + | ||
632 | drl[ds - 1].length; | ||
633 | drl[ds].lcn = LCN_RL_NOT_MAPPED; | ||
634 | /* We now used up a slot. */ | ||
635 | slots--; | ||
636 | } | ||
637 | drl[ds].length = marker_vcn - drl[ds].vcn; | ||
638 | /* Finally add the ENOENT terminator. */ | ||
639 | ds++; | ||
640 | if (!slots) { | ||
641 | /* FIXME/TODO: We need to have the extra | ||
642 | * memory already! (AIA) */ | ||
643 | drl = ntfs_rl_realloc(drl, ds, ds + 1); | ||
644 | if (!drl) | ||
645 | goto critical_error; | ||
646 | } | ||
647 | drl[ds].vcn = marker_vcn; | ||
648 | drl[ds].lcn = LCN_ENOENT; | ||
649 | drl[ds].length = (s64)0; | ||
650 | } | ||
651 | } | ||
652 | } | ||
653 | |||
654 | finished: | ||
655 | /* The merge was completed successfully. */ | ||
656 | ntfs_debug("Merged runlist:"); | ||
657 | ntfs_debug_dump_runlist(drl); | ||
658 | return drl; | ||
659 | |||
660 | critical_error: | ||
661 | /* Critical error! We cannot afford to fail here. */ | ||
662 | ntfs_error(NULL, "Critical error! Not enough memory."); | ||
663 | panic("NTFS: Cannot continue."); | ||
664 | } | ||
665 | |||
666 | /** | ||
667 | * ntfs_mapping_pairs_decompress - convert mapping pairs array to runlist | ||
668 | * @vol: ntfs volume on which the attribute resides | ||
669 | * @attr: attribute record whose mapping pairs array to decompress | ||
670 | * @old_rl: optional runlist in which to insert @attr's runlist | ||
671 | * | ||
672 | * It is up to the caller to serialize access to the runlist @old_rl. | ||
673 | * | ||
674 | * Decompress the attribute @attr's mapping pairs array into a runlist. On | ||
675 | * success, return the decompressed runlist. | ||
676 | * | ||
677 | * If @old_rl is not NULL, decompressed runlist is inserted into the | ||
678 | * appropriate place in @old_rl and the resultant, combined runlist is | ||
679 | * returned. The original @old_rl is deallocated. | ||
680 | * | ||
681 | * On error, return -errno. @old_rl is left unmodified in that case. | ||
682 | * | ||
683 | * The following error codes are defined: | ||
684 | * -ENOMEM - Not enough memory to allocate runlist array. | ||
685 | * -EIO - Corrupt runlist. | ||
686 | * -EINVAL - Invalid parameters were passed in. | ||
687 | * -ERANGE - The two runlists overlap. | ||
688 | * | ||
689 | * FIXME: For now we take the conceptionally simplest approach of creating the | ||
690 | * new runlist disregarding the already existing one and then splicing the | ||
691 | * two into one, if that is possible (we check for overlap and discard the new | ||
692 | * runlist if overlap present before returning ERR_PTR(-ERANGE)). | ||
693 | */ | ||
694 | runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol, | ||
695 | const ATTR_RECORD *attr, runlist_element *old_rl) | ||
696 | { | ||
697 | VCN vcn; /* Current vcn. */ | ||
698 | LCN lcn; /* Current lcn. */ | ||
699 | s64 deltaxcn; /* Change in [vl]cn. */ | ||
700 | runlist_element *rl; /* The output runlist. */ | ||
701 | u8 *buf; /* Current position in mapping pairs array. */ | ||
702 | u8 *attr_end; /* End of attribute. */ | ||
703 | int rlsize; /* Size of runlist buffer. */ | ||
704 | u16 rlpos; /* Current runlist position in units of | ||
705 | runlist_elements. */ | ||
706 | u8 b; /* Current byte offset in buf. */ | ||
707 | |||
708 | #ifdef DEBUG | ||
709 | /* Make sure attr exists and is non-resident. */ | ||
710 | if (!attr || !attr->non_resident || sle64_to_cpu( | ||
711 | attr->data.non_resident.lowest_vcn) < (VCN)0) { | ||
712 | ntfs_error(vol->sb, "Invalid arguments."); | ||
713 | return ERR_PTR(-EINVAL); | ||
714 | } | ||
715 | #endif | ||
716 | /* Start at vcn = lowest_vcn and lcn 0. */ | ||
717 | vcn = sle64_to_cpu(attr->data.non_resident.lowest_vcn); | ||
718 | lcn = 0; | ||
719 | /* Get start of the mapping pairs array. */ | ||
720 | buf = (u8*)attr + le16_to_cpu( | ||
721 | attr->data.non_resident.mapping_pairs_offset); | ||
722 | attr_end = (u8*)attr + le32_to_cpu(attr->length); | ||
723 | if (unlikely(buf < (u8*)attr || buf > attr_end)) { | ||
724 | ntfs_error(vol->sb, "Corrupt attribute."); | ||
725 | return ERR_PTR(-EIO); | ||
726 | } | ||
727 | /* Current position in runlist array. */ | ||
728 | rlpos = 0; | ||
729 | /* Allocate first page and set current runlist size to one page. */ | ||
730 | rl = ntfs_malloc_nofs(rlsize = PAGE_SIZE); | ||
731 | if (unlikely(!rl)) | ||
732 | return ERR_PTR(-ENOMEM); | ||
733 | /* Insert unmapped starting element if necessary. */ | ||
734 | if (vcn) { | ||
735 | rl->vcn = 0; | ||
736 | rl->lcn = LCN_RL_NOT_MAPPED; | ||
737 | rl->length = vcn; | ||
738 | rlpos++; | ||
739 | } | ||
740 | while (buf < attr_end && *buf) { | ||
741 | /* | ||
742 | * Allocate more memory if needed, including space for the | ||
743 | * not-mapped and terminator elements. ntfs_malloc_nofs() | ||
744 | * operates on whole pages only. | ||
745 | */ | ||
746 | if (((rlpos + 3) * sizeof(*old_rl)) > rlsize) { | ||
747 | runlist_element *rl2; | ||
748 | |||
749 | rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); | ||
750 | if (unlikely(!rl2)) { | ||
751 | ntfs_free(rl); | ||
752 | return ERR_PTR(-ENOMEM); | ||
753 | } | ||
754 | memcpy(rl2, rl, rlsize); | ||
755 | ntfs_free(rl); | ||
756 | rl = rl2; | ||
757 | rlsize += PAGE_SIZE; | ||
758 | } | ||
759 | /* Enter the current vcn into the current runlist element. */ | ||
760 | rl[rlpos].vcn = vcn; | ||
761 | /* | ||
762 | * Get the change in vcn, i.e. the run length in clusters. | ||
763 | * Doing it this way ensures that we signextend negative values. | ||
764 | * A negative run length doesn't make any sense, but hey, I | ||
765 | * didn't make up the NTFS specs and Windows NT4 treats the run | ||
766 | * length as a signed value so that's how it is... | ||
767 | */ | ||
768 | b = *buf & 0xf; | ||
769 | if (b) { | ||
770 | if (unlikely(buf + b > attr_end)) | ||
771 | goto io_error; | ||
772 | for (deltaxcn = (s8)buf[b--]; b; b--) | ||
773 | deltaxcn = (deltaxcn << 8) + buf[b]; | ||
774 | } else { /* The length entry is compulsory. */ | ||
775 | ntfs_error(vol->sb, "Missing length entry in mapping " | ||
776 | "pairs array."); | ||
777 | deltaxcn = (s64)-1; | ||
778 | } | ||
779 | /* | ||
780 | * Assume a negative length to indicate data corruption and | ||
781 | * hence clean-up and return NULL. | ||
782 | */ | ||
783 | if (unlikely(deltaxcn < 0)) { | ||
784 | ntfs_error(vol->sb, "Invalid length in mapping pairs " | ||
785 | "array."); | ||
786 | goto err_out; | ||
787 | } | ||
788 | /* | ||
789 | * Enter the current run length into the current runlist | ||
790 | * element. | ||
791 | */ | ||
792 | rl[rlpos].length = deltaxcn; | ||
793 | /* Increment the current vcn by the current run length. */ | ||
794 | vcn += deltaxcn; | ||
795 | /* | ||
796 | * There might be no lcn change at all, as is the case for | ||
797 | * sparse clusters on NTFS 3.0+, in which case we set the lcn | ||
798 | * to LCN_HOLE. | ||
799 | */ | ||
800 | if (!(*buf & 0xf0)) | ||
801 | rl[rlpos].lcn = LCN_HOLE; | ||
802 | else { | ||
803 | /* Get the lcn change which really can be negative. */ | ||
804 | u8 b2 = *buf & 0xf; | ||
805 | b = b2 + ((*buf >> 4) & 0xf); | ||
806 | if (buf + b > attr_end) | ||
807 | goto io_error; | ||
808 | for (deltaxcn = (s8)buf[b--]; b > b2; b--) | ||
809 | deltaxcn = (deltaxcn << 8) + buf[b]; | ||
810 | /* Change the current lcn to its new value. */ | ||
811 | lcn += deltaxcn; | ||
812 | #ifdef DEBUG | ||
813 | /* | ||
814 | * On NTFS 1.2-, apparently can have lcn == -1 to | ||
815 | * indicate a hole. But we haven't verified ourselves | ||
816 | * whether it is really the lcn or the deltaxcn that is | ||
817 | * -1. So if either is found give us a message so we | ||
818 | * can investigate it further! | ||
819 | */ | ||
820 | if (vol->major_ver < 3) { | ||
821 | if (unlikely(deltaxcn == (LCN)-1)) | ||
822 | ntfs_error(vol->sb, "lcn delta == -1"); | ||
823 | if (unlikely(lcn == (LCN)-1)) | ||
824 | ntfs_error(vol->sb, "lcn == -1"); | ||
825 | } | ||
826 | #endif | ||
827 | /* Check lcn is not below -1. */ | ||
828 | if (unlikely(lcn < (LCN)-1)) { | ||
829 | ntfs_error(vol->sb, "Invalid LCN < -1 in " | ||
830 | "mapping pairs array."); | ||
831 | goto err_out; | ||
832 | } | ||
833 | /* Enter the current lcn into the runlist element. */ | ||
834 | rl[rlpos].lcn = lcn; | ||
835 | } | ||
836 | /* Get to the next runlist element. */ | ||
837 | rlpos++; | ||
838 | /* Increment the buffer position to the next mapping pair. */ | ||
839 | buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1; | ||
840 | } | ||
841 | if (unlikely(buf >= attr_end)) | ||
842 | goto io_error; | ||
843 | /* | ||
844 | * If there is a highest_vcn specified, it must be equal to the final | ||
845 | * vcn in the runlist - 1, or something has gone badly wrong. | ||
846 | */ | ||
847 | deltaxcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); | ||
848 | if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) { | ||
849 | mpa_err: | ||
850 | ntfs_error(vol->sb, "Corrupt mapping pairs array in " | ||
851 | "non-resident attribute."); | ||
852 | goto err_out; | ||
853 | } | ||
854 | /* Setup not mapped runlist element if this is the base extent. */ | ||
855 | if (!attr->data.non_resident.lowest_vcn) { | ||
856 | VCN max_cluster; | ||
857 | |||
858 | max_cluster = (sle64_to_cpu( | ||
859 | attr->data.non_resident.allocated_size) + | ||
860 | vol->cluster_size - 1) >> | ||
861 | vol->cluster_size_bits; | ||
862 | /* | ||
863 | * If there is a difference between the highest_vcn and the | ||
864 | * highest cluster, the runlist is either corrupt or, more | ||
865 | * likely, there are more extents following this one. | ||
866 | */ | ||
867 | if (deltaxcn < --max_cluster) { | ||
868 | ntfs_debug("More extents to follow; deltaxcn = 0x%llx, " | ||
869 | "max_cluster = 0x%llx", | ||
870 | (unsigned long long)deltaxcn, | ||
871 | (unsigned long long)max_cluster); | ||
872 | rl[rlpos].vcn = vcn; | ||
873 | vcn += rl[rlpos].length = max_cluster - deltaxcn; | ||
874 | rl[rlpos].lcn = LCN_RL_NOT_MAPPED; | ||
875 | rlpos++; | ||
876 | } else if (unlikely(deltaxcn > max_cluster)) { | ||
877 | ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " | ||
878 | "0x%llx, max_cluster = 0x%llx", | ||
879 | (unsigned long long)deltaxcn, | ||
880 | (unsigned long long)max_cluster); | ||
881 | goto mpa_err; | ||
882 | } | ||
883 | rl[rlpos].lcn = LCN_ENOENT; | ||
884 | } else /* Not the base extent. There may be more extents to follow. */ | ||
885 | rl[rlpos].lcn = LCN_RL_NOT_MAPPED; | ||
886 | |||
887 | /* Setup terminating runlist element. */ | ||
888 | rl[rlpos].vcn = vcn; | ||
889 | rl[rlpos].length = (s64)0; | ||
890 | /* If no existing runlist was specified, we are done. */ | ||
891 | if (!old_rl) { | ||
892 | ntfs_debug("Mapping pairs array successfully decompressed:"); | ||
893 | ntfs_debug_dump_runlist(rl); | ||
894 | return rl; | ||
895 | } | ||
896 | /* Now combine the new and old runlists checking for overlaps. */ | ||
897 | old_rl = ntfs_runlists_merge(old_rl, rl); | ||
898 | if (likely(!IS_ERR(old_rl))) | ||
899 | return old_rl; | ||
900 | ntfs_free(rl); | ||
901 | ntfs_error(vol->sb, "Failed to merge runlists."); | ||
902 | return old_rl; | ||
903 | io_error: | ||
904 | ntfs_error(vol->sb, "Corrupt attribute."); | ||
905 | err_out: | ||
906 | ntfs_free(rl); | ||
907 | return ERR_PTR(-EIO); | ||
908 | } | ||
909 | |||
910 | /** | ||
911 | * ntfs_rl_vcn_to_lcn - convert a vcn into a lcn given a runlist | ||
912 | * @rl: runlist to use for conversion | ||
913 | * @vcn: vcn to convert | ||
914 | * | ||
915 | * Convert the virtual cluster number @vcn of an attribute into a logical | ||
916 | * cluster number (lcn) of a device using the runlist @rl to map vcns to their | ||
917 | * corresponding lcns. | ||
918 | * | ||
919 | * It is up to the caller to serialize access to the runlist @rl. | ||
920 | * | ||
921 | * Since lcns must be >= 0, we use negative return values with special meaning: | ||
922 | * | ||
923 | * Return value Meaning / Description | ||
924 | * ================================================== | ||
925 | * -1 = LCN_HOLE Hole / not allocated on disk. | ||
926 | * -2 = LCN_RL_NOT_MAPPED This is part of the runlist which has not been | ||
927 | * inserted into the runlist yet. | ||
928 | * -3 = LCN_ENOENT There is no such vcn in the attribute. | ||
929 | * | ||
930 | * Locking: - The caller must have locked the runlist (for reading or writing). | ||
931 | * - This function does not touch the lock. | ||
932 | */ | ||
933 | LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn) | ||
934 | { | ||
935 | int i; | ||
936 | |||
937 | BUG_ON(vcn < 0); | ||
938 | /* | ||
939 | * If rl is NULL, assume that we have found an unmapped runlist. The | ||
940 | * caller can then attempt to map it and fail appropriately if | ||
941 | * necessary. | ||
942 | */ | ||
943 | if (unlikely(!rl)) | ||
944 | return LCN_RL_NOT_MAPPED; | ||
945 | |||
946 | /* Catch out of lower bounds vcn. */ | ||
947 | if (unlikely(vcn < rl[0].vcn)) | ||
948 | return LCN_ENOENT; | ||
949 | |||
950 | for (i = 0; likely(rl[i].length); i++) { | ||
951 | if (unlikely(vcn < rl[i+1].vcn)) { | ||
952 | if (likely(rl[i].lcn >= (LCN)0)) | ||
953 | return rl[i].lcn + (vcn - rl[i].vcn); | ||
954 | return rl[i].lcn; | ||
955 | } | ||
956 | } | ||
957 | /* | ||
958 | * The terminator element is setup to the correct value, i.e. one of | ||
959 | * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT. | ||
960 | */ | ||
961 | if (likely(rl[i].lcn < (LCN)0)) | ||
962 | return rl[i].lcn; | ||
963 | /* Just in case... We could replace this with BUG() some day. */ | ||
964 | return LCN_ENOENT; | ||
965 | } | ||
966 | |||
967 | /** | ||
968 | * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number | ||
969 | * @n: number for which to get the number of bytes for | ||
970 | * | ||
971 | * Return the number of bytes required to store @n unambiguously as | ||
972 | * a signed number. | ||
973 | * | ||
974 | * This is used in the context of the mapping pairs array to determine how | ||
975 | * many bytes will be needed in the array to store a given logical cluster | ||
976 | * number (lcn) or a specific run length. | ||
977 | * | ||
978 | * Return the number of bytes written. This function cannot fail. | ||
979 | */ | ||
980 | static inline int ntfs_get_nr_significant_bytes(const s64 n) | ||
981 | { | ||
982 | s64 l = n; | ||
983 | int i; | ||
984 | s8 j; | ||
985 | |||
986 | i = 0; | ||
987 | do { | ||
988 | l >>= 8; | ||
989 | i++; | ||
990 | } while (l != 0 && l != -1); | ||
991 | j = (n >> 8 * (i - 1)) & 0xff; | ||
992 | /* If the sign bit is wrong, we need an extra byte. */ | ||
993 | if ((n < 0 && j >= 0) || (n > 0 && j < 0)) | ||
994 | i++; | ||
995 | return i; | ||
996 | } | ||
997 | |||
998 | /** | ||
999 | * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array | ||
1000 | * @vol: ntfs volume (needed for the ntfs version) | ||
1001 | * @rl: locked runlist to determine the size of the mapping pairs of | ||
1002 | * @start_vcn: vcn at which to start the mapping pairs array | ||
1003 | * | ||
1004 | * Walk the locked runlist @rl and calculate the size in bytes of the mapping | ||
1005 | * pairs array corresponding to the runlist @rl, starting at vcn @start_vcn. | ||
1006 | * This for example allows us to allocate a buffer of the right size when | ||
1007 | * building the mapping pairs array. | ||
1008 | * | ||
1009 | * If @rl is NULL, just return 1 (for the single terminator byte). | ||
1010 | * | ||
1011 | * Return the calculated size in bytes on success. On error, return -errno. | ||
1012 | * The following error codes are defined: | ||
1013 | * -EINVAL - Run list contains unmapped elements. Make sure to only pass | ||
1014 | * fully mapped runlists to this function. | ||
1015 | * -EIO - The runlist is corrupt. | ||
1016 | * | ||
1017 | * Locking: @rl must be locked on entry (either for reading or writing), it | ||
1018 | * remains locked throughout, and is left locked upon return. | ||
1019 | */ | ||
1020 | int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, | ||
1021 | const runlist_element *rl, const VCN start_vcn) | ||
1022 | { | ||
1023 | LCN prev_lcn; | ||
1024 | int rls; | ||
1025 | |||
1026 | BUG_ON(start_vcn < 0); | ||
1027 | if (!rl) { | ||
1028 | BUG_ON(start_vcn); | ||
1029 | return 1; | ||
1030 | } | ||
1031 | /* Skip to runlist element containing @start_vcn. */ | ||
1032 | while (rl->length && start_vcn >= rl[1].vcn) | ||
1033 | rl++; | ||
1034 | if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) | ||
1035 | return -EINVAL; | ||
1036 | prev_lcn = 0; | ||
1037 | /* Always need the termining zero byte. */ | ||
1038 | rls = 1; | ||
1039 | /* Do the first partial run if present. */ | ||
1040 | if (start_vcn > rl->vcn) { | ||
1041 | s64 delta; | ||
1042 | |||
1043 | /* We know rl->length != 0 already. */ | ||
1044 | if (rl->length < 0 || rl->lcn < LCN_HOLE) | ||
1045 | goto err_out; | ||
1046 | delta = start_vcn - rl->vcn; | ||
1047 | /* Header byte + length. */ | ||
1048 | rls += 1 + ntfs_get_nr_significant_bytes(rl->length - delta); | ||
1049 | /* | ||
1050 | * If the logical cluster number (lcn) denotes a hole and we | ||
1051 | * are on NTFS 3.0+, we don't store it at all, i.e. we need | ||
1052 | * zero space. On earlier NTFS versions we just store the lcn. | ||
1053 | * Note: this assumes that on NTFS 1.2-, holes are stored with | ||
1054 | * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). | ||
1055 | */ | ||
1056 | if (rl->lcn >= 0 || vol->major_ver < 3) { | ||
1057 | prev_lcn = rl->lcn; | ||
1058 | if (rl->lcn >= 0) | ||
1059 | prev_lcn += delta; | ||
1060 | /* Change in lcn. */ | ||
1061 | rls += ntfs_get_nr_significant_bytes(prev_lcn); | ||
1062 | } | ||
1063 | /* Go to next runlist element. */ | ||
1064 | rl++; | ||
1065 | } | ||
1066 | /* Do the full runs. */ | ||
1067 | for (; rl->length; rl++) { | ||
1068 | if (rl->length < 0 || rl->lcn < LCN_HOLE) | ||
1069 | goto err_out; | ||
1070 | /* Header byte + length. */ | ||
1071 | rls += 1 + ntfs_get_nr_significant_bytes(rl->length); | ||
1072 | /* | ||
1073 | * If the logical cluster number (lcn) denotes a hole and we | ||
1074 | * are on NTFS 3.0+, we don't store it at all, i.e. we need | ||
1075 | * zero space. On earlier NTFS versions we just store the lcn. | ||
1076 | * Note: this assumes that on NTFS 1.2-, holes are stored with | ||
1077 | * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). | ||
1078 | */ | ||
1079 | if (rl->lcn >= 0 || vol->major_ver < 3) { | ||
1080 | /* Change in lcn. */ | ||
1081 | rls += ntfs_get_nr_significant_bytes(rl->lcn - | ||
1082 | prev_lcn); | ||
1083 | prev_lcn = rl->lcn; | ||
1084 | } | ||
1085 | } | ||
1086 | return rls; | ||
1087 | err_out: | ||
1088 | if (rl->lcn == LCN_RL_NOT_MAPPED) | ||
1089 | rls = -EINVAL; | ||
1090 | else | ||
1091 | rls = -EIO; | ||
1092 | return rls; | ||
1093 | } | ||
1094 | |||
1095 | /** | ||
1096 | * ntfs_write_significant_bytes - write the significant bytes of a number | ||
1097 | * @dst: destination buffer to write to | ||
1098 | * @dst_max: pointer to last byte of destination buffer for bounds checking | ||
1099 | * @n: number whose significant bytes to write | ||
1100 | * | ||
1101 | * Store in @dst, the minimum bytes of the number @n which are required to | ||
1102 | * identify @n unambiguously as a signed number, taking care not to exceed | ||
1103 | * @dest_max, the maximum position within @dst to which we are allowed to | ||
1104 | * write. | ||
1105 | * | ||
1106 | * This is used when building the mapping pairs array of a runlist to compress | ||
1107 | * a given logical cluster number (lcn) or a specific run length to the minumum | ||
1108 | * size possible. | ||
1109 | * | ||
1110 | * Return the number of bytes written on success. On error, i.e. the | ||
1111 | * destination buffer @dst is too small, return -ENOSPC. | ||
1112 | */ | ||
1113 | static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max, | ||
1114 | const s64 n) | ||
1115 | { | ||
1116 | s64 l = n; | ||
1117 | int i; | ||
1118 | s8 j; | ||
1119 | |||
1120 | i = 0; | ||
1121 | do { | ||
1122 | if (dst > dst_max) | ||
1123 | goto err_out; | ||
1124 | *dst++ = l & 0xffll; | ||
1125 | l >>= 8; | ||
1126 | i++; | ||
1127 | } while (l != 0 && l != -1); | ||
1128 | j = (n >> 8 * (i - 1)) & 0xff; | ||
1129 | /* If the sign bit is wrong, we need an extra byte. */ | ||
1130 | if (n < 0 && j >= 0) { | ||
1131 | if (dst > dst_max) | ||
1132 | goto err_out; | ||
1133 | i++; | ||
1134 | *dst = (s8)-1; | ||
1135 | } else if (n > 0 && j < 0) { | ||
1136 | if (dst > dst_max) | ||
1137 | goto err_out; | ||
1138 | i++; | ||
1139 | *dst = (s8)0; | ||
1140 | } | ||
1141 | return i; | ||
1142 | err_out: | ||
1143 | return -ENOSPC; | ||
1144 | } | ||
1145 | |||
1146 | /** | ||
1147 | * ntfs_mapping_pairs_build - build the mapping pairs array from a runlist | ||
1148 | * @vol: ntfs volume (needed for the ntfs version) | ||
1149 | * @dst: destination buffer to which to write the mapping pairs array | ||
1150 | * @dst_len: size of destination buffer @dst in bytes | ||
1151 | * @rl: locked runlist for which to build the mapping pairs array | ||
1152 | * @start_vcn: vcn at which to start the mapping pairs array | ||
1153 | * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC | ||
1154 | * | ||
1155 | * Create the mapping pairs array from the locked runlist @rl, starting at vcn | ||
1156 | * @start_vcn and save the array in @dst. @dst_len is the size of @dst in | ||
1157 | * bytes and it should be at least equal to the value obtained by calling | ||
1158 | * ntfs_get_size_for_mapping_pairs(). | ||
1159 | * | ||
1160 | * If @rl is NULL, just write a single terminator byte to @dst. | ||
1161 | * | ||
1162 | * On success or -ENOSPC error, if @stop_vcn is not NULL, *@stop_vcn is set to | ||
1163 | * the first vcn outside the destination buffer. Note that on error, @dst has | ||
1164 | * been filled with all the mapping pairs that will fit, thus it can be treated | ||
1165 | * as partial success, in that a new attribute extent needs to be created or | ||
1166 | * the next extent has to be used and the mapping pairs build has to be | ||
1167 | * continued with @start_vcn set to *@stop_vcn. | ||
1168 | * | ||
1169 | * Return 0 on success and -errno on error. The following error codes are | ||
1170 | * defined: | ||
1171 | * -EINVAL - Run list contains unmapped elements. Make sure to only pass | ||
1172 | * fully mapped runlists to this function. | ||
1173 | * -EIO - The runlist is corrupt. | ||
1174 | * -ENOSPC - The destination buffer is too small. | ||
1175 | * | ||
1176 | * Locking: @rl must be locked on entry (either for reading or writing), it | ||
1177 | * remains locked throughout, and is left locked upon return. | ||
1178 | */ | ||
1179 | int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, | ||
1180 | const int dst_len, const runlist_element *rl, | ||
1181 | const VCN start_vcn, VCN *const stop_vcn) | ||
1182 | { | ||
1183 | LCN prev_lcn; | ||
1184 | s8 *dst_max, *dst_next; | ||
1185 | int err = -ENOSPC; | ||
1186 | s8 len_len, lcn_len; | ||
1187 | |||
1188 | BUG_ON(start_vcn < 0); | ||
1189 | BUG_ON(dst_len < 1); | ||
1190 | if (!rl) { | ||
1191 | BUG_ON(start_vcn); | ||
1192 | if (stop_vcn) | ||
1193 | *stop_vcn = 0; | ||
1194 | /* Terminator byte. */ | ||
1195 | *dst = 0; | ||
1196 | return 0; | ||
1197 | } | ||
1198 | /* Skip to runlist element containing @start_vcn. */ | ||
1199 | while (rl->length && start_vcn >= rl[1].vcn) | ||
1200 | rl++; | ||
1201 | if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) | ||
1202 | return -EINVAL; | ||
1203 | /* | ||
1204 | * @dst_max is used for bounds checking in | ||
1205 | * ntfs_write_significant_bytes(). | ||
1206 | */ | ||
1207 | dst_max = dst + dst_len - 1; | ||
1208 | prev_lcn = 0; | ||
1209 | /* Do the first partial run if present. */ | ||
1210 | if (start_vcn > rl->vcn) { | ||
1211 | s64 delta; | ||
1212 | |||
1213 | /* We know rl->length != 0 already. */ | ||
1214 | if (rl->length < 0 || rl->lcn < LCN_HOLE) | ||
1215 | goto err_out; | ||
1216 | delta = start_vcn - rl->vcn; | ||
1217 | /* Write length. */ | ||
1218 | len_len = ntfs_write_significant_bytes(dst + 1, dst_max, | ||
1219 | rl->length - delta); | ||
1220 | if (len_len < 0) | ||
1221 | goto size_err; | ||
1222 | /* | ||
1223 | * If the logical cluster number (lcn) denotes a hole and we | ||
1224 | * are on NTFS 3.0+, we don't store it at all, i.e. we need | ||
1225 | * zero space. On earlier NTFS versions we just write the lcn | ||
1226 | * change. FIXME: Do we need to write the lcn change or just | ||
1227 | * the lcn in that case? Not sure as I have never seen this | ||
1228 | * case on NT4. - We assume that we just need to write the lcn | ||
1229 | * change until someone tells us otherwise... (AIA) | ||
1230 | */ | ||
1231 | if (rl->lcn >= 0 || vol->major_ver < 3) { | ||
1232 | prev_lcn = rl->lcn; | ||
1233 | if (rl->lcn >= 0) | ||
1234 | prev_lcn += delta; | ||
1235 | /* Write change in lcn. */ | ||
1236 | lcn_len = ntfs_write_significant_bytes(dst + 1 + | ||
1237 | len_len, dst_max, prev_lcn); | ||
1238 | if (lcn_len < 0) | ||
1239 | goto size_err; | ||
1240 | } else | ||
1241 | lcn_len = 0; | ||
1242 | dst_next = dst + len_len + lcn_len + 1; | ||
1243 | if (dst_next > dst_max) | ||
1244 | goto size_err; | ||
1245 | /* Update header byte. */ | ||
1246 | *dst = lcn_len << 4 | len_len; | ||
1247 | /* Position at next mapping pairs array element. */ | ||
1248 | dst = dst_next; | ||
1249 | /* Go to next runlist element. */ | ||
1250 | rl++; | ||
1251 | } | ||
1252 | /* Do the full runs. */ | ||
1253 | for (; rl->length; rl++) { | ||
1254 | if (rl->length < 0 || rl->lcn < LCN_HOLE) | ||
1255 | goto err_out; | ||
1256 | /* Write length. */ | ||
1257 | len_len = ntfs_write_significant_bytes(dst + 1, dst_max, | ||
1258 | rl->length); | ||
1259 | if (len_len < 0) | ||
1260 | goto size_err; | ||
1261 | /* | ||
1262 | * If the logical cluster number (lcn) denotes a hole and we | ||
1263 | * are on NTFS 3.0+, we don't store it at all, i.e. we need | ||
1264 | * zero space. On earlier NTFS versions we just write the lcn | ||
1265 | * change. FIXME: Do we need to write the lcn change or just | ||
1266 | * the lcn in that case? Not sure as I have never seen this | ||
1267 | * case on NT4. - We assume that we just need to write the lcn | ||
1268 | * change until someone tells us otherwise... (AIA) | ||
1269 | */ | ||
1270 | if (rl->lcn >= 0 || vol->major_ver < 3) { | ||
1271 | /* Write change in lcn. */ | ||
1272 | lcn_len = ntfs_write_significant_bytes(dst + 1 + | ||
1273 | len_len, dst_max, rl->lcn - prev_lcn); | ||
1274 | if (lcn_len < 0) | ||
1275 | goto size_err; | ||
1276 | prev_lcn = rl->lcn; | ||
1277 | } else | ||
1278 | lcn_len = 0; | ||
1279 | dst_next = dst + len_len + lcn_len + 1; | ||
1280 | if (dst_next > dst_max) | ||
1281 | goto size_err; | ||
1282 | /* Update header byte. */ | ||
1283 | *dst = lcn_len << 4 | len_len; | ||
1284 | /* Position at next mapping pairs array element. */ | ||
1285 | dst = dst_next; | ||
1286 | } | ||
1287 | /* Success. */ | ||
1288 | err = 0; | ||
1289 | size_err: | ||
1290 | /* Set stop vcn. */ | ||
1291 | if (stop_vcn) | ||
1292 | *stop_vcn = rl->vcn; | ||
1293 | /* Add terminator byte. */ | ||
1294 | *dst = 0; | ||
1295 | return err; | ||
1296 | err_out: | ||
1297 | if (rl->lcn == LCN_RL_NOT_MAPPED) | ||
1298 | err = -EINVAL; | ||
1299 | else | ||
1300 | err = -EIO; | ||
1301 | return err; | ||
1302 | } | ||
1303 | |||
1304 | /** | ||
1305 | * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn | ||
1306 | * @runlist: runlist to truncate | ||
1307 | * @new_length: the new length of the runlist in VCNs | ||
1308 | * | ||
1309 | * Truncate the runlist described by @runlist as well as the memory buffer | ||
1310 | * holding the runlist elements to a length of @new_length VCNs. | ||
1311 | * | ||
1312 | * If @new_length lies within the runlist, the runlist elements with VCNs of | ||
1313 | * @new_length and above are discarded. | ||
1314 | * | ||
1315 | * If @new_length lies beyond the runlist, a sparse runlist element is added to | ||
1316 | * the end of the runlist @runlist or if the last runlist element is a sparse | ||
1317 | * one already, this is extended. | ||
1318 | * | ||
1319 | * Return 0 on success and -errno on error. | ||
1320 | * | ||
1321 | * Locking: The caller must hold @runlist->lock for writing. | ||
1322 | */ | ||
1323 | int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, | ||
1324 | const s64 new_length) | ||
1325 | { | ||
1326 | runlist_element *rl; | ||
1327 | int old_size; | ||
1328 | |||
1329 | ntfs_debug("Entering for new_length 0x%llx.", (long long)new_length); | ||
1330 | BUG_ON(!runlist); | ||
1331 | BUG_ON(new_length < 0); | ||
1332 | rl = runlist->rl; | ||
1333 | if (unlikely(!rl)) { | ||
1334 | /* | ||
1335 | * Create a runlist consisting of a sparse runlist element of | ||
1336 | * length @new_length followed by a terminator runlist element. | ||
1337 | */ | ||
1338 | rl = ntfs_malloc_nofs(PAGE_SIZE); | ||
1339 | if (unlikely(!rl)) { | ||
1340 | ntfs_error(vol->sb, "Not enough memory to allocate " | ||
1341 | "runlist element buffer."); | ||
1342 | return -ENOMEM; | ||
1343 | } | ||
1344 | runlist->rl = rl; | ||
1345 | rl[1].length = rl->vcn = 0; | ||
1346 | rl->lcn = LCN_HOLE; | ||
1347 | rl[1].vcn = rl->length = new_length; | ||
1348 | rl[1].lcn = LCN_ENOENT; | ||
1349 | return 0; | ||
1350 | } | ||
1351 | BUG_ON(new_length < rl->vcn); | ||
1352 | /* Find @new_length in the runlist. */ | ||
1353 | while (likely(rl->length && new_length >= rl[1].vcn)) | ||
1354 | rl++; | ||
1355 | /* | ||
1356 | * If not at the end of the runlist we need to shrink it. | ||
1357 | * If at the end of the runlist we need to expand it. | ||
1358 | */ | ||
1359 | if (rl->length) { | ||
1360 | runlist_element *trl; | ||
1361 | BOOL is_end; | ||
1362 | |||
1363 | ntfs_debug("Shrinking runlist."); | ||
1364 | /* Determine the runlist size. */ | ||
1365 | trl = rl + 1; | ||
1366 | while (likely(trl->length)) | ||
1367 | trl++; | ||
1368 | old_size = trl - runlist->rl + 1; | ||
1369 | /* Truncate the run. */ | ||
1370 | rl->length = new_length - rl->vcn; | ||
1371 | /* | ||
1372 | * If a run was partially truncated, make the following runlist | ||
1373 | * element a terminator. | ||
1374 | */ | ||
1375 | is_end = FALSE; | ||
1376 | if (rl->length) { | ||
1377 | rl++; | ||
1378 | if (!rl->length) | ||
1379 | is_end = TRUE; | ||
1380 | rl->vcn = new_length; | ||
1381 | rl->length = 0; | ||
1382 | } | ||
1383 | rl->lcn = LCN_ENOENT; | ||
1384 | /* Reallocate memory if necessary. */ | ||
1385 | if (!is_end) { | ||
1386 | int new_size = rl - runlist->rl + 1; | ||
1387 | rl = ntfs_rl_realloc(runlist->rl, old_size, new_size); | ||
1388 | if (IS_ERR(rl)) | ||
1389 | ntfs_warning(vol->sb, "Failed to shrink " | ||
1390 | "runlist buffer. This just " | ||
1391 | "wastes a bit of memory " | ||
1392 | "temporarily so we ignore it " | ||
1393 | "and return success."); | ||
1394 | else | ||
1395 | runlist->rl = rl; | ||
1396 | } | ||
1397 | } else if (likely(/* !rl->length && */ new_length > rl->vcn)) { | ||
1398 | ntfs_debug("Expanding runlist."); | ||
1399 | /* | ||
1400 | * If there is a previous runlist element and it is a sparse | ||
1401 | * one, extend it. Otherwise need to add a new, sparse runlist | ||
1402 | * element. | ||
1403 | */ | ||
1404 | if ((rl > runlist->rl) && ((rl - 1)->lcn == LCN_HOLE)) | ||
1405 | (rl - 1)->length = new_length - (rl - 1)->vcn; | ||
1406 | else { | ||
1407 | /* Determine the runlist size. */ | ||
1408 | old_size = rl - runlist->rl + 1; | ||
1409 | /* Reallocate memory if necessary. */ | ||
1410 | rl = ntfs_rl_realloc(runlist->rl, old_size, | ||
1411 | old_size + 1); | ||
1412 | if (IS_ERR(rl)) { | ||
1413 | ntfs_error(vol->sb, "Failed to expand runlist " | ||
1414 | "buffer, aborting."); | ||
1415 | return PTR_ERR(rl); | ||
1416 | } | ||
1417 | runlist->rl = rl; | ||
1418 | /* | ||
1419 | * Set @rl to the same runlist element in the new | ||
1420 | * runlist as before in the old runlist. | ||
1421 | */ | ||
1422 | rl += old_size - 1; | ||
1423 | /* Add a new, sparse runlist element. */ | ||
1424 | rl->lcn = LCN_HOLE; | ||
1425 | rl->length = new_length - rl->vcn; | ||
1426 | /* Add a new terminator runlist element. */ | ||
1427 | rl++; | ||
1428 | rl->length = 0; | ||
1429 | } | ||
1430 | rl->vcn = new_length; | ||
1431 | rl->lcn = LCN_ENOENT; | ||
1432 | } else /* if (unlikely(!rl->length && new_length == rl->vcn)) */ { | ||
1433 | /* Runlist already has same size as requested. */ | ||
1434 | rl->lcn = LCN_ENOENT; | ||
1435 | } | ||
1436 | ntfs_debug("Done."); | ||
1437 | return 0; | ||
1438 | } | ||
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h new file mode 100644 index 000000000000..7107fde59df9 --- /dev/null +++ b/fs/ntfs/runlist.h | |||
@@ -0,0 +1,89 @@ | |||
1 | /* | ||
2 | * runlist.h - Defines for runlist handling in NTFS Linux kernel driver. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_RUNLIST_H | ||
25 | #define _LINUX_NTFS_RUNLIST_H | ||
26 | |||
27 | #include "types.h" | ||
28 | #include "layout.h" | ||
29 | #include "volume.h" | ||
30 | |||
31 | /** | ||
32 | * runlist_element - in memory vcn to lcn mapping array element | ||
33 | * @vcn: starting vcn of the current array element | ||
34 | * @lcn: starting lcn of the current array element | ||
35 | * @length: length in clusters of the current array element | ||
36 | * | ||
37 | * The last vcn (in fact the last vcn + 1) is reached when length == 0. | ||
38 | * | ||
39 | * When lcn == -1 this means that the count vcns starting at vcn are not | ||
40 | * physically allocated (i.e. this is a hole / data is sparse). | ||
41 | */ | ||
42 | typedef struct { /* In memory vcn to lcn mapping structure element. */ | ||
43 | VCN vcn; /* vcn = Starting virtual cluster number. */ | ||
44 | LCN lcn; /* lcn = Starting logical cluster number. */ | ||
45 | s64 length; /* Run length in clusters. */ | ||
46 | } runlist_element; | ||
47 | |||
48 | /** | ||
49 | * runlist - in memory vcn to lcn mapping array including a read/write lock | ||
50 | * @rl: pointer to an array of runlist elements | ||
51 | * @lock: read/write spinlock for serializing access to @rl | ||
52 | * | ||
53 | */ | ||
54 | typedef struct { | ||
55 | runlist_element *rl; | ||
56 | struct rw_semaphore lock; | ||
57 | } runlist; | ||
58 | |||
59 | static inline void ntfs_init_runlist(runlist *rl) | ||
60 | { | ||
61 | rl->rl = NULL; | ||
62 | init_rwsem(&rl->lock); | ||
63 | } | ||
64 | |||
65 | typedef enum { | ||
66 | LCN_HOLE = -1, /* Keep this as highest value or die! */ | ||
67 | LCN_RL_NOT_MAPPED = -2, | ||
68 | LCN_ENOENT = -3, | ||
69 | } LCN_SPECIAL_VALUES; | ||
70 | |||
71 | extern runlist_element *ntfs_runlists_merge(runlist_element *drl, | ||
72 | runlist_element *srl); | ||
73 | |||
74 | extern runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol, | ||
75 | const ATTR_RECORD *attr, runlist_element *old_rl); | ||
76 | |||
77 | extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn); | ||
78 | |||
79 | extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, | ||
80 | const runlist_element *rl, const VCN start_vcn); | ||
81 | |||
82 | extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, | ||
83 | const int dst_len, const runlist_element *rl, | ||
84 | const VCN start_vcn, VCN *const stop_vcn); | ||
85 | |||
86 | extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, | ||
87 | runlist *const runlist, const s64 new_length); | ||
88 | |||
89 | #endif /* _LINUX_NTFS_RUNLIST_H */ | ||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c new file mode 100644 index 000000000000..212a3d0f2073 --- /dev/null +++ b/fs/ntfs/super.c | |||
@@ -0,0 +1,2771 @@ | |||
1 | /* | ||
2 | * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * Copyright (c) 2001,2002 Richard Russon | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #include <linux/stddef.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | #include <linux/blkdev.h> /* For bdev_hardsect_size(). */ | ||
28 | #include <linux/backing-dev.h> | ||
29 | #include <linux/buffer_head.h> | ||
30 | #include <linux/vfs.h> | ||
31 | #include <linux/moduleparam.h> | ||
32 | #include <linux/smp_lock.h> | ||
33 | |||
34 | #include "sysctl.h" | ||
35 | #include "logfile.h" | ||
36 | #include "quota.h" | ||
37 | #include "dir.h" | ||
38 | #include "debug.h" | ||
39 | #include "index.h" | ||
40 | #include "aops.h" | ||
41 | #include "malloc.h" | ||
42 | #include "ntfs.h" | ||
43 | |||
44 | /* Number of mounted file systems which have compression enabled. */ | ||
45 | static unsigned long ntfs_nr_compression_users; | ||
46 | |||
47 | /* A global default upcase table and a corresponding reference count. */ | ||
48 | static ntfschar *default_upcase = NULL; | ||
49 | static unsigned long ntfs_nr_upcase_users = 0; | ||
50 | |||
51 | /* Error constants/strings used in inode.c::ntfs_show_options(). */ | ||
52 | typedef enum { | ||
53 | /* One of these must be present, default is ON_ERRORS_CONTINUE. */ | ||
54 | ON_ERRORS_PANIC = 0x01, | ||
55 | ON_ERRORS_REMOUNT_RO = 0x02, | ||
56 | ON_ERRORS_CONTINUE = 0x04, | ||
57 | /* Optional, can be combined with any of the above. */ | ||
58 | ON_ERRORS_RECOVER = 0x10, | ||
59 | } ON_ERRORS_ACTIONS; | ||
60 | |||
61 | const option_t on_errors_arr[] = { | ||
62 | { ON_ERRORS_PANIC, "panic" }, | ||
63 | { ON_ERRORS_REMOUNT_RO, "remount-ro", }, | ||
64 | { ON_ERRORS_CONTINUE, "continue", }, | ||
65 | { ON_ERRORS_RECOVER, "recover" }, | ||
66 | { 0, NULL } | ||
67 | }; | ||
68 | |||
69 | /** | ||
70 | * simple_getbool - | ||
71 | * | ||
72 | * Copied from old ntfs driver (which copied from vfat driver). | ||
73 | */ | ||
74 | static int simple_getbool(char *s, BOOL *setval) | ||
75 | { | ||
76 | if (s) { | ||
77 | if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true")) | ||
78 | *setval = TRUE; | ||
79 | else if (!strcmp(s, "0") || !strcmp(s, "no") || | ||
80 | !strcmp(s, "false")) | ||
81 | *setval = FALSE; | ||
82 | else | ||
83 | return 0; | ||
84 | } else | ||
85 | *setval = TRUE; | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * parse_options - parse the (re)mount options | ||
91 | * @vol: ntfs volume | ||
92 | * @opt: string containing the (re)mount options | ||
93 | * | ||
94 | * Parse the recognized options in @opt for the ntfs volume described by @vol. | ||
95 | */ | ||
96 | static BOOL parse_options(ntfs_volume *vol, char *opt) | ||
97 | { | ||
98 | char *p, *v, *ov; | ||
99 | static char *utf8 = "utf8"; | ||
100 | int errors = 0, sloppy = 0; | ||
101 | uid_t uid = (uid_t)-1; | ||
102 | gid_t gid = (gid_t)-1; | ||
103 | mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; | ||
104 | int mft_zone_multiplier = -1, on_errors = -1; | ||
105 | int show_sys_files = -1, case_sensitive = -1; | ||
106 | struct nls_table *nls_map = NULL, *old_nls; | ||
107 | |||
108 | /* I am lazy... (-8 */ | ||
109 | #define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value) \ | ||
110 | if (!strcmp(p, option)) { \ | ||
111 | if (!v || !*v) \ | ||
112 | variable = default_value; \ | ||
113 | else { \ | ||
114 | variable = simple_strtoul(ov = v, &v, 0); \ | ||
115 | if (*v) \ | ||
116 | goto needs_val; \ | ||
117 | } \ | ||
118 | } | ||
119 | #define NTFS_GETOPT(option, variable) \ | ||
120 | if (!strcmp(p, option)) { \ | ||
121 | if (!v || !*v) \ | ||
122 | goto needs_arg; \ | ||
123 | variable = simple_strtoul(ov = v, &v, 0); \ | ||
124 | if (*v) \ | ||
125 | goto needs_val; \ | ||
126 | } | ||
127 | #define NTFS_GETOPT_BOOL(option, variable) \ | ||
128 | if (!strcmp(p, option)) { \ | ||
129 | BOOL val; \ | ||
130 | if (!simple_getbool(v, &val)) \ | ||
131 | goto needs_bool; \ | ||
132 | variable = val; \ | ||
133 | } | ||
134 | #define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array) \ | ||
135 | if (!strcmp(p, option)) { \ | ||
136 | int _i; \ | ||
137 | if (!v || !*v) \ | ||
138 | goto needs_arg; \ | ||
139 | ov = v; \ | ||
140 | if (variable == -1) \ | ||
141 | variable = 0; \ | ||
142 | for (_i = 0; opt_array[_i].str && *opt_array[_i].str; _i++) \ | ||
143 | if (!strcmp(opt_array[_i].str, v)) { \ | ||
144 | variable |= opt_array[_i].val; \ | ||
145 | break; \ | ||
146 | } \ | ||
147 | if (!opt_array[_i].str || !*opt_array[_i].str) \ | ||
148 | goto needs_val; \ | ||
149 | } | ||
150 | if (!opt || !*opt) | ||
151 | goto no_mount_options; | ||
152 | ntfs_debug("Entering with mount options string: %s", opt); | ||
153 | while ((p = strsep(&opt, ","))) { | ||
154 | if ((v = strchr(p, '='))) | ||
155 | *v++ = 0; | ||
156 | NTFS_GETOPT("uid", uid) | ||
157 | else NTFS_GETOPT("gid", gid) | ||
158 | else NTFS_GETOPT("umask", fmask = dmask) | ||
159 | else NTFS_GETOPT("fmask", fmask) | ||
160 | else NTFS_GETOPT("dmask", dmask) | ||
161 | else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) | ||
162 | else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) | ||
163 | else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) | ||
164 | else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) | ||
165 | else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, | ||
166 | on_errors_arr) | ||
167 | else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) | ||
168 | ntfs_warning(vol->sb, "Ignoring obsolete option %s.", | ||
169 | p); | ||
170 | else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) { | ||
171 | if (!strcmp(p, "iocharset")) | ||
172 | ntfs_warning(vol->sb, "Option iocharset is " | ||
173 | "deprecated. Please use " | ||
174 | "option nls=<charsetname> in " | ||
175 | "the future."); | ||
176 | if (!v || !*v) | ||
177 | goto needs_arg; | ||
178 | use_utf8: | ||
179 | old_nls = nls_map; | ||
180 | nls_map = load_nls(v); | ||
181 | if (!nls_map) { | ||
182 | if (!old_nls) { | ||
183 | ntfs_error(vol->sb, "NLS character set " | ||
184 | "%s not found.", v); | ||
185 | return FALSE; | ||
186 | } | ||
187 | ntfs_error(vol->sb, "NLS character set %s not " | ||
188 | "found. Using previous one %s.", | ||
189 | v, old_nls->charset); | ||
190 | nls_map = old_nls; | ||
191 | } else /* nls_map */ { | ||
192 | if (old_nls) | ||
193 | unload_nls(old_nls); | ||
194 | } | ||
195 | } else if (!strcmp(p, "utf8")) { | ||
196 | BOOL val = FALSE; | ||
197 | ntfs_warning(vol->sb, "Option utf8 is no longer " | ||
198 | "supported, using option nls=utf8. Please " | ||
199 | "use option nls=utf8 in the future and " | ||
200 | "make sure utf8 is compiled either as a " | ||
201 | "module or into the kernel."); | ||
202 | if (!v || !*v) | ||
203 | val = TRUE; | ||
204 | else if (!simple_getbool(v, &val)) | ||
205 | goto needs_bool; | ||
206 | if (val) { | ||
207 | v = utf8; | ||
208 | goto use_utf8; | ||
209 | } | ||
210 | } else { | ||
211 | ntfs_error(vol->sb, "Unrecognized mount option %s.", p); | ||
212 | if (errors < INT_MAX) | ||
213 | errors++; | ||
214 | } | ||
215 | #undef NTFS_GETOPT_OPTIONS_ARRAY | ||
216 | #undef NTFS_GETOPT_BOOL | ||
217 | #undef NTFS_GETOPT | ||
218 | #undef NTFS_GETOPT_WITH_DEFAULT | ||
219 | } | ||
220 | no_mount_options: | ||
221 | if (errors && !sloppy) | ||
222 | return FALSE; | ||
223 | if (sloppy) | ||
224 | ntfs_warning(vol->sb, "Sloppy option given. Ignoring " | ||
225 | "unrecognized mount option(s) and continuing."); | ||
226 | /* Keep this first! */ | ||
227 | if (on_errors != -1) { | ||
228 | if (!on_errors) { | ||
229 | ntfs_error(vol->sb, "Invalid errors option argument " | ||
230 | "or bug in options parser."); | ||
231 | return FALSE; | ||
232 | } | ||
233 | } | ||
234 | if (nls_map) { | ||
235 | if (vol->nls_map && vol->nls_map != nls_map) { | ||
236 | ntfs_error(vol->sb, "Cannot change NLS character set " | ||
237 | "on remount."); | ||
238 | return FALSE; | ||
239 | } /* else (!vol->nls_map) */ | ||
240 | ntfs_debug("Using NLS character set %s.", nls_map->charset); | ||
241 | vol->nls_map = nls_map; | ||
242 | } else /* (!nls_map) */ { | ||
243 | if (!vol->nls_map) { | ||
244 | vol->nls_map = load_nls_default(); | ||
245 | if (!vol->nls_map) { | ||
246 | ntfs_error(vol->sb, "Failed to load default " | ||
247 | "NLS character set."); | ||
248 | return FALSE; | ||
249 | } | ||
250 | ntfs_debug("Using default NLS character set (%s).", | ||
251 | vol->nls_map->charset); | ||
252 | } | ||
253 | } | ||
254 | if (mft_zone_multiplier != -1) { | ||
255 | if (vol->mft_zone_multiplier && vol->mft_zone_multiplier != | ||
256 | mft_zone_multiplier) { | ||
257 | ntfs_error(vol->sb, "Cannot change mft_zone_multiplier " | ||
258 | "on remount."); | ||
259 | return FALSE; | ||
260 | } | ||
261 | if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) { | ||
262 | ntfs_error(vol->sb, "Invalid mft_zone_multiplier. " | ||
263 | "Using default value, i.e. 1."); | ||
264 | mft_zone_multiplier = 1; | ||
265 | } | ||
266 | vol->mft_zone_multiplier = mft_zone_multiplier; | ||
267 | } | ||
268 | if (!vol->mft_zone_multiplier) | ||
269 | vol->mft_zone_multiplier = 1; | ||
270 | if (on_errors != -1) | ||
271 | vol->on_errors = on_errors; | ||
272 | if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) | ||
273 | vol->on_errors |= ON_ERRORS_CONTINUE; | ||
274 | if (uid != (uid_t)-1) | ||
275 | vol->uid = uid; | ||
276 | if (gid != (gid_t)-1) | ||
277 | vol->gid = gid; | ||
278 | if (fmask != (mode_t)-1) | ||
279 | vol->fmask = fmask; | ||
280 | if (dmask != (mode_t)-1) | ||
281 | vol->dmask = dmask; | ||
282 | if (show_sys_files != -1) { | ||
283 | if (show_sys_files) | ||
284 | NVolSetShowSystemFiles(vol); | ||
285 | else | ||
286 | NVolClearShowSystemFiles(vol); | ||
287 | } | ||
288 | if (case_sensitive != -1) { | ||
289 | if (case_sensitive) | ||
290 | NVolSetCaseSensitive(vol); | ||
291 | else | ||
292 | NVolClearCaseSensitive(vol); | ||
293 | } | ||
294 | return TRUE; | ||
295 | needs_arg: | ||
296 | ntfs_error(vol->sb, "The %s option requires an argument.", p); | ||
297 | return FALSE; | ||
298 | needs_bool: | ||
299 | ntfs_error(vol->sb, "The %s option requires a boolean argument.", p); | ||
300 | return FALSE; | ||
301 | needs_val: | ||
302 | ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov); | ||
303 | return FALSE; | ||
304 | } | ||
305 | |||
306 | #ifdef NTFS_RW | ||
307 | |||
308 | /** | ||
309 | * ntfs_write_volume_flags - write new flags to the volume information flags | ||
310 | * @vol: ntfs volume on which to modify the flags | ||
311 | * @flags: new flags value for the volume information flags | ||
312 | * | ||
313 | * Internal function. You probably want to use ntfs_{set,clear}_volume_flags() | ||
314 | * instead (see below). | ||
315 | * | ||
316 | * Replace the volume information flags on the volume @vol with the value | ||
317 | * supplied in @flags. Note, this overwrites the volume information flags, so | ||
318 | * make sure to combine the flags you want to modify with the old flags and use | ||
319 | * the result when calling ntfs_write_volume_flags(). | ||
320 | * | ||
321 | * Return 0 on success and -errno on error. | ||
322 | */ | ||
323 | static int ntfs_write_volume_flags(ntfs_volume *vol, const VOLUME_FLAGS flags) | ||
324 | { | ||
325 | ntfs_inode *ni = NTFS_I(vol->vol_ino); | ||
326 | MFT_RECORD *m; | ||
327 | VOLUME_INFORMATION *vi; | ||
328 | ntfs_attr_search_ctx *ctx; | ||
329 | int err; | ||
330 | |||
331 | ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.", | ||
332 | le16_to_cpu(vol->vol_flags), le16_to_cpu(flags)); | ||
333 | if (vol->vol_flags == flags) | ||
334 | goto done; | ||
335 | BUG_ON(!ni); | ||
336 | m = map_mft_record(ni); | ||
337 | if (IS_ERR(m)) { | ||
338 | err = PTR_ERR(m); | ||
339 | goto err_out; | ||
340 | } | ||
341 | ctx = ntfs_attr_get_search_ctx(ni, m); | ||
342 | if (!ctx) { | ||
343 | err = -ENOMEM; | ||
344 | goto put_unm_err_out; | ||
345 | } | ||
346 | err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, | ||
347 | ctx); | ||
348 | if (err) | ||
349 | goto put_unm_err_out; | ||
350 | vi = (VOLUME_INFORMATION*)((u8*)ctx->attr + | ||
351 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
352 | vol->vol_flags = vi->flags = flags; | ||
353 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
354 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
355 | ntfs_attr_put_search_ctx(ctx); | ||
356 | unmap_mft_record(ni); | ||
357 | done: | ||
358 | ntfs_debug("Done."); | ||
359 | return 0; | ||
360 | put_unm_err_out: | ||
361 | if (ctx) | ||
362 | ntfs_attr_put_search_ctx(ctx); | ||
363 | unmap_mft_record(ni); | ||
364 | err_out: | ||
365 | ntfs_error(vol->sb, "Failed with error code %i.", -err); | ||
366 | return err; | ||
367 | } | ||
368 | |||
369 | /** | ||
370 | * ntfs_set_volume_flags - set bits in the volume information flags | ||
371 | * @vol: ntfs volume on which to modify the flags | ||
372 | * @flags: flags to set on the volume | ||
373 | * | ||
374 | * Set the bits in @flags in the volume information flags on the volume @vol. | ||
375 | * | ||
376 | * Return 0 on success and -errno on error. | ||
377 | */ | ||
378 | static inline int ntfs_set_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags) | ||
379 | { | ||
380 | flags &= VOLUME_FLAGS_MASK; | ||
381 | return ntfs_write_volume_flags(vol, vol->vol_flags | flags); | ||
382 | } | ||
383 | |||
384 | /** | ||
385 | * ntfs_clear_volume_flags - clear bits in the volume information flags | ||
386 | * @vol: ntfs volume on which to modify the flags | ||
387 | * @flags: flags to clear on the volume | ||
388 | * | ||
389 | * Clear the bits in @flags in the volume information flags on the volume @vol. | ||
390 | * | ||
391 | * Return 0 on success and -errno on error. | ||
392 | */ | ||
393 | static inline int ntfs_clear_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags) | ||
394 | { | ||
395 | flags &= VOLUME_FLAGS_MASK; | ||
396 | flags = vol->vol_flags & cpu_to_le16(~le16_to_cpu(flags)); | ||
397 | return ntfs_write_volume_flags(vol, flags); | ||
398 | } | ||
399 | |||
400 | #endif /* NTFS_RW */ | ||
401 | |||
402 | /** | ||
403 | * ntfs_remount - change the mount options of a mounted ntfs filesystem | ||
404 | * @sb: superblock of mounted ntfs filesystem | ||
405 | * @flags: remount flags | ||
406 | * @opt: remount options string | ||
407 | * | ||
408 | * Change the mount options of an already mounted ntfs filesystem. | ||
409 | * | ||
410 | * NOTE: The VFS sets the @sb->s_flags remount flags to @flags after | ||
411 | * ntfs_remount() returns successfully (i.e. returns 0). Otherwise, | ||
412 | * @sb->s_flags are not changed. | ||
413 | */ | ||
414 | static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | ||
415 | { | ||
416 | ntfs_volume *vol = NTFS_SB(sb); | ||
417 | |||
418 | ntfs_debug("Entering with remount options string: %s", opt); | ||
419 | #ifndef NTFS_RW | ||
420 | /* For read-only compiled driver, enforce all read-only flags. */ | ||
421 | *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
422 | #else /* NTFS_RW */ | ||
423 | /* | ||
424 | * For the read-write compiled driver, if we are remounting read-write, | ||
425 | * make sure there are no volume errors and that no unsupported volume | ||
426 | * flags are set. Also, empty the logfile journal as it would become | ||
427 | * stale as soon as something is written to the volume and mark the | ||
428 | * volume dirty so that chkdsk is run if the volume is not umounted | ||
429 | * cleanly. Finally, mark the quotas out of date so Windows rescans | ||
430 | * the volume on boot and updates them. | ||
431 | * | ||
432 | * When remounting read-only, mark the volume clean if no volume errors | ||
433 | * have occured. | ||
434 | */ | ||
435 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | ||
436 | static const char *es = ". Cannot remount read-write."; | ||
437 | |||
438 | /* Remounting read-write. */ | ||
439 | if (NVolErrors(vol)) { | ||
440 | ntfs_error(sb, "Volume has errors and is read-only%s", | ||
441 | es); | ||
442 | return -EROFS; | ||
443 | } | ||
444 | if (vol->vol_flags & VOLUME_IS_DIRTY) { | ||
445 | ntfs_error(sb, "Volume is dirty and read-only%s", es); | ||
446 | return -EROFS; | ||
447 | } | ||
448 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { | ||
449 | ntfs_error(sb, "Volume has unsupported flags set and " | ||
450 | "is read-only%s", es); | ||
451 | return -EROFS; | ||
452 | } | ||
453 | if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { | ||
454 | ntfs_error(sb, "Failed to set dirty bit in volume " | ||
455 | "information flags%s", es); | ||
456 | return -EROFS; | ||
457 | } | ||
458 | #if 0 | ||
459 | // TODO: Enable this code once we start modifying anything that | ||
460 | // is different between NTFS 1.2 and 3.x... | ||
461 | /* Set NT4 compatibility flag on newer NTFS version volumes. */ | ||
462 | if ((vol->major_ver > 1)) { | ||
463 | if (ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) { | ||
464 | ntfs_error(sb, "Failed to set NT4 " | ||
465 | "compatibility flag%s", es); | ||
466 | NVolSetErrors(vol); | ||
467 | return -EROFS; | ||
468 | } | ||
469 | } | ||
470 | #endif | ||
471 | if (!ntfs_empty_logfile(vol->logfile_ino)) { | ||
472 | ntfs_error(sb, "Failed to empty journal $LogFile%s", | ||
473 | es); | ||
474 | NVolSetErrors(vol); | ||
475 | return -EROFS; | ||
476 | } | ||
477 | if (!ntfs_mark_quotas_out_of_date(vol)) { | ||
478 | ntfs_error(sb, "Failed to mark quotas out of date%s", | ||
479 | es); | ||
480 | NVolSetErrors(vol); | ||
481 | return -EROFS; | ||
482 | } | ||
483 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | ||
484 | /* Remounting read-only. */ | ||
485 | if (!NVolErrors(vol)) { | ||
486 | if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) | ||
487 | ntfs_warning(sb, "Failed to clear dirty bit " | ||
488 | "in volume information " | ||
489 | "flags. Run chkdsk."); | ||
490 | } | ||
491 | } | ||
492 | #endif /* NTFS_RW */ | ||
493 | |||
494 | // TODO: Deal with *flags. | ||
495 | |||
496 | if (!parse_options(vol, opt)) | ||
497 | return -EINVAL; | ||
498 | ntfs_debug("Done."); | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | /** | ||
503 | * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector | ||
504 | * @sb: Super block of the device to which @b belongs. | ||
505 | * @b: Boot sector of device @sb to check. | ||
506 | * @silent: If TRUE, all output will be silenced. | ||
507 | * | ||
508 | * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot | ||
509 | * sector. Returns TRUE if it is valid and FALSE if not. | ||
510 | * | ||
511 | * @sb is only needed for warning/error output, i.e. it can be NULL when silent | ||
512 | * is TRUE. | ||
513 | */ | ||
514 | static BOOL is_boot_sector_ntfs(const struct super_block *sb, | ||
515 | const NTFS_BOOT_SECTOR *b, const BOOL silent) | ||
516 | { | ||
517 | /* | ||
518 | * Check that checksum == sum of u32 values from b to the checksum | ||
519 | * field. If checksum is zero, no checking is done. | ||
520 | */ | ||
521 | if ((void*)b < (void*)&b->checksum && b->checksum) { | ||
522 | le32 *u; | ||
523 | u32 i; | ||
524 | |||
525 | for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u) | ||
526 | i += le32_to_cpup(u); | ||
527 | if (le32_to_cpu(b->checksum) != i) | ||
528 | goto not_ntfs; | ||
529 | } | ||
530 | /* Check OEMidentifier is "NTFS " */ | ||
531 | if (b->oem_id != magicNTFS) | ||
532 | goto not_ntfs; | ||
533 | /* Check bytes per sector value is between 256 and 4096. */ | ||
534 | if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 || | ||
535 | le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000) | ||
536 | goto not_ntfs; | ||
537 | /* Check sectors per cluster value is valid. */ | ||
538 | switch (b->bpb.sectors_per_cluster) { | ||
539 | case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128: | ||
540 | break; | ||
541 | default: | ||
542 | goto not_ntfs; | ||
543 | } | ||
544 | /* Check the cluster size is not above 65536 bytes. */ | ||
545 | if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * | ||
546 | b->bpb.sectors_per_cluster > 0x10000) | ||
547 | goto not_ntfs; | ||
548 | /* Check reserved/unused fields are really zero. */ | ||
549 | if (le16_to_cpu(b->bpb.reserved_sectors) || | ||
550 | le16_to_cpu(b->bpb.root_entries) || | ||
551 | le16_to_cpu(b->bpb.sectors) || | ||
552 | le16_to_cpu(b->bpb.sectors_per_fat) || | ||
553 | le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats) | ||
554 | goto not_ntfs; | ||
555 | /* Check clusters per file mft record value is valid. */ | ||
556 | if ((u8)b->clusters_per_mft_record < 0xe1 || | ||
557 | (u8)b->clusters_per_mft_record > 0xf7) | ||
558 | switch (b->clusters_per_mft_record) { | ||
559 | case 1: case 2: case 4: case 8: case 16: case 32: case 64: | ||
560 | break; | ||
561 | default: | ||
562 | goto not_ntfs; | ||
563 | } | ||
564 | /* Check clusters per index block value is valid. */ | ||
565 | if ((u8)b->clusters_per_index_record < 0xe1 || | ||
566 | (u8)b->clusters_per_index_record > 0xf7) | ||
567 | switch (b->clusters_per_index_record) { | ||
568 | case 1: case 2: case 4: case 8: case 16: case 32: case 64: | ||
569 | break; | ||
570 | default: | ||
571 | goto not_ntfs; | ||
572 | } | ||
573 | /* | ||
574 | * Check for valid end of sector marker. We will work without it, but | ||
575 | * many BIOSes will refuse to boot from a bootsector if the magic is | ||
576 | * incorrect, so we emit a warning. | ||
577 | */ | ||
578 | if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) | ||
579 | ntfs_warning(sb, "Invalid end of sector marker."); | ||
580 | return TRUE; | ||
581 | not_ntfs: | ||
582 | return FALSE; | ||
583 | } | ||
584 | |||
585 | /** | ||
586 | * read_ntfs_boot_sector - read the NTFS boot sector of a device | ||
587 | * @sb: super block of device to read the boot sector from | ||
588 | * @silent: if true, suppress all output | ||
589 | * | ||
590 | * Reads the boot sector from the device and validates it. If that fails, tries | ||
591 | * to read the backup boot sector, first from the end of the device a-la NT4 and | ||
592 | * later and then from the middle of the device a-la NT3.51 and before. | ||
593 | * | ||
594 | * If a valid boot sector is found but it is not the primary boot sector, we | ||
595 | * repair the primary boot sector silently (unless the device is read-only or | ||
596 | * the primary boot sector is not accessible). | ||
597 | * | ||
598 | * NOTE: To call this function, @sb must have the fields s_dev, the ntfs super | ||
599 | * block (u.ntfs_sb), nr_blocks and the device flags (s_flags) initialized | ||
600 | * to their respective values. | ||
601 | * | ||
602 | * Return the unlocked buffer head containing the boot sector or NULL on error. | ||
603 | */ | ||
604 | static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb, | ||
605 | const int silent) | ||
606 | { | ||
607 | const char *read_err_str = "Unable to read %s boot sector."; | ||
608 | struct buffer_head *bh_primary, *bh_backup; | ||
609 | long nr_blocks = NTFS_SB(sb)->nr_blocks; | ||
610 | |||
611 | /* Try to read primary boot sector. */ | ||
612 | if ((bh_primary = sb_bread(sb, 0))) { | ||
613 | if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) | ||
614 | bh_primary->b_data, silent)) | ||
615 | return bh_primary; | ||
616 | if (!silent) | ||
617 | ntfs_error(sb, "Primary boot sector is invalid."); | ||
618 | } else if (!silent) | ||
619 | ntfs_error(sb, read_err_str, "primary"); | ||
620 | if (!(NTFS_SB(sb)->on_errors & ON_ERRORS_RECOVER)) { | ||
621 | if (bh_primary) | ||
622 | brelse(bh_primary); | ||
623 | if (!silent) | ||
624 | ntfs_error(sb, "Mount option errors=recover not used. " | ||
625 | "Aborting without trying to recover."); | ||
626 | return NULL; | ||
627 | } | ||
628 | /* Try to read NT4+ backup boot sector. */ | ||
629 | if ((bh_backup = sb_bread(sb, nr_blocks - 1))) { | ||
630 | if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) | ||
631 | bh_backup->b_data, silent)) | ||
632 | goto hotfix_primary_boot_sector; | ||
633 | brelse(bh_backup); | ||
634 | } else if (!silent) | ||
635 | ntfs_error(sb, read_err_str, "backup"); | ||
636 | /* Try to read NT3.51- backup boot sector. */ | ||
637 | if ((bh_backup = sb_bread(sb, nr_blocks >> 1))) { | ||
638 | if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) | ||
639 | bh_backup->b_data, silent)) | ||
640 | goto hotfix_primary_boot_sector; | ||
641 | if (!silent) | ||
642 | ntfs_error(sb, "Could not find a valid backup boot " | ||
643 | "sector."); | ||
644 | brelse(bh_backup); | ||
645 | } else if (!silent) | ||
646 | ntfs_error(sb, read_err_str, "backup"); | ||
647 | /* We failed. Cleanup and return. */ | ||
648 | if (bh_primary) | ||
649 | brelse(bh_primary); | ||
650 | return NULL; | ||
651 | hotfix_primary_boot_sector: | ||
652 | if (bh_primary) { | ||
653 | /* | ||
654 | * If we managed to read sector zero and the volume is not | ||
655 | * read-only, copy the found, valid backup boot sector to the | ||
656 | * primary boot sector. | ||
657 | */ | ||
658 | if (!(sb->s_flags & MS_RDONLY)) { | ||
659 | ntfs_warning(sb, "Hot-fix: Recovering invalid primary " | ||
660 | "boot sector from backup copy."); | ||
661 | memcpy(bh_primary->b_data, bh_backup->b_data, | ||
662 | sb->s_blocksize); | ||
663 | mark_buffer_dirty(bh_primary); | ||
664 | sync_dirty_buffer(bh_primary); | ||
665 | if (buffer_uptodate(bh_primary)) { | ||
666 | brelse(bh_backup); | ||
667 | return bh_primary; | ||
668 | } | ||
669 | ntfs_error(sb, "Hot-fix: Device write error while " | ||
670 | "recovering primary boot sector."); | ||
671 | } else { | ||
672 | ntfs_warning(sb, "Hot-fix: Recovery of primary boot " | ||
673 | "sector failed: Read-only mount."); | ||
674 | } | ||
675 | brelse(bh_primary); | ||
676 | } | ||
677 | ntfs_warning(sb, "Using backup boot sector."); | ||
678 | return bh_backup; | ||
679 | } | ||
680 | |||
681 | /** | ||
682 | * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol | ||
683 | * @vol: volume structure to initialise with data from boot sector | ||
684 | * @b: boot sector to parse | ||
685 | * | ||
686 | * Parse the ntfs boot sector @b and store all imporant information therein in | ||
687 | * the ntfs super block @vol. Return TRUE on success and FALSE on error. | ||
688 | */ | ||
689 | static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) | ||
690 | { | ||
691 | unsigned int sectors_per_cluster_bits, nr_hidden_sects; | ||
692 | int clusters_per_mft_record, clusters_per_index_record; | ||
693 | s64 ll; | ||
694 | |||
695 | vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector); | ||
696 | vol->sector_size_bits = ffs(vol->sector_size) - 1; | ||
697 | ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size, | ||
698 | vol->sector_size); | ||
699 | ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits, | ||
700 | vol->sector_size_bits); | ||
701 | if (vol->sector_size != vol->sb->s_blocksize) | ||
702 | ntfs_warning(vol->sb, "The boot sector indicates a sector size " | ||
703 | "different from the device sector size."); | ||
704 | ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster); | ||
705 | sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1; | ||
706 | ntfs_debug("sectors_per_cluster_bits = 0x%x", | ||
707 | sectors_per_cluster_bits); | ||
708 | nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors); | ||
709 | ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects); | ||
710 | vol->cluster_size = vol->sector_size << sectors_per_cluster_bits; | ||
711 | vol->cluster_size_mask = vol->cluster_size - 1; | ||
712 | vol->cluster_size_bits = ffs(vol->cluster_size) - 1; | ||
713 | ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size, | ||
714 | vol->cluster_size); | ||
715 | ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask); | ||
716 | ntfs_debug("vol->cluster_size_bits = %i (0x%x)", | ||
717 | vol->cluster_size_bits, vol->cluster_size_bits); | ||
718 | if (vol->sector_size > vol->cluster_size) { | ||
719 | ntfs_error(vol->sb, "Sector sizes above the cluster size are " | ||
720 | "not supported. Sorry."); | ||
721 | return FALSE; | ||
722 | } | ||
723 | if (vol->sb->s_blocksize > vol->cluster_size) { | ||
724 | ntfs_error(vol->sb, "Cluster sizes smaller than the device " | ||
725 | "sector size are not supported. Sorry."); | ||
726 | return FALSE; | ||
727 | } | ||
728 | clusters_per_mft_record = b->clusters_per_mft_record; | ||
729 | ntfs_debug("clusters_per_mft_record = %i (0x%x)", | ||
730 | clusters_per_mft_record, clusters_per_mft_record); | ||
731 | if (clusters_per_mft_record > 0) | ||
732 | vol->mft_record_size = vol->cluster_size << | ||
733 | (ffs(clusters_per_mft_record) - 1); | ||
734 | else | ||
735 | /* | ||
736 | * When mft_record_size < cluster_size, clusters_per_mft_record | ||
737 | * = -log2(mft_record_size) bytes. mft_record_size normaly is | ||
738 | * 1024 bytes, which is encoded as 0xF6 (-10 in decimal). | ||
739 | */ | ||
740 | vol->mft_record_size = 1 << -clusters_per_mft_record; | ||
741 | vol->mft_record_size_mask = vol->mft_record_size - 1; | ||
742 | vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1; | ||
743 | ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size, | ||
744 | vol->mft_record_size); | ||
745 | ntfs_debug("vol->mft_record_size_mask = 0x%x", | ||
746 | vol->mft_record_size_mask); | ||
747 | ntfs_debug("vol->mft_record_size_bits = %i (0x%x)", | ||
748 | vol->mft_record_size_bits, vol->mft_record_size_bits); | ||
749 | /* | ||
750 | * We cannot support mft record sizes above the PAGE_CACHE_SIZE since | ||
751 | * we store $MFT/$DATA, the table of mft records in the page cache. | ||
752 | */ | ||
753 | if (vol->mft_record_size > PAGE_CACHE_SIZE) { | ||
754 | ntfs_error(vol->sb, "Mft record size %i (0x%x) exceeds the " | ||
755 | "page cache size on your system %lu (0x%lx). " | ||
756 | "This is not supported. Sorry.", | ||
757 | vol->mft_record_size, vol->mft_record_size, | ||
758 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE); | ||
759 | return FALSE; | ||
760 | } | ||
761 | clusters_per_index_record = b->clusters_per_index_record; | ||
762 | ntfs_debug("clusters_per_index_record = %i (0x%x)", | ||
763 | clusters_per_index_record, clusters_per_index_record); | ||
764 | if (clusters_per_index_record > 0) | ||
765 | vol->index_record_size = vol->cluster_size << | ||
766 | (ffs(clusters_per_index_record) - 1); | ||
767 | else | ||
768 | /* | ||
769 | * When index_record_size < cluster_size, | ||
770 | * clusters_per_index_record = -log2(index_record_size) bytes. | ||
771 | * index_record_size normaly equals 4096 bytes, which is | ||
772 | * encoded as 0xF4 (-12 in decimal). | ||
773 | */ | ||
774 | vol->index_record_size = 1 << -clusters_per_index_record; | ||
775 | vol->index_record_size_mask = vol->index_record_size - 1; | ||
776 | vol->index_record_size_bits = ffs(vol->index_record_size) - 1; | ||
777 | ntfs_debug("vol->index_record_size = %i (0x%x)", | ||
778 | vol->index_record_size, vol->index_record_size); | ||
779 | ntfs_debug("vol->index_record_size_mask = 0x%x", | ||
780 | vol->index_record_size_mask); | ||
781 | ntfs_debug("vol->index_record_size_bits = %i (0x%x)", | ||
782 | vol->index_record_size_bits, | ||
783 | vol->index_record_size_bits); | ||
784 | /* | ||
785 | * Get the size of the volume in clusters and check for 64-bit-ness. | ||
786 | * Windows currently only uses 32 bits to save the clusters so we do | ||
787 | * the same as it is much faster on 32-bit CPUs. | ||
788 | */ | ||
789 | ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits; | ||
790 | if ((u64)ll >= 1ULL << 32) { | ||
791 | ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry."); | ||
792 | return FALSE; | ||
793 | } | ||
794 | vol->nr_clusters = ll; | ||
795 | ntfs_debug("vol->nr_clusters = 0x%llx", (long long)vol->nr_clusters); | ||
796 | /* | ||
797 | * On an architecture where unsigned long is 32-bits, we restrict the | ||
798 | * volume size to 2TiB (2^41). On a 64-bit architecture, the compiler | ||
799 | * will hopefully optimize the whole check away. | ||
800 | */ | ||
801 | if (sizeof(unsigned long) < 8) { | ||
802 | if ((ll << vol->cluster_size_bits) >= (1ULL << 41)) { | ||
803 | ntfs_error(vol->sb, "Volume size (%lluTiB) is too " | ||
804 | "large for this architecture. " | ||
805 | "Maximum supported is 2TiB. Sorry.", | ||
806 | (unsigned long long)ll >> (40 - | ||
807 | vol->cluster_size_bits)); | ||
808 | return FALSE; | ||
809 | } | ||
810 | } | ||
811 | ll = sle64_to_cpu(b->mft_lcn); | ||
812 | if (ll >= vol->nr_clusters) { | ||
813 | ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird."); | ||
814 | return FALSE; | ||
815 | } | ||
816 | vol->mft_lcn = ll; | ||
817 | ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn); | ||
818 | ll = sle64_to_cpu(b->mftmirr_lcn); | ||
819 | if (ll >= vol->nr_clusters) { | ||
820 | ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. " | ||
821 | "Weird."); | ||
822 | return FALSE; | ||
823 | } | ||
824 | vol->mftmirr_lcn = ll; | ||
825 | ntfs_debug("vol->mftmirr_lcn = 0x%llx", (long long)vol->mftmirr_lcn); | ||
826 | #ifdef NTFS_RW | ||
827 | /* | ||
828 | * Work out the size of the mft mirror in number of mft records. If the | ||
829 | * cluster size is less than or equal to the size taken by four mft | ||
830 | * records, the mft mirror stores the first four mft records. If the | ||
831 | * cluster size is bigger than the size taken by four mft records, the | ||
832 | * mft mirror contains as many mft records as will fit into one | ||
833 | * cluster. | ||
834 | */ | ||
835 | if (vol->cluster_size <= (4 << vol->mft_record_size_bits)) | ||
836 | vol->mftmirr_size = 4; | ||
837 | else | ||
838 | vol->mftmirr_size = vol->cluster_size >> | ||
839 | vol->mft_record_size_bits; | ||
840 | ntfs_debug("vol->mftmirr_size = %i", vol->mftmirr_size); | ||
841 | #endif /* NTFS_RW */ | ||
842 | vol->serial_no = le64_to_cpu(b->volume_serial_number); | ||
843 | ntfs_debug("vol->serial_no = 0x%llx", | ||
844 | (unsigned long long)vol->serial_no); | ||
845 | return TRUE; | ||
846 | } | ||
847 | |||
848 | /** | ||
849 | * ntfs_setup_allocators - initialize the cluster and mft allocators | ||
850 | * @vol: volume structure for which to setup the allocators | ||
851 | * | ||
852 | * Setup the cluster (lcn) and mft allocators to the starting values. | ||
853 | */ | ||
854 | static void ntfs_setup_allocators(ntfs_volume *vol) | ||
855 | { | ||
856 | #ifdef NTFS_RW | ||
857 | LCN mft_zone_size, mft_lcn; | ||
858 | #endif /* NTFS_RW */ | ||
859 | |||
860 | ntfs_debug("vol->mft_zone_multiplier = 0x%x", | ||
861 | vol->mft_zone_multiplier); | ||
862 | #ifdef NTFS_RW | ||
863 | /* Determine the size of the MFT zone. */ | ||
864 | mft_zone_size = vol->nr_clusters; | ||
865 | switch (vol->mft_zone_multiplier) { /* % of volume size in clusters */ | ||
866 | case 4: | ||
867 | mft_zone_size >>= 1; /* 50% */ | ||
868 | break; | ||
869 | case 3: | ||
870 | mft_zone_size = (mft_zone_size + | ||
871 | (mft_zone_size >> 1)) >> 2; /* 37.5% */ | ||
872 | break; | ||
873 | case 2: | ||
874 | mft_zone_size >>= 2; /* 25% */ | ||
875 | break; | ||
876 | /* case 1: */ | ||
877 | default: | ||
878 | mft_zone_size >>= 3; /* 12.5% */ | ||
879 | break; | ||
880 | } | ||
881 | /* Setup the mft zone. */ | ||
882 | vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn; | ||
883 | ntfs_debug("vol->mft_zone_pos = 0x%llx", | ||
884 | (unsigned long long)vol->mft_zone_pos); | ||
885 | /* | ||
886 | * Calculate the mft_lcn for an unmodified NTFS volume (see mkntfs | ||
887 | * source) and if the actual mft_lcn is in the expected place or even | ||
888 | * further to the front of the volume, extend the mft_zone to cover the | ||
889 | * beginning of the volume as well. This is in order to protect the | ||
890 | * area reserved for the mft bitmap as well within the mft_zone itself. | ||
891 | * On non-standard volumes we do not protect it as the overhead would | ||
892 | * be higher than the speed increase we would get by doing it. | ||
893 | */ | ||
894 | mft_lcn = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size; | ||
895 | if (mft_lcn * vol->cluster_size < 16 * 1024) | ||
896 | mft_lcn = (16 * 1024 + vol->cluster_size - 1) / | ||
897 | vol->cluster_size; | ||
898 | if (vol->mft_zone_start <= mft_lcn) | ||
899 | vol->mft_zone_start = 0; | ||
900 | ntfs_debug("vol->mft_zone_start = 0x%llx", | ||
901 | (unsigned long long)vol->mft_zone_start); | ||
902 | /* | ||
903 | * Need to cap the mft zone on non-standard volumes so that it does | ||
904 | * not point outside the boundaries of the volume. We do this by | ||
905 | * halving the zone size until we are inside the volume. | ||
906 | */ | ||
907 | vol->mft_zone_end = vol->mft_lcn + mft_zone_size; | ||
908 | while (vol->mft_zone_end >= vol->nr_clusters) { | ||
909 | mft_zone_size >>= 1; | ||
910 | vol->mft_zone_end = vol->mft_lcn + mft_zone_size; | ||
911 | } | ||
912 | ntfs_debug("vol->mft_zone_end = 0x%llx", | ||
913 | (unsigned long long)vol->mft_zone_end); | ||
914 | /* | ||
915 | * Set the current position within each data zone to the start of the | ||
916 | * respective zone. | ||
917 | */ | ||
918 | vol->data1_zone_pos = vol->mft_zone_end; | ||
919 | ntfs_debug("vol->data1_zone_pos = 0x%llx", | ||
920 | (unsigned long long)vol->data1_zone_pos); | ||
921 | vol->data2_zone_pos = 0; | ||
922 | ntfs_debug("vol->data2_zone_pos = 0x%llx", | ||
923 | (unsigned long long)vol->data2_zone_pos); | ||
924 | |||
925 | /* Set the mft data allocation position to mft record 24. */ | ||
926 | vol->mft_data_pos = 24; | ||
927 | ntfs_debug("vol->mft_data_pos = 0x%llx", | ||
928 | (unsigned long long)vol->mft_data_pos); | ||
929 | #endif /* NTFS_RW */ | ||
930 | } | ||
931 | |||
932 | #ifdef NTFS_RW | ||
933 | |||
934 | /** | ||
935 | * load_and_init_mft_mirror - load and setup the mft mirror inode for a volume | ||
936 | * @vol: ntfs super block describing device whose mft mirror to load | ||
937 | * | ||
938 | * Return TRUE on success or FALSE on error. | ||
939 | */ | ||
940 | static BOOL load_and_init_mft_mirror(ntfs_volume *vol) | ||
941 | { | ||
942 | struct inode *tmp_ino; | ||
943 | ntfs_inode *tmp_ni; | ||
944 | |||
945 | ntfs_debug("Entering."); | ||
946 | /* Get mft mirror inode. */ | ||
947 | tmp_ino = ntfs_iget(vol->sb, FILE_MFTMirr); | ||
948 | if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { | ||
949 | if (!IS_ERR(tmp_ino)) | ||
950 | iput(tmp_ino); | ||
951 | /* Caller will display error message. */ | ||
952 | return FALSE; | ||
953 | } | ||
954 | /* | ||
955 | * Re-initialize some specifics about $MFTMirr's inode as | ||
956 | * ntfs_read_inode() will have set up the default ones. | ||
957 | */ | ||
958 | /* Set uid and gid to root. */ | ||
959 | tmp_ino->i_uid = tmp_ino->i_gid = 0; | ||
960 | /* Regular file. No access for anyone. */ | ||
961 | tmp_ino->i_mode = S_IFREG; | ||
962 | /* No VFS initiated operations allowed for $MFTMirr. */ | ||
963 | tmp_ino->i_op = &ntfs_empty_inode_ops; | ||
964 | tmp_ino->i_fop = &ntfs_empty_file_ops; | ||
965 | /* Put in our special address space operations. */ | ||
966 | tmp_ino->i_mapping->a_ops = &ntfs_mst_aops; | ||
967 | tmp_ni = NTFS_I(tmp_ino); | ||
968 | /* The $MFTMirr, like the $MFT is multi sector transfer protected. */ | ||
969 | NInoSetMstProtected(tmp_ni); | ||
970 | /* | ||
971 | * Set up our little cheat allowing us to reuse the async read io | ||
972 | * completion handler for directories. | ||
973 | */ | ||
974 | tmp_ni->itype.index.block_size = vol->mft_record_size; | ||
975 | tmp_ni->itype.index.block_size_bits = vol->mft_record_size_bits; | ||
976 | vol->mftmirr_ino = tmp_ino; | ||
977 | ntfs_debug("Done."); | ||
978 | return TRUE; | ||
979 | } | ||
980 | |||
981 | /** | ||
982 | * check_mft_mirror - compare contents of the mft mirror with the mft | ||
983 | * @vol: ntfs super block describing device whose mft mirror to check | ||
984 | * | ||
985 | * Return TRUE on success or FALSE on error. | ||
986 | * | ||
987 | * Note, this function also results in the mft mirror runlist being completely | ||
988 | * mapped into memory. The mft mirror write code requires this and will BUG() | ||
989 | * should it find an unmapped runlist element. | ||
990 | */ | ||
991 | static BOOL check_mft_mirror(ntfs_volume *vol) | ||
992 | { | ||
993 | unsigned long index; | ||
994 | struct super_block *sb = vol->sb; | ||
995 | ntfs_inode *mirr_ni; | ||
996 | struct page *mft_page, *mirr_page; | ||
997 | u8 *kmft, *kmirr; | ||
998 | runlist_element *rl, rl2[2]; | ||
999 | int mrecs_per_page, i; | ||
1000 | |||
1001 | ntfs_debug("Entering."); | ||
1002 | /* Compare contents of $MFT and $MFTMirr. */ | ||
1003 | mrecs_per_page = PAGE_CACHE_SIZE / vol->mft_record_size; | ||
1004 | BUG_ON(!mrecs_per_page); | ||
1005 | BUG_ON(!vol->mftmirr_size); | ||
1006 | mft_page = mirr_page = NULL; | ||
1007 | kmft = kmirr = NULL; | ||
1008 | index = i = 0; | ||
1009 | do { | ||
1010 | u32 bytes; | ||
1011 | |||
1012 | /* Switch pages if necessary. */ | ||
1013 | if (!(i % mrecs_per_page)) { | ||
1014 | if (index) { | ||
1015 | ntfs_unmap_page(mft_page); | ||
1016 | ntfs_unmap_page(mirr_page); | ||
1017 | } | ||
1018 | /* Get the $MFT page. */ | ||
1019 | mft_page = ntfs_map_page(vol->mft_ino->i_mapping, | ||
1020 | index); | ||
1021 | if (IS_ERR(mft_page)) { | ||
1022 | ntfs_error(sb, "Failed to read $MFT."); | ||
1023 | return FALSE; | ||
1024 | } | ||
1025 | kmft = page_address(mft_page); | ||
1026 | /* Get the $MFTMirr page. */ | ||
1027 | mirr_page = ntfs_map_page(vol->mftmirr_ino->i_mapping, | ||
1028 | index); | ||
1029 | if (IS_ERR(mirr_page)) { | ||
1030 | ntfs_error(sb, "Failed to read $MFTMirr."); | ||
1031 | goto mft_unmap_out; | ||
1032 | } | ||
1033 | kmirr = page_address(mirr_page); | ||
1034 | ++index; | ||
1035 | } | ||
1036 | /* Make sure the record is ok. */ | ||
1037 | if (ntfs_is_baad_recordp((le32*)kmft)) { | ||
1038 | ntfs_error(sb, "Incomplete multi sector transfer " | ||
1039 | "detected in mft record %i.", i); | ||
1040 | mm_unmap_out: | ||
1041 | ntfs_unmap_page(mirr_page); | ||
1042 | mft_unmap_out: | ||
1043 | ntfs_unmap_page(mft_page); | ||
1044 | return FALSE; | ||
1045 | } | ||
1046 | if (ntfs_is_baad_recordp((le32*)kmirr)) { | ||
1047 | ntfs_error(sb, "Incomplete multi sector transfer " | ||
1048 | "detected in mft mirror record %i.", i); | ||
1049 | goto mm_unmap_out; | ||
1050 | } | ||
1051 | /* Get the amount of data in the current record. */ | ||
1052 | bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use); | ||
1053 | if (!bytes || bytes > vol->mft_record_size) { | ||
1054 | bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use); | ||
1055 | if (!bytes || bytes > vol->mft_record_size) | ||
1056 | bytes = vol->mft_record_size; | ||
1057 | } | ||
1058 | /* Compare the two records. */ | ||
1059 | if (memcmp(kmft, kmirr, bytes)) { | ||
1060 | ntfs_error(sb, "$MFT and $MFTMirr (record %i) do not " | ||
1061 | "match. Run ntfsfix or chkdsk.", i); | ||
1062 | goto mm_unmap_out; | ||
1063 | } | ||
1064 | kmft += vol->mft_record_size; | ||
1065 | kmirr += vol->mft_record_size; | ||
1066 | } while (++i < vol->mftmirr_size); | ||
1067 | /* Release the last pages. */ | ||
1068 | ntfs_unmap_page(mft_page); | ||
1069 | ntfs_unmap_page(mirr_page); | ||
1070 | |||
1071 | /* Construct the mft mirror runlist by hand. */ | ||
1072 | rl2[0].vcn = 0; | ||
1073 | rl2[0].lcn = vol->mftmirr_lcn; | ||
1074 | rl2[0].length = (vol->mftmirr_size * vol->mft_record_size + | ||
1075 | vol->cluster_size - 1) / vol->cluster_size; | ||
1076 | rl2[1].vcn = rl2[0].length; | ||
1077 | rl2[1].lcn = LCN_ENOENT; | ||
1078 | rl2[1].length = 0; | ||
1079 | /* | ||
1080 | * Because we have just read all of the mft mirror, we know we have | ||
1081 | * mapped the full runlist for it. | ||
1082 | */ | ||
1083 | mirr_ni = NTFS_I(vol->mftmirr_ino); | ||
1084 | down_read(&mirr_ni->runlist.lock); | ||
1085 | rl = mirr_ni->runlist.rl; | ||
1086 | /* Compare the two runlists. They must be identical. */ | ||
1087 | i = 0; | ||
1088 | do { | ||
1089 | if (rl2[i].vcn != rl[i].vcn || rl2[i].lcn != rl[i].lcn || | ||
1090 | rl2[i].length != rl[i].length) { | ||
1091 | ntfs_error(sb, "$MFTMirr location mismatch. " | ||
1092 | "Run chkdsk."); | ||
1093 | up_read(&mirr_ni->runlist.lock); | ||
1094 | return FALSE; | ||
1095 | } | ||
1096 | } while (rl2[i++].length); | ||
1097 | up_read(&mirr_ni->runlist.lock); | ||
1098 | ntfs_debug("Done."); | ||
1099 | return TRUE; | ||
1100 | } | ||
1101 | |||
1102 | /** | ||
1103 | * load_and_check_logfile - load and check the logfile inode for a volume | ||
1104 | * @vol: ntfs super block describing device whose logfile to load | ||
1105 | * | ||
1106 | * Return TRUE on success or FALSE on error. | ||
1107 | */ | ||
1108 | static BOOL load_and_check_logfile(ntfs_volume *vol) | ||
1109 | { | ||
1110 | struct inode *tmp_ino; | ||
1111 | |||
1112 | ntfs_debug("Entering."); | ||
1113 | tmp_ino = ntfs_iget(vol->sb, FILE_LogFile); | ||
1114 | if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { | ||
1115 | if (!IS_ERR(tmp_ino)) | ||
1116 | iput(tmp_ino); | ||
1117 | /* Caller will display error message. */ | ||
1118 | return FALSE; | ||
1119 | } | ||
1120 | if (!ntfs_check_logfile(tmp_ino)) { | ||
1121 | iput(tmp_ino); | ||
1122 | /* ntfs_check_logfile() will have displayed error output. */ | ||
1123 | return FALSE; | ||
1124 | } | ||
1125 | vol->logfile_ino = tmp_ino; | ||
1126 | ntfs_debug("Done."); | ||
1127 | return TRUE; | ||
1128 | } | ||
1129 | |||
1130 | /** | ||
1131 | * load_and_init_quota - load and setup the quota file for a volume if present | ||
1132 | * @vol: ntfs super block describing device whose quota file to load | ||
1133 | * | ||
1134 | * Return TRUE on success or FALSE on error. If $Quota is not present, we | ||
1135 | * leave vol->quota_ino as NULL and return success. | ||
1136 | */ | ||
1137 | static BOOL load_and_init_quota(ntfs_volume *vol) | ||
1138 | { | ||
1139 | MFT_REF mref; | ||
1140 | struct inode *tmp_ino; | ||
1141 | ntfs_name *name = NULL; | ||
1142 | static const ntfschar Quota[7] = { const_cpu_to_le16('$'), | ||
1143 | const_cpu_to_le16('Q'), const_cpu_to_le16('u'), | ||
1144 | const_cpu_to_le16('o'), const_cpu_to_le16('t'), | ||
1145 | const_cpu_to_le16('a'), 0 }; | ||
1146 | static ntfschar Q[3] = { const_cpu_to_le16('$'), | ||
1147 | const_cpu_to_le16('Q'), 0 }; | ||
1148 | |||
1149 | ntfs_debug("Entering."); | ||
1150 | /* | ||
1151 | * Find the inode number for the quota file by looking up the filename | ||
1152 | * $Quota in the extended system files directory $Extend. | ||
1153 | */ | ||
1154 | down(&vol->extend_ino->i_sem); | ||
1155 | mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6, | ||
1156 | &name); | ||
1157 | up(&vol->extend_ino->i_sem); | ||
1158 | if (IS_ERR_MREF(mref)) { | ||
1159 | /* | ||
1160 | * If the file does not exist, quotas are disabled and have | ||
1161 | * never been enabled on this volume, just return success. | ||
1162 | */ | ||
1163 | if (MREF_ERR(mref) == -ENOENT) { | ||
1164 | ntfs_debug("$Quota not present. Volume does not have " | ||
1165 | "quotas enabled."); | ||
1166 | /* | ||
1167 | * No need to try to set quotas out of date if they are | ||
1168 | * not enabled. | ||
1169 | */ | ||
1170 | NVolSetQuotaOutOfDate(vol); | ||
1171 | return TRUE; | ||
1172 | } | ||
1173 | /* A real error occured. */ | ||
1174 | ntfs_error(vol->sb, "Failed to find inode number for $Quota."); | ||
1175 | return FALSE; | ||
1176 | } | ||
1177 | /* We do not care for the type of match that was found. */ | ||
1178 | if (name) | ||
1179 | kfree(name); | ||
1180 | /* Get the inode. */ | ||
1181 | tmp_ino = ntfs_iget(vol->sb, MREF(mref)); | ||
1182 | if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { | ||
1183 | if (!IS_ERR(tmp_ino)) | ||
1184 | iput(tmp_ino); | ||
1185 | ntfs_error(vol->sb, "Failed to load $Quota."); | ||
1186 | return FALSE; | ||
1187 | } | ||
1188 | vol->quota_ino = tmp_ino; | ||
1189 | /* Get the $Q index allocation attribute. */ | ||
1190 | tmp_ino = ntfs_index_iget(vol->quota_ino, Q, 2); | ||
1191 | if (IS_ERR(tmp_ino)) { | ||
1192 | ntfs_error(vol->sb, "Failed to load $Quota/$Q index."); | ||
1193 | return FALSE; | ||
1194 | } | ||
1195 | vol->quota_q_ino = tmp_ino; | ||
1196 | ntfs_debug("Done."); | ||
1197 | return TRUE; | ||
1198 | } | ||
1199 | |||
1200 | /** | ||
1201 | * load_and_init_attrdef - load the attribute definitions table for a volume | ||
1202 | * @vol: ntfs super block describing device whose attrdef to load | ||
1203 | * | ||
1204 | * Return TRUE on success or FALSE on error. | ||
1205 | */ | ||
1206 | static BOOL load_and_init_attrdef(ntfs_volume *vol) | ||
1207 | { | ||
1208 | struct super_block *sb = vol->sb; | ||
1209 | struct inode *ino; | ||
1210 | struct page *page; | ||
1211 | unsigned long index, max_index; | ||
1212 | unsigned int size; | ||
1213 | |||
1214 | ntfs_debug("Entering."); | ||
1215 | /* Read attrdef table and setup vol->attrdef and vol->attrdef_size. */ | ||
1216 | ino = ntfs_iget(sb, FILE_AttrDef); | ||
1217 | if (IS_ERR(ino) || is_bad_inode(ino)) { | ||
1218 | if (!IS_ERR(ino)) | ||
1219 | iput(ino); | ||
1220 | goto failed; | ||
1221 | } | ||
1222 | /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */ | ||
1223 | if (!ino->i_size || ino->i_size > 0x7fffffff) | ||
1224 | goto iput_failed; | ||
1225 | vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(ino->i_size); | ||
1226 | if (!vol->attrdef) | ||
1227 | goto iput_failed; | ||
1228 | index = 0; | ||
1229 | max_index = ino->i_size >> PAGE_CACHE_SHIFT; | ||
1230 | size = PAGE_CACHE_SIZE; | ||
1231 | while (index < max_index) { | ||
1232 | /* Read the attrdef table and copy it into the linear buffer. */ | ||
1233 | read_partial_attrdef_page: | ||
1234 | page = ntfs_map_page(ino->i_mapping, index); | ||
1235 | if (IS_ERR(page)) | ||
1236 | goto free_iput_failed; | ||
1237 | memcpy((u8*)vol->attrdef + (index++ << PAGE_CACHE_SHIFT), | ||
1238 | page_address(page), size); | ||
1239 | ntfs_unmap_page(page); | ||
1240 | }; | ||
1241 | if (size == PAGE_CACHE_SIZE) { | ||
1242 | size = ino->i_size & ~PAGE_CACHE_MASK; | ||
1243 | if (size) | ||
1244 | goto read_partial_attrdef_page; | ||
1245 | } | ||
1246 | vol->attrdef_size = ino->i_size; | ||
1247 | ntfs_debug("Read %llu bytes from $AttrDef.", ino->i_size); | ||
1248 | iput(ino); | ||
1249 | return TRUE; | ||
1250 | free_iput_failed: | ||
1251 | ntfs_free(vol->attrdef); | ||
1252 | vol->attrdef = NULL; | ||
1253 | iput_failed: | ||
1254 | iput(ino); | ||
1255 | failed: | ||
1256 | ntfs_error(sb, "Failed to initialize attribute definition table."); | ||
1257 | return FALSE; | ||
1258 | } | ||
1259 | |||
1260 | #endif /* NTFS_RW */ | ||
1261 | |||
1262 | /** | ||
1263 | * load_and_init_upcase - load the upcase table for an ntfs volume | ||
1264 | * @vol: ntfs super block describing device whose upcase to load | ||
1265 | * | ||
1266 | * Return TRUE on success or FALSE on error. | ||
1267 | */ | ||
1268 | static BOOL load_and_init_upcase(ntfs_volume *vol) | ||
1269 | { | ||
1270 | struct super_block *sb = vol->sb; | ||
1271 | struct inode *ino; | ||
1272 | struct page *page; | ||
1273 | unsigned long index, max_index; | ||
1274 | unsigned int size; | ||
1275 | int i, max; | ||
1276 | |||
1277 | ntfs_debug("Entering."); | ||
1278 | /* Read upcase table and setup vol->upcase and vol->upcase_len. */ | ||
1279 | ino = ntfs_iget(sb, FILE_UpCase); | ||
1280 | if (IS_ERR(ino) || is_bad_inode(ino)) { | ||
1281 | if (!IS_ERR(ino)) | ||
1282 | iput(ino); | ||
1283 | goto upcase_failed; | ||
1284 | } | ||
1285 | /* | ||
1286 | * The upcase size must not be above 64k Unicode characters, must not | ||
1287 | * be zero and must be a multiple of sizeof(ntfschar). | ||
1288 | */ | ||
1289 | if (!ino->i_size || ino->i_size & (sizeof(ntfschar) - 1) || | ||
1290 | ino->i_size > 64ULL * 1024 * sizeof(ntfschar)) | ||
1291 | goto iput_upcase_failed; | ||
1292 | vol->upcase = (ntfschar*)ntfs_malloc_nofs(ino->i_size); | ||
1293 | if (!vol->upcase) | ||
1294 | goto iput_upcase_failed; | ||
1295 | index = 0; | ||
1296 | max_index = ino->i_size >> PAGE_CACHE_SHIFT; | ||
1297 | size = PAGE_CACHE_SIZE; | ||
1298 | while (index < max_index) { | ||
1299 | /* Read the upcase table and copy it into the linear buffer. */ | ||
1300 | read_partial_upcase_page: | ||
1301 | page = ntfs_map_page(ino->i_mapping, index); | ||
1302 | if (IS_ERR(page)) | ||
1303 | goto iput_upcase_failed; | ||
1304 | memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT), | ||
1305 | page_address(page), size); | ||
1306 | ntfs_unmap_page(page); | ||
1307 | }; | ||
1308 | if (size == PAGE_CACHE_SIZE) { | ||
1309 | size = ino->i_size & ~PAGE_CACHE_MASK; | ||
1310 | if (size) | ||
1311 | goto read_partial_upcase_page; | ||
1312 | } | ||
1313 | vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; | ||
1314 | ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).", | ||
1315 | ino->i_size, 64 * 1024 * sizeof(ntfschar)); | ||
1316 | iput(ino); | ||
1317 | down(&ntfs_lock); | ||
1318 | if (!default_upcase) { | ||
1319 | ntfs_debug("Using volume specified $UpCase since default is " | ||
1320 | "not present."); | ||
1321 | up(&ntfs_lock); | ||
1322 | return TRUE; | ||
1323 | } | ||
1324 | max = default_upcase_len; | ||
1325 | if (max > vol->upcase_len) | ||
1326 | max = vol->upcase_len; | ||
1327 | for (i = 0; i < max; i++) | ||
1328 | if (vol->upcase[i] != default_upcase[i]) | ||
1329 | break; | ||
1330 | if (i == max) { | ||
1331 | ntfs_free(vol->upcase); | ||
1332 | vol->upcase = default_upcase; | ||
1333 | vol->upcase_len = max; | ||
1334 | ntfs_nr_upcase_users++; | ||
1335 | up(&ntfs_lock); | ||
1336 | ntfs_debug("Volume specified $UpCase matches default. Using " | ||
1337 | "default."); | ||
1338 | return TRUE; | ||
1339 | } | ||
1340 | up(&ntfs_lock); | ||
1341 | ntfs_debug("Using volume specified $UpCase since it does not match " | ||
1342 | "the default."); | ||
1343 | return TRUE; | ||
1344 | iput_upcase_failed: | ||
1345 | iput(ino); | ||
1346 | ntfs_free(vol->upcase); | ||
1347 | vol->upcase = NULL; | ||
1348 | upcase_failed: | ||
1349 | down(&ntfs_lock); | ||
1350 | if (default_upcase) { | ||
1351 | vol->upcase = default_upcase; | ||
1352 | vol->upcase_len = default_upcase_len; | ||
1353 | ntfs_nr_upcase_users++; | ||
1354 | up(&ntfs_lock); | ||
1355 | ntfs_error(sb, "Failed to load $UpCase from the volume. Using " | ||
1356 | "default."); | ||
1357 | return TRUE; | ||
1358 | } | ||
1359 | up(&ntfs_lock); | ||
1360 | ntfs_error(sb, "Failed to initialize upcase table."); | ||
1361 | return FALSE; | ||
1362 | } | ||
1363 | |||
1364 | /** | ||
1365 | * load_system_files - open the system files using normal functions | ||
1366 | * @vol: ntfs super block describing device whose system files to load | ||
1367 | * | ||
1368 | * Open the system files with normal access functions and complete setting up | ||
1369 | * the ntfs super block @vol. | ||
1370 | * | ||
1371 | * Return TRUE on success or FALSE on error. | ||
1372 | */ | ||
1373 | static BOOL load_system_files(ntfs_volume *vol) | ||
1374 | { | ||
1375 | struct super_block *sb = vol->sb; | ||
1376 | MFT_RECORD *m; | ||
1377 | VOLUME_INFORMATION *vi; | ||
1378 | ntfs_attr_search_ctx *ctx; | ||
1379 | |||
1380 | ntfs_debug("Entering."); | ||
1381 | #ifdef NTFS_RW | ||
1382 | /* Get mft mirror inode compare the contents of $MFT and $MFTMirr. */ | ||
1383 | if (!load_and_init_mft_mirror(vol) || !check_mft_mirror(vol)) { | ||
1384 | static const char *es1 = "Failed to load $MFTMirr"; | ||
1385 | static const char *es2 = "$MFTMirr does not match $MFT"; | ||
1386 | static const char *es3 = ". Run ntfsfix and/or chkdsk."; | ||
1387 | |||
1388 | /* If a read-write mount, convert it to a read-only mount. */ | ||
1389 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1390 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1391 | ON_ERRORS_CONTINUE))) { | ||
1392 | ntfs_error(sb, "%s and neither on_errors=" | ||
1393 | "continue nor on_errors=" | ||
1394 | "remount-ro was specified%s", | ||
1395 | !vol->mftmirr_ino ? es1 : es2, | ||
1396 | es3); | ||
1397 | goto iput_mirr_err_out; | ||
1398 | } | ||
1399 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1400 | ntfs_error(sb, "%s. Mounting read-only%s", | ||
1401 | !vol->mftmirr_ino ? es1 : es2, es3); | ||
1402 | } else | ||
1403 | ntfs_warning(sb, "%s. Will not be able to remount " | ||
1404 | "read-write%s", | ||
1405 | !vol->mftmirr_ino ? es1 : es2, es3); | ||
1406 | /* This will prevent a read-write remount. */ | ||
1407 | NVolSetErrors(vol); | ||
1408 | } | ||
1409 | #endif /* NTFS_RW */ | ||
1410 | /* Get mft bitmap attribute inode. */ | ||
1411 | vol->mftbmp_ino = ntfs_attr_iget(vol->mft_ino, AT_BITMAP, NULL, 0); | ||
1412 | if (IS_ERR(vol->mftbmp_ino)) { | ||
1413 | ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); | ||
1414 | goto iput_mirr_err_out; | ||
1415 | } | ||
1416 | /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ | ||
1417 | if (!load_and_init_upcase(vol)) | ||
1418 | goto iput_mftbmp_err_out; | ||
1419 | #ifdef NTFS_RW | ||
1420 | /* | ||
1421 | * Read attribute definitions table and setup @vol->attrdef and | ||
1422 | * @vol->attrdef_size. | ||
1423 | */ | ||
1424 | if (!load_and_init_attrdef(vol)) | ||
1425 | goto iput_upcase_err_out; | ||
1426 | #endif /* NTFS_RW */ | ||
1427 | /* | ||
1428 | * Get the cluster allocation bitmap inode and verify the size, no | ||
1429 | * need for any locking at this stage as we are already running | ||
1430 | * exclusively as we are mount in progress task. | ||
1431 | */ | ||
1432 | vol->lcnbmp_ino = ntfs_iget(sb, FILE_Bitmap); | ||
1433 | if (IS_ERR(vol->lcnbmp_ino) || is_bad_inode(vol->lcnbmp_ino)) { | ||
1434 | if (!IS_ERR(vol->lcnbmp_ino)) | ||
1435 | iput(vol->lcnbmp_ino); | ||
1436 | goto bitmap_failed; | ||
1437 | } | ||
1438 | if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { | ||
1439 | iput(vol->lcnbmp_ino); | ||
1440 | bitmap_failed: | ||
1441 | ntfs_error(sb, "Failed to load $Bitmap."); | ||
1442 | goto iput_attrdef_err_out; | ||
1443 | } | ||
1444 | /* | ||
1445 | * Get the volume inode and setup our cache of the volume flags and | ||
1446 | * version. | ||
1447 | */ | ||
1448 | vol->vol_ino = ntfs_iget(sb, FILE_Volume); | ||
1449 | if (IS_ERR(vol->vol_ino) || is_bad_inode(vol->vol_ino)) { | ||
1450 | if (!IS_ERR(vol->vol_ino)) | ||
1451 | iput(vol->vol_ino); | ||
1452 | volume_failed: | ||
1453 | ntfs_error(sb, "Failed to load $Volume."); | ||
1454 | goto iput_lcnbmp_err_out; | ||
1455 | } | ||
1456 | m = map_mft_record(NTFS_I(vol->vol_ino)); | ||
1457 | if (IS_ERR(m)) { | ||
1458 | iput_volume_failed: | ||
1459 | iput(vol->vol_ino); | ||
1460 | goto volume_failed; | ||
1461 | } | ||
1462 | if (!(ctx = ntfs_attr_get_search_ctx(NTFS_I(vol->vol_ino), m))) { | ||
1463 | ntfs_error(sb, "Failed to get attribute search context."); | ||
1464 | goto get_ctx_vol_failed; | ||
1465 | } | ||
1466 | if (ntfs_attr_lookup(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, | ||
1467 | ctx) || ctx->attr->non_resident || ctx->attr->flags) { | ||
1468 | err_put_vol: | ||
1469 | ntfs_attr_put_search_ctx(ctx); | ||
1470 | get_ctx_vol_failed: | ||
1471 | unmap_mft_record(NTFS_I(vol->vol_ino)); | ||
1472 | goto iput_volume_failed; | ||
1473 | } | ||
1474 | vi = (VOLUME_INFORMATION*)((char*)ctx->attr + | ||
1475 | le16_to_cpu(ctx->attr->data.resident.value_offset)); | ||
1476 | /* Some bounds checks. */ | ||
1477 | if ((u8*)vi < (u8*)ctx->attr || (u8*)vi + | ||
1478 | le32_to_cpu(ctx->attr->data.resident.value_length) > | ||
1479 | (u8*)ctx->attr + le32_to_cpu(ctx->attr->length)) | ||
1480 | goto err_put_vol; | ||
1481 | /* Copy the volume flags and version to the ntfs_volume structure. */ | ||
1482 | vol->vol_flags = vi->flags; | ||
1483 | vol->major_ver = vi->major_ver; | ||
1484 | vol->minor_ver = vi->minor_ver; | ||
1485 | ntfs_attr_put_search_ctx(ctx); | ||
1486 | unmap_mft_record(NTFS_I(vol->vol_ino)); | ||
1487 | printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, | ||
1488 | vol->minor_ver); | ||
1489 | #ifdef NTFS_RW | ||
1490 | /* Make sure that no unsupported volume flags are set. */ | ||
1491 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { | ||
1492 | static const char *es1a = "Volume is dirty"; | ||
1493 | static const char *es1b = "Volume has unsupported flags set"; | ||
1494 | static const char *es2 = ". Run chkdsk and mount in Windows."; | ||
1495 | const char *es1; | ||
1496 | |||
1497 | es1 = vol->vol_flags & VOLUME_IS_DIRTY ? es1a : es1b; | ||
1498 | /* If a read-write mount, convert it to a read-only mount. */ | ||
1499 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1500 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1501 | ON_ERRORS_CONTINUE))) { | ||
1502 | ntfs_error(sb, "%s and neither on_errors=" | ||
1503 | "continue nor on_errors=" | ||
1504 | "remount-ro was specified%s", | ||
1505 | es1, es2); | ||
1506 | goto iput_vol_err_out; | ||
1507 | } | ||
1508 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1509 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1510 | } else | ||
1511 | ntfs_warning(sb, "%s. Will not be able to remount " | ||
1512 | "read-write%s", es1, es2); | ||
1513 | /* | ||
1514 | * Do not set NVolErrors() because ntfs_remount() re-checks the | ||
1515 | * flags which we need to do in case any flags have changed. | ||
1516 | */ | ||
1517 | } | ||
1518 | /* | ||
1519 | * Get the inode for the logfile, check it and determine if the volume | ||
1520 | * was shutdown cleanly. | ||
1521 | */ | ||
1522 | if (!load_and_check_logfile(vol) || | ||
1523 | !ntfs_is_logfile_clean(vol->logfile_ino)) { | ||
1524 | static const char *es1a = "Failed to load $LogFile"; | ||
1525 | static const char *es1b = "$LogFile is not clean"; | ||
1526 | static const char *es2 = ". Mount in Windows."; | ||
1527 | const char *es1; | ||
1528 | |||
1529 | es1 = !vol->logfile_ino ? es1a : es1b; | ||
1530 | /* If a read-write mount, convert it to a read-only mount. */ | ||
1531 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1532 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1533 | ON_ERRORS_CONTINUE))) { | ||
1534 | ntfs_error(sb, "%s and neither on_errors=" | ||
1535 | "continue nor on_errors=" | ||
1536 | "remount-ro was specified%s", | ||
1537 | es1, es2); | ||
1538 | goto iput_logfile_err_out; | ||
1539 | } | ||
1540 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1541 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1542 | } else | ||
1543 | ntfs_warning(sb, "%s. Will not be able to remount " | ||
1544 | "read-write%s", es1, es2); | ||
1545 | /* This will prevent a read-write remount. */ | ||
1546 | NVolSetErrors(vol); | ||
1547 | } | ||
1548 | /* If (still) a read-write mount, mark the volume dirty. */ | ||
1549 | if (!(sb->s_flags & MS_RDONLY) && | ||
1550 | ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { | ||
1551 | static const char *es1 = "Failed to set dirty bit in volume " | ||
1552 | "information flags"; | ||
1553 | static const char *es2 = ". Run chkdsk."; | ||
1554 | |||
1555 | /* Convert to a read-only mount. */ | ||
1556 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1557 | ON_ERRORS_CONTINUE))) { | ||
1558 | ntfs_error(sb, "%s and neither on_errors=continue nor " | ||
1559 | "on_errors=remount-ro was specified%s", | ||
1560 | es1, es2); | ||
1561 | goto iput_logfile_err_out; | ||
1562 | } | ||
1563 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1564 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1565 | /* | ||
1566 | * Do not set NVolErrors() because ntfs_remount() might manage | ||
1567 | * to set the dirty flag in which case all would be well. | ||
1568 | */ | ||
1569 | } | ||
1570 | #if 0 | ||
1571 | // TODO: Enable this code once we start modifying anything that is | ||
1572 | // different between NTFS 1.2 and 3.x... | ||
1573 | /* | ||
1574 | * If (still) a read-write mount, set the NT4 compatibility flag on | ||
1575 | * newer NTFS version volumes. | ||
1576 | */ | ||
1577 | if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) && | ||
1578 | ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) { | ||
1579 | static const char *es1 = "Failed to set NT4 compatibility flag"; | ||
1580 | static const char *es2 = ". Run chkdsk."; | ||
1581 | |||
1582 | /* Convert to a read-only mount. */ | ||
1583 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1584 | ON_ERRORS_CONTINUE))) { | ||
1585 | ntfs_error(sb, "%s and neither on_errors=continue nor " | ||
1586 | "on_errors=remount-ro was specified%s", | ||
1587 | es1, es2); | ||
1588 | goto iput_logfile_err_out; | ||
1589 | } | ||
1590 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1591 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1592 | NVolSetErrors(vol); | ||
1593 | } | ||
1594 | #endif | ||
1595 | /* If (still) a read-write mount, empty the logfile. */ | ||
1596 | if (!(sb->s_flags & MS_RDONLY) && | ||
1597 | !ntfs_empty_logfile(vol->logfile_ino)) { | ||
1598 | static const char *es1 = "Failed to empty $LogFile"; | ||
1599 | static const char *es2 = ". Mount in Windows."; | ||
1600 | |||
1601 | /* Convert to a read-only mount. */ | ||
1602 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1603 | ON_ERRORS_CONTINUE))) { | ||
1604 | ntfs_error(sb, "%s and neither on_errors=continue nor " | ||
1605 | "on_errors=remount-ro was specified%s", | ||
1606 | es1, es2); | ||
1607 | goto iput_logfile_err_out; | ||
1608 | } | ||
1609 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1610 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1611 | NVolSetErrors(vol); | ||
1612 | } | ||
1613 | #endif /* NTFS_RW */ | ||
1614 | /* Get the root directory inode. */ | ||
1615 | vol->root_ino = ntfs_iget(sb, FILE_root); | ||
1616 | if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) { | ||
1617 | if (!IS_ERR(vol->root_ino)) | ||
1618 | iput(vol->root_ino); | ||
1619 | ntfs_error(sb, "Failed to load root directory."); | ||
1620 | goto iput_logfile_err_out; | ||
1621 | } | ||
1622 | /* If on NTFS versions before 3.0, we are done. */ | ||
1623 | if (vol->major_ver < 3) | ||
1624 | return TRUE; | ||
1625 | /* NTFS 3.0+ specific initialization. */ | ||
1626 | /* Get the security descriptors inode. */ | ||
1627 | vol->secure_ino = ntfs_iget(sb, FILE_Secure); | ||
1628 | if (IS_ERR(vol->secure_ino) || is_bad_inode(vol->secure_ino)) { | ||
1629 | if (!IS_ERR(vol->secure_ino)) | ||
1630 | iput(vol->secure_ino); | ||
1631 | ntfs_error(sb, "Failed to load $Secure."); | ||
1632 | goto iput_root_err_out; | ||
1633 | } | ||
1634 | // FIXME: Initialize security. | ||
1635 | /* Get the extended system files' directory inode. */ | ||
1636 | vol->extend_ino = ntfs_iget(sb, FILE_Extend); | ||
1637 | if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) { | ||
1638 | if (!IS_ERR(vol->extend_ino)) | ||
1639 | iput(vol->extend_ino); | ||
1640 | ntfs_error(sb, "Failed to load $Extend."); | ||
1641 | goto iput_sec_err_out; | ||
1642 | } | ||
1643 | #ifdef NTFS_RW | ||
1644 | /* Find the quota file, load it if present, and set it up. */ | ||
1645 | if (!load_and_init_quota(vol)) { | ||
1646 | static const char *es1 = "Failed to load $Quota"; | ||
1647 | static const char *es2 = ". Run chkdsk."; | ||
1648 | |||
1649 | /* If a read-write mount, convert it to a read-only mount. */ | ||
1650 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1651 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1652 | ON_ERRORS_CONTINUE))) { | ||
1653 | ntfs_error(sb, "%s and neither on_errors=" | ||
1654 | "continue nor on_errors=" | ||
1655 | "remount-ro was specified%s", | ||
1656 | es1, es2); | ||
1657 | goto iput_quota_err_out; | ||
1658 | } | ||
1659 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1660 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1661 | } else | ||
1662 | ntfs_warning(sb, "%s. Will not be able to remount " | ||
1663 | "read-write%s", es1, es2); | ||
1664 | /* This will prevent a read-write remount. */ | ||
1665 | NVolSetErrors(vol); | ||
1666 | } | ||
1667 | /* If (still) a read-write mount, mark the quotas out of date. */ | ||
1668 | if (!(sb->s_flags & MS_RDONLY) && | ||
1669 | !ntfs_mark_quotas_out_of_date(vol)) { | ||
1670 | static const char *es1 = "Failed to mark quotas out of date"; | ||
1671 | static const char *es2 = ". Run chkdsk."; | ||
1672 | |||
1673 | /* Convert to a read-only mount. */ | ||
1674 | if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | | ||
1675 | ON_ERRORS_CONTINUE))) { | ||
1676 | ntfs_error(sb, "%s and neither on_errors=continue nor " | ||
1677 | "on_errors=remount-ro was specified%s", | ||
1678 | es1, es2); | ||
1679 | goto iput_quota_err_out; | ||
1680 | } | ||
1681 | ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); | ||
1682 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
1683 | NVolSetErrors(vol); | ||
1684 | } | ||
1685 | // TODO: Delete or checkpoint the $UsnJrnl if it exists. | ||
1686 | #endif /* NTFS_RW */ | ||
1687 | return TRUE; | ||
1688 | #ifdef NTFS_RW | ||
1689 | iput_quota_err_out: | ||
1690 | if (vol->quota_q_ino) | ||
1691 | iput(vol->quota_q_ino); | ||
1692 | if (vol->quota_ino) | ||
1693 | iput(vol->quota_ino); | ||
1694 | iput(vol->extend_ino); | ||
1695 | #endif /* NTFS_RW */ | ||
1696 | iput_sec_err_out: | ||
1697 | iput(vol->secure_ino); | ||
1698 | iput_root_err_out: | ||
1699 | iput(vol->root_ino); | ||
1700 | iput_logfile_err_out: | ||
1701 | #ifdef NTFS_RW | ||
1702 | if (vol->logfile_ino) | ||
1703 | iput(vol->logfile_ino); | ||
1704 | iput_vol_err_out: | ||
1705 | #endif /* NTFS_RW */ | ||
1706 | iput(vol->vol_ino); | ||
1707 | iput_lcnbmp_err_out: | ||
1708 | iput(vol->lcnbmp_ino); | ||
1709 | iput_attrdef_err_out: | ||
1710 | vol->attrdef_size = 0; | ||
1711 | if (vol->attrdef) { | ||
1712 | ntfs_free(vol->attrdef); | ||
1713 | vol->attrdef = NULL; | ||
1714 | } | ||
1715 | #ifdef NTFS_RW | ||
1716 | iput_upcase_err_out: | ||
1717 | #endif /* NTFS_RW */ | ||
1718 | vol->upcase_len = 0; | ||
1719 | down(&ntfs_lock); | ||
1720 | if (vol->upcase == default_upcase) { | ||
1721 | ntfs_nr_upcase_users--; | ||
1722 | vol->upcase = NULL; | ||
1723 | } | ||
1724 | up(&ntfs_lock); | ||
1725 | if (vol->upcase) { | ||
1726 | ntfs_free(vol->upcase); | ||
1727 | vol->upcase = NULL; | ||
1728 | } | ||
1729 | iput_mftbmp_err_out: | ||
1730 | iput(vol->mftbmp_ino); | ||
1731 | iput_mirr_err_out: | ||
1732 | #ifdef NTFS_RW | ||
1733 | if (vol->mftmirr_ino) | ||
1734 | iput(vol->mftmirr_ino); | ||
1735 | #endif /* NTFS_RW */ | ||
1736 | return FALSE; | ||
1737 | } | ||
1738 | |||
1739 | /** | ||
1740 | * ntfs_put_super - called by the vfs to unmount a volume | ||
1741 | * @sb: vfs superblock of volume to unmount | ||
1742 | * | ||
1743 | * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when | ||
1744 | * the volume is being unmounted (umount system call has been invoked) and it | ||
1745 | * releases all inodes and memory belonging to the NTFS specific part of the | ||
1746 | * super block. | ||
1747 | */ | ||
1748 | static void ntfs_put_super(struct super_block *sb) | ||
1749 | { | ||
1750 | ntfs_volume *vol = NTFS_SB(sb); | ||
1751 | |||
1752 | ntfs_debug("Entering."); | ||
1753 | #ifdef NTFS_RW | ||
1754 | /* | ||
1755 | * Commit all inodes while they are still open in case some of them | ||
1756 | * cause others to be dirtied. | ||
1757 | */ | ||
1758 | ntfs_commit_inode(vol->vol_ino); | ||
1759 | |||
1760 | /* NTFS 3.0+ specific. */ | ||
1761 | if (vol->major_ver >= 3) { | ||
1762 | if (vol->quota_q_ino) | ||
1763 | ntfs_commit_inode(vol->quota_q_ino); | ||
1764 | if (vol->quota_ino) | ||
1765 | ntfs_commit_inode(vol->quota_ino); | ||
1766 | if (vol->extend_ino) | ||
1767 | ntfs_commit_inode(vol->extend_ino); | ||
1768 | if (vol->secure_ino) | ||
1769 | ntfs_commit_inode(vol->secure_ino); | ||
1770 | } | ||
1771 | |||
1772 | ntfs_commit_inode(vol->root_ino); | ||
1773 | |||
1774 | down_write(&vol->lcnbmp_lock); | ||
1775 | ntfs_commit_inode(vol->lcnbmp_ino); | ||
1776 | up_write(&vol->lcnbmp_lock); | ||
1777 | |||
1778 | down_write(&vol->mftbmp_lock); | ||
1779 | ntfs_commit_inode(vol->mftbmp_ino); | ||
1780 | up_write(&vol->mftbmp_lock); | ||
1781 | |||
1782 | if (vol->logfile_ino) | ||
1783 | ntfs_commit_inode(vol->logfile_ino); | ||
1784 | |||
1785 | if (vol->mftmirr_ino) | ||
1786 | ntfs_commit_inode(vol->mftmirr_ino); | ||
1787 | ntfs_commit_inode(vol->mft_ino); | ||
1788 | |||
1789 | /* | ||
1790 | * If a read-write mount and no volume errors have occured, mark the | ||
1791 | * volume clean. Also, re-commit all affected inodes. | ||
1792 | */ | ||
1793 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1794 | if (!NVolErrors(vol)) { | ||
1795 | if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY)) | ||
1796 | ntfs_warning(sb, "Failed to clear dirty bit " | ||
1797 | "in volume information " | ||
1798 | "flags. Run chkdsk."); | ||
1799 | ntfs_commit_inode(vol->vol_ino); | ||
1800 | ntfs_commit_inode(vol->root_ino); | ||
1801 | if (vol->mftmirr_ino) | ||
1802 | ntfs_commit_inode(vol->mftmirr_ino); | ||
1803 | ntfs_commit_inode(vol->mft_ino); | ||
1804 | } else { | ||
1805 | ntfs_warning(sb, "Volume has errors. Leaving volume " | ||
1806 | "marked dirty. Run chkdsk."); | ||
1807 | } | ||
1808 | } | ||
1809 | #endif /* NTFS_RW */ | ||
1810 | |||
1811 | iput(vol->vol_ino); | ||
1812 | vol->vol_ino = NULL; | ||
1813 | |||
1814 | /* NTFS 3.0+ specific clean up. */ | ||
1815 | if (vol->major_ver >= 3) { | ||
1816 | #ifdef NTFS_RW | ||
1817 | if (vol->quota_q_ino) { | ||
1818 | iput(vol->quota_q_ino); | ||
1819 | vol->quota_q_ino = NULL; | ||
1820 | } | ||
1821 | if (vol->quota_ino) { | ||
1822 | iput(vol->quota_ino); | ||
1823 | vol->quota_ino = NULL; | ||
1824 | } | ||
1825 | #endif /* NTFS_RW */ | ||
1826 | if (vol->extend_ino) { | ||
1827 | iput(vol->extend_ino); | ||
1828 | vol->extend_ino = NULL; | ||
1829 | } | ||
1830 | if (vol->secure_ino) { | ||
1831 | iput(vol->secure_ino); | ||
1832 | vol->secure_ino = NULL; | ||
1833 | } | ||
1834 | } | ||
1835 | |||
1836 | iput(vol->root_ino); | ||
1837 | vol->root_ino = NULL; | ||
1838 | |||
1839 | down_write(&vol->lcnbmp_lock); | ||
1840 | iput(vol->lcnbmp_ino); | ||
1841 | vol->lcnbmp_ino = NULL; | ||
1842 | up_write(&vol->lcnbmp_lock); | ||
1843 | |||
1844 | down_write(&vol->mftbmp_lock); | ||
1845 | iput(vol->mftbmp_ino); | ||
1846 | vol->mftbmp_ino = NULL; | ||
1847 | up_write(&vol->mftbmp_lock); | ||
1848 | |||
1849 | #ifdef NTFS_RW | ||
1850 | if (vol->logfile_ino) { | ||
1851 | iput(vol->logfile_ino); | ||
1852 | vol->logfile_ino = NULL; | ||
1853 | } | ||
1854 | if (vol->mftmirr_ino) { | ||
1855 | /* Re-commit the mft mirror and mft just in case. */ | ||
1856 | ntfs_commit_inode(vol->mftmirr_ino); | ||
1857 | ntfs_commit_inode(vol->mft_ino); | ||
1858 | iput(vol->mftmirr_ino); | ||
1859 | vol->mftmirr_ino = NULL; | ||
1860 | } | ||
1861 | /* | ||
1862 | * If any dirty inodes are left, throw away all mft data page cache | ||
1863 | * pages to allow a clean umount. This should never happen any more | ||
1864 | * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as | ||
1865 | * the underlying mft records are written out and cleaned. If it does, | ||
1866 | * happen anyway, we want to know... | ||
1867 | */ | ||
1868 | ntfs_commit_inode(vol->mft_ino); | ||
1869 | write_inode_now(vol->mft_ino, 1); | ||
1870 | if (!list_empty(&sb->s_dirty)) { | ||
1871 | const char *s1, *s2; | ||
1872 | |||
1873 | down(&vol->mft_ino->i_sem); | ||
1874 | truncate_inode_pages(vol->mft_ino->i_mapping, 0); | ||
1875 | up(&vol->mft_ino->i_sem); | ||
1876 | write_inode_now(vol->mft_ino, 1); | ||
1877 | if (!list_empty(&sb->s_dirty)) { | ||
1878 | static const char *_s1 = "inodes"; | ||
1879 | static const char *_s2 = ""; | ||
1880 | s1 = _s1; | ||
1881 | s2 = _s2; | ||
1882 | } else { | ||
1883 | static const char *_s1 = "mft pages"; | ||
1884 | static const char *_s2 = "They have been thrown " | ||
1885 | "away. "; | ||
1886 | s1 = _s1; | ||
1887 | s2 = _s2; | ||
1888 | } | ||
1889 | ntfs_error(sb, "Dirty %s found at umount time. %sYou should " | ||
1890 | "run chkdsk. Please email " | ||
1891 | "linux-ntfs-dev@lists.sourceforge.net and say " | ||
1892 | "that you saw this message. Thank you.", s1, | ||
1893 | s2); | ||
1894 | } | ||
1895 | #endif /* NTFS_RW */ | ||
1896 | |||
1897 | iput(vol->mft_ino); | ||
1898 | vol->mft_ino = NULL; | ||
1899 | |||
1900 | /* Throw away the table of attribute definitions. */ | ||
1901 | vol->attrdef_size = 0; | ||
1902 | if (vol->attrdef) { | ||
1903 | ntfs_free(vol->attrdef); | ||
1904 | vol->attrdef = NULL; | ||
1905 | } | ||
1906 | vol->upcase_len = 0; | ||
1907 | /* | ||
1908 | * Destroy the global default upcase table if necessary. Also decrease | ||
1909 | * the number of upcase users if we are a user. | ||
1910 | */ | ||
1911 | down(&ntfs_lock); | ||
1912 | if (vol->upcase == default_upcase) { | ||
1913 | ntfs_nr_upcase_users--; | ||
1914 | vol->upcase = NULL; | ||
1915 | } | ||
1916 | if (!ntfs_nr_upcase_users && default_upcase) { | ||
1917 | ntfs_free(default_upcase); | ||
1918 | default_upcase = NULL; | ||
1919 | } | ||
1920 | if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) | ||
1921 | free_compression_buffers(); | ||
1922 | up(&ntfs_lock); | ||
1923 | if (vol->upcase) { | ||
1924 | ntfs_free(vol->upcase); | ||
1925 | vol->upcase = NULL; | ||
1926 | } | ||
1927 | if (vol->nls_map) { | ||
1928 | unload_nls(vol->nls_map); | ||
1929 | vol->nls_map = NULL; | ||
1930 | } | ||
1931 | sb->s_fs_info = NULL; | ||
1932 | kfree(vol); | ||
1933 | return; | ||
1934 | } | ||
1935 | |||
1936 | /** | ||
1937 | * get_nr_free_clusters - return the number of free clusters on a volume | ||
1938 | * @vol: ntfs volume for which to obtain free cluster count | ||
1939 | * | ||
1940 | * Calculate the number of free clusters on the mounted NTFS volume @vol. We | ||
1941 | * actually calculate the number of clusters in use instead because this | ||
1942 | * allows us to not care about partial pages as these will be just zero filled | ||
1943 | * and hence not be counted as allocated clusters. | ||
1944 | * | ||
1945 | * The only particularity is that clusters beyond the end of the logical ntfs | ||
1946 | * volume will be marked as allocated to prevent errors which means we have to | ||
1947 | * discount those at the end. This is important as the cluster bitmap always | ||
1948 | * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside | ||
1949 | * the logical volume and marked in use when they are not as they do not exist. | ||
1950 | * | ||
1951 | * If any pages cannot be read we assume all clusters in the erroring pages are | ||
1952 | * in use. This means we return an underestimate on errors which is better than | ||
1953 | * an overestimate. | ||
1954 | */ | ||
1955 | static s64 get_nr_free_clusters(ntfs_volume *vol) | ||
1956 | { | ||
1957 | s64 nr_free = vol->nr_clusters; | ||
1958 | u32 *kaddr; | ||
1959 | struct address_space *mapping = vol->lcnbmp_ino->i_mapping; | ||
1960 | filler_t *readpage = (filler_t*)mapping->a_ops->readpage; | ||
1961 | struct page *page; | ||
1962 | unsigned long index, max_index; | ||
1963 | unsigned int max_size; | ||
1964 | |||
1965 | ntfs_debug("Entering."); | ||
1966 | /* Serialize accesses to the cluster bitmap. */ | ||
1967 | down_read(&vol->lcnbmp_lock); | ||
1968 | /* | ||
1969 | * Convert the number of bits into bytes rounded up, then convert into | ||
1970 | * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one | ||
1971 | * full and one partial page max_index = 2. | ||
1972 | */ | ||
1973 | max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> | ||
1974 | PAGE_CACHE_SHIFT; | ||
1975 | /* Use multiples of 4 bytes. */ | ||
1976 | max_size = PAGE_CACHE_SIZE >> 2; | ||
1977 | ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", | ||
1978 | max_index, max_size); | ||
1979 | for (index = 0UL; index < max_index; index++) { | ||
1980 | unsigned int i; | ||
1981 | /* | ||
1982 | * Read the page from page cache, getting it from backing store | ||
1983 | * if necessary, and increment the use count. | ||
1984 | */ | ||
1985 | page = read_cache_page(mapping, index, (filler_t*)readpage, | ||
1986 | NULL); | ||
1987 | /* Ignore pages which errored synchronously. */ | ||
1988 | if (IS_ERR(page)) { | ||
1989 | ntfs_debug("Sync read_cache_page() error. Skipping " | ||
1990 | "page (index 0x%lx).", index); | ||
1991 | nr_free -= PAGE_CACHE_SIZE * 8; | ||
1992 | continue; | ||
1993 | } | ||
1994 | wait_on_page_locked(page); | ||
1995 | /* Ignore pages which errored asynchronously. */ | ||
1996 | if (!PageUptodate(page)) { | ||
1997 | ntfs_debug("Async read_cache_page() error. Skipping " | ||
1998 | "page (index 0x%lx).", index); | ||
1999 | page_cache_release(page); | ||
2000 | nr_free -= PAGE_CACHE_SIZE * 8; | ||
2001 | continue; | ||
2002 | } | ||
2003 | kaddr = (u32*)kmap_atomic(page, KM_USER0); | ||
2004 | /* | ||
2005 | * For each 4 bytes, subtract the number of set bits. If this | ||
2006 | * is the last page and it is partial we don't really care as | ||
2007 | * it just means we do a little extra work but it won't affect | ||
2008 | * the result as all out of range bytes are set to zero by | ||
2009 | * ntfs_readpage(). | ||
2010 | */ | ||
2011 | for (i = 0; i < max_size; i++) | ||
2012 | nr_free -= (s64)hweight32(kaddr[i]); | ||
2013 | kunmap_atomic(kaddr, KM_USER0); | ||
2014 | page_cache_release(page); | ||
2015 | } | ||
2016 | ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); | ||
2017 | /* | ||
2018 | * Fixup for eventual bits outside logical ntfs volume (see function | ||
2019 | * description above). | ||
2020 | */ | ||
2021 | if (vol->nr_clusters & 63) | ||
2022 | nr_free += 64 - (vol->nr_clusters & 63); | ||
2023 | up_read(&vol->lcnbmp_lock); | ||
2024 | /* If errors occured we may well have gone below zero, fix this. */ | ||
2025 | if (nr_free < 0) | ||
2026 | nr_free = 0; | ||
2027 | ntfs_debug("Exiting."); | ||
2028 | return nr_free; | ||
2029 | } | ||
2030 | |||
2031 | /** | ||
2032 | * __get_nr_free_mft_records - return the number of free inodes on a volume | ||
2033 | * @vol: ntfs volume for which to obtain free inode count | ||
2034 | * | ||
2035 | * Calculate the number of free mft records (inodes) on the mounted NTFS | ||
2036 | * volume @vol. We actually calculate the number of mft records in use instead | ||
2037 | * because this allows us to not care about partial pages as these will be just | ||
2038 | * zero filled and hence not be counted as allocated mft record. | ||
2039 | * | ||
2040 | * If any pages cannot be read we assume all mft records in the erroring pages | ||
2041 | * are in use. This means we return an underestimate on errors which is better | ||
2042 | * than an overestimate. | ||
2043 | * | ||
2044 | * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. | ||
2045 | */ | ||
2046 | static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) | ||
2047 | { | ||
2048 | s64 nr_free; | ||
2049 | u32 *kaddr; | ||
2050 | struct address_space *mapping = vol->mftbmp_ino->i_mapping; | ||
2051 | filler_t *readpage = (filler_t*)mapping->a_ops->readpage; | ||
2052 | struct page *page; | ||
2053 | unsigned long index, max_index; | ||
2054 | unsigned int max_size; | ||
2055 | |||
2056 | ntfs_debug("Entering."); | ||
2057 | /* Number of mft records in file system (at this point in time). */ | ||
2058 | nr_free = vol->mft_ino->i_size >> vol->mft_record_size_bits; | ||
2059 | /* | ||
2060 | * Convert the maximum number of set bits into bytes rounded up, then | ||
2061 | * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we | ||
2062 | * have one full and one partial page max_index = 2. | ||
2063 | */ | ||
2064 | max_index = ((((NTFS_I(vol->mft_ino)->initialized_size >> | ||
2065 | vol->mft_record_size_bits) + 7) >> 3) + | ||
2066 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
2067 | /* Use multiples of 4 bytes. */ | ||
2068 | max_size = PAGE_CACHE_SIZE >> 2; | ||
2069 | ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " | ||
2070 | "0x%x.", max_index, max_size); | ||
2071 | for (index = 0UL; index < max_index; index++) { | ||
2072 | unsigned int i; | ||
2073 | /* | ||
2074 | * Read the page from page cache, getting it from backing store | ||
2075 | * if necessary, and increment the use count. | ||
2076 | */ | ||
2077 | page = read_cache_page(mapping, index, (filler_t*)readpage, | ||
2078 | NULL); | ||
2079 | /* Ignore pages which errored synchronously. */ | ||
2080 | if (IS_ERR(page)) { | ||
2081 | ntfs_debug("Sync read_cache_page() error. Skipping " | ||
2082 | "page (index 0x%lx).", index); | ||
2083 | nr_free -= PAGE_CACHE_SIZE * 8; | ||
2084 | continue; | ||
2085 | } | ||
2086 | wait_on_page_locked(page); | ||
2087 | /* Ignore pages which errored asynchronously. */ | ||
2088 | if (!PageUptodate(page)) { | ||
2089 | ntfs_debug("Async read_cache_page() error. Skipping " | ||
2090 | "page (index 0x%lx).", index); | ||
2091 | page_cache_release(page); | ||
2092 | nr_free -= PAGE_CACHE_SIZE * 8; | ||
2093 | continue; | ||
2094 | } | ||
2095 | kaddr = (u32*)kmap_atomic(page, KM_USER0); | ||
2096 | /* | ||
2097 | * For each 4 bytes, subtract the number of set bits. If this | ||
2098 | * is the last page and it is partial we don't really care as | ||
2099 | * it just means we do a little extra work but it won't affect | ||
2100 | * the result as all out of range bytes are set to zero by | ||
2101 | * ntfs_readpage(). | ||
2102 | */ | ||
2103 | for (i = 0; i < max_size; i++) | ||
2104 | nr_free -= (s64)hweight32(kaddr[i]); | ||
2105 | kunmap_atomic(kaddr, KM_USER0); | ||
2106 | page_cache_release(page); | ||
2107 | } | ||
2108 | ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", | ||
2109 | index - 1); | ||
2110 | /* If errors occured we may well have gone below zero, fix this. */ | ||
2111 | if (nr_free < 0) | ||
2112 | nr_free = 0; | ||
2113 | ntfs_debug("Exiting."); | ||
2114 | return nr_free; | ||
2115 | } | ||
2116 | |||
2117 | /** | ||
2118 | * ntfs_statfs - return information about mounted NTFS volume | ||
2119 | * @sb: super block of mounted volume | ||
2120 | * @sfs: statfs structure in which to return the information | ||
2121 | * | ||
2122 | * Return information about the mounted NTFS volume @sb in the statfs structure | ||
2123 | * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is | ||
2124 | * called). We interpret the values to be correct of the moment in time at | ||
2125 | * which we are called. Most values are variable otherwise and this isn't just | ||
2126 | * the free values but the totals as well. For example we can increase the | ||
2127 | * total number of file nodes if we run out and we can keep doing this until | ||
2128 | * there is no more space on the volume left at all. | ||
2129 | * | ||
2130 | * Called from vfs_statfs which is used to handle the statfs, fstatfs, and | ||
2131 | * ustat system calls. | ||
2132 | * | ||
2133 | * Return 0 on success or -errno on error. | ||
2134 | */ | ||
2135 | static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) | ||
2136 | { | ||
2137 | ntfs_volume *vol = NTFS_SB(sb); | ||
2138 | s64 size; | ||
2139 | |||
2140 | ntfs_debug("Entering."); | ||
2141 | /* Type of filesystem. */ | ||
2142 | sfs->f_type = NTFS_SB_MAGIC; | ||
2143 | /* Optimal transfer block size. */ | ||
2144 | sfs->f_bsize = PAGE_CACHE_SIZE; | ||
2145 | /* | ||
2146 | * Total data blocks in file system in units of f_bsize and since | ||
2147 | * inodes are also stored in data blocs ($MFT is a file) this is just | ||
2148 | * the total clusters. | ||
2149 | */ | ||
2150 | sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> | ||
2151 | PAGE_CACHE_SHIFT; | ||
2152 | /* Free data blocks in file system in units of f_bsize. */ | ||
2153 | size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> | ||
2154 | PAGE_CACHE_SHIFT; | ||
2155 | if (size < 0LL) | ||
2156 | size = 0LL; | ||
2157 | /* Free blocks avail to non-superuser, same as above on NTFS. */ | ||
2158 | sfs->f_bavail = sfs->f_bfree = size; | ||
2159 | /* Serialize accesses to the inode bitmap. */ | ||
2160 | down_read(&vol->mftbmp_lock); | ||
2161 | /* Number of inodes in file system (at this point in time). */ | ||
2162 | sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; | ||
2163 | /* Free inodes in fs (based on current total count). */ | ||
2164 | sfs->f_ffree = __get_nr_free_mft_records(vol); | ||
2165 | up_read(&vol->mftbmp_lock); | ||
2166 | /* | ||
2167 | * File system id. This is extremely *nix flavour dependent and even | ||
2168 | * within Linux itself all fs do their own thing. I interpret this to | ||
2169 | * mean a unique id associated with the mounted fs and not the id | ||
2170 | * associated with the file system driver, the latter is already given | ||
2171 | * by the file system type in sfs->f_type. Thus we use the 64-bit | ||
2172 | * volume serial number splitting it into two 32-bit parts. We enter | ||
2173 | * the least significant 32-bits in f_fsid[0] and the most significant | ||
2174 | * 32-bits in f_fsid[1]. | ||
2175 | */ | ||
2176 | sfs->f_fsid.val[0] = vol->serial_no & 0xffffffff; | ||
2177 | sfs->f_fsid.val[1] = (vol->serial_no >> 32) & 0xffffffff; | ||
2178 | /* Maximum length of filenames. */ | ||
2179 | sfs->f_namelen = NTFS_MAX_NAME_LEN; | ||
2180 | return 0; | ||
2181 | } | ||
2182 | |||
2183 | /** | ||
2184 | * The complete super operations. | ||
2185 | */ | ||
2186 | static struct super_operations ntfs_sops = { | ||
2187 | .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ | ||
2188 | .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ | ||
2189 | .put_inode = ntfs_put_inode, /* VFS: Called just before | ||
2190 | the inode reference count | ||
2191 | is decreased. */ | ||
2192 | #ifdef NTFS_RW | ||
2193 | //.dirty_inode = NULL, /* VFS: Called from | ||
2194 | // __mark_inode_dirty(). */ | ||
2195 | .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to | ||
2196 | disk. */ | ||
2197 | //.drop_inode = NULL, /* VFS: Called just after the | ||
2198 | // inode reference count has | ||
2199 | // been decreased to zero. | ||
2200 | // NOTE: The inode lock is | ||
2201 | // held. See fs/inode.c:: | ||
2202 | // generic_drop_inode(). */ | ||
2203 | //.delete_inode = NULL, /* VFS: Delete inode from disk. | ||
2204 | // Called when i_count becomes | ||
2205 | // 0 and i_nlink is also 0. */ | ||
2206 | //.write_super = NULL, /* Flush dirty super block to | ||
2207 | // disk. */ | ||
2208 | //.sync_fs = NULL, /* ? */ | ||
2209 | //.write_super_lockfs = NULL, /* ? */ | ||
2210 | //.unlockfs = NULL, /* ? */ | ||
2211 | #endif /* NTFS_RW */ | ||
2212 | .put_super = ntfs_put_super, /* Syscall: umount. */ | ||
2213 | .statfs = ntfs_statfs, /* Syscall: statfs */ | ||
2214 | .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ | ||
2215 | .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is | ||
2216 | removed from memory. */ | ||
2217 | //.umount_begin = NULL, /* Forced umount. */ | ||
2218 | .show_options = ntfs_show_options, /* Show mount options in | ||
2219 | proc. */ | ||
2220 | }; | ||
2221 | |||
2222 | |||
2223 | /** | ||
2224 | * Declarations for NTFS specific export operations (fs/ntfs/namei.c). | ||
2225 | */ | ||
2226 | extern struct dentry *ntfs_get_parent(struct dentry *child_dent); | ||
2227 | extern struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh); | ||
2228 | |||
2229 | /** | ||
2230 | * Export operations allowing NFS exporting of mounted NTFS partitions. | ||
2231 | * | ||
2232 | * We use the default ->decode_fh() and ->encode_fh() for now. Note that they | ||
2233 | * use 32 bits to store the inode number which is an unsigned long so on 64-bit | ||
2234 | * architectures is usually 64 bits so it would all fail horribly on huge | ||
2235 | * volumes. I guess we need to define our own encode and decode fh functions | ||
2236 | * that store 64-bit inode numbers at some point but for now we will ignore the | ||
2237 | * problem... | ||
2238 | * | ||
2239 | * We also use the default ->get_name() helper (used by ->decode_fh() via | ||
2240 | * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs | ||
2241 | * independent. | ||
2242 | * | ||
2243 | * The default ->get_parent() just returns -EACCES so we have to provide our | ||
2244 | * own and the default ->get_dentry() is incompatible with NTFS due to not | ||
2245 | * allowing the inode number 0 which is used in NTFS for the system file $MFT | ||
2246 | * and due to using iget() whereas NTFS needs ntfs_iget(). | ||
2247 | */ | ||
2248 | static struct export_operations ntfs_export_ops = { | ||
2249 | .get_parent = ntfs_get_parent, /* Find the parent of a given | ||
2250 | directory. */ | ||
2251 | .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode | ||
2252 | given a file handle | ||
2253 | sub-fragment. */ | ||
2254 | }; | ||
2255 | |||
2256 | /** | ||
2257 | * ntfs_fill_super - mount an ntfs files system | ||
2258 | * @sb: super block of ntfs file system to mount | ||
2259 | * @opt: string containing the mount options | ||
2260 | * @silent: silence error output | ||
2261 | * | ||
2262 | * ntfs_fill_super() is called by the VFS to mount the device described by @sb | ||
2263 | * with the mount otions in @data with the NTFS file system. | ||
2264 | * | ||
2265 | * If @silent is true, remain silent even if errors are detected. This is used | ||
2266 | * during bootup, when the kernel tries to mount the root file system with all | ||
2267 | * registered file systems one after the other until one succeeds. This implies | ||
2268 | * that all file systems except the correct one will quite correctly and | ||
2269 | * expectedly return an error, but nobody wants to see error messages when in | ||
2270 | * fact this is what is supposed to happen. | ||
2271 | * | ||
2272 | * NOTE: @sb->s_flags contains the mount options flags. | ||
2273 | */ | ||
2274 | static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | ||
2275 | { | ||
2276 | ntfs_volume *vol; | ||
2277 | struct buffer_head *bh; | ||
2278 | struct inode *tmp_ino; | ||
2279 | int result; | ||
2280 | |||
2281 | ntfs_debug("Entering."); | ||
2282 | #ifndef NTFS_RW | ||
2283 | sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; | ||
2284 | #endif /* ! NTFS_RW */ | ||
2285 | /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ | ||
2286 | sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); | ||
2287 | vol = NTFS_SB(sb); | ||
2288 | if (!vol) { | ||
2289 | if (!silent) | ||
2290 | ntfs_error(sb, "Allocation of NTFS volume structure " | ||
2291 | "failed. Aborting mount..."); | ||
2292 | return -ENOMEM; | ||
2293 | } | ||
2294 | /* Initialize ntfs_volume structure. */ | ||
2295 | memset(vol, 0, sizeof(ntfs_volume)); | ||
2296 | vol->sb = sb; | ||
2297 | vol->upcase = NULL; | ||
2298 | vol->attrdef = NULL; | ||
2299 | vol->mft_ino = NULL; | ||
2300 | vol->mftbmp_ino = NULL; | ||
2301 | init_rwsem(&vol->mftbmp_lock); | ||
2302 | #ifdef NTFS_RW | ||
2303 | vol->mftmirr_ino = NULL; | ||
2304 | vol->logfile_ino = NULL; | ||
2305 | #endif /* NTFS_RW */ | ||
2306 | vol->lcnbmp_ino = NULL; | ||
2307 | init_rwsem(&vol->lcnbmp_lock); | ||
2308 | vol->vol_ino = NULL; | ||
2309 | vol->root_ino = NULL; | ||
2310 | vol->secure_ino = NULL; | ||
2311 | vol->extend_ino = NULL; | ||
2312 | #ifdef NTFS_RW | ||
2313 | vol->quota_ino = NULL; | ||
2314 | vol->quota_q_ino = NULL; | ||
2315 | #endif /* NTFS_RW */ | ||
2316 | vol->nls_map = NULL; | ||
2317 | |||
2318 | /* | ||
2319 | * Default is group and other don't have any access to files or | ||
2320 | * directories while owner has full access. Further, files by default | ||
2321 | * are not executable but directories are of course browseable. | ||
2322 | */ | ||
2323 | vol->fmask = 0177; | ||
2324 | vol->dmask = 0077; | ||
2325 | |||
2326 | unlock_kernel(); | ||
2327 | |||
2328 | /* Important to get the mount options dealt with now. */ | ||
2329 | if (!parse_options(vol, (char*)opt)) | ||
2330 | goto err_out_now; | ||
2331 | |||
2332 | /* | ||
2333 | * TODO: Fail safety check. In the future we should really be able to | ||
2334 | * cope with this being the case, but for now just bail out. | ||
2335 | */ | ||
2336 | if (bdev_hardsect_size(sb->s_bdev) > NTFS_BLOCK_SIZE) { | ||
2337 | if (!silent) | ||
2338 | ntfs_error(sb, "Device has unsupported hardsect_size."); | ||
2339 | goto err_out_now; | ||
2340 | } | ||
2341 | |||
2342 | /* Setup the device access block size to NTFS_BLOCK_SIZE. */ | ||
2343 | if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) { | ||
2344 | if (!silent) | ||
2345 | ntfs_error(sb, "Unable to set block size."); | ||
2346 | goto err_out_now; | ||
2347 | } | ||
2348 | |||
2349 | /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ | ||
2350 | vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; | ||
2351 | |||
2352 | /* Read the boot sector and return unlocked buffer head to it. */ | ||
2353 | if (!(bh = read_ntfs_boot_sector(sb, silent))) { | ||
2354 | if (!silent) | ||
2355 | ntfs_error(sb, "Not an NTFS volume."); | ||
2356 | goto err_out_now; | ||
2357 | } | ||
2358 | |||
2359 | /* | ||
2360 | * Extract the data from the boot sector and setup the ntfs super block | ||
2361 | * using it. | ||
2362 | */ | ||
2363 | result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data); | ||
2364 | |||
2365 | /* Initialize the cluster and mft allocators. */ | ||
2366 | ntfs_setup_allocators(vol); | ||
2367 | |||
2368 | brelse(bh); | ||
2369 | |||
2370 | if (!result) { | ||
2371 | if (!silent) | ||
2372 | ntfs_error(sb, "Unsupported NTFS filesystem."); | ||
2373 | goto err_out_now; | ||
2374 | } | ||
2375 | |||
2376 | /* | ||
2377 | * TODO: When we start coping with sector sizes different from | ||
2378 | * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the | ||
2379 | * device (probably to NTFS_BLOCK_SIZE). | ||
2380 | */ | ||
2381 | |||
2382 | /* Setup remaining fields in the super block. */ | ||
2383 | sb->s_magic = NTFS_SB_MAGIC; | ||
2384 | |||
2385 | /* | ||
2386 | * Ntfs allows 63 bits for the file size, i.e. correct would be: | ||
2387 | * sb->s_maxbytes = ~0ULL >> 1; | ||
2388 | * But the kernel uses a long as the page cache page index which on | ||
2389 | * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel | ||
2390 | * defined to the maximum the page cache page index can cope with | ||
2391 | * without overflowing the index or to 2^63 - 1, whichever is smaller. | ||
2392 | */ | ||
2393 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
2394 | |||
2395 | sb->s_time_gran = 100; | ||
2396 | |||
2397 | /* | ||
2398 | * Now load the metadata required for the page cache and our address | ||
2399 | * space operations to function. We do this by setting up a specialised | ||
2400 | * read_inode method and then just calling the normal iget() to obtain | ||
2401 | * the inode for $MFT which is sufficient to allow our normal inode | ||
2402 | * operations and associated address space operations to function. | ||
2403 | */ | ||
2404 | sb->s_op = &ntfs_sops; | ||
2405 | tmp_ino = new_inode(sb); | ||
2406 | if (!tmp_ino) { | ||
2407 | if (!silent) | ||
2408 | ntfs_error(sb, "Failed to load essential metadata."); | ||
2409 | goto err_out_now; | ||
2410 | } | ||
2411 | tmp_ino->i_ino = FILE_MFT; | ||
2412 | insert_inode_hash(tmp_ino); | ||
2413 | if (ntfs_read_inode_mount(tmp_ino) < 0) { | ||
2414 | if (!silent) | ||
2415 | ntfs_error(sb, "Failed to load essential metadata."); | ||
2416 | goto iput_tmp_ino_err_out_now; | ||
2417 | } | ||
2418 | down(&ntfs_lock); | ||
2419 | /* | ||
2420 | * The current mount is a compression user if the cluster size is | ||
2421 | * less than or equal 4kiB. | ||
2422 | */ | ||
2423 | if (vol->cluster_size <= 4096 && !ntfs_nr_compression_users++) { | ||
2424 | result = allocate_compression_buffers(); | ||
2425 | if (result) { | ||
2426 | ntfs_error(NULL, "Failed to allocate buffers " | ||
2427 | "for compression engine."); | ||
2428 | ntfs_nr_compression_users--; | ||
2429 | up(&ntfs_lock); | ||
2430 | goto iput_tmp_ino_err_out_now; | ||
2431 | } | ||
2432 | } | ||
2433 | /* | ||
2434 | * Generate the global default upcase table if necessary. Also | ||
2435 | * temporarily increment the number of upcase users to avoid race | ||
2436 | * conditions with concurrent (u)mounts. | ||
2437 | */ | ||
2438 | if (!default_upcase) | ||
2439 | default_upcase = generate_default_upcase(); | ||
2440 | ntfs_nr_upcase_users++; | ||
2441 | up(&ntfs_lock); | ||
2442 | /* | ||
2443 | * From now on, ignore @silent parameter. If we fail below this line, | ||
2444 | * it will be due to a corrupt fs or a system error, so we report it. | ||
2445 | */ | ||
2446 | /* | ||
2447 | * Open the system files with normal access functions and complete | ||
2448 | * setting up the ntfs super block. | ||
2449 | */ | ||
2450 | if (!load_system_files(vol)) { | ||
2451 | ntfs_error(sb, "Failed to load system files."); | ||
2452 | goto unl_upcase_iput_tmp_ino_err_out_now; | ||
2453 | } | ||
2454 | if ((sb->s_root = d_alloc_root(vol->root_ino))) { | ||
2455 | /* We increment i_count simulating an ntfs_iget(). */ | ||
2456 | atomic_inc(&vol->root_ino->i_count); | ||
2457 | ntfs_debug("Exiting, status successful."); | ||
2458 | /* Release the default upcase if it has no users. */ | ||
2459 | down(&ntfs_lock); | ||
2460 | if (!--ntfs_nr_upcase_users && default_upcase) { | ||
2461 | ntfs_free(default_upcase); | ||
2462 | default_upcase = NULL; | ||
2463 | } | ||
2464 | up(&ntfs_lock); | ||
2465 | sb->s_export_op = &ntfs_export_ops; | ||
2466 | lock_kernel(); | ||
2467 | return 0; | ||
2468 | } | ||
2469 | ntfs_error(sb, "Failed to allocate root directory."); | ||
2470 | /* Clean up after the successful load_system_files() call from above. */ | ||
2471 | // TODO: Use ntfs_put_super() instead of repeating all this code... | ||
2472 | // FIXME: Should mark the volume clean as the error is most likely | ||
2473 | // -ENOMEM. | ||
2474 | iput(vol->vol_ino); | ||
2475 | vol->vol_ino = NULL; | ||
2476 | /* NTFS 3.0+ specific clean up. */ | ||
2477 | if (vol->major_ver >= 3) { | ||
2478 | #ifdef NTFS_RW | ||
2479 | if (vol->quota_q_ino) { | ||
2480 | iput(vol->quota_q_ino); | ||
2481 | vol->quota_q_ino = NULL; | ||
2482 | } | ||
2483 | if (vol->quota_ino) { | ||
2484 | iput(vol->quota_ino); | ||
2485 | vol->quota_ino = NULL; | ||
2486 | } | ||
2487 | #endif /* NTFS_RW */ | ||
2488 | if (vol->extend_ino) { | ||
2489 | iput(vol->extend_ino); | ||
2490 | vol->extend_ino = NULL; | ||
2491 | } | ||
2492 | if (vol->secure_ino) { | ||
2493 | iput(vol->secure_ino); | ||
2494 | vol->secure_ino = NULL; | ||
2495 | } | ||
2496 | } | ||
2497 | iput(vol->root_ino); | ||
2498 | vol->root_ino = NULL; | ||
2499 | iput(vol->lcnbmp_ino); | ||
2500 | vol->lcnbmp_ino = NULL; | ||
2501 | iput(vol->mftbmp_ino); | ||
2502 | vol->mftbmp_ino = NULL; | ||
2503 | #ifdef NTFS_RW | ||
2504 | if (vol->logfile_ino) { | ||
2505 | iput(vol->logfile_ino); | ||
2506 | vol->logfile_ino = NULL; | ||
2507 | } | ||
2508 | if (vol->mftmirr_ino) { | ||
2509 | iput(vol->mftmirr_ino); | ||
2510 | vol->mftmirr_ino = NULL; | ||
2511 | } | ||
2512 | #endif /* NTFS_RW */ | ||
2513 | /* Throw away the table of attribute definitions. */ | ||
2514 | vol->attrdef_size = 0; | ||
2515 | if (vol->attrdef) { | ||
2516 | ntfs_free(vol->attrdef); | ||
2517 | vol->attrdef = NULL; | ||
2518 | } | ||
2519 | vol->upcase_len = 0; | ||
2520 | down(&ntfs_lock); | ||
2521 | if (vol->upcase == default_upcase) { | ||
2522 | ntfs_nr_upcase_users--; | ||
2523 | vol->upcase = NULL; | ||
2524 | } | ||
2525 | up(&ntfs_lock); | ||
2526 | if (vol->upcase) { | ||
2527 | ntfs_free(vol->upcase); | ||
2528 | vol->upcase = NULL; | ||
2529 | } | ||
2530 | if (vol->nls_map) { | ||
2531 | unload_nls(vol->nls_map); | ||
2532 | vol->nls_map = NULL; | ||
2533 | } | ||
2534 | /* Error exit code path. */ | ||
2535 | unl_upcase_iput_tmp_ino_err_out_now: | ||
2536 | /* | ||
2537 | * Decrease the number of upcase users and destroy the global default | ||
2538 | * upcase table if necessary. | ||
2539 | */ | ||
2540 | down(&ntfs_lock); | ||
2541 | if (!--ntfs_nr_upcase_users && default_upcase) { | ||
2542 | ntfs_free(default_upcase); | ||
2543 | default_upcase = NULL; | ||
2544 | } | ||
2545 | if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) | ||
2546 | free_compression_buffers(); | ||
2547 | up(&ntfs_lock); | ||
2548 | iput_tmp_ino_err_out_now: | ||
2549 | iput(tmp_ino); | ||
2550 | if (vol->mft_ino && vol->mft_ino != tmp_ino) | ||
2551 | iput(vol->mft_ino); | ||
2552 | vol->mft_ino = NULL; | ||
2553 | /* | ||
2554 | * This is needed to get ntfs_clear_extent_inode() called for each | ||
2555 | * inode we have ever called ntfs_iget()/iput() on, otherwise we A) | ||
2556 | * leak resources and B) a subsequent mount fails automatically due to | ||
2557 | * ntfs_iget() never calling down into our ntfs_read_locked_inode() | ||
2558 | * method again... FIXME: Do we need to do this twice now because of | ||
2559 | * attribute inodes? I think not, so leave as is for now... (AIA) | ||
2560 | */ | ||
2561 | if (invalidate_inodes(sb)) { | ||
2562 | ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " | ||
2563 | "driver bug."); | ||
2564 | /* Copied from fs/super.c. I just love this message. (-; */ | ||
2565 | printk("NTFS: Busy inodes after umount. Self-destruct in 5 " | ||
2566 | "seconds. Have a nice day...\n"); | ||
2567 | } | ||
2568 | /* Errors at this stage are irrelevant. */ | ||
2569 | err_out_now: | ||
2570 | lock_kernel(); | ||
2571 | sb->s_fs_info = NULL; | ||
2572 | kfree(vol); | ||
2573 | ntfs_debug("Failed, returning -EINVAL."); | ||
2574 | return -EINVAL; | ||
2575 | } | ||
2576 | |||
2577 | /* | ||
2578 | * This is a slab cache to optimize allocations and deallocations of Unicode | ||
2579 | * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN | ||
2580 | * (255) Unicode characters + a terminating NULL Unicode character. | ||
2581 | */ | ||
2582 | kmem_cache_t *ntfs_name_cache; | ||
2583 | |||
2584 | /* Slab caches for efficient allocation/deallocation of of inodes. */ | ||
2585 | kmem_cache_t *ntfs_inode_cache; | ||
2586 | kmem_cache_t *ntfs_big_inode_cache; | ||
2587 | |||
2588 | /* Init once constructor for the inode slab cache. */ | ||
2589 | static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep, | ||
2590 | unsigned long flags) | ||
2591 | { | ||
2592 | ntfs_inode *ni = (ntfs_inode *)foo; | ||
2593 | |||
2594 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
2595 | SLAB_CTOR_CONSTRUCTOR) | ||
2596 | inode_init_once(VFS_I(ni)); | ||
2597 | } | ||
2598 | |||
2599 | /* | ||
2600 | * Slab caches to optimize allocations and deallocations of attribute search | ||
2601 | * contexts and index contexts, respectively. | ||
2602 | */ | ||
2603 | kmem_cache_t *ntfs_attr_ctx_cache; | ||
2604 | kmem_cache_t *ntfs_index_ctx_cache; | ||
2605 | |||
2606 | /* Driver wide semaphore. */ | ||
2607 | DECLARE_MUTEX(ntfs_lock); | ||
2608 | |||
2609 | static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, | ||
2610 | int flags, const char *dev_name, void *data) | ||
2611 | { | ||
2612 | return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); | ||
2613 | } | ||
2614 | |||
2615 | static struct file_system_type ntfs_fs_type = { | ||
2616 | .owner = THIS_MODULE, | ||
2617 | .name = "ntfs", | ||
2618 | .get_sb = ntfs_get_sb, | ||
2619 | .kill_sb = kill_block_super, | ||
2620 | .fs_flags = FS_REQUIRES_DEV, | ||
2621 | }; | ||
2622 | |||
2623 | /* Stable names for the slab caches. */ | ||
2624 | static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; | ||
2625 | static const char ntfs_attr_ctx_cache_name[] = "ntfs_attr_ctx_cache"; | ||
2626 | static const char ntfs_name_cache_name[] = "ntfs_name_cache"; | ||
2627 | static const char ntfs_inode_cache_name[] = "ntfs_inode_cache"; | ||
2628 | static const char ntfs_big_inode_cache_name[] = "ntfs_big_inode_cache"; | ||
2629 | |||
2630 | static int __init init_ntfs_fs(void) | ||
2631 | { | ||
2632 | int err = 0; | ||
2633 | |||
2634 | /* This may be ugly but it results in pretty output so who cares. (-8 */ | ||
2635 | printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" | ||
2636 | #ifdef NTFS_RW | ||
2637 | "W" | ||
2638 | #else | ||
2639 | "O" | ||
2640 | #endif | ||
2641 | #ifdef DEBUG | ||
2642 | " DEBUG" | ||
2643 | #endif | ||
2644 | #ifdef MODULE | ||
2645 | " MODULE" | ||
2646 | #endif | ||
2647 | "].\n"); | ||
2648 | |||
2649 | ntfs_debug("Debug messages are enabled."); | ||
2650 | |||
2651 | ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name, | ||
2652 | sizeof(ntfs_index_context), 0 /* offset */, | ||
2653 | SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); | ||
2654 | if (!ntfs_index_ctx_cache) { | ||
2655 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | ||
2656 | ntfs_index_ctx_cache_name); | ||
2657 | goto ictx_err_out; | ||
2658 | } | ||
2659 | ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, | ||
2660 | sizeof(ntfs_attr_search_ctx), 0 /* offset */, | ||
2661 | SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); | ||
2662 | if (!ntfs_attr_ctx_cache) { | ||
2663 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | ||
2664 | ntfs_attr_ctx_cache_name); | ||
2665 | goto actx_err_out; | ||
2666 | } | ||
2667 | |||
2668 | ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name, | ||
2669 | (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, | ||
2670 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
2671 | if (!ntfs_name_cache) { | ||
2672 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | ||
2673 | ntfs_name_cache_name); | ||
2674 | goto name_err_out; | ||
2675 | } | ||
2676 | |||
2677 | ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name, | ||
2678 | sizeof(ntfs_inode), 0, | ||
2679 | SLAB_RECLAIM_ACCOUNT, NULL, NULL); | ||
2680 | if (!ntfs_inode_cache) { | ||
2681 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | ||
2682 | ntfs_inode_cache_name); | ||
2683 | goto inode_err_out; | ||
2684 | } | ||
2685 | |||
2686 | ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, | ||
2687 | sizeof(big_ntfs_inode), 0, | ||
2688 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, | ||
2689 | ntfs_big_inode_init_once, NULL); | ||
2690 | if (!ntfs_big_inode_cache) { | ||
2691 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | ||
2692 | ntfs_big_inode_cache_name); | ||
2693 | goto big_inode_err_out; | ||
2694 | } | ||
2695 | |||
2696 | /* Register the ntfs sysctls. */ | ||
2697 | err = ntfs_sysctl(1); | ||
2698 | if (err) { | ||
2699 | printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); | ||
2700 | goto sysctl_err_out; | ||
2701 | } | ||
2702 | |||
2703 | err = register_filesystem(&ntfs_fs_type); | ||
2704 | if (!err) { | ||
2705 | ntfs_debug("NTFS driver registered successfully."); | ||
2706 | return 0; /* Success! */ | ||
2707 | } | ||
2708 | printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); | ||
2709 | |||
2710 | sysctl_err_out: | ||
2711 | kmem_cache_destroy(ntfs_big_inode_cache); | ||
2712 | big_inode_err_out: | ||
2713 | kmem_cache_destroy(ntfs_inode_cache); | ||
2714 | inode_err_out: | ||
2715 | kmem_cache_destroy(ntfs_name_cache); | ||
2716 | name_err_out: | ||
2717 | kmem_cache_destroy(ntfs_attr_ctx_cache); | ||
2718 | actx_err_out: | ||
2719 | kmem_cache_destroy(ntfs_index_ctx_cache); | ||
2720 | ictx_err_out: | ||
2721 | if (!err) { | ||
2722 | printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " | ||
2723 | "registration...\n"); | ||
2724 | err = -ENOMEM; | ||
2725 | } | ||
2726 | return err; | ||
2727 | } | ||
2728 | |||
2729 | static void __exit exit_ntfs_fs(void) | ||
2730 | { | ||
2731 | int err = 0; | ||
2732 | |||
2733 | ntfs_debug("Unregistering NTFS driver."); | ||
2734 | |||
2735 | unregister_filesystem(&ntfs_fs_type); | ||
2736 | |||
2737 | if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1)) | ||
2738 | printk(KERN_CRIT "NTFS: Failed to destory %s.\n", | ||
2739 | ntfs_big_inode_cache_name); | ||
2740 | if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1)) | ||
2741 | printk(KERN_CRIT "NTFS: Failed to destory %s.\n", | ||
2742 | ntfs_inode_cache_name); | ||
2743 | if (kmem_cache_destroy(ntfs_name_cache) && (err = 1)) | ||
2744 | printk(KERN_CRIT "NTFS: Failed to destory %s.\n", | ||
2745 | ntfs_name_cache_name); | ||
2746 | if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1)) | ||
2747 | printk(KERN_CRIT "NTFS: Failed to destory %s.\n", | ||
2748 | ntfs_attr_ctx_cache_name); | ||
2749 | if (kmem_cache_destroy(ntfs_index_ctx_cache) && (err = 1)) | ||
2750 | printk(KERN_CRIT "NTFS: Failed to destory %s.\n", | ||
2751 | ntfs_index_ctx_cache_name); | ||
2752 | if (err) | ||
2753 | printk(KERN_CRIT "NTFS: This causes memory to leak! There is " | ||
2754 | "probably a BUG in the driver! Please report " | ||
2755 | "you saw this message to " | ||
2756 | "linux-ntfs-dev@lists.sourceforge.net\n"); | ||
2757 | /* Unregister the ntfs sysctls. */ | ||
2758 | ntfs_sysctl(0); | ||
2759 | } | ||
2760 | |||
2761 | MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); | ||
2762 | MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2004 Anton Altaparmakov"); | ||
2763 | MODULE_VERSION(NTFS_VERSION); | ||
2764 | MODULE_LICENSE("GPL"); | ||
2765 | #ifdef DEBUG | ||
2766 | module_param(debug_msgs, bool, 0); | ||
2767 | MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); | ||
2768 | #endif | ||
2769 | |||
2770 | module_init(init_ntfs_fs) | ||
2771 | module_exit(exit_ntfs_fs) | ||
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c new file mode 100644 index 000000000000..75067e4f3036 --- /dev/null +++ b/fs/ntfs/sysctl.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of | ||
3 | * the Linux-NTFS project. Adapted from the old NTFS driver, | ||
4 | * Copyright (C) 1997 Martin von Löwis, Régis Duchesne | ||
5 | * | ||
6 | * Copyright (c) 2002-2004 Anton Altaparmakov | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifdef DEBUG | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | |||
28 | #ifdef CONFIG_SYSCTL | ||
29 | |||
30 | #include <linux/proc_fs.h> | ||
31 | #include <linux/sysctl.h> | ||
32 | |||
33 | #include "sysctl.h" | ||
34 | #include "debug.h" | ||
35 | |||
36 | #define FS_NTFS 1 | ||
37 | |||
38 | /* Definition of the ntfs sysctl. */ | ||
39 | static ctl_table ntfs_sysctls[] = { | ||
40 | { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */ | ||
41 | &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */ | ||
42 | 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */ | ||
43 | { 0 } | ||
44 | }; | ||
45 | |||
46 | /* Define the parent directory /proc/sys/fs. */ | ||
47 | static ctl_table sysctls_root[] = { | ||
48 | { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls }, | ||
49 | { 0 } | ||
50 | }; | ||
51 | |||
52 | /* Storage for the sysctls header. */ | ||
53 | static struct ctl_table_header *sysctls_root_table = NULL; | ||
54 | |||
55 | /** | ||
56 | * ntfs_sysctl - add or remove the debug sysctl | ||
57 | * @add: add (1) or remove (0) the sysctl | ||
58 | * | ||
59 | * Add or remove the debug sysctl. Return 0 on success or -errno on error. | ||
60 | */ | ||
61 | int ntfs_sysctl(int add) | ||
62 | { | ||
63 | if (add) { | ||
64 | BUG_ON(sysctls_root_table); | ||
65 | sysctls_root_table = register_sysctl_table(sysctls_root, 0); | ||
66 | if (!sysctls_root_table) | ||
67 | return -ENOMEM; | ||
68 | #ifdef CONFIG_PROC_FS | ||
69 | /* | ||
70 | * If the proc file system is in use and we are a module, need | ||
71 | * to set the owner of our proc entry to our module. In the | ||
72 | * non-modular case, THIS_MODULE is NULL, so this is ok. | ||
73 | */ | ||
74 | ntfs_sysctls[0].de->owner = THIS_MODULE; | ||
75 | #endif | ||
76 | } else { | ||
77 | BUG_ON(!sysctls_root_table); | ||
78 | unregister_sysctl_table(sysctls_root_table); | ||
79 | sysctls_root_table = NULL; | ||
80 | } | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | #endif /* CONFIG_SYSCTL */ | ||
85 | #endif /* DEBUG */ | ||
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h new file mode 100644 index 000000000000..df749cc0aac8 --- /dev/null +++ b/fs/ntfs/sysctl.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of | ||
3 | * the Linux-NTFS project. Adapted from the old NTFS driver, | ||
4 | * Copyright (C) 1997 Martin von Löwis, Régis Duchesne | ||
5 | * | ||
6 | * Copyright (c) 2002-2004 Anton Altaparmakov | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_SYSCTL_H | ||
25 | #define _LINUX_NTFS_SYSCTL_H | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | |||
29 | #if (DEBUG && CONFIG_SYSCTL) | ||
30 | |||
31 | extern int ntfs_sysctl(int add); | ||
32 | |||
33 | #else | ||
34 | |||
35 | /* Just return success. */ | ||
36 | static inline int ntfs_sysctl(int add) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | #endif /* DEBUG && CONFIG_SYSCTL */ | ||
42 | #endif /* _LINUX_NTFS_SYSCTL_H */ | ||
diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h new file mode 100644 index 000000000000..a09a51dabe4e --- /dev/null +++ b/fs/ntfs/time.h | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * time.h - NTFS time conversion functions. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifndef _LINUX_NTFS_TIME_H | ||
23 | #define _LINUX_NTFS_TIME_H | ||
24 | |||
25 | #include <linux/time.h> /* For current_kernel_time(). */ | ||
26 | #include <asm/div64.h> /* For do_div(). */ | ||
27 | |||
28 | #include "endian.h" | ||
29 | |||
30 | #define NTFS_TIME_OFFSET ((s64)(369 * 365 + 89) * 24 * 3600 * 10000000) | ||
31 | |||
32 | /** | ||
33 | * utc2ntfs - convert Linux UTC time to NTFS time | ||
34 | * @ts: Linux UTC time to convert to NTFS time | ||
35 | * | ||
36 | * Convert the Linux UTC time @ts to its corresponding NTFS time and return | ||
37 | * that in little endian format. | ||
38 | * | ||
39 | * Linux stores time in a struct timespec consisting of a time_t (long at | ||
40 | * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second | ||
41 | * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of | ||
42 | * 1-nano-second intervals since the value of tv_sec. | ||
43 | * | ||
44 | * NTFS uses Microsoft's standard time format which is stored in a s64 and is | ||
45 | * measured as the number of 100-nano-second intervals since 1st January 1601, | ||
46 | * 00:00:00 UTC. | ||
47 | */ | ||
48 | static inline sle64 utc2ntfs(const struct timespec ts) | ||
49 | { | ||
50 | /* | ||
51 | * Convert the seconds to 100ns intervals, add the nano-seconds | ||
52 | * converted to 100ns intervals, and then add the NTFS time offset. | ||
53 | */ | ||
54 | return cpu_to_sle64((s64)ts.tv_sec * 10000000 + ts.tv_nsec / 100 + | ||
55 | NTFS_TIME_OFFSET); | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | * get_current_ntfs_time - get the current time in little endian NTFS format | ||
60 | * | ||
61 | * Get the current time from the Linux kernel, convert it to its corresponding | ||
62 | * NTFS time and return that in little endian format. | ||
63 | */ | ||
64 | static inline sle64 get_current_ntfs_time(void) | ||
65 | { | ||
66 | return utc2ntfs(current_kernel_time()); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * ntfs2utc - convert NTFS time to Linux time | ||
71 | * @time: NTFS time (little endian) to convert to Linux UTC | ||
72 | * | ||
73 | * Convert the little endian NTFS time @time to its corresponding Linux UTC | ||
74 | * time and return that in cpu format. | ||
75 | * | ||
76 | * Linux stores time in a struct timespec consisting of a time_t (long at | ||
77 | * present) tv_sec and a long tv_nsec where tv_sec is the number of 1-second | ||
78 | * intervals since 1st January 1970, 00:00:00 UTC and tv_nsec is the number of | ||
79 | * 1-nano-second intervals since the value of tv_sec. | ||
80 | * | ||
81 | * NTFS uses Microsoft's standard time format which is stored in a s64 and is | ||
82 | * measured as the number of 100 nano-second intervals since 1st January 1601, | ||
83 | * 00:00:00 UTC. | ||
84 | */ | ||
85 | static inline struct timespec ntfs2utc(const sle64 time) | ||
86 | { | ||
87 | struct timespec ts; | ||
88 | |||
89 | /* Subtract the NTFS time offset. */ | ||
90 | s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; | ||
91 | /* | ||
92 | * Convert the time to 1-second intervals and the remainder to | ||
93 | * 1-nano-second intervals. | ||
94 | */ | ||
95 | ts.tv_nsec = do_div(t, 10000000) * 100; | ||
96 | ts.tv_sec = t; | ||
97 | return ts; | ||
98 | } | ||
99 | |||
100 | #endif /* _LINUX_NTFS_TIME_H */ | ||
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h new file mode 100644 index 000000000000..08a55aa53d4e --- /dev/null +++ b/fs/ntfs/types.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * types.h - Defines for NTFS Linux kernel driver specific types. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * | ||
7 | * This program/include file is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as published | ||
9 | * by the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program/include file is distributed in the hope that it will be | ||
13 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program (in the main directory of the Linux-NTFS | ||
19 | * distribution in the file COPYING); if not, write to the Free Software | ||
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_NTFS_TYPES_H | ||
24 | #define _LINUX_NTFS_TYPES_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | |||
28 | typedef __le16 le16; | ||
29 | typedef __le32 le32; | ||
30 | typedef __le64 le64; | ||
31 | typedef __u16 __bitwise sle16; | ||
32 | typedef __u32 __bitwise sle32; | ||
33 | typedef __u64 __bitwise sle64; | ||
34 | |||
35 | /* 2-byte Unicode character type. */ | ||
36 | typedef le16 ntfschar; | ||
37 | #define UCHAR_T_SIZE_BITS 1 | ||
38 | |||
39 | /* | ||
40 | * Clusters are signed 64-bit values on NTFS volumes. We define two types, LCN | ||
41 | * and VCN, to allow for type checking and better code readability. | ||
42 | */ | ||
43 | typedef s64 VCN; | ||
44 | typedef sle64 leVCN; | ||
45 | typedef s64 LCN; | ||
46 | typedef sle64 leLCN; | ||
47 | |||
48 | /* | ||
49 | * The NTFS journal $LogFile uses log sequence numbers which are signed 64-bit | ||
50 | * values. We define our own type LSN, to allow for type checking and better | ||
51 | * code readability. | ||
52 | */ | ||
53 | typedef s64 LSN; | ||
54 | typedef sle64 leLSN; | ||
55 | |||
56 | typedef enum { | ||
57 | FALSE = 0, | ||
58 | TRUE = 1 | ||
59 | } BOOL; | ||
60 | |||
61 | typedef enum { | ||
62 | CASE_SENSITIVE = 0, | ||
63 | IGNORE_CASE = 1, | ||
64 | } IGNORE_CASE_BOOL; | ||
65 | |||
66 | #endif /* _LINUX_NTFS_TYPES_H */ | ||
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c new file mode 100644 index 000000000000..560b0ea255b0 --- /dev/null +++ b/fs/ntfs/unistr.c | |||
@@ -0,0 +1,384 @@ | |||
1 | /* | ||
2 | * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include "types.h" | ||
23 | #include "debug.h" | ||
24 | #include "ntfs.h" | ||
25 | |||
26 | /* | ||
27 | * IMPORTANT | ||
28 | * ========= | ||
29 | * | ||
30 | * All these routines assume that the Unicode characters are in little endian | ||
31 | * encoding inside the strings!!! | ||
32 | */ | ||
33 | |||
34 | /* | ||
35 | * This is used by the name collation functions to quickly determine what | ||
36 | * characters are (in)valid. | ||
37 | */ | ||
38 | static const u8 legal_ansi_char_array[0x40] = { | ||
39 | 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, | ||
40 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, | ||
41 | |||
42 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, | ||
43 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, | ||
44 | |||
45 | 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, | ||
46 | 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, | ||
47 | |||
48 | 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, | ||
49 | 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, | ||
50 | }; | ||
51 | |||
52 | /** | ||
53 | * ntfs_are_names_equal - compare two Unicode names for equality | ||
54 | * @s1: name to compare to @s2 | ||
55 | * @s1_len: length in Unicode characters of @s1 | ||
56 | * @s2: name to compare to @s1 | ||
57 | * @s2_len: length in Unicode characters of @s2 | ||
58 | * @ic: ignore case bool | ||
59 | * @upcase: upcase table (only if @ic == IGNORE_CASE) | ||
60 | * @upcase_size: length in Unicode characters of @upcase (if present) | ||
61 | * | ||
62 | * Compare the names @s1 and @s2 and return TRUE (1) if the names are | ||
63 | * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, | ||
64 | * the @upcase table is used to performa a case insensitive comparison. | ||
65 | */ | ||
66 | BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, | ||
67 | const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, | ||
68 | const ntfschar *upcase, const u32 upcase_size) | ||
69 | { | ||
70 | if (s1_len != s2_len) | ||
71 | return FALSE; | ||
72 | if (ic == CASE_SENSITIVE) | ||
73 | return !ntfs_ucsncmp(s1, s2, s1_len); | ||
74 | return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * ntfs_collate_names - collate two Unicode names | ||
79 | * @name1: first Unicode name to compare | ||
80 | * @name2: second Unicode name to compare | ||
81 | * @err_val: if @name1 contains an invalid character return this value | ||
82 | * @ic: either CASE_SENSITIVE or IGNORE_CASE | ||
83 | * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) | ||
84 | * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) | ||
85 | * | ||
86 | * ntfs_collate_names collates two Unicode names and returns: | ||
87 | * | ||
88 | * -1 if the first name collates before the second one, | ||
89 | * 0 if the names match, | ||
90 | * 1 if the second name collates before the first one, or | ||
91 | * @err_val if an invalid character is found in @name1 during the comparison. | ||
92 | * | ||
93 | * The following characters are considered invalid: '"', '*', '<', '>' and '?'. | ||
94 | */ | ||
95 | int ntfs_collate_names(const ntfschar *name1, const u32 name1_len, | ||
96 | const ntfschar *name2, const u32 name2_len, | ||
97 | const int err_val, const IGNORE_CASE_BOOL ic, | ||
98 | const ntfschar *upcase, const u32 upcase_len) | ||
99 | { | ||
100 | u32 cnt, min_len; | ||
101 | u16 c1, c2; | ||
102 | |||
103 | min_len = name1_len; | ||
104 | if (name1_len > name2_len) | ||
105 | min_len = name2_len; | ||
106 | for (cnt = 0; cnt < min_len; ++cnt) { | ||
107 | c1 = le16_to_cpu(*name1++); | ||
108 | c2 = le16_to_cpu(*name2++); | ||
109 | if (ic) { | ||
110 | if (c1 < upcase_len) | ||
111 | c1 = le16_to_cpu(upcase[c1]); | ||
112 | if (c2 < upcase_len) | ||
113 | c2 = le16_to_cpu(upcase[c2]); | ||
114 | } | ||
115 | if (c1 < 64 && legal_ansi_char_array[c1] & 8) | ||
116 | return err_val; | ||
117 | if (c1 < c2) | ||
118 | return -1; | ||
119 | if (c1 > c2) | ||
120 | return 1; | ||
121 | } | ||
122 | if (name1_len < name2_len) | ||
123 | return -1; | ||
124 | if (name1_len == name2_len) | ||
125 | return 0; | ||
126 | /* name1_len > name2_len */ | ||
127 | c1 = le16_to_cpu(*name1); | ||
128 | if (c1 < 64 && legal_ansi_char_array[c1] & 8) | ||
129 | return err_val; | ||
130 | return 1; | ||
131 | } | ||
132 | |||
133 | /** | ||
134 | * ntfs_ucsncmp - compare two little endian Unicode strings | ||
135 | * @s1: first string | ||
136 | * @s2: second string | ||
137 | * @n: maximum unicode characters to compare | ||
138 | * | ||
139 | * Compare the first @n characters of the Unicode strings @s1 and @s2, | ||
140 | * The strings in little endian format and appropriate le16_to_cpu() | ||
141 | * conversion is performed on non-little endian machines. | ||
142 | * | ||
143 | * The function returns an integer less than, equal to, or greater than zero | ||
144 | * if @s1 (or the first @n Unicode characters thereof) is found, respectively, | ||
145 | * to be less than, to match, or be greater than @s2. | ||
146 | */ | ||
147 | int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n) | ||
148 | { | ||
149 | u16 c1, c2; | ||
150 | size_t i; | ||
151 | |||
152 | for (i = 0; i < n; ++i) { | ||
153 | c1 = le16_to_cpu(s1[i]); | ||
154 | c2 = le16_to_cpu(s2[i]); | ||
155 | if (c1 < c2) | ||
156 | return -1; | ||
157 | if (c1 > c2) | ||
158 | return 1; | ||
159 | if (!c1) | ||
160 | break; | ||
161 | } | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case | ||
167 | * @s1: first string | ||
168 | * @s2: second string | ||
169 | * @n: maximum unicode characters to compare | ||
170 | * @upcase: upcase table | ||
171 | * @upcase_size: upcase table size in Unicode characters | ||
172 | * | ||
173 | * Compare the first @n characters of the Unicode strings @s1 and @s2, | ||
174 | * ignoring case. The strings in little endian format and appropriate | ||
175 | * le16_to_cpu() conversion is performed on non-little endian machines. | ||
176 | * | ||
177 | * Each character is uppercased using the @upcase table before the comparison. | ||
178 | * | ||
179 | * The function returns an integer less than, equal to, or greater than zero | ||
180 | * if @s1 (or the first @n Unicode characters thereof) is found, respectively, | ||
181 | * to be less than, to match, or be greater than @s2. | ||
182 | */ | ||
183 | int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n, | ||
184 | const ntfschar *upcase, const u32 upcase_size) | ||
185 | { | ||
186 | size_t i; | ||
187 | u16 c1, c2; | ||
188 | |||
189 | for (i = 0; i < n; ++i) { | ||
190 | if ((c1 = le16_to_cpu(s1[i])) < upcase_size) | ||
191 | c1 = le16_to_cpu(upcase[c1]); | ||
192 | if ((c2 = le16_to_cpu(s2[i])) < upcase_size) | ||
193 | c2 = le16_to_cpu(upcase[c2]); | ||
194 | if (c1 < c2) | ||
195 | return -1; | ||
196 | if (c1 > c2) | ||
197 | return 1; | ||
198 | if (!c1) | ||
199 | break; | ||
200 | } | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | void ntfs_upcase_name(ntfschar *name, u32 name_len, const ntfschar *upcase, | ||
205 | const u32 upcase_len) | ||
206 | { | ||
207 | u32 i; | ||
208 | u16 u; | ||
209 | |||
210 | for (i = 0; i < name_len; i++) | ||
211 | if ((u = le16_to_cpu(name[i])) < upcase_len) | ||
212 | name[i] = upcase[u]; | ||
213 | } | ||
214 | |||
215 | void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, | ||
216 | const ntfschar *upcase, const u32 upcase_len) | ||
217 | { | ||
218 | ntfs_upcase_name((ntfschar*)&file_name_attr->file_name, | ||
219 | file_name_attr->file_name_length, upcase, upcase_len); | ||
220 | } | ||
221 | |||
222 | int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, | ||
223 | FILE_NAME_ATTR *file_name_attr2, | ||
224 | const int err_val, const IGNORE_CASE_BOOL ic, | ||
225 | const ntfschar *upcase, const u32 upcase_len) | ||
226 | { | ||
227 | return ntfs_collate_names((ntfschar*)&file_name_attr1->file_name, | ||
228 | file_name_attr1->file_name_length, | ||
229 | (ntfschar*)&file_name_attr2->file_name, | ||
230 | file_name_attr2->file_name_length, | ||
231 | err_val, ic, upcase, upcase_len); | ||
232 | } | ||
233 | |||
234 | /** | ||
235 | * ntfs_nlstoucs - convert NLS string to little endian Unicode string | ||
236 | * @vol: ntfs volume which we are working with | ||
237 | * @ins: input NLS string buffer | ||
238 | * @ins_len: length of input string in bytes | ||
239 | * @outs: on return contains the allocated output Unicode string buffer | ||
240 | * | ||
241 | * Convert the input string @ins, which is in whatever format the loaded NLS | ||
242 | * map dictates, into a little endian, 2-byte Unicode string. | ||
243 | * | ||
244 | * This function allocates the string and the caller is responsible for | ||
245 | * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it. | ||
246 | * | ||
247 | * On success the function returns the number of Unicode characters written to | ||
248 | * the output string *@outs (>= 0), not counting the terminating Unicode NULL | ||
249 | * character. *@outs is set to the allocated output string buffer. | ||
250 | * | ||
251 | * On error, a negative number corresponding to the error code is returned. In | ||
252 | * that case the output string is not allocated. Both *@outs and *@outs_len | ||
253 | * are then undefined. | ||
254 | * | ||
255 | * This might look a bit odd due to fast path optimization... | ||
256 | */ | ||
257 | int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, | ||
258 | const int ins_len, ntfschar **outs) | ||
259 | { | ||
260 | struct nls_table *nls = vol->nls_map; | ||
261 | ntfschar *ucs; | ||
262 | wchar_t wc; | ||
263 | int i, o, wc_len; | ||
264 | |||
265 | /* We don't trust outside sources. */ | ||
266 | if (ins) { | ||
267 | ucs = (ntfschar*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); | ||
268 | if (ucs) { | ||
269 | for (i = o = 0; i < ins_len; i += wc_len) { | ||
270 | wc_len = nls->char2uni(ins + i, ins_len - i, | ||
271 | &wc); | ||
272 | if (wc_len >= 0) { | ||
273 | if (wc) { | ||
274 | ucs[o++] = cpu_to_le16(wc); | ||
275 | continue; | ||
276 | } /* else (!wc) */ | ||
277 | break; | ||
278 | } /* else (wc_len < 0) */ | ||
279 | goto conversion_err; | ||
280 | } | ||
281 | ucs[o] = 0; | ||
282 | *outs = ucs; | ||
283 | return o; | ||
284 | } /* else (!ucs) */ | ||
285 | ntfs_error(vol->sb, "Failed to allocate name from " | ||
286 | "ntfs_name_cache!"); | ||
287 | return -ENOMEM; | ||
288 | } /* else (!ins) */ | ||
289 | ntfs_error(NULL, "Received NULL pointer."); | ||
290 | return -EINVAL; | ||
291 | conversion_err: | ||
292 | ntfs_error(vol->sb, "Name using character set %s contains characters " | ||
293 | "that cannot be converted to Unicode.", nls->charset); | ||
294 | kmem_cache_free(ntfs_name_cache, ucs); | ||
295 | return -EILSEQ; | ||
296 | } | ||
297 | |||
298 | /** | ||
299 | * ntfs_ucstonls - convert little endian Unicode string to NLS string | ||
300 | * @vol: ntfs volume which we are working with | ||
301 | * @ins: input Unicode string buffer | ||
302 | * @ins_len: length of input string in Unicode characters | ||
303 | * @outs: on return contains the (allocated) output NLS string buffer | ||
304 | * @outs_len: length of output string buffer in bytes | ||
305 | * | ||
306 | * Convert the input little endian, 2-byte Unicode string @ins, of length | ||
307 | * @ins_len into the string format dictated by the loaded NLS. | ||
308 | * | ||
309 | * If *@outs is NULL, this function allocates the string and the caller is | ||
310 | * responsible for calling kfree(*@outs); when finished with it. In this case | ||
311 | * @outs_len is ignored and can be 0. | ||
312 | * | ||
313 | * On success the function returns the number of bytes written to the output | ||
314 | * string *@outs (>= 0), not counting the terminating NULL byte. If the output | ||
315 | * string buffer was allocated, *@outs is set to it. | ||
316 | * | ||
317 | * On error, a negative number corresponding to the error code is returned. In | ||
318 | * that case the output string is not allocated. The contents of *@outs are | ||
319 | * then undefined. | ||
320 | * | ||
321 | * This might look a bit odd due to fast path optimization... | ||
322 | */ | ||
323 | int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins, | ||
324 | const int ins_len, unsigned char **outs, int outs_len) | ||
325 | { | ||
326 | struct nls_table *nls = vol->nls_map; | ||
327 | unsigned char *ns; | ||
328 | int i, o, ns_len, wc; | ||
329 | |||
330 | /* We don't trust outside sources. */ | ||
331 | if (ins) { | ||
332 | ns = *outs; | ||
333 | ns_len = outs_len; | ||
334 | if (ns && !ns_len) { | ||
335 | wc = -ENAMETOOLONG; | ||
336 | goto conversion_err; | ||
337 | } | ||
338 | if (!ns) { | ||
339 | ns_len = ins_len * NLS_MAX_CHARSET_SIZE; | ||
340 | ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); | ||
341 | if (!ns) | ||
342 | goto mem_err_out; | ||
343 | } | ||
344 | for (i = o = 0; i < ins_len; i++) { | ||
345 | retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, | ||
346 | ns_len - o); | ||
347 | if (wc > 0) { | ||
348 | o += wc; | ||
349 | continue; | ||
350 | } else if (!wc) | ||
351 | break; | ||
352 | else if (wc == -ENAMETOOLONG && ns != *outs) { | ||
353 | unsigned char *tc; | ||
354 | /* Grow in multiples of 64 bytes. */ | ||
355 | tc = (unsigned char*)kmalloc((ns_len + 64) & | ||
356 | ~63, GFP_NOFS); | ||
357 | if (tc) { | ||
358 | memcpy(tc, ns, ns_len); | ||
359 | ns_len = ((ns_len + 64) & ~63) - 1; | ||
360 | kfree(ns); | ||
361 | ns = tc; | ||
362 | goto retry; | ||
363 | } /* No memory so goto conversion_error; */ | ||
364 | } /* wc < 0, real error. */ | ||
365 | goto conversion_err; | ||
366 | } | ||
367 | ns[o] = 0; | ||
368 | *outs = ns; | ||
369 | return o; | ||
370 | } /* else (!ins) */ | ||
371 | ntfs_error(vol->sb, "Received NULL pointer."); | ||
372 | return -EINVAL; | ||
373 | conversion_err: | ||
374 | ntfs_error(vol->sb, "Unicode name contains characters that cannot be " | ||
375 | "converted to character set %s.", nls->charset); | ||
376 | if (ns != *outs) | ||
377 | kfree(ns); | ||
378 | if (wc != -ENAMETOOLONG) | ||
379 | wc = -EILSEQ; | ||
380 | return wc; | ||
381 | mem_err_out: | ||
382 | ntfs_error(vol->sb, "Failed to allocate name!"); | ||
383 | return -ENOMEM; | ||
384 | } | ||
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c new file mode 100644 index 000000000000..879cdf1d5bd3 --- /dev/null +++ b/fs/ntfs/upcase.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * upcase.c - Generate the full NTFS Unicode upcase table in little endian. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org> | ||
6 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
7 | * | ||
8 | * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov. | ||
9 | * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program (in the main directory of the Linux-NTFS source | ||
23 | * in the file COPYING); if not, write to the Free Software Foundation, | ||
24 | * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
25 | */ | ||
26 | |||
27 | #include "malloc.h" | ||
28 | #include "ntfs.h" | ||
29 | |||
30 | ntfschar *generate_default_upcase(void) | ||
31 | { | ||
32 | static const int uc_run_table[][3] = { /* Start, End, Add */ | ||
33 | {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, | ||
34 | {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, | ||
35 | {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, | ||
36 | {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, | ||
37 | {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, | ||
38 | {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, | ||
39 | {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, | ||
40 | {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, | ||
41 | {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, | ||
42 | {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, | ||
43 | {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, | ||
44 | {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, | ||
45 | {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, | ||
46 | {0} | ||
47 | }; | ||
48 | |||
49 | static const int uc_dup_table[][2] = { /* Start, End */ | ||
50 | {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, | ||
51 | {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, | ||
52 | {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, | ||
53 | {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, | ||
54 | {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, | ||
55 | {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, | ||
56 | {0} | ||
57 | }; | ||
58 | |||
59 | static const int uc_word_table[][2] = { /* Offset, Value */ | ||
60 | {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, | ||
61 | {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, | ||
62 | {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, | ||
63 | {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, | ||
64 | {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, | ||
65 | {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, | ||
66 | {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, | ||
67 | {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, | ||
68 | {0} | ||
69 | }; | ||
70 | |||
71 | int i, r; | ||
72 | ntfschar *uc; | ||
73 | |||
74 | uc = ntfs_malloc_nofs(default_upcase_len * sizeof(ntfschar)); | ||
75 | if (!uc) | ||
76 | return uc; | ||
77 | memset(uc, 0, default_upcase_len * sizeof(ntfschar)); | ||
78 | for (i = 0; i < default_upcase_len; i++) | ||
79 | uc[i] = cpu_to_le16(i); | ||
80 | for (r = 0; uc_run_table[r][0]; r++) | ||
81 | for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) | ||
82 | uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) + | ||
83 | uc_run_table[r][2])); | ||
84 | for (r = 0; uc_dup_table[r][0]; r++) | ||
85 | for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) | ||
86 | uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); | ||
87 | for (r = 0; uc_word_table[r][0]; r++) | ||
88 | uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); | ||
89 | return uc; | ||
90 | } | ||
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h new file mode 100644 index 000000000000..4b97fa8635a8 --- /dev/null +++ b/fs/ntfs/volume.h | |||
@@ -0,0 +1,171 @@ | |||
1 | /* | ||
2 | * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part | ||
3 | * of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #ifndef _LINUX_NTFS_VOLUME_H | ||
25 | #define _LINUX_NTFS_VOLUME_H | ||
26 | |||
27 | #include <linux/rwsem.h> | ||
28 | |||
29 | #include "types.h" | ||
30 | #include "layout.h" | ||
31 | |||
32 | /* | ||
33 | * The NTFS in memory super block structure. | ||
34 | */ | ||
35 | typedef struct { | ||
36 | /* | ||
37 | * FIXME: Reorder to have commonly used together element within the | ||
38 | * same cache line, aiming at a cache line size of 32 bytes. Aim for | ||
39 | * 64 bytes for less commonly used together elements. Put most commonly | ||
40 | * used elements to front of structure. Obviously do this only when the | ||
41 | * structure has stabilized... (AIA) | ||
42 | */ | ||
43 | /* Device specifics. */ | ||
44 | struct super_block *sb; /* Pointer back to the super_block, | ||
45 | so we don't have to get the offset | ||
46 | every time. */ | ||
47 | LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes | ||
48 | sized blocks on the device. */ | ||
49 | /* Configuration provided by user at mount time. */ | ||
50 | unsigned long flags; /* Miscellaneous flags, see below. */ | ||
51 | uid_t uid; /* uid that files will be mounted as. */ | ||
52 | gid_t gid; /* gid that files will be mounted as. */ | ||
53 | mode_t fmask; /* The mask for file permissions. */ | ||
54 | mode_t dmask; /* The mask for directory | ||
55 | permissions. */ | ||
56 | u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ | ||
57 | u8 on_errors; /* What to do on file system errors. */ | ||
58 | /* NTFS bootsector provided information. */ | ||
59 | u16 sector_size; /* in bytes */ | ||
60 | u8 sector_size_bits; /* log2(sector_size) */ | ||
61 | u32 cluster_size; /* in bytes */ | ||
62 | u32 cluster_size_mask; /* cluster_size - 1 */ | ||
63 | u8 cluster_size_bits; /* log2(cluster_size) */ | ||
64 | u32 mft_record_size; /* in bytes */ | ||
65 | u32 mft_record_size_mask; /* mft_record_size - 1 */ | ||
66 | u8 mft_record_size_bits; /* log2(mft_record_size) */ | ||
67 | u32 index_record_size; /* in bytes */ | ||
68 | u32 index_record_size_mask; /* index_record_size - 1 */ | ||
69 | u8 index_record_size_bits; /* log2(index_record_size) */ | ||
70 | LCN nr_clusters; /* Volume size in clusters == number of | ||
71 | bits in lcn bitmap. */ | ||
72 | LCN mft_lcn; /* Cluster location of mft data. */ | ||
73 | LCN mftmirr_lcn; /* Cluster location of copy of mft. */ | ||
74 | u64 serial_no; /* The volume serial number. */ | ||
75 | /* Mount specific NTFS information. */ | ||
76 | u32 upcase_len; /* Number of entries in upcase[]. */ | ||
77 | ntfschar *upcase; /* The upcase table. */ | ||
78 | |||
79 | s32 attrdef_size; /* Size of the attribute definition | ||
80 | table in bytes. */ | ||
81 | ATTR_DEF *attrdef; /* Table of attribute definitions. | ||
82 | Obtained from FILE_AttrDef. */ | ||
83 | |||
84 | #ifdef NTFS_RW | ||
85 | /* Variables used by the cluster and mft allocators. */ | ||
86 | s64 mft_data_pos; /* Mft record number at which to | ||
87 | allocate the next mft record. */ | ||
88 | LCN mft_zone_start; /* First cluster of the mft zone. */ | ||
89 | LCN mft_zone_end; /* First cluster beyond the mft zone. */ | ||
90 | LCN mft_zone_pos; /* Current position in the mft zone. */ | ||
91 | LCN data1_zone_pos; /* Current position in the first data | ||
92 | zone. */ | ||
93 | LCN data2_zone_pos; /* Current position in the second data | ||
94 | zone. */ | ||
95 | #endif /* NTFS_RW */ | ||
96 | |||
97 | struct inode *mft_ino; /* The VFS inode of $MFT. */ | ||
98 | |||
99 | struct inode *mftbmp_ino; /* Attribute inode for $MFT/$BITMAP. */ | ||
100 | struct rw_semaphore mftbmp_lock; /* Lock for serializing accesses to the | ||
101 | mft record bitmap ($MFT/$BITMAP). */ | ||
102 | #ifdef NTFS_RW | ||
103 | struct inode *mftmirr_ino; /* The VFS inode of $MFTMirr. */ | ||
104 | int mftmirr_size; /* Size of mft mirror in mft records. */ | ||
105 | |||
106 | struct inode *logfile_ino; /* The VFS inode of $LogFile. */ | ||
107 | #endif /* NTFS_RW */ | ||
108 | |||
109 | struct inode *lcnbmp_ino; /* The VFS inode of $Bitmap. */ | ||
110 | struct rw_semaphore lcnbmp_lock; /* Lock for serializing accesses to the | ||
111 | cluster bitmap ($Bitmap/$DATA). */ | ||
112 | |||
113 | struct inode *vol_ino; /* The VFS inode of $Volume. */ | ||
114 | VOLUME_FLAGS vol_flags; /* Volume flags. */ | ||
115 | u8 major_ver; /* Ntfs major version of volume. */ | ||
116 | u8 minor_ver; /* Ntfs minor version of volume. */ | ||
117 | |||
118 | struct inode *root_ino; /* The VFS inode of the root | ||
119 | directory. */ | ||
120 | struct inode *secure_ino; /* The VFS inode of $Secure (NTFS3.0+ | ||
121 | only, otherwise NULL). */ | ||
122 | struct inode *extend_ino; /* The VFS inode of $Extend (NTFS3.0+ | ||
123 | only, otherwise NULL). */ | ||
124 | #ifdef NTFS_RW | ||
125 | /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */ | ||
126 | struct inode *quota_ino; /* The VFS inode of $Quota. */ | ||
127 | struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */ | ||
128 | #endif /* NTFS_RW */ | ||
129 | struct nls_table *nls_map; | ||
130 | } ntfs_volume; | ||
131 | |||
132 | /* | ||
133 | * Defined bits for the flags field in the ntfs_volume structure. | ||
134 | */ | ||
135 | typedef enum { | ||
136 | NV_Errors, /* 1: Volume has errors, prevent remount rw. */ | ||
137 | NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ | ||
138 | NV_CaseSensitive, /* 1: Treat file names as case sensitive and | ||
139 | create filenames in the POSIX namespace. | ||
140 | Otherwise be case insensitive and create | ||
141 | file names in WIN32 namespace. */ | ||
142 | NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ | ||
143 | NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ | ||
144 | } ntfs_volume_flags; | ||
145 | |||
146 | /* | ||
147 | * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo() | ||
148 | * functions. | ||
149 | */ | ||
150 | #define NVOL_FNS(flag) \ | ||
151 | static inline int NVol##flag(ntfs_volume *vol) \ | ||
152 | { \ | ||
153 | return test_bit(NV_##flag, &(vol)->flags); \ | ||
154 | } \ | ||
155 | static inline void NVolSet##flag(ntfs_volume *vol) \ | ||
156 | { \ | ||
157 | set_bit(NV_##flag, &(vol)->flags); \ | ||
158 | } \ | ||
159 | static inline void NVolClear##flag(ntfs_volume *vol) \ | ||
160 | { \ | ||
161 | clear_bit(NV_##flag, &(vol)->flags); \ | ||
162 | } | ||
163 | |||
164 | /* Emit the ntfs volume bitops functions. */ | ||
165 | NVOL_FNS(Errors) | ||
166 | NVOL_FNS(ShowSystemFiles) | ||
167 | NVOL_FNS(CaseSensitive) | ||
168 | NVOL_FNS(LogFileEmpty) | ||
169 | NVOL_FNS(QuotaOutOfDate) | ||
170 | |||
171 | #endif /* _LINUX_NTFS_VOLUME_H */ | ||