aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ntfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ntfs')
-rw-r--r--fs/ntfs/ChangeLog56
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/attrib.c977
-rw-r--r--fs/ntfs/attrib.h10
-rw-r--r--fs/ntfs/inode.c514
-rw-r--r--fs/ntfs/lcnalloc.c56
-rw-r--r--fs/ntfs/lcnalloc.h43
-rw-r--r--fs/ntfs/mft.c26
8 files changed, 1525 insertions, 159 deletions
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index de58579a1d0e..9f4674a026f2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -3,16 +3,14 @@ ToDo/Notes:
3 - In between ntfs_prepare/commit_write, need exclusion between 3 - In between ntfs_prepare/commit_write, need exclusion between
4 simultaneous file extensions. This is given to us by holding i_sem 4 simultaneous file extensions. This is given to us by holding i_sem
5 on the inode. The only places in the kernel when a file is resized 5 on the inode. The only places in the kernel when a file is resized
6 are prepare/commit write and truncate for both of which i_sem is 6 are prepare/commit write and ntfs_truncate() for both of which i_sem
7 held. Just have to be careful in readpage/writepage and all other 7 is held. Just have to be careful in read-/writepage and other helpers
8 helpers not running under i_sem that we play nice... 8 not running under i_sem that we play nice... Also need to be careful
9 Also need to be careful with initialized_size extention in 9 with initialized_size extention in ntfs_prepare_write and writepage.
10 ntfs_prepare_write. Basically, just be _very_ careful in this code... 10 UPDATE: The only things that need to be checked are
11 UPDATE: The only things that need to be checked are read/writepage 11 prepare/commit_write as well as the compressed write and the other
12 which do not hold i_sem. Note writepage cannot change i_size but it 12 attribute resize/write cases like index attributes, etc. For now
13 needs to cope with a concurrent i_size change, just like readpage. 13 none of these are implemented so are safe.
14 Also both need to cope with concurrent changes to the other sizes,
15 i.e. initialized/allocated/compressed size, as well.
16 - Implement mft.c::sync_mft_mirror_umount(). We currently will just 14 - Implement mft.c::sync_mft_mirror_umount(). We currently will just
17 leave the volume dirty on umount if the final iput(vol->mft_ino) 15 leave the volume dirty on umount if the final iput(vol->mft_ino)
18 causes a write of any mirrored mft records due to the mft mirror 16 causes a write of any mirrored mft records due to the mft mirror
@@ -22,6 +20,44 @@ ToDo/Notes:
22 - Enable the code for setting the NT4 compatibility flag when we start 20 - Enable the code for setting the NT4 compatibility flag when we start
23 making NTFS 1.2 specific modifications. 21 making NTFS 1.2 specific modifications.
24 22
232.1.25-WIP
24
25 - Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and
26 {__,}ntfs_cluster_free() to also take an optional attribute search
27 context as argument. This allows calling these functions with the
28 mft record mapped. Update all callers.
29 - Fix potential deadlock in ntfs_mft_data_extend_allocation_nolock()
30 error handling by passing in the active search context when calling
31 ntfs_cluster_free().
32 - Change ntfs_cluster_alloc() to take an extra boolean parameter
33 specifying whether the cluster are being allocated to extend an
34 attribute or to fill a hole.
35 - Change ntfs_attr_make_non_resident() to call ntfs_cluster_alloc()
36 with @is_extension set to TRUE and remove the runlist terminator
37 fixup code as this is now done by ntfs_cluster_alloc().
38 - Change ntfs_attr_make_non_resident to take the attribute value size
39 as an extra parameter. This is needed since we need to know the size
40 before we can map the mft record and our callers always know it. The
41 reason we cannot simply read the size from the vfs inode i_size is
42 that this is not necessarily uptodate. This happens when
43 ntfs_attr_make_non_resident() is called in the ->truncate call path.
44 - Fix ntfs_attr_make_non_resident() to update the vfs inode i_blocks
45 which is zero for a resident attribute but should no longer be zero
46 once the attribute is non-resident as it then has real clusters
47 allocated.
48 - Add fs/ntfs/attrib.[hc]::ntfs_attr_extend_allocation(), a function to
49 extend the allocation of an attributes. Optionally, the data size,
50 but not the initialized size can be extended, too.
51 - Implement fs/ntfs/inode.[hc]::ntfs_truncate(). It only supports
52 uncompressed and unencrypted files.
53 - Enable ATTR_SIZE attribute changes in ntfs_setattr(). This completes
54 the initial implementation of file truncation. Now both open(2)ing
55 a file with the O_TRUNC flag and the {,f}truncate(2) system calls
56 will resize a file appropriately. The limitations are that only
57 uncompressed and unencrypted files are supported. Also, there is
58 only very limited support for highly fragmented files (the ones whose
59 $DATA attribute is split into multiple attribute extents).
60
252.1.24 - Lots of bug fixes and support more clean journal states. 612.1.24 - Lots of bug fixes and support more clean journal states.
26 62
27 - Support journals ($LogFile) which have been modified by chkdsk. This 63 - Support journals ($LogFile) which have been modified by chkdsk. This
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 894b2b876d35..a3ce2c0e7dd9 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25-WIP\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 3f9a4ff42ee5..bc25e88ad468 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -36,9 +36,27 @@
36 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode 36 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
37 * @ni: ntfs inode for which to map (part of) a runlist 37 * @ni: ntfs inode for which to map (part of) a runlist
38 * @vcn: map runlist part containing this vcn 38 * @vcn: map runlist part containing this vcn
39 * @ctx: active attribute search context if present or NULL if not
39 * 40 *
40 * Map the part of a runlist containing the @vcn of the ntfs inode @ni. 41 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
41 * 42 *
43 * If @ctx is specified, it is an active search context of @ni and its base mft
44 * record. This is needed when ntfs_map_runlist_nolock() encounters unmapped
45 * runlist fragments and allows their mapping. If you do not have the mft
46 * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
47 * will perform the necessary mapping and unmapping.
48 *
49 * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
50 * restores it before returning. Thus, @ctx will be left pointing to the same
51 * attribute on return as on entry. However, the actual pointers in @ctx may
52 * point to different memory locations on return, so you must remember to reset
53 * any cached pointers from the @ctx, i.e. after the call to
54 * ntfs_map_runlist_nolock(), you will probably want to do:
55 * m = ctx->mrec;
56 * a = ctx->attr;
57 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
58 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
59 *
42 * Return 0 on success and -errno on error. There is one special error code 60 * Return 0 on success and -errno on error. There is one special error code
43 * which is not an error as such. This is -ENOENT. It means that @vcn is out 61 * which is not an error as such. This is -ENOENT. It means that @vcn is out
44 * of bounds of the runlist. 62 * of bounds of the runlist.
@@ -46,19 +64,32 @@
46 * Note the runlist can be NULL after this function returns if @vcn is zero and 64 * Note the runlist can be NULL after this function returns if @vcn is zero and
47 * the attribute has zero allocated size, i.e. there simply is no runlist. 65 * the attribute has zero allocated size, i.e. there simply is no runlist.
48 * 66 *
49 * Locking: - The runlist must be locked for writing. 67 * WARNING: If @ctx is supplied, regardless of whether success or failure is
50 * - This function modifies the runlist. 68 * returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
69 * is no longer valid, i.e. you need to either call
70 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
71 * In that case PTR_ERR(@ctx->mrec) will give you the error code for
72 * why the mapping of the old inode failed.
73 *
74 * Locking: - The runlist described by @ni must be locked for writing on entry
75 * and is locked on return. Note the runlist will be modified.
76 * - If @ctx is NULL, the base mft record of @ni must not be mapped on
77 * entry and it will be left unmapped on return.
78 * - If @ctx is not NULL, the base mft record must be mapped on entry
79 * and it will be left mapped on return.
51 */ 80 */
52int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) 81int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
53{ 82{
54 VCN end_vcn; 83 VCN end_vcn;
84 unsigned long flags;
55 ntfs_inode *base_ni; 85 ntfs_inode *base_ni;
56 MFT_RECORD *m; 86 MFT_RECORD *m;
57 ATTR_RECORD *a; 87 ATTR_RECORD *a;
58 ntfs_attr_search_ctx *ctx;
59 runlist_element *rl; 88 runlist_element *rl;
60 unsigned long flags; 89 struct page *put_this_page = NULL;
61 int err = 0; 90 int err = 0;
91 BOOL ctx_is_temporary, ctx_needs_reset;
92 ntfs_attr_search_ctx old_ctx;
62 93
63 ntfs_debug("Mapping runlist part containing vcn 0x%llx.", 94 ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
64 (unsigned long long)vcn); 95 (unsigned long long)vcn);
@@ -66,20 +97,77 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
66 base_ni = ni; 97 base_ni = ni;
67 else 98 else
68 base_ni = ni->ext.base_ntfs_ino; 99 base_ni = ni->ext.base_ntfs_ino;
69 m = map_mft_record(base_ni); 100 if (!ctx) {
70 if (IS_ERR(m)) 101 ctx_is_temporary = ctx_needs_reset = TRUE;
71 return PTR_ERR(m); 102 m = map_mft_record(base_ni);
72 ctx = ntfs_attr_get_search_ctx(base_ni, m); 103 if (IS_ERR(m))
73 if (unlikely(!ctx)) { 104 return PTR_ERR(m);
74 err = -ENOMEM; 105 ctx = ntfs_attr_get_search_ctx(base_ni, m);
75 goto err_out; 106 if (unlikely(!ctx)) {
107 err = -ENOMEM;
108 goto err_out;
109 }
110 } else {
111 VCN allocated_size_vcn;
112
113 BUG_ON(IS_ERR(ctx->mrec));
114 a = ctx->attr;
115 BUG_ON(!a->non_resident);
116 ctx_is_temporary = FALSE;
117 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
118 read_lock_irqsave(&ni->size_lock, flags);
119 allocated_size_vcn = ni->allocated_size >>
120 ni->vol->cluster_size_bits;
121 read_unlock_irqrestore(&ni->size_lock, flags);
122 if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
123 end_vcn = allocated_size_vcn - 1;
124 /*
125 * If we already have the attribute extent containing @vcn in
126 * @ctx, no need to look it up again. We slightly cheat in
127 * that if vcn exceeds the allocated size, we will refuse to
128 * map the runlist below, so there is definitely no need to get
129 * the right attribute extent.
130 */
131 if (vcn >= allocated_size_vcn || (a->type == ni->type &&
132 a->name_length == ni->name_len &&
133 !memcmp((u8*)a + le16_to_cpu(a->name_offset),
134 ni->name, ni->name_len) &&
135 sle64_to_cpu(a->data.non_resident.lowest_vcn)
136 <= vcn && end_vcn >= vcn))
137 ctx_needs_reset = FALSE;
138 else {
139 /* Save the old search context. */
140 old_ctx = *ctx;
141 /*
142 * If the currently mapped (extent) inode is not the
143 * base inode we will unmap it when we reinitialize the
144 * search context which means we need to get a
145 * reference to the page containing the mapped mft
146 * record so we do not accidentally drop changes to the
147 * mft record when it has not been marked dirty yet.
148 */
149 if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
150 old_ctx.base_ntfs_ino) {
151 put_this_page = old_ctx.ntfs_ino->page;
152 page_cache_get(put_this_page);
153 }
154 /*
155 * Reinitialize the search context so we can lookup the
156 * needed attribute extent.
157 */
158 ntfs_attr_reinit_search_ctx(ctx);
159 ctx_needs_reset = TRUE;
160 }
76 } 161 }
77 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 162 if (ctx_needs_reset) {
78 CASE_SENSITIVE, vcn, NULL, 0, ctx); 163 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
79 if (unlikely(err)) { 164 CASE_SENSITIVE, vcn, NULL, 0, ctx);
80 if (err == -ENOENT) 165 if (unlikely(err)) {
81 err = -EIO; 166 if (err == -ENOENT)
82 goto err_out; 167 err = -EIO;
168 goto err_out;
169 }
170 BUG_ON(!ctx->attr->non_resident);
83 } 171 }
84 a = ctx->attr; 172 a = ctx->attr;
85 /* 173 /*
@@ -89,11 +177,9 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
89 * ntfs_mapping_pairs_decompress() fails. 177 * ntfs_mapping_pairs_decompress() fails.
90 */ 178 */
91 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; 179 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
92 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) { 180 if (!a->data.non_resident.lowest_vcn && end_vcn == 1)
93 read_lock_irqsave(&ni->size_lock, flags); 181 end_vcn = sle64_to_cpu(a->data.non_resident.allocated_size) >>
94 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; 182 ni->vol->cluster_size_bits;
95 read_unlock_irqrestore(&ni->size_lock, flags);
96 }
97 if (unlikely(vcn >= end_vcn)) { 183 if (unlikely(vcn >= end_vcn)) {
98 err = -ENOENT; 184 err = -ENOENT;
99 goto err_out; 185 goto err_out;
@@ -104,9 +190,93 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
104 else 190 else
105 ni->runlist.rl = rl; 191 ni->runlist.rl = rl;
106err_out: 192err_out:
107 if (likely(ctx)) 193 if (ctx_is_temporary) {
108 ntfs_attr_put_search_ctx(ctx); 194 if (likely(ctx))
109 unmap_mft_record(base_ni); 195 ntfs_attr_put_search_ctx(ctx);
196 unmap_mft_record(base_ni);
197 } else if (ctx_needs_reset) {
198 /*
199 * If there is no attribute list, restoring the search context
200 * is acomplished simply by copying the saved context back over
201 * the caller supplied context. If there is an attribute list,
202 * things are more complicated as we need to deal with mapping
203 * of mft records and resulting potential changes in pointers.
204 */
205 if (NInoAttrList(base_ni)) {
206 /*
207 * If the currently mapped (extent) inode is not the
208 * one we had before, we need to unmap it and map the
209 * old one.
210 */
211 if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
212 /*
213 * If the currently mapped inode is not the
214 * base inode, unmap it.
215 */
216 if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
217 ctx->base_ntfs_ino) {
218 unmap_extent_mft_record(ctx->ntfs_ino);
219 ctx->mrec = ctx->base_mrec;
220 BUG_ON(!ctx->mrec);
221 }
222 /*
223 * If the old mapped inode is not the base
224 * inode, map it.
225 */
226 if (old_ctx.base_ntfs_ino &&
227 old_ctx.ntfs_ino !=
228 old_ctx.base_ntfs_ino) {
229retry_map:
230 ctx->mrec = map_mft_record(
231 old_ctx.ntfs_ino);
232 /*
233 * Something bad has happened. If out
234 * of memory retry till it succeeds.
235 * Any other errors are fatal and we
236 * return the error code in ctx->mrec.
237 * Let the caller deal with it... We
238 * just need to fudge things so the
239 * caller can reinit and/or put the
240 * search context safely.
241 */
242 if (IS_ERR(ctx->mrec)) {
243 if (PTR_ERR(ctx->mrec) ==
244 -ENOMEM) {
245 schedule();
246 goto retry_map;
247 } else
248 old_ctx.ntfs_ino =
249 old_ctx.
250 base_ntfs_ino;
251 }
252 }
253 }
254 /* Update the changed pointers in the saved context. */
255 if (ctx->mrec != old_ctx.mrec) {
256 if (!IS_ERR(ctx->mrec))
257 old_ctx.attr = (ATTR_RECORD*)(
258 (u8*)ctx->mrec +
259 ((u8*)old_ctx.attr -
260 (u8*)old_ctx.mrec));
261 old_ctx.mrec = ctx->mrec;
262 }
263 }
264 /* Restore the search context to the saved one. */
265 *ctx = old_ctx;
266 /*
267 * We drop the reference on the page we took earlier. In the
268 * case that IS_ERR(ctx->mrec) is true this means we might lose
269 * some changes to the mft record that had been made between
270 * the last time it was marked dirty/written out and now. This
271 * at this stage is not a problem as the mapping error is fatal
272 * enough that the mft record cannot be written out anyway and
273 * the caller is very likely to shutdown the whole inode
274 * immediately and mark the volume dirty for chkdsk to pick up
275 * the pieces anyway.
276 */
277 if (put_this_page)
278 page_cache_release(put_this_page);
279 }
110 return err; 280 return err;
111} 281}
112 282
@@ -122,8 +292,8 @@ err_out:
122 * of bounds of the runlist. 292 * of bounds of the runlist.
123 * 293 *
124 * Locking: - The runlist must be unlocked on entry and is unlocked on return. 294 * Locking: - The runlist must be unlocked on entry and is unlocked on return.
125 * - This function takes the runlist lock for writing and modifies the 295 * - This function takes the runlist lock for writing and may modify
126 * runlist. 296 * the runlist.
127 */ 297 */
128int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) 298int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
129{ 299{
@@ -133,7 +303,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
133 /* Make sure someone else didn't do the work while we were sleeping. */ 303 /* Make sure someone else didn't do the work while we were sleeping. */
134 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= 304 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
135 LCN_RL_NOT_MAPPED)) 305 LCN_RL_NOT_MAPPED))
136 err = ntfs_map_runlist_nolock(ni, vcn); 306 err = ntfs_map_runlist_nolock(ni, vcn, NULL);
137 up_write(&ni->runlist.lock); 307 up_write(&ni->runlist.lock);
138 return err; 308 return err;
139} 309}
@@ -212,7 +382,7 @@ retry_remap:
212 goto retry_remap; 382 goto retry_remap;
213 } 383 }
214 } 384 }
215 err = ntfs_map_runlist_nolock(ni, vcn); 385 err = ntfs_map_runlist_nolock(ni, vcn, NULL);
216 if (!write_locked) { 386 if (!write_locked) {
217 up_write(&ni->runlist.lock); 387 up_write(&ni->runlist.lock);
218 down_read(&ni->runlist.lock); 388 down_read(&ni->runlist.lock);
@@ -236,9 +406,9 @@ retry_remap:
236 406
237/** 407/**
238 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode 408 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
239 * @ni: ntfs inode describing the runlist to search 409 * @ni: ntfs inode describing the runlist to search
240 * @vcn: vcn to find 410 * @vcn: vcn to find
241 * @write_locked: true if the runlist is locked for writing 411 * @ctx: active attribute search context if present or NULL if not
242 * 412 *
243 * Find the virtual cluster number @vcn in the runlist described by the ntfs 413 * Find the virtual cluster number @vcn in the runlist described by the ntfs
244 * inode @ni and return the address of the runlist element containing the @vcn. 414 * inode @ni and return the address of the runlist element containing the @vcn.
@@ -246,9 +416,22 @@ retry_remap:
246 * If the @vcn is not mapped yet, the attempt is made to map the attribute 416 * If the @vcn is not mapped yet, the attempt is made to map the attribute
247 * extent containing the @vcn and the vcn to lcn conversion is retried. 417 * extent containing the @vcn and the vcn to lcn conversion is retried.
248 * 418 *
249 * If @write_locked is true the caller has locked the runlist for writing and 419 * If @ctx is specified, it is an active search context of @ni and its base mft
250 * if false for reading. 420 * record. This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
251 * 421 * runlist fragments and allows their mapping. If you do not have the mft
422 * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
423 * will perform the necessary mapping and unmapping.
424 *
425 * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
426 * restores it before returning. Thus, @ctx will be left pointing to the same
427 * attribute on return as on entry. However, the actual pointers in @ctx may
428 * point to different memory locations on return, so you must remember to reset
429 * any cached pointers from the @ctx, i.e. after the call to
430 * ntfs_attr_find_vcn_nolock(), you will probably want to do:
431 * m = ctx->mrec;
432 * a = ctx->attr;
433 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
434 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
252 * Note you need to distinguish between the lcn of the returned runlist element 435 * Note you need to distinguish between the lcn of the returned runlist element
253 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on 436 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
254 * read and allocate clusters on write. 437 * read and allocate clusters on write.
@@ -263,22 +446,31 @@ retry_remap:
263 * -ENOMEM - Not enough memory to map runlist. 446 * -ENOMEM - Not enough memory to map runlist.
264 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). 447 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc).
265 * 448 *
266 * Locking: - The runlist must be locked on entry and is left locked on return. 449 * WARNING: If @ctx is supplied, regardless of whether success or failure is
267 * - If @write_locked is FALSE, i.e. the runlist is locked for reading, 450 * returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
268 * the lock may be dropped inside the function so you cannot rely on 451 * is no longer valid, i.e. you need to either call
269 * the runlist still being the same when this function returns. 452 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
453 * In that case PTR_ERR(@ctx->mrec) will give you the error code for
454 * why the mapping of the old inode failed.
455 *
456 * Locking: - The runlist described by @ni must be locked for writing on entry
457 * and is locked on return. Note the runlist may be modified when
458 * needed runlist fragments need to be mapped.
459 * - If @ctx is NULL, the base mft record of @ni must not be mapped on
460 * entry and it will be left unmapped on return.
461 * - If @ctx is not NULL, the base mft record must be mapped on entry
462 * and it will be left mapped on return.
270 */ 463 */
271runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, 464runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
272 const BOOL write_locked) 465 ntfs_attr_search_ctx *ctx)
273{ 466{
274 unsigned long flags; 467 unsigned long flags;
275 runlist_element *rl; 468 runlist_element *rl;
276 int err = 0; 469 int err = 0;
277 BOOL is_retry = FALSE; 470 BOOL is_retry = FALSE;
278 471
279 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", 472 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
280 ni->mft_no, (unsigned long long)vcn, 473 ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
281 write_locked ? "write" : "read");
282 BUG_ON(!ni); 474 BUG_ON(!ni);
283 BUG_ON(!NInoNonResident(ni)); 475 BUG_ON(!NInoNonResident(ni));
284 BUG_ON(vcn < 0); 476 BUG_ON(vcn < 0);
@@ -312,33 +504,22 @@ retry_remap:
312 } 504 }
313 if (!err && !is_retry) { 505 if (!err && !is_retry) {
314 /* 506 /*
315 * The @vcn is in an unmapped region, map the runlist and 507 * If the search context is invalid we cannot map the unmapped
316 * retry. 508 * region.
317 */ 509 */
318 if (!write_locked) { 510 if (IS_ERR(ctx->mrec))
319 up_read(&ni->runlist.lock); 511 err = PTR_ERR(ctx->mrec);
320 down_write(&ni->runlist.lock); 512 else {
321 if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) != 513 /*
322 LCN_RL_NOT_MAPPED)) { 514 * The @vcn is in an unmapped region, map the runlist
323 up_write(&ni->runlist.lock); 515 * and retry.
324 down_read(&ni->runlist.lock); 516 */
517 err = ntfs_map_runlist_nolock(ni, vcn, ctx);
518 if (likely(!err)) {
519 is_retry = TRUE;
325 goto retry_remap; 520 goto retry_remap;
326 } 521 }
327 } 522 }
328 err = ntfs_map_runlist_nolock(ni, vcn);
329 if (!write_locked) {
330 up_write(&ni->runlist.lock);
331 down_read(&ni->runlist.lock);
332 }
333 if (likely(!err)) {
334 is_retry = TRUE;
335 goto retry_remap;
336 }
337 /*
338 * -EINVAL coming from a failed mapping attempt is equivalent
339 * to i/o error for us as it should not happen in our code
340 * paths.
341 */
342 if (err == -EINVAL) 523 if (err == -EINVAL)
343 err = -EIO; 524 err = -EIO;
344 } else if (!err) 525 } else if (!err)
@@ -1011,6 +1192,7 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
1011 ntfs_inode *base_ni; 1192 ntfs_inode *base_ni;
1012 1193
1013 ntfs_debug("Entering."); 1194 ntfs_debug("Entering.");
1195 BUG_ON(IS_ERR(ctx->mrec));
1014 if (ctx->base_ntfs_ino) 1196 if (ctx->base_ntfs_ino)
1015 base_ni = ctx->base_ntfs_ino; 1197 base_ni = ctx->base_ntfs_ino;
1016 else 1198 else
@@ -1319,10 +1501,17 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1319/** 1501/**
1320 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute 1502 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1321 * @ni: ntfs inode describing the attribute to convert 1503 * @ni: ntfs inode describing the attribute to convert
1504 * @data_size: size of the resident data to copy to the non-resident attribute
1322 * 1505 *
1323 * Convert the resident ntfs attribute described by the ntfs inode @ni to a 1506 * Convert the resident ntfs attribute described by the ntfs inode @ni to a
1324 * non-resident one. 1507 * non-resident one.
1325 * 1508 *
1509 * @data_size must be equal to the attribute value size. This is needed since
1510 * we need to know the size before we can map the mft record and our callers
1511 * always know it. The reason we cannot simply read the size from the vfs
1512 * inode i_size is that this is not necessarily uptodate. This happens when
1513 * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
1514 *
1326 * Return 0 on success and -errno on error. The following error return codes 1515 * Return 0 on success and -errno on error. The following error return codes
1327 * are defined: 1516 * are defined:
1328 * -EPERM - The attribute is not allowed to be non-resident. 1517 * -EPERM - The attribute is not allowed to be non-resident.
@@ -1343,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1343 * 1532 *
1344 * Locking: - The caller must hold i_sem on the inode. 1533 * Locking: - The caller must hold i_sem on the inode.
1345 */ 1534 */
1346int ntfs_attr_make_non_resident(ntfs_inode *ni) 1535int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1347{ 1536{
1348 s64 new_size; 1537 s64 new_size;
1349 struct inode *vi = VFS_I(ni); 1538 struct inode *vi = VFS_I(ni);
@@ -1381,11 +1570,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1381 * The size needs to be aligned to a cluster boundary for allocation 1570 * The size needs to be aligned to a cluster boundary for allocation
1382 * purposes. 1571 * purposes.
1383 */ 1572 */
1384 new_size = (i_size_read(vi) + vol->cluster_size - 1) & 1573 new_size = (data_size + vol->cluster_size - 1) &
1385 ~(vol->cluster_size - 1); 1574 ~(vol->cluster_size - 1);
1386 if (new_size > 0) { 1575 if (new_size > 0) {
1387 runlist_element *rl2;
1388
1389 /* 1576 /*
1390 * Will need the page later and since the page lock nests 1577 * Will need the page later and since the page lock nests
1391 * outside all ntfs locks, we need to get the page now. 1578 * outside all ntfs locks, we need to get the page now.
@@ -1396,7 +1583,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1396 return -ENOMEM; 1583 return -ENOMEM;
1397 /* Start by allocating clusters to hold the attribute value. */ 1584 /* Start by allocating clusters to hold the attribute value. */
1398 rl = ntfs_cluster_alloc(vol, 0, new_size >> 1585 rl = ntfs_cluster_alloc(vol, 0, new_size >>
1399 vol->cluster_size_bits, -1, DATA_ZONE); 1586 vol->cluster_size_bits, -1, DATA_ZONE, TRUE);
1400 if (IS_ERR(rl)) { 1587 if (IS_ERR(rl)) {
1401 err = PTR_ERR(rl); 1588 err = PTR_ERR(rl);
1402 ntfs_debug("Failed to allocate cluster%s, error code " 1589 ntfs_debug("Failed to allocate cluster%s, error code "
@@ -1405,12 +1592,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1405 err); 1592 err);
1406 goto page_err_out; 1593 goto page_err_out;
1407 } 1594 }
1408 /* Change the runlist terminator to LCN_ENOENT. */
1409 rl2 = rl;
1410 while (rl2->length)
1411 rl2++;
1412 BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
1413 rl2->lcn = LCN_ENOENT;
1414 } else { 1595 } else {
1415 rl = NULL; 1596 rl = NULL;
1416 page = NULL; 1597 page = NULL;
@@ -1473,7 +1654,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1473 * attribute value. 1654 * attribute value.
1474 */ 1655 */
1475 attr_size = le32_to_cpu(a->data.resident.value_length); 1656 attr_size = le32_to_cpu(a->data.resident.value_length);
1476 BUG_ON(attr_size != i_size_read(vi)); 1657 BUG_ON(attr_size != data_size);
1477 if (page && !PageUptodate(page)) { 1658 if (page && !PageUptodate(page)) {
1478 kaddr = kmap_atomic(page, KM_USER0); 1659 kaddr = kmap_atomic(page, KM_USER0);
1479 memcpy(kaddr, (u8*)a + 1660 memcpy(kaddr, (u8*)a +
@@ -1538,7 +1719,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
1538 ffs(ni->itype.compressed.block_size) - 1; 1719 ffs(ni->itype.compressed.block_size) - 1;
1539 ni->itype.compressed.block_clusters = 1U << 1720 ni->itype.compressed.block_clusters = 1U <<
1540 a->data.non_resident.compression_unit; 1721 a->data.non_resident.compression_unit;
1541 } 1722 vi->i_blocks = ni->itype.compressed.size >> 9;
1723 } else
1724 vi->i_blocks = ni->allocated_size >> 9;
1542 write_unlock_irqrestore(&ni->size_lock, flags); 1725 write_unlock_irqrestore(&ni->size_lock, flags);
1543 /* 1726 /*
1544 * This needs to be last since the address space operations ->readpage 1727 * This needs to be last since the address space operations ->readpage
@@ -1652,6 +1835,640 @@ page_err_out:
1652} 1835}
1653 1836
1654/** 1837/**
1838 * ntfs_attr_extend_allocation - extend the allocated space of an attribute
1839 * @ni: ntfs inode of the attribute whose allocation to extend
1840 * @new_alloc_size: new size in bytes to which to extend the allocation to
1841 * @new_data_size: new size in bytes to which to extend the data to
1842 * @data_start: beginning of region which is required to be non-sparse
1843 *
1844 * Extend the allocated space of an attribute described by the ntfs inode @ni
1845 * to @new_alloc_size bytes. If @data_start is -1, the whole extension may be
1846 * implemented as a hole in the file (as long as both the volume and the ntfs
1847 * inode @ni have sparse support enabled). If @data_start is >= 0, then the
1848 * region between the old allocated size and @data_start - 1 may be made sparse
1849 * but the regions between @data_start and @new_alloc_size must be backed by
1850 * actual clusters.
1851 *
1852 * If @new_data_size is -1, it is ignored. If it is >= 0, then the data size
1853 * of the attribute is extended to @new_data_size. Note that the i_size of the
1854 * vfs inode is not updated. Only the data size in the base attribute record
1855 * is updated. The caller has to update i_size separately if this is required.
1856 * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
1857 * size as well as for @new_data_size to be greater than @new_alloc_size.
1858 *
1859 * For resident attributes this involves resizing the attribute record and if
1860 * necessary moving it and/or other attributes into extent mft records and/or
1861 * converting the attribute to a non-resident attribute which in turn involves
1862 * extending the allocation of a non-resident attribute as described below.
1863 *
1864 * For non-resident attributes this involves allocating clusters in the data
1865 * zone on the volume (except for regions that are being made sparse) and
1866 * extending the run list to describe the allocated clusters as well as
1867 * updating the mapping pairs array of the attribute. This in turn involves
1868 * resizing the attribute record and if necessary moving it and/or other
1869 * attributes into extent mft records and/or splitting the attribute record
1870 * into multiple extent attribute records.
1871 *
1872 * Also, the attribute list attribute is updated if present and in some of the
1873 * above cases (the ones where extent mft records/attributes come into play),
1874 * an attribute list attribute is created if not already present.
1875 *
1876 * Return the new allocated size on success and -errno on error. In the case
1877 * that an error is encountered but a partial extension at least up to
1878 * @data_start (if present) is possible, the allocation is partially extended
1879 * and this is returned. This means the caller must check the returned size to
1880 * determine if the extension was partial. If @data_start is -1 then partial
1881 * allocations are not performed.
1882 *
1883 * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
1884 *
1885 * Locking: This function takes the runlist lock of @ni for writing as well as
1886 * locking the mft record of the base ntfs inode. These locks are maintained
1887 * throughout execution of the function. These locks are required so that the
1888 * attribute can be resized safely and so that it can for example be converted
1889 * from resident to non-resident safely.
1890 *
1891 * TODO: At present attribute list attribute handling is not implemented.
1892 *
1893 * TODO: At present it is not safe to call this function for anything other
1894 * than the $DATA attribute(s) of an uncompressed and unencrypted file.
1895 */
1896s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
1897 const s64 new_data_size, const s64 data_start)
1898{
1899 VCN vcn;
1900 s64 ll, allocated_size, start = data_start;
1901 struct inode *vi = VFS_I(ni);
1902 ntfs_volume *vol = ni->vol;
1903 ntfs_inode *base_ni;
1904 MFT_RECORD *m;
1905 ATTR_RECORD *a;
1906 ntfs_attr_search_ctx *ctx;
1907 runlist_element *rl, *rl2;
1908 unsigned long flags;
1909 int err, mp_size;
1910 u32 attr_len = 0; /* Silence stupid gcc warning. */
1911 BOOL mp_rebuilt;
1912
1913#ifdef NTFS_DEBUG
1914 read_lock_irqsave(&ni->size_lock, flags);
1915 allocated_size = ni->allocated_size;
1916 read_unlock_irqrestore(&ni->size_lock, flags);
1917 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
1918 "old_allocated_size 0x%llx, "
1919 "new_allocated_size 0x%llx, new_data_size 0x%llx, "
1920 "data_start 0x%llx.", vi->i_ino,
1921 (unsigned)le32_to_cpu(ni->type),
1922 (unsigned long long)allocated_size,
1923 (unsigned long long)new_alloc_size,
1924 (unsigned long long)new_data_size,
1925 (unsigned long long)start);
1926#endif
1927retry_extend:
1928 /*
1929 * For non-resident attributes, @start and @new_size need to be aligned
1930 * to cluster boundaries for allocation purposes.
1931 */
1932 if (NInoNonResident(ni)) {
1933 if (start > 0)
1934 start &= ~(s64)vol->cluster_size_mask;
1935 new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
1936 ~(s64)vol->cluster_size_mask;
1937 }
1938 BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
1939 /* Check if new size is allowed in $AttrDef. */
1940 err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
1941 if (unlikely(err)) {
1942 /* Only emit errors when the write will fail completely. */
1943 read_lock_irqsave(&ni->size_lock, flags);
1944 allocated_size = ni->allocated_size;
1945 read_unlock_irqrestore(&ni->size_lock, flags);
1946 if (start < 0 || start >= allocated_size) {
1947 if (err == -ERANGE) {
1948 ntfs_error(vol->sb, "Cannot extend allocation "
1949 "of inode 0x%lx, attribute "
1950 "type 0x%x, because the new "
1951 "allocation would exceed the "
1952 "maximum allowed size for "
1953 "this attribute type.",
1954 vi->i_ino, (unsigned)
1955 le32_to_cpu(ni->type));
1956 } else {
1957 ntfs_error(vol->sb, "Cannot extend allocation "
1958 "of inode 0x%lx, attribute "
1959 "type 0x%x, because this "
1960 "attribute type is not "
1961 "defined on the NTFS volume. "
1962 "Possible corruption! You "
1963 "should run chkdsk!",
1964 vi->i_ino, (unsigned)
1965 le32_to_cpu(ni->type));
1966 }
1967 }
1968 /* Translate error code to be POSIX conformant for write(2). */
1969 if (err == -ERANGE)
1970 err = -EFBIG;
1971 else
1972 err = -EIO;
1973 return err;
1974 }
1975 if (!NInoAttr(ni))
1976 base_ni = ni;
1977 else
1978 base_ni = ni->ext.base_ntfs_ino;
1979 /*
1980 * We will be modifying both the runlist (if non-resident) and the mft
1981 * record so lock them both down.
1982 */
1983 down_write(&ni->runlist.lock);
1984 m = map_mft_record(base_ni);
1985 if (IS_ERR(m)) {
1986 err = PTR_ERR(m);
1987 m = NULL;
1988 ctx = NULL;
1989 goto err_out;
1990 }
1991 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1992 if (unlikely(!ctx)) {
1993 err = -ENOMEM;
1994 goto err_out;
1995 }
1996 read_lock_irqsave(&ni->size_lock, flags);
1997 allocated_size = ni->allocated_size;
1998 read_unlock_irqrestore(&ni->size_lock, flags);
1999 /*
2000 * If non-resident, seek to the last extent. If resident, there is
2001 * only one extent, so seek to that.
2002 */
2003 vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
2004 0;
2005 /*
2006 * Abort if someone did the work whilst we waited for the locks. If we
2007 * just converted the attribute from resident to non-resident it is
2008 * likely that exactly this has happened already. We cannot quite
2009 * abort if we need to update the data size.
2010 */
2011 if (unlikely(new_alloc_size <= allocated_size)) {
2012 ntfs_debug("Allocated size already exceeds requested size.");
2013 new_alloc_size = allocated_size;
2014 if (new_data_size < 0)
2015 goto done;
2016 /*
2017 * We want the first attribute extent so that we can update the
2018 * data size.
2019 */
2020 vcn = 0;
2021 }
2022 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2023 CASE_SENSITIVE, vcn, NULL, 0, ctx);
2024 if (unlikely(err)) {
2025 if (err == -ENOENT)
2026 err = -EIO;
2027 goto err_out;
2028 }
2029 m = ctx->mrec;
2030 a = ctx->attr;
2031 /* Use goto to reduce indentation. */
2032 if (a->non_resident)
2033 goto do_non_resident_extend;
2034 BUG_ON(NInoNonResident(ni));
2035 /* The total length of the attribute value. */
2036 attr_len = le32_to_cpu(a->data.resident.value_length);
2037 /*
2038 * Extend the attribute record to be able to store the new attribute
2039 * size. ntfs_attr_record_resize() will not do anything if the size is
2040 * not changing.
2041 */
2042 if (new_alloc_size < vol->mft_record_size &&
2043 !ntfs_attr_record_resize(m, a,
2044 le16_to_cpu(a->data.resident.value_offset) +
2045 new_alloc_size)) {
2046 /* The resize succeeded! */
2047 write_lock_irqsave(&ni->size_lock, flags);
2048 ni->allocated_size = le32_to_cpu(a->length) -
2049 le16_to_cpu(a->data.resident.value_offset);
2050 write_unlock_irqrestore(&ni->size_lock, flags);
2051 if (new_data_size >= 0) {
2052 BUG_ON(new_data_size < attr_len);
2053 a->data.resident.value_length =
2054 cpu_to_le32((u32)new_data_size);
2055 }
2056 goto flush_done;
2057 }
2058 /*
2059 * We have to drop all the locks so we can call
2060 * ntfs_attr_make_non_resident(). This could be optimised by try-
2061 * locking the first page cache page and only if that fails dropping
2062 * the locks, locking the page, and redoing all the locking and
2063 * lookups. While this would be a huge optimisation, it is not worth
2064 * it as this is definitely a slow code path.
2065 */
2066 ntfs_attr_put_search_ctx(ctx);
2067 unmap_mft_record(base_ni);
2068 up_write(&ni->runlist.lock);
2069 /*
2070 * Not enough space in the mft record, try to make the attribute
2071 * non-resident and if successful restart the extension process.
2072 */
2073 err = ntfs_attr_make_non_resident(ni, attr_len);
2074 if (likely(!err))
2075 goto retry_extend;
2076 /*
2077 * Could not make non-resident. If this is due to this not being
2078 * permitted for this attribute type or there not being enough space,
2079 * try to make other attributes non-resident. Otherwise fail.
2080 */
2081 if (unlikely(err != -EPERM && err != -ENOSPC)) {
2082 /* Only emit errors when the write will fail completely. */
2083 read_lock_irqsave(&ni->size_lock, flags);
2084 allocated_size = ni->allocated_size;
2085 read_unlock_irqrestore(&ni->size_lock, flags);
2086 if (start < 0 || start >= allocated_size)
2087 ntfs_error(vol->sb, "Cannot extend allocation of "
2088 "inode 0x%lx, attribute type 0x%x, "
2089 "because the conversion from resident "
2090 "to non-resident attribute failed "
2091 "with error code %i.", vi->i_ino,
2092 (unsigned)le32_to_cpu(ni->type), err);
2093 if (err != -ENOMEM)
2094 err = -EIO;
2095 goto conv_err_out;
2096 }
2097 /* TODO: Not implemented from here, abort. */
2098 read_lock_irqsave(&ni->size_lock, flags);
2099 allocated_size = ni->allocated_size;
2100 read_unlock_irqrestore(&ni->size_lock, flags);
2101 if (start < 0 || start >= allocated_size) {
2102 if (err == -ENOSPC)
2103 ntfs_error(vol->sb, "Not enough space in the mft "
2104 "record/on disk for the non-resident "
2105 "attribute value. This case is not "
2106 "implemented yet.");
2107 else /* if (err == -EPERM) */
2108 ntfs_error(vol->sb, "This attribute type may not be "
2109 "non-resident. This case is not "
2110 "implemented yet.");
2111 }
2112 err = -EOPNOTSUPP;
2113 goto conv_err_out;
2114#if 0
2115 // TODO: Attempt to make other attributes non-resident.
2116 if (!err)
2117 goto do_resident_extend;
2118 /*
2119 * Both the attribute list attribute and the standard information
2120 * attribute must remain in the base inode. Thus, if this is one of
2121 * these attributes, we have to try to move other attributes out into
2122 * extent mft records instead.
2123 */
2124 if (ni->type == AT_ATTRIBUTE_LIST ||
2125 ni->type == AT_STANDARD_INFORMATION) {
2126 // TODO: Attempt to move other attributes into extent mft
2127 // records.
2128 err = -EOPNOTSUPP;
2129 if (!err)
2130 goto do_resident_extend;
2131 goto err_out;
2132 }
2133 // TODO: Attempt to move this attribute to an extent mft record, but
2134 // only if it is not already the only attribute in an mft record in
2135 // which case there would be nothing to gain.
2136 err = -EOPNOTSUPP;
2137 if (!err)
2138 goto do_resident_extend;
2139 /* There is nothing we can do to make enough space. )-: */
2140 goto err_out;
2141#endif
2142do_non_resident_extend:
2143 BUG_ON(!NInoNonResident(ni));
2144 if (new_alloc_size == allocated_size) {
2145 BUG_ON(vcn);
2146 goto alloc_done;
2147 }
2148 /*
2149 * If the data starts after the end of the old allocation, this is a
2150 * $DATA attribute and sparse attributes are enabled on the volume and
2151 * for this inode, then create a sparse region between the old
2152 * allocated size and the start of the data. Otherwise simply proceed
2153 * with filling the whole space between the old allocated size and the
2154 * new allocated size with clusters.
2155 */
2156 if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
2157 !NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
2158 goto skip_sparse;
2159 // TODO: This is not implemented yet. We just fill in with real
2160 // clusters for now...
2161 ntfs_debug("Inserting holes is not-implemented yet. Falling back to "
2162 "allocating real clusters instead.");
2163skip_sparse:
2164 rl = ni->runlist.rl;
2165 if (likely(rl)) {
2166 /* Seek to the end of the runlist. */
2167 while (rl->length)
2168 rl++;
2169 }
2170 /* If this attribute extent is not mapped, map it now. */
2171 if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
2172 (rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
2173 (rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
2174 if (!rl && !allocated_size)
2175 goto first_alloc;
2176 rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
2177 if (IS_ERR(rl)) {
2178 err = PTR_ERR(rl);
2179 if (start < 0 || start >= allocated_size)
2180 ntfs_error(vol->sb, "Cannot extend allocation "
2181 "of inode 0x%lx, attribute "
2182 "type 0x%x, because the "
2183 "mapping of a runlist "
2184 "fragment failed with error "
2185 "code %i.", vi->i_ino,
2186 (unsigned)le32_to_cpu(ni->type),
2187 err);
2188 if (err != -ENOMEM)
2189 err = -EIO;
2190 goto err_out;
2191 }
2192 ni->runlist.rl = rl;
2193 /* Seek to the end of the runlist. */
2194 while (rl->length)
2195 rl++;
2196 }
2197 /*
2198 * We now know the runlist of the last extent is mapped and @rl is at
2199 * the end of the runlist. We want to begin allocating clusters
2200 * starting at the last allocated cluster to reduce fragmentation. If
2201 * there are no valid LCNs in the attribute we let the cluster
2202 * allocator choose the starting cluster.
2203 */
2204 /* If the last LCN is a hole or simillar seek back to last real LCN. */
2205 while (rl->lcn < 0 && rl > ni->runlist.rl)
2206 rl--;
2207first_alloc:
2208 // FIXME: Need to implement partial allocations so at least part of the
2209 // write can be performed when start >= 0. (Needed for POSIX write(2)
2210 // conformance.)
2211 rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
2212 (new_alloc_size - allocated_size) >>
2213 vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
2214 rl->lcn + rl->length : -1, DATA_ZONE, TRUE);
2215 if (IS_ERR(rl2)) {
2216 err = PTR_ERR(rl2);
2217 if (start < 0 || start >= allocated_size)
2218 ntfs_error(vol->sb, "Cannot extend allocation of "
2219 "inode 0x%lx, attribute type 0x%x, "
2220 "because the allocation of clusters "
2221 "failed with error code %i.", vi->i_ino,
2222 (unsigned)le32_to_cpu(ni->type), err);
2223 if (err != -ENOMEM && err != -ENOSPC)
2224 err = -EIO;
2225 goto err_out;
2226 }
2227 rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
2228 if (IS_ERR(rl)) {
2229 err = PTR_ERR(rl);
2230 if (start < 0 || start >= allocated_size)
2231 ntfs_error(vol->sb, "Cannot extend allocation of "
2232 "inode 0x%lx, attribute type 0x%x, "
2233 "because the runlist merge failed "
2234 "with error code %i.", vi->i_ino,
2235 (unsigned)le32_to_cpu(ni->type), err);
2236 if (err != -ENOMEM)
2237 err = -EIO;
2238 if (ntfs_cluster_free_from_rl(vol, rl2)) {
2239 ntfs_error(vol->sb, "Failed to release allocated "
2240 "cluster(s) in error code path. Run "
2241 "chkdsk to recover the lost "
2242 "cluster(s).");
2243 NVolSetErrors(vol);
2244 }
2245 ntfs_free(rl2);
2246 goto err_out;
2247 }
2248 ni->runlist.rl = rl;
2249 ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
2250 allocated_size) >> vol->cluster_size_bits);
2251 /* Find the runlist element with which the attribute extent starts. */
2252 ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
2253 rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
2254 BUG_ON(!rl2);
2255 BUG_ON(!rl2->length);
2256 BUG_ON(rl2->lcn < LCN_HOLE);
2257 mp_rebuilt = FALSE;
2258 /* Get the size for the new mapping pairs array for this extent. */
2259 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
2260 if (unlikely(mp_size <= 0)) {
2261 err = mp_size;
2262 if (start < 0 || start >= allocated_size)
2263 ntfs_error(vol->sb, "Cannot extend allocation of "
2264 "inode 0x%lx, attribute type 0x%x, "
2265 "because determining the size for the "
2266 "mapping pairs failed with error code "
2267 "%i.", vi->i_ino,
2268 (unsigned)le32_to_cpu(ni->type), err);
2269 err = -EIO;
2270 goto undo_alloc;
2271 }
2272 /* Extend the attribute record to fit the bigger mapping pairs array. */
2273 attr_len = le32_to_cpu(a->length);
2274 err = ntfs_attr_record_resize(m, a, mp_size +
2275 le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
2276 if (unlikely(err)) {
2277 BUG_ON(err != -ENOSPC);
2278 // TODO: Deal with this by moving this extent to a new mft
2279 // record or by starting a new extent in a new mft record,
2280 // possibly by extending this extent partially and filling it
2281 // and creating a new extent for the remainder, or by making
2282 // other attributes non-resident and/or by moving other
2283 // attributes out of this mft record.
2284 if (start < 0 || start >= allocated_size)
2285 ntfs_error(vol->sb, "Not enough space in the mft "
2286 "record for the extended attribute "
2287 "record. This case is not "
2288 "implemented yet.");
2289 err = -EOPNOTSUPP;
2290 goto undo_alloc;
2291 }
2292 mp_rebuilt = TRUE;
2293 /* Generate the mapping pairs array directly into the attr record. */
2294 err = ntfs_mapping_pairs_build(vol, (u8*)a +
2295 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
2296 mp_size, rl2, ll, -1, NULL);
2297 if (unlikely(err)) {
2298 if (start < 0 || start >= allocated_size)
2299 ntfs_error(vol->sb, "Cannot extend allocation of "
2300 "inode 0x%lx, attribute type 0x%x, "
2301 "because building the mapping pairs "
2302 "failed with error code %i.", vi->i_ino,
2303 (unsigned)le32_to_cpu(ni->type), err);
2304 err = -EIO;
2305 goto undo_alloc;
2306 }
2307 /* Update the highest_vcn. */
2308 a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
2309 vol->cluster_size_bits) - 1);
2310 /*
2311 * We now have extended the allocated size of the attribute. Reflect
2312 * this in the ntfs_inode structure and the attribute record.
2313 */
2314 if (a->data.non_resident.lowest_vcn) {
2315 /*
2316 * We are not in the first attribute extent, switch to it, but
2317 * first ensure the changes will make it to disk later.
2318 */
2319 flush_dcache_mft_record_page(ctx->ntfs_ino);
2320 mark_mft_record_dirty(ctx->ntfs_ino);
2321 ntfs_attr_reinit_search_ctx(ctx);
2322 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2323 CASE_SENSITIVE, 0, NULL, 0, ctx);
2324 if (unlikely(err))
2325 goto restore_undo_alloc;
2326 /* @m is not used any more so no need to set it. */
2327 a = ctx->attr;
2328 }
2329 write_lock_irqsave(&ni->size_lock, flags);
2330 ni->allocated_size = new_alloc_size;
2331 a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
2332 /*
2333 * FIXME: This would fail if @ni is a directory, $MFT, or an index,
2334 * since those can have sparse/compressed set. For example can be
2335 * set compressed even though it is not compressed itself and in that
2336 * case the bit means that files are to be created compressed in the
2337 * directory... At present this is ok as this code is only called for
2338 * regular files, and only for their $DATA attribute(s).
2339 * FIXME: The calculation is wrong if we created a hole above. For now
2340 * it does not matter as we never create holes.
2341 */
2342 if (NInoSparse(ni) || NInoCompressed(ni)) {
2343 ni->itype.compressed.size += new_alloc_size - allocated_size;
2344 a->data.non_resident.compressed_size =
2345 cpu_to_sle64(ni->itype.compressed.size);
2346 vi->i_blocks = ni->itype.compressed.size >> 9;
2347 } else
2348 vi->i_blocks = new_alloc_size >> 9;
2349 write_unlock_irqrestore(&ni->size_lock, flags);
2350alloc_done:
2351 if (new_data_size >= 0) {
2352 BUG_ON(new_data_size <
2353 sle64_to_cpu(a->data.non_resident.data_size));
2354 a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
2355 }
2356flush_done:
2357 /* Ensure the changes make it to disk. */
2358 flush_dcache_mft_record_page(ctx->ntfs_ino);
2359 mark_mft_record_dirty(ctx->ntfs_ino);
2360done:
2361 ntfs_attr_put_search_ctx(ctx);
2362 unmap_mft_record(base_ni);
2363 up_write(&ni->runlist.lock);
2364 ntfs_debug("Done, new_allocated_size 0x%llx.",
2365 (unsigned long long)new_alloc_size);
2366 return new_alloc_size;
2367restore_undo_alloc:
2368 if (start < 0 || start >= allocated_size)
2369 ntfs_error(vol->sb, "Cannot complete extension of allocation "
2370 "of inode 0x%lx, attribute type 0x%x, because "
2371 "lookup of first attribute extent failed with "
2372 "error code %i.", vi->i_ino,
2373 (unsigned)le32_to_cpu(ni->type), err);
2374 if (err == -ENOENT)
2375 err = -EIO;
2376 ntfs_attr_reinit_search_ctx(ctx);
2377 if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
2378 allocated_size >> vol->cluster_size_bits, NULL, 0,
2379 ctx)) {
2380 ntfs_error(vol->sb, "Failed to find last attribute extent of "
2381 "attribute in error code path. Run chkdsk to "
2382 "recover.");
2383 write_lock_irqsave(&ni->size_lock, flags);
2384 ni->allocated_size = new_alloc_size;
2385 /*
2386 * FIXME: This would fail if @ni is a directory... See above.
2387 * FIXME: The calculation is wrong if we created a hole above.
2388 * For now it does not matter as we never create holes.
2389 */
2390 if (NInoSparse(ni) || NInoCompressed(ni)) {
2391 ni->itype.compressed.size += new_alloc_size -
2392 allocated_size;
2393 vi->i_blocks = ni->itype.compressed.size >> 9;
2394 } else
2395 vi->i_blocks = new_alloc_size >> 9;
2396 write_unlock_irqrestore(&ni->size_lock, flags);
2397 ntfs_attr_put_search_ctx(ctx);
2398 unmap_mft_record(base_ni);
2399 up_write(&ni->runlist.lock);
2400 /*
2401 * The only thing that is now wrong is the allocated size of the
2402 * base attribute extent which chkdsk should be able to fix.
2403 */
2404 NVolSetErrors(vol);
2405 return err;
2406 }
2407 ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
2408 (allocated_size >> vol->cluster_size_bits) - 1);
2409undo_alloc:
2410 ll = allocated_size >> vol->cluster_size_bits;
2411 if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
2412 ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
2413 "in error code path. Run chkdsk to recover "
2414 "the lost cluster(s).");
2415 NVolSetErrors(vol);
2416 }
2417 m = ctx->mrec;
2418 a = ctx->attr;
2419 /*
2420 * If the runlist truncation fails and/or the search context is no
2421 * longer valid, we cannot resize the attribute record or build the
2422 * mapping pairs array thus we mark the inode bad so that no access to
2423 * the freed clusters can happen.
2424 */
2425 if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
2426 ntfs_error(vol->sb, "Failed to %s in error code path. Run "
2427 "chkdsk to recover.", IS_ERR(m) ?
2428 "restore attribute search context" :
2429 "truncate attribute runlist");
2430 make_bad_inode(vi);
2431 make_bad_inode(VFS_I(base_ni));
2432 NVolSetErrors(vol);
2433 } else if (mp_rebuilt) {
2434 if (ntfs_attr_record_resize(m, a, attr_len)) {
2435 ntfs_error(vol->sb, "Failed to restore attribute "
2436 "record in error code path. Run "
2437 "chkdsk to recover.");
2438 make_bad_inode(vi);
2439 make_bad_inode(VFS_I(base_ni));
2440 NVolSetErrors(vol);
2441 } else /* if (success) */ {
2442 if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
2443 a->data.non_resident.
2444 mapping_pairs_offset), attr_len -
2445 le16_to_cpu(a->data.non_resident.
2446 mapping_pairs_offset), rl2, ll, -1,
2447 NULL)) {
2448 ntfs_error(vol->sb, "Failed to restore "
2449 "mapping pairs array in error "
2450 "code path. Run chkdsk to "
2451 "recover.");
2452 make_bad_inode(vi);
2453 make_bad_inode(VFS_I(base_ni));
2454 NVolSetErrors(vol);
2455 }
2456 flush_dcache_mft_record_page(ctx->ntfs_ino);
2457 mark_mft_record_dirty(ctx->ntfs_ino);
2458 }
2459 }
2460err_out:
2461 if (ctx)
2462 ntfs_attr_put_search_ctx(ctx);
2463 if (m)
2464 unmap_mft_record(base_ni);
2465 up_write(&ni->runlist.lock);
2466conv_err_out:
2467 ntfs_debug("Failed. Returning error code %i.", err);
2468 return err;
2469}
2470
2471/**
1655 * ntfs_attr_set - fill (a part of) an attribute with a byte 2472 * ntfs_attr_set - fill (a part of) an attribute with a byte
1656 * @ni: ntfs inode describing the attribute to fill 2473 * @ni: ntfs inode describing the attribute to fill
1657 * @ofs: offset inside the attribute at which to start to fill 2474 * @ofs: offset inside the attribute at which to start to fill
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0618ed6fd7b3..9074886b44ba 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -60,14 +60,15 @@ typedef struct {
60 ATTR_RECORD *base_attr; 60 ATTR_RECORD *base_attr;
61} ntfs_attr_search_ctx; 61} ntfs_attr_search_ctx;
62 62
63extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn); 63extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
64 ntfs_attr_search_ctx *ctx);
64extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); 65extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
65 66
66extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, 67extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
67 const BOOL write_locked); 68 const BOOL write_locked);
68 69
69extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, 70extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
70 const VCN vcn, const BOOL write_locked); 71 const VCN vcn, ntfs_attr_search_ctx *ctx);
71 72
72int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 73int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
73 const u32 name_len, const IGNORE_CASE_BOOL ic, 74 const u32 name_len, const IGNORE_CASE_BOOL ic,
@@ -102,7 +103,10 @@ extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
102extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, 103extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
103 const u32 new_size); 104 const u32 new_size);
104 105
105extern int ntfs_attr_make_non_resident(ntfs_inode *ni); 106extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
107
108extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
109 const s64 new_data_size, const s64 data_start);
106 110
107extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, 111extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
108 const u8 val); 112 const u8 val);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 7ec045131808..b24f4c4b2c5c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -30,6 +30,7 @@
30#include "debug.h" 30#include "debug.h"
31#include "inode.h" 31#include "inode.h"
32#include "attrib.h" 32#include "attrib.h"
33#include "lcnalloc.h"
33#include "malloc.h" 34#include "malloc.h"
34#include "mft.h" 35#include "mft.h"
35#include "time.h" 36#include "time.h"
@@ -2291,11 +2292,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
2291 2292
2292#ifdef NTFS_RW 2293#ifdef NTFS_RW
2293 2294
2295static const char *es = " Leaving inconsistent metadata. Unmount and run "
2296 "chkdsk.";
2297
2294/** 2298/**
2295 * ntfs_truncate - called when the i_size of an ntfs inode is changed 2299 * ntfs_truncate - called when the i_size of an ntfs inode is changed
2296 * @vi: inode for which the i_size was changed 2300 * @vi: inode for which the i_size was changed
2297 * 2301 *
2298 * We do not support i_size changes yet. 2302 * We only support i_size changes for normal files at present, i.e. not
2303 * compressed and not encrypted. This is enforced in ntfs_setattr(), see
2304 * below.
2299 * 2305 *
2300 * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and 2306 * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
2301 * that the change is allowed. 2307 * that the change is allowed.
@@ -2306,80 +2312,499 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
2306 * Returns 0 on success or -errno on error. 2312 * Returns 0 on success or -errno on error.
2307 * 2313 *
2308 * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for 2314 * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for
2309 * writing. The only case where ->i_alloc_sem is not held is 2315 * writing. The only case in the kernel where ->i_alloc_sem is not held is
2310 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called 2316 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2311 * with the current i_size as the offset which means that it is a noop as far 2317 * with the current i_size as the offset. The analogous place in NTFS is in
2312 * as ntfs_truncate() is concerned. 2318 * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
2319 * without holding ->i_alloc_sem.
2313 */ 2320 */
2314int ntfs_truncate(struct inode *vi) 2321int ntfs_truncate(struct inode *vi)
2315{ 2322{
2316 ntfs_inode *ni = NTFS_I(vi); 2323 s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
2324 VCN highest_vcn;
2325 unsigned long flags;
2326 ntfs_inode *base_ni, *ni = NTFS_I(vi);
2317 ntfs_volume *vol = ni->vol; 2327 ntfs_volume *vol = ni->vol;
2318 ntfs_attr_search_ctx *ctx; 2328 ntfs_attr_search_ctx *ctx;
2319 MFT_RECORD *m; 2329 MFT_RECORD *m;
2320 ATTR_RECORD *a; 2330 ATTR_RECORD *a;
2321 const char *te = " Leaving file length out of sync with i_size."; 2331 const char *te = " Leaving file length out of sync with i_size.";
2322 int err; 2332 int err, mp_size, size_change, alloc_change;
2333 u32 attr_len;
2323 2334
2324 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 2335 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2325 BUG_ON(NInoAttr(ni)); 2336 BUG_ON(NInoAttr(ni));
2337 BUG_ON(S_ISDIR(vi->i_mode));
2338 BUG_ON(NInoMstProtected(ni));
2326 BUG_ON(ni->nr_extents < 0); 2339 BUG_ON(ni->nr_extents < 0);
2327 m = map_mft_record(ni); 2340retry_truncate:
2341 /*
2342 * Lock the runlist for writing and map the mft record to ensure it is
2343 * safe to mess with the attribute runlist and sizes.
2344 */
2345 down_write(&ni->runlist.lock);
2346 if (!NInoAttr(ni))
2347 base_ni = ni;
2348 else
2349 base_ni = ni->ext.base_ntfs_ino;
2350 m = map_mft_record(base_ni);
2328 if (IS_ERR(m)) { 2351 if (IS_ERR(m)) {
2329 err = PTR_ERR(m); 2352 err = PTR_ERR(m);
2330 ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx " 2353 ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
2331 "(error code %d).%s", vi->i_ino, err, te); 2354 "(error code %d).%s", vi->i_ino, err, te);
2332 ctx = NULL; 2355 ctx = NULL;
2333 m = NULL; 2356 m = NULL;
2334 goto err_out; 2357 goto old_bad_out;
2335 } 2358 }
2336 ctx = ntfs_attr_get_search_ctx(ni, m); 2359 ctx = ntfs_attr_get_search_ctx(base_ni, m);
2337 if (unlikely(!ctx)) { 2360 if (unlikely(!ctx)) {
2338 ntfs_error(vi->i_sb, "Failed to allocate a search context for " 2361 ntfs_error(vi->i_sb, "Failed to allocate a search context for "
2339 "inode 0x%lx (not enough memory).%s", 2362 "inode 0x%lx (not enough memory).%s",
2340 vi->i_ino, te); 2363 vi->i_ino, te);
2341 err = -ENOMEM; 2364 err = -ENOMEM;
2342 goto err_out; 2365 goto old_bad_out;
2343 } 2366 }
2344 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 2367 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2345 CASE_SENSITIVE, 0, NULL, 0, ctx); 2368 CASE_SENSITIVE, 0, NULL, 0, ctx);
2346 if (unlikely(err)) { 2369 if (unlikely(err)) {
2347 if (err == -ENOENT) 2370 if (err == -ENOENT) {
2348 ntfs_error(vi->i_sb, "Open attribute is missing from " 2371 ntfs_error(vi->i_sb, "Open attribute is missing from "
2349 "mft record. Inode 0x%lx is corrupt. " 2372 "mft record. Inode 0x%lx is corrupt. "
2350 "Run chkdsk.", vi->i_ino); 2373 "Run chkdsk.%s", vi->i_ino, te);
2351 else 2374 err = -EIO;
2375 } else
2352 ntfs_error(vi->i_sb, "Failed to lookup attribute in " 2376 ntfs_error(vi->i_sb, "Failed to lookup attribute in "
2353 "inode 0x%lx (error code %d).", 2377 "inode 0x%lx (error code %d).%s",
2354 vi->i_ino, err); 2378 vi->i_ino, err, te);
2355 goto err_out; 2379 goto old_bad_out;
2356 } 2380 }
2381 m = ctx->mrec;
2357 a = ctx->attr; 2382 a = ctx->attr;
2358 /* If the size has not changed there is nothing to do. */ 2383 /*
2359 if (ntfs_attr_size(a) == i_size_read(vi)) 2384 * The i_size of the vfs inode is the new size for the attribute value.
2360 goto done; 2385 */
2361 // TODO: Implement the truncate... 2386 new_size = i_size_read(vi);
2362 ntfs_error(vi->i_sb, "Inode size has changed but this is not " 2387 /* The current size of the attribute value is the old size. */
2363 "implemented yet. Resetting inode size to old value. " 2388 old_size = ntfs_attr_size(a);
2364 " This is most likely a bug in the ntfs driver!"); 2389 /* Calculate the new allocated size. */
2365 i_size_write(vi, ntfs_attr_size(a)); 2390 if (NInoNonResident(ni))
2366done: 2391 new_alloc_size = (new_size + vol->cluster_size - 1) &
2392 ~(s64)vol->cluster_size_mask;
2393 else
2394 new_alloc_size = (new_size + 7) & ~7;
2395 /* The current allocated size is the old allocated size. */
2396 read_lock_irqsave(&ni->size_lock, flags);
2397 old_alloc_size = ni->allocated_size;
2398 read_unlock_irqrestore(&ni->size_lock, flags);
2399 /*
2400 * The change in the file size. This will be 0 if no change, >0 if the
2401 * size is growing, and <0 if the size is shrinking.
2402 */
2403 size_change = -1;
2404 if (new_size - old_size >= 0) {
2405 size_change = 1;
2406 if (new_size == old_size)
2407 size_change = 0;
2408 }
2409 /* As above for the allocated size. */
2410 alloc_change = -1;
2411 if (new_alloc_size - old_alloc_size >= 0) {
2412 alloc_change = 1;
2413 if (new_alloc_size == old_alloc_size)
2414 alloc_change = 0;
2415 }
2416 /*
2417 * If neither the size nor the allocation are being changed there is
2418 * nothing to do.
2419 */
2420 if (!size_change && !alloc_change)
2421 goto unm_done;
2422 /* If the size is changing, check if new size is allowed in $AttrDef. */
2423 if (size_change) {
2424 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
2425 if (unlikely(err)) {
2426 if (err == -ERANGE) {
2427 ntfs_error(vol->sb, "Truncate would cause the "
2428 "inode 0x%lx to %simum size "
2429 "for its attribute type "
2430 "(0x%x). Aborting truncate.",
2431 vi->i_ino,
2432 new_size > old_size ? "exceed "
2433 "the max" : "go under the min",
2434 le32_to_cpu(ni->type));
2435 err = -EFBIG;
2436 } else {
2437 ntfs_error(vol->sb, "Inode 0x%lx has unknown "
2438 "attribute type 0x%x. "
2439 "Aborting truncate.",
2440 vi->i_ino,
2441 le32_to_cpu(ni->type));
2442 err = -EIO;
2443 }
2444 /* Reset the vfs inode size to the old size. */
2445 i_size_write(vi, old_size);
2446 goto err_out;
2447 }
2448 }
2449 if (NInoCompressed(ni) || NInoEncrypted(ni)) {
2450 ntfs_warning(vi->i_sb, "Changes in inode size are not "
2451 "supported yet for %s files, ignoring.",
2452 NInoCompressed(ni) ? "compressed" :
2453 "encrypted");
2454 err = -EOPNOTSUPP;
2455 goto bad_out;
2456 }
2457 if (a->non_resident)
2458 goto do_non_resident_truncate;
2459 BUG_ON(NInoNonResident(ni));
2460 /* Resize the attribute record to best fit the new attribute size. */
2461 if (new_size < vol->mft_record_size &&
2462 !ntfs_resident_attr_value_resize(m, a, new_size)) {
2463 unsigned long flags;
2464
2465 /* The resize succeeded! */
2466 flush_dcache_mft_record_page(ctx->ntfs_ino);
2467 mark_mft_record_dirty(ctx->ntfs_ino);
2468 write_lock_irqsave(&ni->size_lock, flags);
2469 /* Update the sizes in the ntfs inode and all is done. */
2470 ni->allocated_size = le32_to_cpu(a->length) -
2471 le16_to_cpu(a->data.resident.value_offset);
2472 /*
2473 * Note ntfs_resident_attr_value_resize() has already done any
2474 * necessary data clearing in the attribute record. When the
2475 * file is being shrunk vmtruncate() will already have cleared
2476 * the top part of the last partial page, i.e. since this is
2477 * the resident case this is the page with index 0. However,
2478 * when the file is being expanded, the page cache page data
2479 * between the old data_size, i.e. old_size, and the new_size
2480 * has not been zeroed. Fortunately, we do not need to zero it
2481 * either since on one hand it will either already be zero due
2482 * to both readpage and writepage clearing partial page data
2483 * beyond i_size in which case there is nothing to do or in the
2484 * case of the file being mmap()ped at the same time, POSIX
2485 * specifies that the behaviour is unspecified thus we do not
2486 * have to do anything. This means that in our implementation
2487 * in the rare case that the file is mmap()ped and a write
2488 * occured into the mmap()ped region just beyond the file size
2489 * and writepage has not yet been called to write out the page
2490 * (which would clear the area beyond the file size) and we now
2491 * extend the file size to incorporate this dirty region
2492 * outside the file size, a write of the page would result in
2493 * this data being written to disk instead of being cleared.
2494 * Given both POSIX and the Linux mmap(2) man page specify that
2495 * this corner case is undefined, we choose to leave it like
2496 * that as this is much simpler for us as we cannot lock the
2497 * relevant page now since we are holding too many ntfs locks
2498 * which would result in a lock reversal deadlock.
2499 */
2500 ni->initialized_size = new_size;
2501 write_unlock_irqrestore(&ni->size_lock, flags);
2502 goto unm_done;
2503 }
2504 /* If the above resize failed, this must be an attribute extension. */
2505 BUG_ON(size_change < 0);
2506 /*
2507 * We have to drop all the locks so we can call
2508 * ntfs_attr_make_non_resident(). This could be optimised by try-
2509 * locking the first page cache page and only if that fails dropping
2510 * the locks, locking the page, and redoing all the locking and
2511 * lookups. While this would be a huge optimisation, it is not worth
2512 * it as this is definitely a slow code path as it only ever can happen
2513 * once for any given file.
2514 */
2367 ntfs_attr_put_search_ctx(ctx); 2515 ntfs_attr_put_search_ctx(ctx);
2368 unmap_mft_record(ni); 2516 unmap_mft_record(base_ni);
2369 NInoClearTruncateFailed(ni); 2517 up_write(&ni->runlist.lock);
2370 ntfs_debug("Done."); 2518 /*
2371 return 0; 2519 * Not enough space in the mft record, try to make the attribute
2372err_out: 2520 * non-resident and if successful restart the truncation process.
2373 if (err != -ENOMEM) { 2521 */
2522 err = ntfs_attr_make_non_resident(ni, old_size);
2523 if (likely(!err))
2524 goto retry_truncate;
2525 /*
2526 * Could not make non-resident. If this is due to this not being
2527 * permitted for this attribute type or there not being enough space,
2528 * try to make other attributes non-resident. Otherwise fail.
2529 */
2530 if (unlikely(err != -EPERM && err != -ENOSPC)) {
2531 ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
2532 "type 0x%x, because the conversion from "
2533 "resident to non-resident attribute failed "
2534 "with error code %i.", vi->i_ino,
2535 (unsigned)le32_to_cpu(ni->type), err);
2536 if (err != -ENOMEM)
2537 err = -EIO;
2538 goto conv_err_out;
2539 }
2540 /* TODO: Not implemented from here, abort. */
2541 if (err == -ENOSPC)
2542 ntfs_error(vol->sb, "Not enough space in the mft record/on "
2543 "disk for the non-resident attribute value. "
2544 "This case is not implemented yet.");
2545 else /* if (err == -EPERM) */
2546 ntfs_error(vol->sb, "This attribute type may not be "
2547 "non-resident. This case is not implemented "
2548 "yet.");
2549 err = -EOPNOTSUPP;
2550 goto conv_err_out;
2551#if 0
2552 // TODO: Attempt to make other attributes non-resident.
2553 if (!err)
2554 goto do_resident_extend;
2555 /*
2556 * Both the attribute list attribute and the standard information
2557 * attribute must remain in the base inode. Thus, if this is one of
2558 * these attributes, we have to try to move other attributes out into
2559 * extent mft records instead.
2560 */
2561 if (ni->type == AT_ATTRIBUTE_LIST ||
2562 ni->type == AT_STANDARD_INFORMATION) {
2563 // TODO: Attempt to move other attributes into extent mft
2564 // records.
2565 err = -EOPNOTSUPP;
2566 if (!err)
2567 goto do_resident_extend;
2568 goto err_out;
2569 }
2570 // TODO: Attempt to move this attribute to an extent mft record, but
2571 // only if it is not already the only attribute in an mft record in
2572 // which case there would be nothing to gain.
2573 err = -EOPNOTSUPP;
2574 if (!err)
2575 goto do_resident_extend;
2576 /* There is nothing we can do to make enough space. )-: */
2577 goto err_out;
2578#endif
2579do_non_resident_truncate:
2580 BUG_ON(!NInoNonResident(ni));
2581 if (alloc_change < 0) {
2582 highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2583 if (highest_vcn > 0 &&
2584 old_alloc_size >> vol->cluster_size_bits >
2585 highest_vcn + 1) {
2586 /*
2587 * This attribute has multiple extents. Not yet
2588 * supported.
2589 */
2590 ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
2591 "attribute type 0x%x, because the "
2592 "attribute is highly fragmented (it "
2593 "consists of multiple extents) and "
2594 "this case is not implemented yet.",
2595 vi->i_ino,
2596 (unsigned)le32_to_cpu(ni->type));
2597 err = -EOPNOTSUPP;
2598 goto bad_out;
2599 }
2600 }
2601 /*
2602 * If the size is shrinking, need to reduce the initialized_size and
2603 * the data_size before reducing the allocation.
2604 */
2605 if (size_change < 0) {
2606 /*
2607 * Make the valid size smaller (i_size is already up-to-date).
2608 */
2609 write_lock_irqsave(&ni->size_lock, flags);
2610 if (new_size < ni->initialized_size) {
2611 ni->initialized_size = new_size;
2612 a->data.non_resident.initialized_size =
2613 cpu_to_sle64(new_size);
2614 }
2615 a->data.non_resident.data_size = cpu_to_sle64(new_size);
2616 write_unlock_irqrestore(&ni->size_lock, flags);
2617 flush_dcache_mft_record_page(ctx->ntfs_ino);
2618 mark_mft_record_dirty(ctx->ntfs_ino);
2619 /* If the allocated size is not changing, we are done. */
2620 if (!alloc_change)
2621 goto unm_done;
2622 /*
2623 * If the size is shrinking it makes no sense for the
2624 * allocation to be growing.
2625 */
2626 BUG_ON(alloc_change > 0);
2627 } else /* if (size_change >= 0) */ {
2628 /*
2629 * The file size is growing or staying the same but the
2630 * allocation can be shrinking, growing or staying the same.
2631 */
2632 if (alloc_change > 0) {
2633 /*
2634 * We need to extend the allocation and possibly update
2635 * the data size. If we are updating the data size,
2636 * since we are not touching the initialized_size we do
2637 * not need to worry about the actual data on disk.
2638 * And as far as the page cache is concerned, there
2639 * will be no pages beyond the old data size and any
2640 * partial region in the last page between the old and
2641 * new data size (or the end of the page if the new
2642 * data size is outside the page) does not need to be
2643 * modified as explained above for the resident
2644 * attribute truncate case. To do this, we simply drop
2645 * the locks we hold and leave all the work to our
2646 * friendly helper ntfs_attr_extend_allocation().
2647 */
2648 ntfs_attr_put_search_ctx(ctx);
2649 unmap_mft_record(base_ni);
2650 up_write(&ni->runlist.lock);
2651 err = ntfs_attr_extend_allocation(ni, new_size,
2652 size_change > 0 ? new_size : -1, -1);
2653 /*
2654 * ntfs_attr_extend_allocation() will have done error
2655 * output already.
2656 */
2657 goto done;
2658 }
2659 if (!alloc_change)
2660 goto alloc_done;
2661 }
2662 /* alloc_change < 0 */
2663 /* Free the clusters. */
2664 nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
2665 vol->cluster_size_bits, -1, ctx);
2666 m = ctx->mrec;
2667 a = ctx->attr;
2668 if (unlikely(nr_freed < 0)) {
2669 ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
2670 "%lli). Unmount and run chkdsk to recover "
2671 "the lost cluster(s).", (long long)nr_freed);
2374 NVolSetErrors(vol); 2672 NVolSetErrors(vol);
2673 nr_freed = 0;
2674 }
2675 /* Truncate the runlist. */
2676 err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
2677 new_alloc_size >> vol->cluster_size_bits);
2678 /*
2679 * If the runlist truncation failed and/or the search context is no
2680 * longer valid, we cannot resize the attribute record or build the
2681 * mapping pairs array thus we mark the inode bad so that no access to
2682 * the freed clusters can happen.
2683 */
2684 if (unlikely(err || IS_ERR(m))) {
2685 ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
2686 IS_ERR(m) ?
2687 "restore attribute search context" :
2688 "truncate attribute runlist",
2689 IS_ERR(m) ? PTR_ERR(m) : err, es);
2690 err = -EIO;
2691 goto bad_out;
2692 }
2693 /* Get the size for the shrunk mapping pairs array for the runlist. */
2694 mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
2695 if (unlikely(mp_size <= 0)) {
2696 ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2697 "attribute type 0x%x, because determining the "
2698 "size for the mapping pairs failed with error "
2699 "code %i.%s", vi->i_ino,
2700 (unsigned)le32_to_cpu(ni->type), mp_size, es);
2701 err = -EIO;
2702 goto bad_out;
2703 }
2704 /*
2705 * Shrink the attribute record for the new mapping pairs array. Note,
2706 * this cannot fail since we are making the attribute smaller thus by
2707 * definition there is enough space to do so.
2708 */
2709 attr_len = le32_to_cpu(a->length);
2710 err = ntfs_attr_record_resize(m, a, mp_size +
2711 le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
2712 BUG_ON(err);
2713 /*
2714 * Generate the mapping pairs array directly into the attribute record.
2715 */
2716 err = ntfs_mapping_pairs_build(vol, (u8*)a +
2717 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
2718 mp_size, ni->runlist.rl, 0, -1, NULL);
2719 if (unlikely(err)) {
2720 ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2721 "attribute type 0x%x, because building the "
2722 "mapping pairs failed with error code %i.%s",
2723 vi->i_ino, (unsigned)le32_to_cpu(ni->type),
2724 err, es);
2725 err = -EIO;
2726 goto bad_out;
2727 }
2728 /* Update the allocated/compressed size as well as the highest vcn. */
2729 a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
2730 vol->cluster_size_bits) - 1);
2731 write_lock_irqsave(&ni->size_lock, flags);
2732 ni->allocated_size = new_alloc_size;
2733 a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
2734 if (NInoSparse(ni) || NInoCompressed(ni)) {
2735 if (nr_freed) {
2736 ni->itype.compressed.size -= nr_freed <<
2737 vol->cluster_size_bits;
2738 BUG_ON(ni->itype.compressed.size < 0);
2739 a->data.non_resident.compressed_size = cpu_to_sle64(
2740 ni->itype.compressed.size);
2741 vi->i_blocks = ni->itype.compressed.size >> 9;
2742 }
2743 } else
2744 vi->i_blocks = new_alloc_size >> 9;
2745 write_unlock_irqrestore(&ni->size_lock, flags);
2746 /*
2747 * We have shrunk the allocation. If this is a shrinking truncate we
2748 * have already dealt with the initialized_size and the data_size above
2749 * and we are done. If the truncate is only changing the allocation
2750 * and not the data_size, we are also done. If this is an extending
2751 * truncate, need to extend the data_size now which is ensured by the
2752 * fact that @size_change is positive.
2753 */
2754alloc_done:
2755 /*
2756 * If the size is growing, need to update it now. If it is shrinking,
2757 * we have already updated it above (before the allocation change).
2758 */
2759 if (size_change > 0)
2760 a->data.non_resident.data_size = cpu_to_sle64(new_size);
2761 /* Ensure the modified mft record is written out. */
2762 flush_dcache_mft_record_page(ctx->ntfs_ino);
2763 mark_mft_record_dirty(ctx->ntfs_ino);
2764unm_done:
2765 ntfs_attr_put_search_ctx(ctx);
2766 unmap_mft_record(base_ni);
2767 up_write(&ni->runlist.lock);
2768done:
2769 /* Update the mtime and ctime on the base inode. */
2770 inode_update_time(VFS_I(base_ni), 1);
2771 if (likely(!err)) {
2772 NInoClearTruncateFailed(ni);
2773 ntfs_debug("Done.");
2774 }
2775 return err;
2776old_bad_out:
2777 old_size = -1;
2778bad_out:
2779 if (err != -ENOMEM && err != -EOPNOTSUPP) {
2375 make_bad_inode(vi); 2780 make_bad_inode(vi);
2781 make_bad_inode(VFS_I(base_ni));
2782 NVolSetErrors(vol);
2376 } 2783 }
2784 if (err != -EOPNOTSUPP)
2785 NInoSetTruncateFailed(ni);
2786 else if (old_size >= 0)
2787 i_size_write(vi, old_size);
2788err_out:
2377 if (ctx) 2789 if (ctx)
2378 ntfs_attr_put_search_ctx(ctx); 2790 ntfs_attr_put_search_ctx(ctx);
2379 if (m) 2791 if (m)
2380 unmap_mft_record(ni); 2792 unmap_mft_record(base_ni);
2381 NInoSetTruncateFailed(ni); 2793 up_write(&ni->runlist.lock);
2794out:
2795 ntfs_debug("Failed. Returning error code %i.", err);
2382 return err; 2796 return err;
2797conv_err_out:
2798 if (err != -ENOMEM && err != -EOPNOTSUPP) {
2799 make_bad_inode(vi);
2800 make_bad_inode(VFS_I(base_ni));
2801 NVolSetErrors(vol);
2802 }
2803 if (err != -EOPNOTSUPP)
2804 NInoSetTruncateFailed(ni);
2805 else
2806 i_size_write(vi, old_size);
2807 goto out;
2383} 2808}
2384 2809
2385/** 2810/**
@@ -2420,8 +2845,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2420 2845
2421 err = inode_change_ok(vi, attr); 2846 err = inode_change_ok(vi, attr);
2422 if (err) 2847 if (err)
2423 return err; 2848 goto out;
2424
2425 /* We do not support NTFS ACLs yet. */ 2849 /* We do not support NTFS ACLs yet. */
2426 if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) { 2850 if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
2427 ntfs_warning(vi->i_sb, "Changes in user/group/mode are not " 2851 ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
@@ -2429,14 +2853,22 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2429 err = -EOPNOTSUPP; 2853 err = -EOPNOTSUPP;
2430 goto out; 2854 goto out;
2431 } 2855 }
2432
2433 if (ia_valid & ATTR_SIZE) { 2856 if (ia_valid & ATTR_SIZE) {
2434 if (attr->ia_size != i_size_read(vi)) { 2857 if (attr->ia_size != i_size_read(vi)) {
2435 ntfs_warning(vi->i_sb, "Changes in inode size are not " 2858 ntfs_inode *ni = NTFS_I(vi);
2436 "supported yet, ignoring."); 2859 /*
2437 err = -EOPNOTSUPP; 2860 * FIXME: For now we do not support resizing of
2438 // TODO: Implement... 2861 * compressed or encrypted files yet.
2439 // err = vmtruncate(vi, attr->ia_size); 2862 */
2863 if (NInoCompressed(ni) || NInoEncrypted(ni)) {
2864 ntfs_warning(vi->i_sb, "Changes in inode size "
2865 "are not supported yet for "
2866 "%s files, ignoring.",
2867 NInoCompressed(ni) ?
2868 "compressed" : "encrypted");
2869 err = -EOPNOTSUPP;
2870 } else
2871 err = vmtruncate(vi, attr->ia_size);
2440 if (err || ia_valid == ATTR_SIZE) 2872 if (err || ia_valid == ATTR_SIZE)
2441 goto out; 2873 goto out;
2442 } else { 2874 } else {
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 5af3bf0b7eee..29cabf93d2d2 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -76,6 +76,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
76 * @count: number of clusters to allocate 76 * @count: number of clusters to allocate
77 * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) 77 * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none)
78 * @zone: zone from which to allocate the clusters 78 * @zone: zone from which to allocate the clusters
79 * @is_extension: if TRUE, this is an attribute extension
79 * 80 *
80 * Allocate @count clusters preferably starting at cluster @start_lcn or at the 81 * Allocate @count clusters preferably starting at cluster @start_lcn or at the
81 * current allocator position if @start_lcn is -1, on the mounted ntfs volume 82 * current allocator position if @start_lcn is -1, on the mounted ntfs volume
@@ -86,6 +87,13 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
86 * @start_vcn specifies the vcn of the first allocated cluster. This makes 87 * @start_vcn specifies the vcn of the first allocated cluster. This makes
87 * merging the resulting runlist with the old runlist easier. 88 * merging the resulting runlist with the old runlist easier.
88 * 89 *
90 * If @is_extension is TRUE, the caller is allocating clusters to extend an
91 * attribute and if it is FALSE, the caller is allocating clusters to fill a
92 * hole in an attribute. Practically the difference is that if @is_extension
93 * is TRUE the returned runlist will be terminated with LCN_ENOENT and if
94 * @is_extension is FALSE the runlist will be terminated with
95 * LCN_RL_NOT_MAPPED.
96 *
89 * You need to check the return value with IS_ERR(). If this is false, the 97 * You need to check the return value with IS_ERR(). If this is false, the
90 * function was successful and the return value is a runlist describing the 98 * function was successful and the return value is a runlist describing the
91 * allocated cluster(s). If IS_ERR() is true, the function failed and 99 * allocated cluster(s). If IS_ERR() is true, the function failed and
@@ -137,7 +145,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
137 */ 145 */
138runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, 146runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
139 const s64 count, const LCN start_lcn, 147 const s64 count, const LCN start_lcn,
140 const NTFS_CLUSTER_ALLOCATION_ZONES zone) 148 const NTFS_CLUSTER_ALLOCATION_ZONES zone,
149 const BOOL is_extension)
141{ 150{
142 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 151 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
143 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 152 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
@@ -310,7 +319,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
310 continue; 319 continue;
311 } 320 }
312 bit = 1 << (lcn & 7); 321 bit = 1 << (lcn & 7);
313 ntfs_debug("bit %i.", bit); 322 ntfs_debug("bit 0x%x.", bit);
314 /* If the bit is already set, go onto the next one. */ 323 /* If the bit is already set, go onto the next one. */
315 if (*byte & bit) { 324 if (*byte & bit) {
316 lcn++; 325 lcn++;
@@ -729,7 +738,7 @@ out:
729 /* Add runlist terminator element. */ 738 /* Add runlist terminator element. */
730 if (likely(rl)) { 739 if (likely(rl)) {
731 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; 740 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
732 rl[rlpos].lcn = LCN_RL_NOT_MAPPED; 741 rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
733 rl[rlpos].length = 0; 742 rl[rlpos].length = 0;
734 } 743 }
735 if (likely(page && !IS_ERR(page))) { 744 if (likely(page && !IS_ERR(page))) {
@@ -782,6 +791,7 @@ out:
782 * @ni: ntfs inode whose runlist describes the clusters to free 791 * @ni: ntfs inode whose runlist describes the clusters to free
783 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters 792 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
784 * @count: number of clusters to free or -1 for all clusters 793 * @count: number of clusters to free or -1 for all clusters
794 * @ctx: active attribute search context if present or NULL if not
785 * @is_rollback: true if this is a rollback operation 795 * @is_rollback: true if this is a rollback operation
786 * 796 *
787 * Free @count clusters starting at the cluster @start_vcn in the runlist 797 * Free @count clusters starting at the cluster @start_vcn in the runlist
@@ -791,15 +801,39 @@ out:
791 * deallocated. Thus, to completely free all clusters in a runlist, use 801 * deallocated. Thus, to completely free all clusters in a runlist, use
792 * @start_vcn = 0 and @count = -1. 802 * @start_vcn = 0 and @count = -1.
793 * 803 *
804 * If @ctx is specified, it is an active search context of @ni and its base mft
805 * record. This is needed when __ntfs_cluster_free() encounters unmapped
806 * runlist fragments and allows their mapping. If you do not have the mft
807 * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
808 * perform the necessary mapping and unmapping.
809 *
810 * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
811 * before returning. Thus, @ctx will be left pointing to the same attribute on
812 * return as on entry. However, the actual pointers in @ctx may point to
813 * different memory locations on return, so you must remember to reset any
814 * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
815 * you will probably want to do:
816 * m = ctx->mrec;
817 * a = ctx->attr;
818 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
819 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
820 *
794 * @is_rollback should always be FALSE, it is for internal use to rollback 821 * @is_rollback should always be FALSE, it is for internal use to rollback
795 * errors. You probably want to use ntfs_cluster_free() instead. 822 * errors. You probably want to use ntfs_cluster_free() instead.
796 * 823 *
797 * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller 824 * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
798 * has to deal with it later. 825 * remove from the runlist or mark sparse the freed runs later.
799 * 826 *
800 * Return the number of deallocated clusters (not counting sparse ones) on 827 * Return the number of deallocated clusters (not counting sparse ones) on
801 * success and -errno on error. 828 * success and -errno on error.
802 * 829 *
830 * WARNING: If @ctx is supplied, regardless of whether success or failure is
831 * returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
832 * is no longer valid, i.e. you need to either call
833 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
834 * In that case PTR_ERR(@ctx->mrec) will give you the error code for
835 * why the mapping of the old inode failed.
836 *
803 * Locking: - The runlist described by @ni must be locked for writing on entry 837 * Locking: - The runlist described by @ni must be locked for writing on entry
804 * and is locked on return. Note the runlist may be modified when 838 * and is locked on return. Note the runlist may be modified when
805 * needed runlist fragments need to be mapped. 839 * needed runlist fragments need to be mapped.
@@ -807,9 +841,13 @@ out:
807 * on return. 841 * on return.
808 * - This function takes the volume lcn bitmap lock for writing and 842 * - This function takes the volume lcn bitmap lock for writing and
809 * modifies the bitmap contents. 843 * modifies the bitmap contents.
844 * - If @ctx is NULL, the base mft record of @ni must not be mapped on
845 * entry and it will be left unmapped on return.
846 * - If @ctx is not NULL, the base mft record must be mapped on entry
847 * and it will be left mapped on return.
810 */ 848 */
811s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, 849s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
812 const BOOL is_rollback) 850 ntfs_attr_search_ctx *ctx, const BOOL is_rollback)
813{ 851{
814 s64 delta, to_free, total_freed, real_freed; 852 s64 delta, to_free, total_freed, real_freed;
815 ntfs_volume *vol; 853 ntfs_volume *vol;
@@ -839,7 +877,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
839 877
840 total_freed = real_freed = 0; 878 total_freed = real_freed = 0;
841 879
842 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, TRUE); 880 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
843 if (IS_ERR(rl)) { 881 if (IS_ERR(rl)) {
844 if (!is_rollback) 882 if (!is_rollback)
845 ntfs_error(vol->sb, "Failed to find first runlist " 883 ntfs_error(vol->sb, "Failed to find first runlist "
@@ -893,7 +931,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
893 931
894 /* Attempt to map runlist. */ 932 /* Attempt to map runlist. */
895 vcn = rl->vcn; 933 vcn = rl->vcn;
896 rl = ntfs_attr_find_vcn_nolock(ni, vcn, TRUE); 934 rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
897 if (IS_ERR(rl)) { 935 if (IS_ERR(rl)) {
898 err = PTR_ERR(rl); 936 err = PTR_ERR(rl);
899 if (!is_rollback) 937 if (!is_rollback)
@@ -961,7 +999,7 @@ err_out:
961 * If rollback fails, set the volume errors flag, emit an error 999 * If rollback fails, set the volume errors flag, emit an error
962 * message, and return the error code. 1000 * message, and return the error code.
963 */ 1001 */
964 delta = __ntfs_cluster_free(ni, start_vcn, total_freed, TRUE); 1002 delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, TRUE);
965 if (delta < 0) { 1003 if (delta < 0) {
966 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " 1004 ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
967 "inconsistent metadata! Unmount and run " 1005 "inconsistent metadata! Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index a6a8827882e7..72cbca7003b2 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -27,6 +27,7 @@
27 27
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
30#include "attrib.h"
30#include "types.h" 31#include "types.h"
31#include "inode.h" 32#include "inode.h"
32#include "runlist.h" 33#include "runlist.h"
@@ -41,16 +42,18 @@ typedef enum {
41 42
42extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, 43extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
43 const VCN start_vcn, const s64 count, const LCN start_lcn, 44 const VCN start_vcn, const s64 count, const LCN start_lcn,
44 const NTFS_CLUSTER_ALLOCATION_ZONES zone); 45 const NTFS_CLUSTER_ALLOCATION_ZONES zone,
46 const BOOL is_extension);
45 47
46extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, 48extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
47 s64 count, const BOOL is_rollback); 49 s64 count, ntfs_attr_search_ctx *ctx, const BOOL is_rollback);
48 50
49/** 51/**
50 * ntfs_cluster_free - free clusters on an ntfs volume 52 * ntfs_cluster_free - free clusters on an ntfs volume
51 * @ni: ntfs inode whose runlist describes the clusters to free 53 * @ni: ntfs inode whose runlist describes the clusters to free
52 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters 54 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
53 * @count: number of clusters to free or -1 for all clusters 55 * @count: number of clusters to free or -1 for all clusters
56 * @ctx: active attribute search context if present or NULL if not
54 * 57 *
55 * Free @count clusters starting at the cluster @start_vcn in the runlist 58 * Free @count clusters starting at the cluster @start_vcn in the runlist
56 * described by the ntfs inode @ni. 59 * described by the ntfs inode @ni.
@@ -59,12 +62,36 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
59 * deallocated. Thus, to completely free all clusters in a runlist, use 62 * deallocated. Thus, to completely free all clusters in a runlist, use
60 * @start_vcn = 0 and @count = -1. 63 * @start_vcn = 0 and @count = -1.
61 * 64 *
62 * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller 65 * If @ctx is specified, it is an active search context of @ni and its base mft
63 * has to deal with it later. 66 * record. This is needed when ntfs_cluster_free() encounters unmapped runlist
67 * fragments and allows their mapping. If you do not have the mft record
68 * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform
69 * the necessary mapping and unmapping.
70 *
71 * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it
72 * before returning. Thus, @ctx will be left pointing to the same attribute on
73 * return as on entry. However, the actual pointers in @ctx may point to
74 * different memory locations on return, so you must remember to reset any
75 * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(),
76 * you will probably want to do:
77 * m = ctx->mrec;
78 * a = ctx->attr;
79 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
80 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
81 *
82 * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove
83 * from the runlist or mark sparse the freed runs later.
64 * 84 *
65 * Return the number of deallocated clusters (not counting sparse ones) on 85 * Return the number of deallocated clusters (not counting sparse ones) on
66 * success and -errno on error. 86 * success and -errno on error.
67 * 87 *
88 * WARNING: If @ctx is supplied, regardless of whether success or failure is
89 * returned, you need to check IS_ERR(@ctx->mrec) and if TRUE the @ctx
90 * is no longer valid, i.e. you need to either call
91 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
92 * In that case PTR_ERR(@ctx->mrec) will give you the error code for
93 * why the mapping of the old inode failed.
94 *
68 * Locking: - The runlist described by @ni must be locked for writing on entry 95 * Locking: - The runlist described by @ni must be locked for writing on entry
69 * and is locked on return. Note the runlist may be modified when 96 * and is locked on return. Note the runlist may be modified when
70 * needed runlist fragments need to be mapped. 97 * needed runlist fragments need to be mapped.
@@ -72,11 +99,15 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
72 * on return. 99 * on return.
73 * - This function takes the volume lcn bitmap lock for writing and 100 * - This function takes the volume lcn bitmap lock for writing and
74 * modifies the bitmap contents. 101 * modifies the bitmap contents.
102 * - If @ctx is NULL, the base mft record of @ni must not be mapped on
103 * entry and it will be left unmapped on return.
104 * - If @ctx is not NULL, the base mft record must be mapped on entry
105 * and it will be left mapped on return.
75 */ 106 */
76static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, 107static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
77 s64 count) 108 s64 count, ntfs_attr_search_ctx *ctx)
78{ 109{
79 return __ntfs_cluster_free(ni, start_vcn, count, FALSE); 110 return __ntfs_cluster_free(ni, start_vcn, count, ctx, FALSE);
80} 111}
81 112
82extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, 113extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b011369b5956..0c65cbb8c5cf 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -49,7 +49,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
49 ntfs_volume *vol = ni->vol; 49 ntfs_volume *vol = ni->vol;
50 struct inode *mft_vi = vol->mft_ino; 50 struct inode *mft_vi = vol->mft_ino;
51 struct page *page; 51 struct page *page;
52 unsigned long index, ofs, end_index; 52 unsigned long index, end_index;
53 unsigned ofs;
53 54
54 BUG_ON(ni->page); 55 BUG_ON(ni->page);
55 /* 56 /*
@@ -1308,7 +1309,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1308 ll = mftbmp_ni->allocated_size; 1309 ll = mftbmp_ni->allocated_size;
1309 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); 1310 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1310 rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, 1311 rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
1311 (ll - 1) >> vol->cluster_size_bits, TRUE); 1312 (ll - 1) >> vol->cluster_size_bits, NULL);
1312 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1313 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1313 up_write(&mftbmp_ni->runlist.lock); 1314 up_write(&mftbmp_ni->runlist.lock);
1314 ntfs_error(vol->sb, "Failed to determine last allocated " 1315 ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1354,7 +1355,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1354 up_write(&vol->lcnbmp_lock); 1355 up_write(&vol->lcnbmp_lock);
1355 ntfs_unmap_page(page); 1356 ntfs_unmap_page(page);
1356 /* Allocate a cluster from the DATA_ZONE. */ 1357 /* Allocate a cluster from the DATA_ZONE. */
1357 rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE); 1358 rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
1359 TRUE);
1358 if (IS_ERR(rl2)) { 1360 if (IS_ERR(rl2)) {
1359 up_write(&mftbmp_ni->runlist.lock); 1361 up_write(&mftbmp_ni->runlist.lock);
1360 ntfs_error(vol->sb, "Failed to allocate a cluster for " 1362 ntfs_error(vol->sb, "Failed to allocate a cluster for "
@@ -1738,7 +1740,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1738 ll = mft_ni->allocated_size; 1740 ll = mft_ni->allocated_size;
1739 read_unlock_irqrestore(&mft_ni->size_lock, flags); 1741 read_unlock_irqrestore(&mft_ni->size_lock, flags);
1740 rl = ntfs_attr_find_vcn_nolock(mft_ni, 1742 rl = ntfs_attr_find_vcn_nolock(mft_ni,
1741 (ll - 1) >> vol->cluster_size_bits, TRUE); 1743 (ll - 1) >> vol->cluster_size_bits, NULL);
1742 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1744 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1743 up_write(&mft_ni->runlist.lock); 1745 up_write(&mft_ni->runlist.lock);
1744 ntfs_error(vol->sb, "Failed to determine last allocated " 1746 ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1779,7 +1781,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1779 nr > min_nr ? "default" : "minimal", (long long)nr); 1781 nr > min_nr ? "default" : "minimal", (long long)nr);
1780 old_last_vcn = rl[1].vcn; 1782 old_last_vcn = rl[1].vcn;
1781 do { 1783 do {
1782 rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE); 1784 rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
1785 TRUE);
1783 if (likely(!IS_ERR(rl2))) 1786 if (likely(!IS_ERR(rl2)))
1784 break; 1787 break;
1785 if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { 1788 if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
@@ -1951,20 +1954,21 @@ restore_undo_alloc:
1951 NVolSetErrors(vol); 1954 NVolSetErrors(vol);
1952 return ret; 1955 return ret;
1953 } 1956 }
1954 a = ctx->attr; 1957 ctx->attr->data.non_resident.highest_vcn =
1955 a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); 1958 cpu_to_sle64(old_last_vcn - 1);
1956undo_alloc: 1959undo_alloc:
1957 if (ntfs_cluster_free(mft_ni, old_last_vcn, -1) < 0) { 1960 if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
1958 ntfs_error(vol->sb, "Failed to free clusters from mft data " 1961 ntfs_error(vol->sb, "Failed to free clusters from mft data "
1959 "attribute.%s", es); 1962 "attribute.%s", es);
1960 NVolSetErrors(vol); 1963 NVolSetErrors(vol);
1961 } 1964 }
1965 a = ctx->attr;
1962 if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { 1966 if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
1963 ntfs_error(vol->sb, "Failed to truncate mft data attribute " 1967 ntfs_error(vol->sb, "Failed to truncate mft data attribute "
1964 "runlist.%s", es); 1968 "runlist.%s", es);
1965 NVolSetErrors(vol); 1969 NVolSetErrors(vol);
1966 } 1970 }
1967 if (mp_rebuilt) { 1971 if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
1968 if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 1972 if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
1969 a->data.non_resident.mapping_pairs_offset), 1973 a->data.non_resident.mapping_pairs_offset),
1970 old_alen - le16_to_cpu( 1974 old_alen - le16_to_cpu(
@@ -1981,6 +1985,10 @@ undo_alloc:
1981 } 1985 }
1982 flush_dcache_mft_record_page(ctx->ntfs_ino); 1986 flush_dcache_mft_record_page(ctx->ntfs_ino);
1983 mark_mft_record_dirty(ctx->ntfs_ino); 1987 mark_mft_record_dirty(ctx->ntfs_ino);
1988 } else if (IS_ERR(ctx->mrec)) {
1989 ntfs_error(vol->sb, "Failed to restore attribute search "
1990 "context.%s", es);
1991 NVolSetErrors(vol);
1984 } 1992 }
1985 if (ctx) 1993 if (ctx)
1986 ntfs_attr_put_search_ctx(ctx); 1994 ntfs_attr_put_search_ctx(ctx);