diff options
Diffstat (limited to 'fs/ntfs/lcnalloc.c')
-rw-r--r-- | fs/ntfs/lcnalloc.c | 1002 |
1 files changed, 1002 insertions, 0 deletions
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c new file mode 100644 index 000000000000..23fd911078b1 --- /dev/null +++ b/fs/ntfs/lcnalloc.c | |||
@@ -0,0 +1,1002 @@ | |||
1 | /* | ||
2 | * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. | ||
3 | * | ||
4 | * Copyright (c) 2004 Anton Altaparmakov | ||
5 | * | ||
6 | * This program/include file is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License as published | ||
8 | * by the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program/include file is distributed in the hope that it will be | ||
12 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program (in the main directory of the Linux-NTFS | ||
18 | * distribution in the file COPYING); if not, write to the Free Software | ||
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifdef NTFS_RW | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | |||
26 | #include "lcnalloc.h" | ||
27 | #include "debug.h" | ||
28 | #include "bitmap.h" | ||
29 | #include "inode.h" | ||
30 | #include "volume.h" | ||
31 | #include "attrib.h" | ||
32 | #include "malloc.h" | ||
33 | #include "aops.h" | ||
34 | #include "ntfs.h" | ||
35 | |||
36 | /** | ||
37 | * ntfs_cluster_free_from_rl_nolock - free clusters from runlist | ||
38 | * @vol: mounted ntfs volume on which to free the clusters | ||
39 | * @rl: runlist describing the clusters to free | ||
40 | * | ||
41 | * Free all the clusters described by the runlist @rl on the volume @vol. In | ||
42 | * the case of an error being returned, at least some of the clusters were not | ||
43 | * freed. | ||
44 | * | ||
45 | * Return 0 on success and -errno on error. | ||
46 | * | ||
47 | * Locking: - The volume lcn bitmap must be locked for writing on entry and is | ||
48 | * left locked on return. | ||
49 | */ | ||
50 | int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, | ||
51 | const runlist_element *rl) | ||
52 | { | ||
53 | struct inode *lcnbmp_vi = vol->lcnbmp_ino; | ||
54 | int ret = 0; | ||
55 | |||
56 | ntfs_debug("Entering."); | ||
57 | for (; rl->length; rl++) { | ||
58 | int err; | ||
59 | |||
60 | if (rl->lcn < 0) | ||
61 | continue; | ||
62 | err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); | ||
63 | if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) | ||
64 | ret = err; | ||
65 | } | ||
66 | ntfs_debug("Done."); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * ntfs_cluster_alloc - allocate clusters on an ntfs volume | ||
72 | * @vol: mounted ntfs volume on which to allocate the clusters | ||
73 | * @start_vcn: vcn to use for the first allocated cluster | ||
74 | * @count: number of clusters to allocate | ||
75 | * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) | ||
76 | * @zone: zone from which to allocate the clusters | ||
77 | * | ||
78 | * Allocate @count clusters preferably starting at cluster @start_lcn or at the | ||
79 | * current allocator position if @start_lcn is -1, on the mounted ntfs volume | ||
80 | * @vol. @zone is either DATA_ZONE for allocation of normal clusters or | ||
81 | * MFT_ZONE for allocation of clusters for the master file table, i.e. the | ||
82 | * $MFT/$DATA attribute. | ||
83 | * | ||
84 | * @start_vcn specifies the vcn of the first allocated cluster. This makes | ||
85 | * merging the resulting runlist with the old runlist easier. | ||
86 | * | ||
87 | * You need to check the return value with IS_ERR(). If this is false, the | ||
88 | * function was successful and the return value is a runlist describing the | ||
89 | * allocated cluster(s). If IS_ERR() is true, the function failed and | ||
90 | * PTR_ERR() gives you the error code. | ||
91 | * | ||
92 | * Notes on the allocation algorithm | ||
93 | * ================================= | ||
94 | * | ||
95 | * There are two data zones. First is the area between the end of the mft zone | ||
96 | * and the end of the volume, and second is the area between the start of the | ||
97 | * volume and the start of the mft zone. On unmodified/standard NTFS 1.x | ||
98 | * volumes, the second data zone does not exist due to the mft zone being | ||
99 | * expanded to cover the start of the volume in order to reserve space for the | ||
100 | * mft bitmap attribute. | ||
101 | * | ||
102 | * This is not the prettiest function but the complexity stems from the need of | ||
103 | * implementing the mft vs data zoned approach and from the fact that we have | ||
104 | * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we | ||
105 | * need to cope with crossing over boundaries of two buffers. Further, the | ||
106 | * fact that the allocator allows for caller supplied hints as to the location | ||
107 | * of where allocation should begin and the fact that the allocator keeps track | ||
108 | * of where in the data zones the next natural allocation should occur, | ||
109 | * contribute to the complexity of the function. But it should all be | ||
110 | * worthwhile, because this allocator should: 1) be a full implementation of | ||
111 | * the MFT zone approach used by Windows NT, 2) cause reduction in | ||
112 | * fragmentation, and 3) be speedy in allocations (the code is not optimized | ||
113 | * for speed, but the algorithm is, so further speed improvements are probably | ||
114 | * possible). | ||
115 | * | ||
116 | * FIXME: We should be monitoring cluster allocation and increment the MFT zone | ||
117 | * size dynamically but this is something for the future. We will just cause | ||
118 | * heavier fragmentation by not doing it and I am not even sure Windows would | ||
119 | * grow the MFT zone dynamically, so it might even be correct not to do this. | ||
120 | * The overhead in doing dynamic MFT zone expansion would be very large and | ||
121 | * unlikely worth the effort. (AIA) | ||
122 | * | ||
123 | * TODO: I have added in double the required zone position pointer wrap around | ||
124 | * logic which can be optimized to having only one of the two logic sets. | ||
125 | * However, having the double logic will work fine, but if we have only one of | ||
126 | * the sets and we get it wrong somewhere, then we get into trouble, so | ||
127 | * removing the duplicate logic requires _very_ careful consideration of _all_ | ||
128 | * possible code paths. So at least for now, I am leaving the double logic - | ||
129 | * better safe than sorry... (AIA) | ||
130 | * | ||
131 | * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked | ||
132 | * on return. | ||
133 | * - This function takes the volume lcn bitmap lock for writing and | ||
134 | * modifies the bitmap contents. | ||
135 | */ | ||
136 | runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, | ||
137 | const s64 count, const LCN start_lcn, | ||
138 | const NTFS_CLUSTER_ALLOCATION_ZONES zone) | ||
139 | { | ||
140 | LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; | ||
141 | LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; | ||
142 | s64 clusters; | ||
143 | struct inode *lcnbmp_vi; | ||
144 | runlist_element *rl = NULL; | ||
145 | struct address_space *mapping; | ||
146 | struct page *page = NULL; | ||
147 | u8 *buf, *byte; | ||
148 | int err = 0, rlpos, rlsize, buf_size; | ||
149 | u8 pass, done_zones, search_zone, need_writeback = 0, bit; | ||
150 | |||
151 | ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn " | ||
152 | "0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn, | ||
153 | (unsigned long long)count, | ||
154 | (unsigned long long)start_lcn, | ||
155 | zone == MFT_ZONE ? "MFT" : "DATA"); | ||
156 | BUG_ON(!vol); | ||
157 | lcnbmp_vi = vol->lcnbmp_ino; | ||
158 | BUG_ON(!lcnbmp_vi); | ||
159 | BUG_ON(start_vcn < 0); | ||
160 | BUG_ON(count < 0); | ||
161 | BUG_ON(start_lcn < -1); | ||
162 | BUG_ON(zone < FIRST_ZONE); | ||
163 | BUG_ON(zone > LAST_ZONE); | ||
164 | |||
165 | /* Return empty runlist if @count == 0 */ | ||
166 | // FIXME: Do we want to just return NULL instead? (AIA) | ||
167 | if (!count) { | ||
168 | rl = ntfs_malloc_nofs(PAGE_SIZE); | ||
169 | if (!rl) | ||
170 | return ERR_PTR(-ENOMEM); | ||
171 | rl[0].vcn = start_vcn; | ||
172 | rl[0].lcn = LCN_RL_NOT_MAPPED; | ||
173 | rl[0].length = 0; | ||
174 | return rl; | ||
175 | } | ||
176 | /* Take the lcnbmp lock for writing. */ | ||
177 | down_write(&vol->lcnbmp_lock); | ||
178 | /* | ||
179 | * If no specific @start_lcn was requested, use the current data zone | ||
180 | * position, otherwise use the requested @start_lcn but make sure it | ||
181 | * lies outside the mft zone. Also set done_zones to 0 (no zones done) | ||
182 | * and pass depending on whether we are starting inside a zone (1) or | ||
183 | * at the beginning of a zone (2). If requesting from the MFT_ZONE, | ||
184 | * we either start at the current position within the mft zone or at | ||
185 | * the specified position. If the latter is out of bounds then we start | ||
186 | * at the beginning of the MFT_ZONE. | ||
187 | */ | ||
188 | done_zones = 0; | ||
189 | pass = 1; | ||
190 | /* | ||
191 | * zone_start and zone_end are the current search range. search_zone | ||
192 | * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of | ||
193 | * volume) and 4 for data zone 2 (start of volume till start of mft | ||
194 | * zone). | ||
195 | */ | ||
196 | zone_start = start_lcn; | ||
197 | if (zone_start < 0) { | ||
198 | if (zone == DATA_ZONE) | ||
199 | zone_start = vol->data1_zone_pos; | ||
200 | else | ||
201 | zone_start = vol->mft_zone_pos; | ||
202 | if (!zone_start) { | ||
203 | /* | ||
204 | * Zone starts at beginning of volume which means a | ||
205 | * single pass is sufficient. | ||
206 | */ | ||
207 | pass = 2; | ||
208 | } | ||
209 | } else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start && | ||
210 | zone_start < vol->mft_zone_end) { | ||
211 | zone_start = vol->mft_zone_end; | ||
212 | /* | ||
213 | * Starting at beginning of data1_zone which means a single | ||
214 | * pass in this zone is sufficient. | ||
215 | */ | ||
216 | pass = 2; | ||
217 | } else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start || | ||
218 | zone_start >= vol->mft_zone_end)) { | ||
219 | zone_start = vol->mft_lcn; | ||
220 | if (!vol->mft_zone_end) | ||
221 | zone_start = 0; | ||
222 | /* | ||
223 | * Starting at beginning of volume which means a single pass | ||
224 | * is sufficient. | ||
225 | */ | ||
226 | pass = 2; | ||
227 | } | ||
228 | if (zone == MFT_ZONE) { | ||
229 | zone_end = vol->mft_zone_end; | ||
230 | search_zone = 1; | ||
231 | } else /* if (zone == DATA_ZONE) */ { | ||
232 | /* Skip searching the mft zone. */ | ||
233 | done_zones |= 1; | ||
234 | if (zone_start >= vol->mft_zone_end) { | ||
235 | zone_end = vol->nr_clusters; | ||
236 | search_zone = 2; | ||
237 | } else { | ||
238 | zone_end = vol->mft_zone_start; | ||
239 | search_zone = 4; | ||
240 | } | ||
241 | } | ||
242 | /* | ||
243 | * bmp_pos is the current bit position inside the bitmap. We use | ||
244 | * bmp_initial_pos to determine whether or not to do a zone switch. | ||
245 | */ | ||
246 | bmp_pos = bmp_initial_pos = zone_start; | ||
247 | |||
248 | /* Loop until all clusters are allocated, i.e. clusters == 0. */ | ||
249 | clusters = count; | ||
250 | rlpos = rlsize = 0; | ||
251 | mapping = lcnbmp_vi->i_mapping; | ||
252 | while (1) { | ||
253 | ntfs_debug("Start of outer while loop: done_zones 0x%x, " | ||
254 | "search_zone %i, pass %i, zone_start 0x%llx, " | ||
255 | "zone_end 0x%llx, bmp_initial_pos 0x%llx, " | ||
256 | "bmp_pos 0x%llx, rlpos %i, rlsize %i.", | ||
257 | done_zones, search_zone, pass, | ||
258 | (unsigned long long)zone_start, | ||
259 | (unsigned long long)zone_end, | ||
260 | (unsigned long long)bmp_initial_pos, | ||
261 | (unsigned long long)bmp_pos, rlpos, rlsize); | ||
262 | /* Loop until we run out of free clusters. */ | ||
263 | last_read_pos = bmp_pos >> 3; | ||
264 | ntfs_debug("last_read_pos 0x%llx.", | ||
265 | (unsigned long long)last_read_pos); | ||
266 | if (last_read_pos > lcnbmp_vi->i_size) { | ||
267 | ntfs_debug("End of attribute reached. " | ||
268 | "Skipping to zone_pass_done."); | ||
269 | goto zone_pass_done; | ||
270 | } | ||
271 | if (likely(page)) { | ||
272 | if (need_writeback) { | ||
273 | ntfs_debug("Marking page dirty."); | ||
274 | flush_dcache_page(page); | ||
275 | set_page_dirty(page); | ||
276 | need_writeback = 0; | ||
277 | } | ||
278 | ntfs_unmap_page(page); | ||
279 | } | ||
280 | page = ntfs_map_page(mapping, last_read_pos >> | ||
281 | PAGE_CACHE_SHIFT); | ||
282 | if (IS_ERR(page)) { | ||
283 | err = PTR_ERR(page); | ||
284 | ntfs_error(vol->sb, "Failed to map page."); | ||
285 | goto out; | ||
286 | } | ||
287 | buf_size = last_read_pos & ~PAGE_CACHE_MASK; | ||
288 | buf = page_address(page) + buf_size; | ||
289 | buf_size = PAGE_CACHE_SIZE - buf_size; | ||
290 | if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) | ||
291 | buf_size = lcnbmp_vi->i_size - last_read_pos; | ||
292 | buf_size <<= 3; | ||
293 | lcn = bmp_pos & 7; | ||
294 | bmp_pos &= ~7; | ||
295 | ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " | ||
296 | "bmp_pos 0x%llx, need_writeback %i.", buf_size, | ||
297 | (unsigned long long)lcn, | ||
298 | (unsigned long long)bmp_pos, need_writeback); | ||
299 | while (lcn < buf_size && lcn + bmp_pos < zone_end) { | ||
300 | byte = buf + (lcn >> 3); | ||
301 | ntfs_debug("In inner while loop: buf_size %i, " | ||
302 | "lcn 0x%llx, bmp_pos 0x%llx, " | ||
303 | "need_writeback %i, byte ofs 0x%x, " | ||
304 | "*byte 0x%x.", buf_size, | ||
305 | (unsigned long long)lcn, | ||
306 | (unsigned long long)bmp_pos, | ||
307 | need_writeback, | ||
308 | (unsigned int)(lcn >> 3), | ||
309 | (unsigned int)*byte); | ||
310 | /* Skip full bytes. */ | ||
311 | if (*byte == 0xff) { | ||
312 | lcn = (lcn + 8) & ~7; | ||
313 | ntfs_debug("Continuing while loop 1."); | ||
314 | continue; | ||
315 | } | ||
316 | bit = 1 << (lcn & 7); | ||
317 | ntfs_debug("bit %i.", bit); | ||
318 | /* If the bit is already set, go onto the next one. */ | ||
319 | if (*byte & bit) { | ||
320 | lcn++; | ||
321 | ntfs_debug("Continuing while loop 2."); | ||
322 | continue; | ||
323 | } | ||
324 | /* | ||
325 | * Allocate more memory if needed, including space for | ||
326 | * the terminator element. | ||
327 | * ntfs_malloc_nofs() operates on whole pages only. | ||
328 | */ | ||
329 | if ((rlpos + 2) * sizeof(*rl) > rlsize) { | ||
330 | runlist_element *rl2; | ||
331 | |||
332 | ntfs_debug("Reallocating memory."); | ||
333 | if (!rl) | ||
334 | ntfs_debug("First free bit is at LCN " | ||
335 | "0x%llx.", | ||
336 | (unsigned long long) | ||
337 | (lcn + bmp_pos)); | ||
338 | rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); | ||
339 | if (unlikely(!rl2)) { | ||
340 | err = -ENOMEM; | ||
341 | ntfs_error(vol->sb, "Failed to " | ||
342 | "allocate memory."); | ||
343 | goto out; | ||
344 | } | ||
345 | memcpy(rl2, rl, rlsize); | ||
346 | ntfs_free(rl); | ||
347 | rl = rl2; | ||
348 | rlsize += PAGE_SIZE; | ||
349 | ntfs_debug("Reallocated memory, rlsize 0x%x.", | ||
350 | rlsize); | ||
351 | } | ||
352 | /* Allocate the bitmap bit. */ | ||
353 | *byte |= bit; | ||
354 | /* We need to write this bitmap page to disk. */ | ||
355 | need_writeback = 1; | ||
356 | ntfs_debug("*byte 0x%x, need_writeback is set.", | ||
357 | (unsigned int)*byte); | ||
358 | /* | ||
359 | * Coalesce with previous run if adjacent LCNs. | ||
360 | * Otherwise, append a new run. | ||
361 | */ | ||
362 | ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), " | ||
363 | "prev_lcn 0x%llx, lcn 0x%llx, " | ||
364 | "bmp_pos 0x%llx, prev_run_len 0x%llx, " | ||
365 | "rlpos %i.", | ||
366 | (unsigned long long)(lcn + bmp_pos), | ||
367 | 1ULL, (unsigned long long)prev_lcn, | ||
368 | (unsigned long long)lcn, | ||
369 | (unsigned long long)bmp_pos, | ||
370 | (unsigned long long)prev_run_len, | ||
371 | rlpos); | ||
372 | if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) { | ||
373 | ntfs_debug("Coalescing to run (lcn 0x%llx, " | ||
374 | "len 0x%llx).", | ||
375 | (unsigned long long) | ||
376 | rl[rlpos - 1].lcn, | ||
377 | (unsigned long long) | ||
378 | rl[rlpos - 1].length); | ||
379 | rl[rlpos - 1].length = ++prev_run_len; | ||
380 | ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), " | ||
381 | "prev_run_len 0x%llx.", | ||
382 | (unsigned long long) | ||
383 | rl[rlpos - 1].lcn, | ||
384 | (unsigned long long) | ||
385 | rl[rlpos - 1].length, | ||
386 | (unsigned long long) | ||
387 | prev_run_len); | ||
388 | } else { | ||
389 | if (likely(rlpos)) { | ||
390 | ntfs_debug("Adding new run, (previous " | ||
391 | "run lcn 0x%llx, " | ||
392 | "len 0x%llx).", | ||
393 | (unsigned long long) | ||
394 | rl[rlpos - 1].lcn, | ||
395 | (unsigned long long) | ||
396 | rl[rlpos - 1].length); | ||
397 | rl[rlpos].vcn = rl[rlpos - 1].vcn + | ||
398 | prev_run_len; | ||
399 | } else { | ||
400 | ntfs_debug("Adding new run, is first " | ||
401 | "run."); | ||
402 | rl[rlpos].vcn = start_vcn; | ||
403 | } | ||
404 | rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; | ||
405 | rl[rlpos].length = prev_run_len = 1; | ||
406 | rlpos++; | ||
407 | } | ||
408 | /* Done? */ | ||
409 | if (!--clusters) { | ||
410 | LCN tc; | ||
411 | /* | ||
412 | * Update the current zone position. Positions | ||
413 | * of already scanned zones have been updated | ||
414 | * during the respective zone switches. | ||
415 | */ | ||
416 | tc = lcn + bmp_pos + 1; | ||
417 | ntfs_debug("Done. Updating current zone " | ||
418 | "position, tc 0x%llx, " | ||
419 | "search_zone %i.", | ||
420 | (unsigned long long)tc, | ||
421 | search_zone); | ||
422 | switch (search_zone) { | ||
423 | case 1: | ||
424 | ntfs_debug("Before checks, " | ||
425 | "vol->mft_zone_pos " | ||
426 | "0x%llx.", | ||
427 | (unsigned long long) | ||
428 | vol->mft_zone_pos); | ||
429 | if (tc >= vol->mft_zone_end) { | ||
430 | vol->mft_zone_pos = | ||
431 | vol->mft_lcn; | ||
432 | if (!vol->mft_zone_end) | ||
433 | vol->mft_zone_pos = 0; | ||
434 | } else if ((bmp_initial_pos >= | ||
435 | vol->mft_zone_pos || | ||
436 | tc > vol->mft_zone_pos) | ||
437 | && tc >= vol->mft_lcn) | ||
438 | vol->mft_zone_pos = tc; | ||
439 | ntfs_debug("After checks, " | ||
440 | "vol->mft_zone_pos " | ||
441 | "0x%llx.", | ||
442 | (unsigned long long) | ||
443 | vol->mft_zone_pos); | ||
444 | break; | ||
445 | case 2: | ||
446 | ntfs_debug("Before checks, " | ||
447 | "vol->data1_zone_pos " | ||
448 | "0x%llx.", | ||
449 | (unsigned long long) | ||
450 | vol->data1_zone_pos); | ||
451 | if (tc >= vol->nr_clusters) | ||
452 | vol->data1_zone_pos = | ||
453 | vol->mft_zone_end; | ||
454 | else if ((bmp_initial_pos >= | ||
455 | vol->data1_zone_pos || | ||
456 | tc > vol->data1_zone_pos) | ||
457 | && tc >= vol->mft_zone_end) | ||
458 | vol->data1_zone_pos = tc; | ||
459 | ntfs_debug("After checks, " | ||
460 | "vol->data1_zone_pos " | ||
461 | "0x%llx.", | ||
462 | (unsigned long long) | ||
463 | vol->data1_zone_pos); | ||
464 | break; | ||
465 | case 4: | ||
466 | ntfs_debug("Before checks, " | ||
467 | "vol->data2_zone_pos " | ||
468 | "0x%llx.", | ||
469 | (unsigned long long) | ||
470 | vol->data2_zone_pos); | ||
471 | if (tc >= vol->mft_zone_start) | ||
472 | vol->data2_zone_pos = 0; | ||
473 | else if (bmp_initial_pos >= | ||
474 | vol->data2_zone_pos || | ||
475 | tc > vol->data2_zone_pos) | ||
476 | vol->data2_zone_pos = tc; | ||
477 | ntfs_debug("After checks, " | ||
478 | "vol->data2_zone_pos " | ||
479 | "0x%llx.", | ||
480 | (unsigned long long) | ||
481 | vol->data2_zone_pos); | ||
482 | break; | ||
483 | default: | ||
484 | BUG(); | ||
485 | } | ||
486 | ntfs_debug("Finished. Going to out."); | ||
487 | goto out; | ||
488 | } | ||
489 | lcn++; | ||
490 | } | ||
491 | bmp_pos += buf_size; | ||
492 | ntfs_debug("After inner while loop: buf_size 0x%x, lcn " | ||
493 | "0x%llx, bmp_pos 0x%llx, need_writeback %i.", | ||
494 | buf_size, (unsigned long long)lcn, | ||
495 | (unsigned long long)bmp_pos, need_writeback); | ||
496 | if (bmp_pos < zone_end) { | ||
497 | ntfs_debug("Continuing outer while loop, " | ||
498 | "bmp_pos 0x%llx, zone_end 0x%llx.", | ||
499 | (unsigned long long)bmp_pos, | ||
500 | (unsigned long long)zone_end); | ||
501 | continue; | ||
502 | } | ||
503 | zone_pass_done: /* Finished with the current zone pass. */ | ||
504 | ntfs_debug("At zone_pass_done, pass %i.", pass); | ||
505 | if (pass == 1) { | ||
506 | /* | ||
507 | * Now do pass 2, scanning the first part of the zone | ||
508 | * we omitted in pass 1. | ||
509 | */ | ||
510 | pass = 2; | ||
511 | zone_end = zone_start; | ||
512 | switch (search_zone) { | ||
513 | case 1: /* mft_zone */ | ||
514 | zone_start = vol->mft_zone_start; | ||
515 | break; | ||
516 | case 2: /* data1_zone */ | ||
517 | zone_start = vol->mft_zone_end; | ||
518 | break; | ||
519 | case 4: /* data2_zone */ | ||
520 | zone_start = 0; | ||
521 | break; | ||
522 | default: | ||
523 | BUG(); | ||
524 | } | ||
525 | /* Sanity check. */ | ||
526 | if (zone_end < zone_start) | ||
527 | zone_end = zone_start; | ||
528 | bmp_pos = zone_start; | ||
529 | ntfs_debug("Continuing outer while loop, pass 2, " | ||
530 | "zone_start 0x%llx, zone_end 0x%llx, " | ||
531 | "bmp_pos 0x%llx.", | ||
532 | (unsigned long long)zone_start, | ||
533 | (unsigned long long)zone_end, | ||
534 | (unsigned long long)bmp_pos); | ||
535 | continue; | ||
536 | } /* pass == 2 */ | ||
537 | done_zones_check: | ||
538 | ntfs_debug("At done_zones_check, search_zone %i, done_zones " | ||
539 | "before 0x%x, done_zones after 0x%x.", | ||
540 | search_zone, done_zones, | ||
541 | done_zones | search_zone); | ||
542 | done_zones |= search_zone; | ||
543 | if (done_zones < 7) { | ||
544 | ntfs_debug("Switching zone."); | ||
545 | /* Now switch to the next zone we haven't done yet. */ | ||
546 | pass = 1; | ||
547 | switch (search_zone) { | ||
548 | case 1: | ||
549 | ntfs_debug("Switching from mft zone to data1 " | ||
550 | "zone."); | ||
551 | /* Update mft zone position. */ | ||
552 | if (rlpos) { | ||
553 | LCN tc; | ||
554 | |||
555 | ntfs_debug("Before checks, " | ||
556 | "vol->mft_zone_pos " | ||
557 | "0x%llx.", | ||
558 | (unsigned long long) | ||
559 | vol->mft_zone_pos); | ||
560 | tc = rl[rlpos - 1].lcn + | ||
561 | rl[rlpos - 1].length; | ||
562 | if (tc >= vol->mft_zone_end) { | ||
563 | vol->mft_zone_pos = | ||
564 | vol->mft_lcn; | ||
565 | if (!vol->mft_zone_end) | ||
566 | vol->mft_zone_pos = 0; | ||
567 | } else if ((bmp_initial_pos >= | ||
568 | vol->mft_zone_pos || | ||
569 | tc > vol->mft_zone_pos) | ||
570 | && tc >= vol->mft_lcn) | ||
571 | vol->mft_zone_pos = tc; | ||
572 | ntfs_debug("After checks, " | ||
573 | "vol->mft_zone_pos " | ||
574 | "0x%llx.", | ||
575 | (unsigned long long) | ||
576 | vol->mft_zone_pos); | ||
577 | } | ||
578 | /* Switch from mft zone to data1 zone. */ | ||
579 | switch_to_data1_zone: search_zone = 2; | ||
580 | zone_start = bmp_initial_pos = | ||
581 | vol->data1_zone_pos; | ||
582 | zone_end = vol->nr_clusters; | ||
583 | if (zone_start == vol->mft_zone_end) | ||
584 | pass = 2; | ||
585 | if (zone_start >= zone_end) { | ||
586 | vol->data1_zone_pos = zone_start = | ||
587 | vol->mft_zone_end; | ||
588 | pass = 2; | ||
589 | } | ||
590 | break; | ||
591 | case 2: | ||
592 | ntfs_debug("Switching from data1 zone to " | ||
593 | "data2 zone."); | ||
594 | /* Update data1 zone position. */ | ||
595 | if (rlpos) { | ||
596 | LCN tc; | ||
597 | |||
598 | ntfs_debug("Before checks, " | ||
599 | "vol->data1_zone_pos " | ||
600 | "0x%llx.", | ||
601 | (unsigned long long) | ||
602 | vol->data1_zone_pos); | ||
603 | tc = rl[rlpos - 1].lcn + | ||
604 | rl[rlpos - 1].length; | ||
605 | if (tc >= vol->nr_clusters) | ||
606 | vol->data1_zone_pos = | ||
607 | vol->mft_zone_end; | ||
608 | else if ((bmp_initial_pos >= | ||
609 | vol->data1_zone_pos || | ||
610 | tc > vol->data1_zone_pos) | ||
611 | && tc >= vol->mft_zone_end) | ||
612 | vol->data1_zone_pos = tc; | ||
613 | ntfs_debug("After checks, " | ||
614 | "vol->data1_zone_pos " | ||
615 | "0x%llx.", | ||
616 | (unsigned long long) | ||
617 | vol->data1_zone_pos); | ||
618 | } | ||
619 | /* Switch from data1 zone to data2 zone. */ | ||
620 | search_zone = 4; | ||
621 | zone_start = bmp_initial_pos = | ||
622 | vol->data2_zone_pos; | ||
623 | zone_end = vol->mft_zone_start; | ||
624 | if (!zone_start) | ||
625 | pass = 2; | ||
626 | if (zone_start >= zone_end) { | ||
627 | vol->data2_zone_pos = zone_start = | ||
628 | bmp_initial_pos = 0; | ||
629 | pass = 2; | ||
630 | } | ||
631 | break; | ||
632 | case 4: | ||
633 | ntfs_debug("Switching from data2 zone to " | ||
634 | "data1 zone."); | ||
635 | /* Update data2 zone position. */ | ||
636 | if (rlpos) { | ||
637 | LCN tc; | ||
638 | |||
639 | ntfs_debug("Before checks, " | ||
640 | "vol->data2_zone_pos " | ||
641 | "0x%llx.", | ||
642 | (unsigned long long) | ||
643 | vol->data2_zone_pos); | ||
644 | tc = rl[rlpos - 1].lcn + | ||
645 | rl[rlpos - 1].length; | ||
646 | if (tc >= vol->mft_zone_start) | ||
647 | vol->data2_zone_pos = 0; | ||
648 | else if (bmp_initial_pos >= | ||
649 | vol->data2_zone_pos || | ||
650 | tc > vol->data2_zone_pos) | ||
651 | vol->data2_zone_pos = tc; | ||
652 | ntfs_debug("After checks, " | ||
653 | "vol->data2_zone_pos " | ||
654 | "0x%llx.", | ||
655 | (unsigned long long) | ||
656 | vol->data2_zone_pos); | ||
657 | } | ||
658 | /* Switch from data2 zone to data1 zone. */ | ||
659 | goto switch_to_data1_zone; | ||
660 | default: | ||
661 | BUG(); | ||
662 | } | ||
663 | ntfs_debug("After zone switch, search_zone %i, " | ||
664 | "pass %i, bmp_initial_pos 0x%llx, " | ||
665 | "zone_start 0x%llx, zone_end 0x%llx.", | ||
666 | search_zone, pass, | ||
667 | (unsigned long long)bmp_initial_pos, | ||
668 | (unsigned long long)zone_start, | ||
669 | (unsigned long long)zone_end); | ||
670 | bmp_pos = zone_start; | ||
671 | if (zone_start == zone_end) { | ||
672 | ntfs_debug("Empty zone, going to " | ||
673 | "done_zones_check."); | ||
674 | /* Empty zone. Don't bother searching it. */ | ||
675 | goto done_zones_check; | ||
676 | } | ||
677 | ntfs_debug("Continuing outer while loop."); | ||
678 | continue; | ||
679 | } /* done_zones == 7 */ | ||
680 | ntfs_debug("All zones are finished."); | ||
681 | /* | ||
682 | * All zones are finished! If DATA_ZONE, shrink mft zone. If | ||
683 | * MFT_ZONE, we have really run out of space. | ||
684 | */ | ||
685 | mft_zone_size = vol->mft_zone_end - vol->mft_zone_start; | ||
686 | ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end " | ||
687 | "0x%llx, mft_zone_size 0x%llx.", | ||
688 | (unsigned long long)vol->mft_zone_start, | ||
689 | (unsigned long long)vol->mft_zone_end, | ||
690 | (unsigned long long)mft_zone_size); | ||
691 | if (zone == MFT_ZONE || mft_zone_size <= 0) { | ||
692 | ntfs_debug("No free clusters left, going to out."); | ||
693 | /* Really no more space left on device. */ | ||
694 | err = ENOSPC; | ||
695 | goto out; | ||
696 | } /* zone == DATA_ZONE && mft_zone_size > 0 */ | ||
697 | ntfs_debug("Shrinking mft zone."); | ||
698 | zone_end = vol->mft_zone_end; | ||
699 | mft_zone_size >>= 1; | ||
700 | if (mft_zone_size > 0) | ||
701 | vol->mft_zone_end = vol->mft_zone_start + mft_zone_size; | ||
702 | else /* mft zone and data2 zone no longer exist. */ | ||
703 | vol->data2_zone_pos = vol->mft_zone_start = | ||
704 | vol->mft_zone_end = 0; | ||
705 | if (vol->mft_zone_pos >= vol->mft_zone_end) { | ||
706 | vol->mft_zone_pos = vol->mft_lcn; | ||
707 | if (!vol->mft_zone_end) | ||
708 | vol->mft_zone_pos = 0; | ||
709 | } | ||
710 | bmp_pos = zone_start = bmp_initial_pos = | ||
711 | vol->data1_zone_pos = vol->mft_zone_end; | ||
712 | search_zone = 2; | ||
713 | pass = 2; | ||
714 | done_zones &= ~2; | ||
715 | ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, " | ||
716 | "vol->mft_zone_start 0x%llx, " | ||
717 | "vol->mft_zone_end 0x%llx, " | ||
718 | "vol->mft_zone_pos 0x%llx, search_zone 2, " | ||
719 | "pass 2, dones_zones 0x%x, zone_start 0x%llx, " | ||
720 | "zone_end 0x%llx, vol->data1_zone_pos 0x%llx, " | ||
721 | "continuing outer while loop.", | ||
722 | (unsigned long long)mft_zone_size, | ||
723 | (unsigned long long)vol->mft_zone_start, | ||
724 | (unsigned long long)vol->mft_zone_end, | ||
725 | (unsigned long long)vol->mft_zone_pos, | ||
726 | done_zones, (unsigned long long)zone_start, | ||
727 | (unsigned long long)zone_end, | ||
728 | (unsigned long long)vol->data1_zone_pos); | ||
729 | } | ||
730 | ntfs_debug("After outer while loop."); | ||
731 | out: | ||
732 | ntfs_debug("At out."); | ||
733 | /* Add runlist terminator element. */ | ||
734 | if (likely(rl)) { | ||
735 | rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; | ||
736 | rl[rlpos].lcn = LCN_RL_NOT_MAPPED; | ||
737 | rl[rlpos].length = 0; | ||
738 | } | ||
739 | if (likely(page && !IS_ERR(page))) { | ||
740 | if (need_writeback) { | ||
741 | ntfs_debug("Marking page dirty."); | ||
742 | flush_dcache_page(page); | ||
743 | set_page_dirty(page); | ||
744 | need_writeback = 0; | ||
745 | } | ||
746 | ntfs_unmap_page(page); | ||
747 | } | ||
748 | if (likely(!err)) { | ||
749 | up_write(&vol->lcnbmp_lock); | ||
750 | ntfs_debug("Done."); | ||
751 | return rl; | ||
752 | } | ||
753 | ntfs_error(vol->sb, "Failed to allocate clusters, aborting " | ||
754 | "(error %i).", err); | ||
755 | if (rl) { | ||
756 | int err2; | ||
757 | |||
758 | if (err == ENOSPC) | ||
759 | ntfs_debug("Not enough space to complete allocation, " | ||
760 | "err ENOSPC, first free lcn 0x%llx, " | ||
761 | "could allocate up to 0x%llx " | ||
762 | "clusters.", | ||
763 | (unsigned long long)rl[0].lcn, | ||
764 | (unsigned long long)count - clusters); | ||
765 | /* Deallocate all allocated clusters. */ | ||
766 | ntfs_debug("Attempting rollback..."); | ||
767 | err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); | ||
768 | if (err2) { | ||
769 | ntfs_error(vol->sb, "Failed to rollback (error %i). " | ||
770 | "Leaving inconsistent metadata! " | ||
771 | "Unmount and run chkdsk.", err2); | ||
772 | NVolSetErrors(vol); | ||
773 | } | ||
774 | /* Free the runlist. */ | ||
775 | ntfs_free(rl); | ||
776 | } else if (err == ENOSPC) | ||
777 | ntfs_debug("No space left at all, err = ENOSPC, " | ||
778 | "first free lcn = 0x%llx.", | ||
779 | (unsigned long long)vol->data1_zone_pos); | ||
780 | up_write(&vol->lcnbmp_lock); | ||
781 | return ERR_PTR(err); | ||
782 | } | ||
783 | |||
784 | /** | ||
785 | * __ntfs_cluster_free - free clusters on an ntfs volume | ||
786 | * @vi: vfs inode whose runlist describes the clusters to free | ||
787 | * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters | ||
788 | * @count: number of clusters to free or -1 for all clusters | ||
789 | * @is_rollback: if TRUE this is a rollback operation | ||
790 | * | ||
791 | * Free @count clusters starting at the cluster @start_vcn in the runlist | ||
792 | * described by the vfs inode @vi. | ||
793 | * | ||
794 | * If @count is -1, all clusters from @start_vcn to the end of the runlist are | ||
795 | * deallocated. Thus, to completely free all clusters in a runlist, use | ||
796 | * @start_vcn = 0 and @count = -1. | ||
797 | * | ||
798 | * @is_rollback should always be FALSE, it is for internal use to rollback | ||
799 | * errors. You probably want to use ntfs_cluster_free() instead. | ||
800 | * | ||
801 | * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller | ||
802 | * has to deal with it later. | ||
803 | * | ||
804 | * Return the number of deallocated clusters (not counting sparse ones) on | ||
805 | * success and -errno on error. | ||
806 | * | ||
807 | * Locking: - The runlist described by @vi must be unlocked on entry and is | ||
808 | * unlocked on return. | ||
809 | * - This function takes the runlist lock of @vi for reading and | ||
810 | * sometimes for writing and sometimes modifies the runlist. | ||
811 | * - The volume lcn bitmap must be unlocked on entry and is unlocked | ||
812 | * on return. | ||
813 | * - This function takes the volume lcn bitmap lock for writing and | ||
814 | * modifies the bitmap contents. | ||
815 | */ | ||
816 | s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, | ||
817 | const BOOL is_rollback) | ||
818 | { | ||
819 | s64 delta, to_free, total_freed, real_freed; | ||
820 | ntfs_inode *ni; | ||
821 | ntfs_volume *vol; | ||
822 | struct inode *lcnbmp_vi; | ||
823 | runlist_element *rl; | ||
824 | int err; | ||
825 | |||
826 | BUG_ON(!vi); | ||
827 | ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count " | ||
828 | "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn, | ||
829 | (unsigned long long)count, | ||
830 | is_rollback ? " (rollback)" : ""); | ||
831 | ni = NTFS_I(vi); | ||
832 | vol = ni->vol; | ||
833 | lcnbmp_vi = vol->lcnbmp_ino; | ||
834 | BUG_ON(!lcnbmp_vi); | ||
835 | BUG_ON(start_vcn < 0); | ||
836 | BUG_ON(count < -1); | ||
837 | /* | ||
838 | * Lock the lcn bitmap for writing but only if not rolling back. We | ||
839 | * must hold the lock all the way including through rollback otherwise | ||
840 | * rollback is not possible because once we have cleared a bit and | ||
841 | * dropped the lock, anyone could have set the bit again, thus | ||
842 | * allocating the cluster for another use. | ||
843 | */ | ||
844 | if (likely(!is_rollback)) | ||
845 | down_write(&vol->lcnbmp_lock); | ||
846 | |||
847 | total_freed = real_freed = 0; | ||
848 | |||
849 | /* This returns with ni->runlist locked for reading on success. */ | ||
850 | rl = ntfs_find_vcn(ni, start_vcn, FALSE); | ||
851 | if (IS_ERR(rl)) { | ||
852 | if (!is_rollback) | ||
853 | ntfs_error(vol->sb, "Failed to find first runlist " | ||
854 | "element (error %li), aborting.", | ||
855 | PTR_ERR(rl)); | ||
856 | err = PTR_ERR(rl); | ||
857 | goto err_out; | ||
858 | } | ||
859 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
860 | if (!is_rollback) | ||
861 | ntfs_error(vol->sb, "First runlist element has " | ||
862 | "invalid lcn, aborting."); | ||
863 | err = -EIO; | ||
864 | goto unl_err_out; | ||
865 | } | ||
866 | /* Find the starting cluster inside the run that needs freeing. */ | ||
867 | delta = start_vcn - rl->vcn; | ||
868 | |||
869 | /* The number of clusters in this run that need freeing. */ | ||
870 | to_free = rl->length - delta; | ||
871 | if (count >= 0 && to_free > count) | ||
872 | to_free = count; | ||
873 | |||
874 | if (likely(rl->lcn >= 0)) { | ||
875 | /* Do the actual freeing of the clusters in this run. */ | ||
876 | err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta, | ||
877 | to_free, likely(!is_rollback) ? 0 : 1); | ||
878 | if (unlikely(err)) { | ||
879 | if (!is_rollback) | ||
880 | ntfs_error(vol->sb, "Failed to clear first run " | ||
881 | "(error %i), aborting.", err); | ||
882 | goto unl_err_out; | ||
883 | } | ||
884 | /* We have freed @to_free real clusters. */ | ||
885 | real_freed = to_free; | ||
886 | }; | ||
887 | /* Go to the next run and adjust the number of clusters left to free. */ | ||
888 | ++rl; | ||
889 | if (count >= 0) | ||
890 | count -= to_free; | ||
891 | |||
892 | /* Keep track of the total "freed" clusters, including sparse ones. */ | ||
893 | total_freed = to_free; | ||
894 | /* | ||
895 | * Loop over the remaining runs, using @count as a capping value, and | ||
896 | * free them. | ||
897 | */ | ||
898 | for (; rl->length && count != 0; ++rl) { | ||
899 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
900 | VCN vcn; | ||
901 | |||
902 | /* | ||
903 | * Attempt to map runlist, dropping runlist lock for | ||
904 | * the duration. | ||
905 | */ | ||
906 | vcn = rl->vcn; | ||
907 | up_read(&ni->runlist.lock); | ||
908 | err = ntfs_map_runlist(ni, vcn); | ||
909 | if (err) { | ||
910 | if (!is_rollback) | ||
911 | ntfs_error(vol->sb, "Failed to map " | ||
912 | "runlist fragment."); | ||
913 | if (err == -EINVAL || err == -ENOENT) | ||
914 | err = -EIO; | ||
915 | goto err_out; | ||
916 | } | ||
917 | /* | ||
918 | * This returns with ni->runlist locked for reading on | ||
919 | * success. | ||
920 | */ | ||
921 | rl = ntfs_find_vcn(ni, vcn, FALSE); | ||
922 | if (IS_ERR(rl)) { | ||
923 | err = PTR_ERR(rl); | ||
924 | if (!is_rollback) | ||
925 | ntfs_error(vol->sb, "Failed to find " | ||
926 | "subsequent runlist " | ||
927 | "element."); | ||
928 | goto err_out; | ||
929 | } | ||
930 | if (unlikely(rl->lcn < LCN_HOLE)) { | ||
931 | if (!is_rollback) | ||
932 | ntfs_error(vol->sb, "Runlist element " | ||
933 | "has invalid lcn " | ||
934 | "(0x%llx).", | ||
935 | (unsigned long long) | ||
936 | rl->lcn); | ||
937 | err = -EIO; | ||
938 | goto unl_err_out; | ||
939 | } | ||
940 | } | ||
941 | /* The number of clusters in this run that need freeing. */ | ||
942 | to_free = rl->length; | ||
943 | if (count >= 0 && to_free > count) | ||
944 | to_free = count; | ||
945 | |||
946 | if (likely(rl->lcn >= 0)) { | ||
947 | /* Do the actual freeing of the clusters in the run. */ | ||
948 | err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn, | ||
949 | to_free, likely(!is_rollback) ? 0 : 1); | ||
950 | if (unlikely(err)) { | ||
951 | if (!is_rollback) | ||
952 | ntfs_error(vol->sb, "Failed to clear " | ||
953 | "subsequent run."); | ||
954 | goto unl_err_out; | ||
955 | } | ||
956 | /* We have freed @to_free real clusters. */ | ||
957 | real_freed += to_free; | ||
958 | } | ||
959 | /* Adjust the number of clusters left to free. */ | ||
960 | if (count >= 0) | ||
961 | count -= to_free; | ||
962 | |||
963 | /* Update the total done clusters. */ | ||
964 | total_freed += to_free; | ||
965 | } | ||
966 | up_read(&ni->runlist.lock); | ||
967 | if (likely(!is_rollback)) | ||
968 | up_write(&vol->lcnbmp_lock); | ||
969 | |||
970 | BUG_ON(count > 0); | ||
971 | |||
972 | /* We are done. Return the number of actually freed clusters. */ | ||
973 | ntfs_debug("Done."); | ||
974 | return real_freed; | ||
975 | unl_err_out: | ||
976 | up_read(&ni->runlist.lock); | ||
977 | err_out: | ||
978 | if (is_rollback) | ||
979 | return err; | ||
980 | /* If no real clusters were freed, no need to rollback. */ | ||
981 | if (!real_freed) { | ||
982 | up_write(&vol->lcnbmp_lock); | ||
983 | return err; | ||
984 | } | ||
985 | /* | ||
986 | * Attempt to rollback and if that succeeds just return the error code. | ||
987 | * If rollback fails, set the volume errors flag, emit an error | ||
988 | * message, and return the error code. | ||
989 | */ | ||
990 | delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); | ||
991 | if (delta < 0) { | ||
992 | ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " | ||
993 | "inconsistent metadata! Unmount and run " | ||
994 | "chkdsk.", (int)delta); | ||
995 | NVolSetErrors(vol); | ||
996 | } | ||
997 | up_write(&vol->lcnbmp_lock); | ||
998 | ntfs_error(vol->sb, "Aborting (error %i).", err); | ||
999 | return err; | ||
1000 | } | ||
1001 | |||
1002 | #endif /* NTFS_RW */ | ||