diff options
Diffstat (limited to 'fs/ntfs/aops.c')
-rw-r--r-- | fs/ntfs/aops.c | 2324 |
1 files changed, 2324 insertions, 0 deletions
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c new file mode 100644 index 000000000000..45d56e41ed98 --- /dev/null +++ b/fs/ntfs/aops.c | |||
@@ -0,0 +1,2324 @@ | |||
1 | /** | ||
2 | * aops.c - NTFS kernel address space operations and page cache handling. | ||
3 | * Part of the Linux-NTFS project. | ||
4 | * | ||
5 | * Copyright (c) 2001-2004 Anton Altaparmakov | ||
6 | * Copyright (c) 2002 Richard Russon | ||
7 | * | ||
8 | * This program/include file is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as published | ||
10 | * by the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program/include file is distributed in the hope that it will be | ||
14 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty | ||
15 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program (in the main directory of the Linux-NTFS | ||
20 | * distribution in the file COPYING); if not, write to the Free Software | ||
21 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #include <linux/errno.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/pagemap.h> | ||
27 | #include <linux/swap.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/writeback.h> | ||
30 | |||
31 | #include "aops.h" | ||
32 | #include "attrib.h" | ||
33 | #include "debug.h" | ||
34 | #include "inode.h" | ||
35 | #include "mft.h" | ||
36 | #include "runlist.h" | ||
37 | #include "types.h" | ||
38 | #include "ntfs.h" | ||
39 | |||
40 | /** | ||
41 | * ntfs_end_buffer_async_read - async io completion for reading attributes | ||
42 | * @bh: buffer head on which io is completed | ||
43 | * @uptodate: whether @bh is now uptodate or not | ||
44 | * | ||
45 | * Asynchronous I/O completion handler for reading pages belonging to the | ||
46 | * attribute address space of an inode. The inodes can either be files or | ||
47 | * directories or they can be fake inodes describing some attribute. | ||
48 | * | ||
49 | * If NInoMstProtected(), perform the post read mst fixups when all IO on the | ||
50 | * page has been completed and mark the page uptodate or set the error bit on | ||
51 | * the page. To determine the size of the records that need fixing up, we | ||
52 | * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs | ||
53 | * record size, and index_block_size_bits, to the log(base 2) of the ntfs | ||
54 | * record size. | ||
55 | */ | ||
56 | static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | ||
57 | { | ||
58 | static DEFINE_SPINLOCK(page_uptodate_lock); | ||
59 | unsigned long flags; | ||
60 | struct buffer_head *tmp; | ||
61 | struct page *page; | ||
62 | ntfs_inode *ni; | ||
63 | int page_uptodate = 1; | ||
64 | |||
65 | page = bh->b_page; | ||
66 | ni = NTFS_I(page->mapping->host); | ||
67 | |||
68 | if (likely(uptodate)) { | ||
69 | s64 file_ofs; | ||
70 | |||
71 | set_buffer_uptodate(bh); | ||
72 | |||
73 | file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + | ||
74 | bh_offset(bh); | ||
75 | /* Check for the current buffer head overflowing. */ | ||
76 | if (file_ofs + bh->b_size > ni->initialized_size) { | ||
77 | char *addr; | ||
78 | int ofs = 0; | ||
79 | |||
80 | if (file_ofs < ni->initialized_size) | ||
81 | ofs = ni->initialized_size - file_ofs; | ||
82 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | ||
83 | memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); | ||
84 | flush_dcache_page(page); | ||
85 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
86 | } | ||
87 | } else { | ||
88 | clear_buffer_uptodate(bh); | ||
89 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", | ||
90 | (unsigned long long)bh->b_blocknr); | ||
91 | SetPageError(page); | ||
92 | } | ||
93 | spin_lock_irqsave(&page_uptodate_lock, flags); | ||
94 | clear_buffer_async_read(bh); | ||
95 | unlock_buffer(bh); | ||
96 | tmp = bh; | ||
97 | do { | ||
98 | if (!buffer_uptodate(tmp)) | ||
99 | page_uptodate = 0; | ||
100 | if (buffer_async_read(tmp)) { | ||
101 | if (likely(buffer_locked(tmp))) | ||
102 | goto still_busy; | ||
103 | /* Async buffers must be locked. */ | ||
104 | BUG(); | ||
105 | } | ||
106 | tmp = tmp->b_this_page; | ||
107 | } while (tmp != bh); | ||
108 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | ||
109 | /* | ||
110 | * If none of the buffers had errors then we can set the page uptodate, | ||
111 | * but we first have to perform the post read mst fixups, if the | ||
112 | * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. | ||
113 | * Note we ignore fixup errors as those are detected when | ||
114 | * map_mft_record() is called which gives us per record granularity | ||
115 | * rather than per page granularity. | ||
116 | */ | ||
117 | if (!NInoMstProtected(ni)) { | ||
118 | if (likely(page_uptodate && !PageError(page))) | ||
119 | SetPageUptodate(page); | ||
120 | } else { | ||
121 | char *addr; | ||
122 | unsigned int i, recs; | ||
123 | u32 rec_size; | ||
124 | |||
125 | rec_size = ni->itype.index.block_size; | ||
126 | recs = PAGE_CACHE_SIZE / rec_size; | ||
127 | /* Should have been verified before we got here... */ | ||
128 | BUG_ON(!recs); | ||
129 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | ||
130 | for (i = 0; i < recs; i++) | ||
131 | post_read_mst_fixup((NTFS_RECORD*)(addr + | ||
132 | i * rec_size), rec_size); | ||
133 | flush_dcache_page(page); | ||
134 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
135 | if (likely(!PageError(page) && page_uptodate)) | ||
136 | SetPageUptodate(page); | ||
137 | } | ||
138 | unlock_page(page); | ||
139 | return; | ||
140 | still_busy: | ||
141 | spin_unlock_irqrestore(&page_uptodate_lock, flags); | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * ntfs_read_block - fill a @page of an address space with data | ||
147 | * @page: page cache page to fill with data | ||
148 | * | ||
149 | * Fill the page @page of the address space belonging to the @page->host inode. | ||
150 | * We read each buffer asynchronously and when all buffers are read in, our io | ||
151 | * completion handler ntfs_end_buffer_read_async(), if required, automatically | ||
152 | * applies the mst fixups to the page before finally marking it uptodate and | ||
153 | * unlocking it. | ||
154 | * | ||
155 | * We only enforce allocated_size limit because i_size is checked for in | ||
156 | * generic_file_read(). | ||
157 | * | ||
158 | * Return 0 on success and -errno on error. | ||
159 | * | ||
160 | * Contains an adapted version of fs/buffer.c::block_read_full_page(). | ||
161 | */ | ||
162 | static int ntfs_read_block(struct page *page) | ||
163 | { | ||
164 | VCN vcn; | ||
165 | LCN lcn; | ||
166 | ntfs_inode *ni; | ||
167 | ntfs_volume *vol; | ||
168 | runlist_element *rl; | ||
169 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
170 | sector_t iblock, lblock, zblock; | ||
171 | unsigned int blocksize, vcn_ofs; | ||
172 | int i, nr; | ||
173 | unsigned char blocksize_bits; | ||
174 | |||
175 | ni = NTFS_I(page->mapping->host); | ||
176 | vol = ni->vol; | ||
177 | |||
178 | /* $MFT/$DATA must have its complete runlist in memory at all times. */ | ||
179 | BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); | ||
180 | |||
181 | blocksize_bits = VFS_I(ni)->i_blkbits; | ||
182 | blocksize = 1 << blocksize_bits; | ||
183 | |||
184 | if (!page_has_buffers(page)) | ||
185 | create_empty_buffers(page, blocksize, 0); | ||
186 | bh = head = page_buffers(page); | ||
187 | if (unlikely(!bh)) { | ||
188 | unlock_page(page); | ||
189 | return -ENOMEM; | ||
190 | } | ||
191 | |||
192 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
193 | lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; | ||
194 | zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; | ||
195 | |||
196 | /* Loop through all the buffers in the page. */ | ||
197 | rl = NULL; | ||
198 | nr = i = 0; | ||
199 | do { | ||
200 | u8 *kaddr; | ||
201 | |||
202 | if (unlikely(buffer_uptodate(bh))) | ||
203 | continue; | ||
204 | if (unlikely(buffer_mapped(bh))) { | ||
205 | arr[nr++] = bh; | ||
206 | continue; | ||
207 | } | ||
208 | bh->b_bdev = vol->sb->s_bdev; | ||
209 | /* Is the block within the allowed limits? */ | ||
210 | if (iblock < lblock) { | ||
211 | BOOL is_retry = FALSE; | ||
212 | |||
213 | /* Convert iblock into corresponding vcn and offset. */ | ||
214 | vcn = (VCN)iblock << blocksize_bits >> | ||
215 | vol->cluster_size_bits; | ||
216 | vcn_ofs = ((VCN)iblock << blocksize_bits) & | ||
217 | vol->cluster_size_mask; | ||
218 | if (!rl) { | ||
219 | lock_retry_remap: | ||
220 | down_read(&ni->runlist.lock); | ||
221 | rl = ni->runlist.rl; | ||
222 | } | ||
223 | if (likely(rl != NULL)) { | ||
224 | /* Seek to element containing target vcn. */ | ||
225 | while (rl->length && rl[1].vcn <= vcn) | ||
226 | rl++; | ||
227 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
228 | } else | ||
229 | lcn = LCN_RL_NOT_MAPPED; | ||
230 | /* Successful remap. */ | ||
231 | if (lcn >= 0) { | ||
232 | /* Setup buffer head to correct block. */ | ||
233 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) | ||
234 | + vcn_ofs) >> blocksize_bits; | ||
235 | set_buffer_mapped(bh); | ||
236 | /* Only read initialized data blocks. */ | ||
237 | if (iblock < zblock) { | ||
238 | arr[nr++] = bh; | ||
239 | continue; | ||
240 | } | ||
241 | /* Fully non-initialized data block, zero it. */ | ||
242 | goto handle_zblock; | ||
243 | } | ||
244 | /* It is a hole, need to zero it. */ | ||
245 | if (lcn == LCN_HOLE) | ||
246 | goto handle_hole; | ||
247 | /* If first try and runlist unmapped, map and retry. */ | ||
248 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { | ||
249 | int err; | ||
250 | is_retry = TRUE; | ||
251 | /* | ||
252 | * Attempt to map runlist, dropping lock for | ||
253 | * the duration. | ||
254 | */ | ||
255 | up_read(&ni->runlist.lock); | ||
256 | err = ntfs_map_runlist(ni, vcn); | ||
257 | if (likely(!err)) | ||
258 | goto lock_retry_remap; | ||
259 | rl = NULL; | ||
260 | lcn = err; | ||
261 | } | ||
262 | /* Hard error, zero out region. */ | ||
263 | bh->b_blocknr = -1; | ||
264 | SetPageError(page); | ||
265 | ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " | ||
266 | "attribute type 0x%x, vcn 0x%llx, " | ||
267 | "offset 0x%x because its location on " | ||
268 | "disk could not be determined%s " | ||
269 | "(error code %lli).", ni->mft_no, | ||
270 | ni->type, (unsigned long long)vcn, | ||
271 | vcn_ofs, is_retry ? " even after " | ||
272 | "retrying" : "", (long long)lcn); | ||
273 | } | ||
274 | /* | ||
275 | * Either iblock was outside lblock limits or | ||
276 | * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion | ||
277 | * of the page and set the buffer uptodate. | ||
278 | */ | ||
279 | handle_hole: | ||
280 | bh->b_blocknr = -1UL; | ||
281 | clear_buffer_mapped(bh); | ||
282 | handle_zblock: | ||
283 | kaddr = kmap_atomic(page, KM_USER0); | ||
284 | memset(kaddr + i * blocksize, 0, blocksize); | ||
285 | flush_dcache_page(page); | ||
286 | kunmap_atomic(kaddr, KM_USER0); | ||
287 | set_buffer_uptodate(bh); | ||
288 | } while (i++, iblock++, (bh = bh->b_this_page) != head); | ||
289 | |||
290 | /* Release the lock if we took it. */ | ||
291 | if (rl) | ||
292 | up_read(&ni->runlist.lock); | ||
293 | |||
294 | /* Check we have at least one buffer ready for i/o. */ | ||
295 | if (nr) { | ||
296 | struct buffer_head *tbh; | ||
297 | |||
298 | /* Lock the buffers. */ | ||
299 | for (i = 0; i < nr; i++) { | ||
300 | tbh = arr[i]; | ||
301 | lock_buffer(tbh); | ||
302 | tbh->b_end_io = ntfs_end_buffer_async_read; | ||
303 | set_buffer_async_read(tbh); | ||
304 | } | ||
305 | /* Finally, start i/o on the buffers. */ | ||
306 | for (i = 0; i < nr; i++) { | ||
307 | tbh = arr[i]; | ||
308 | if (likely(!buffer_uptodate(tbh))) | ||
309 | submit_bh(READ, tbh); | ||
310 | else | ||
311 | ntfs_end_buffer_async_read(tbh, 1); | ||
312 | } | ||
313 | return 0; | ||
314 | } | ||
315 | /* No i/o was scheduled on any of the buffers. */ | ||
316 | if (likely(!PageError(page))) | ||
317 | SetPageUptodate(page); | ||
318 | else /* Signal synchronous i/o error. */ | ||
319 | nr = -EIO; | ||
320 | unlock_page(page); | ||
321 | return nr; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * ntfs_readpage - fill a @page of a @file with data from the device | ||
326 | * @file: open file to which the page @page belongs or NULL | ||
327 | * @page: page cache page to fill with data | ||
328 | * | ||
329 | * For non-resident attributes, ntfs_readpage() fills the @page of the open | ||
330 | * file @file by calling the ntfs version of the generic block_read_full_page() | ||
331 | * function, ntfs_read_block(), which in turn creates and reads in the buffers | ||
332 | * associated with the page asynchronously. | ||
333 | * | ||
334 | * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the | ||
335 | * data from the mft record (which at this stage is most likely in memory) and | ||
336 | * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as | ||
337 | * even if the mft record is not cached at this point in time, we need to wait | ||
338 | * for it to be read in before we can do the copy. | ||
339 | * | ||
340 | * Return 0 on success and -errno on error. | ||
341 | */ | ||
342 | static int ntfs_readpage(struct file *file, struct page *page) | ||
343 | { | ||
344 | loff_t i_size; | ||
345 | ntfs_inode *ni, *base_ni; | ||
346 | u8 *kaddr; | ||
347 | ntfs_attr_search_ctx *ctx; | ||
348 | MFT_RECORD *mrec; | ||
349 | u32 attr_len; | ||
350 | int err = 0; | ||
351 | |||
352 | BUG_ON(!PageLocked(page)); | ||
353 | /* | ||
354 | * This can potentially happen because we clear PageUptodate() during | ||
355 | * ntfs_writepage() of MstProtected() attributes. | ||
356 | */ | ||
357 | if (PageUptodate(page)) { | ||
358 | unlock_page(page); | ||
359 | return 0; | ||
360 | } | ||
361 | ni = NTFS_I(page->mapping->host); | ||
362 | |||
363 | /* NInoNonResident() == NInoIndexAllocPresent() */ | ||
364 | if (NInoNonResident(ni)) { | ||
365 | /* | ||
366 | * Only unnamed $DATA attributes can be compressed or | ||
367 | * encrypted. | ||
368 | */ | ||
369 | if (ni->type == AT_DATA && !ni->name_len) { | ||
370 | /* If file is encrypted, deny access, just like NT4. */ | ||
371 | if (NInoEncrypted(ni)) { | ||
372 | err = -EACCES; | ||
373 | goto err_out; | ||
374 | } | ||
375 | /* Compressed data streams are handled in compress.c. */ | ||
376 | if (NInoCompressed(ni)) | ||
377 | return ntfs_read_compressed_block(page); | ||
378 | } | ||
379 | /* Normal data stream. */ | ||
380 | return ntfs_read_block(page); | ||
381 | } | ||
382 | /* | ||
383 | * Attribute is resident, implying it is not compressed or encrypted. | ||
384 | * This also means the attribute is smaller than an mft record and | ||
385 | * hence smaller than a page, so can simply zero out any pages with | ||
386 | * index above 0. We can also do this if the file size is 0. | ||
387 | */ | ||
388 | if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { | ||
389 | kaddr = kmap_atomic(page, KM_USER0); | ||
390 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
391 | flush_dcache_page(page); | ||
392 | kunmap_atomic(kaddr, KM_USER0); | ||
393 | goto done; | ||
394 | } | ||
395 | if (!NInoAttr(ni)) | ||
396 | base_ni = ni; | ||
397 | else | ||
398 | base_ni = ni->ext.base_ntfs_ino; | ||
399 | /* Map, pin, and lock the mft record. */ | ||
400 | mrec = map_mft_record(base_ni); | ||
401 | if (IS_ERR(mrec)) { | ||
402 | err = PTR_ERR(mrec); | ||
403 | goto err_out; | ||
404 | } | ||
405 | ctx = ntfs_attr_get_search_ctx(base_ni, mrec); | ||
406 | if (unlikely(!ctx)) { | ||
407 | err = -ENOMEM; | ||
408 | goto unm_err_out; | ||
409 | } | ||
410 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
411 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
412 | if (unlikely(err)) | ||
413 | goto put_unm_err_out; | ||
414 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | ||
415 | i_size = i_size_read(VFS_I(ni)); | ||
416 | if (unlikely(attr_len > i_size)) | ||
417 | attr_len = i_size; | ||
418 | kaddr = kmap_atomic(page, KM_USER0); | ||
419 | /* Copy the data to the page. */ | ||
420 | memcpy(kaddr, (u8*)ctx->attr + | ||
421 | le16_to_cpu(ctx->attr->data.resident.value_offset), | ||
422 | attr_len); | ||
423 | /* Zero the remainder of the page. */ | ||
424 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
425 | flush_dcache_page(page); | ||
426 | kunmap_atomic(kaddr, KM_USER0); | ||
427 | put_unm_err_out: | ||
428 | ntfs_attr_put_search_ctx(ctx); | ||
429 | unm_err_out: | ||
430 | unmap_mft_record(base_ni); | ||
431 | done: | ||
432 | SetPageUptodate(page); | ||
433 | err_out: | ||
434 | unlock_page(page); | ||
435 | return err; | ||
436 | } | ||
437 | |||
438 | #ifdef NTFS_RW | ||
439 | |||
440 | /** | ||
441 | * ntfs_write_block - write a @page to the backing store | ||
442 | * @page: page cache page to write out | ||
443 | * @wbc: writeback control structure | ||
444 | * | ||
445 | * This function is for writing pages belonging to non-resident, non-mst | ||
446 | * protected attributes to their backing store. | ||
447 | * | ||
448 | * For a page with buffers, map and write the dirty buffers asynchronously | ||
449 | * under page writeback. For a page without buffers, create buffers for the | ||
450 | * page, then proceed as above. | ||
451 | * | ||
452 | * If a page doesn't have buffers the page dirty state is definitive. If a page | ||
453 | * does have buffers, the page dirty state is just a hint, and the buffer dirty | ||
454 | * state is definitive. (A hint which has rules: dirty buffers against a clean | ||
455 | * page is illegal. Other combinations are legal and need to be handled. In | ||
456 | * particular a dirty page containing clean buffers for example.) | ||
457 | * | ||
458 | * Return 0 on success and -errno on error. | ||
459 | * | ||
460 | * Based on ntfs_read_block() and __block_write_full_page(). | ||
461 | */ | ||
462 | static int ntfs_write_block(struct page *page, struct writeback_control *wbc) | ||
463 | { | ||
464 | VCN vcn; | ||
465 | LCN lcn; | ||
466 | sector_t block, dblock, iblock; | ||
467 | struct inode *vi; | ||
468 | ntfs_inode *ni; | ||
469 | ntfs_volume *vol; | ||
470 | runlist_element *rl; | ||
471 | struct buffer_head *bh, *head; | ||
472 | unsigned int blocksize, vcn_ofs; | ||
473 | int err; | ||
474 | BOOL need_end_writeback; | ||
475 | unsigned char blocksize_bits; | ||
476 | |||
477 | vi = page->mapping->host; | ||
478 | ni = NTFS_I(vi); | ||
479 | vol = ni->vol; | ||
480 | |||
481 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
482 | "0x%lx.", ni->mft_no, ni->type, page->index); | ||
483 | |||
484 | BUG_ON(!NInoNonResident(ni)); | ||
485 | BUG_ON(NInoMstProtected(ni)); | ||
486 | |||
487 | blocksize_bits = vi->i_blkbits; | ||
488 | blocksize = 1 << blocksize_bits; | ||
489 | |||
490 | if (!page_has_buffers(page)) { | ||
491 | BUG_ON(!PageUptodate(page)); | ||
492 | create_empty_buffers(page, blocksize, | ||
493 | (1 << BH_Uptodate) | (1 << BH_Dirty)); | ||
494 | } | ||
495 | bh = head = page_buffers(page); | ||
496 | if (unlikely(!bh)) { | ||
497 | ntfs_warning(vol->sb, "Error allocating page buffers. " | ||
498 | "Redirtying page so we try again later."); | ||
499 | /* | ||
500 | * Put the page back on mapping->dirty_pages, but leave its | ||
501 | * buffer's dirty state as-is. | ||
502 | */ | ||
503 | redirty_page_for_writepage(wbc, page); | ||
504 | unlock_page(page); | ||
505 | return 0; | ||
506 | } | ||
507 | |||
508 | /* NOTE: Different naming scheme to ntfs_read_block()! */ | ||
509 | |||
510 | /* The first block in the page. */ | ||
511 | block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
512 | |||
513 | /* The first out of bounds block for the data size. */ | ||
514 | dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; | ||
515 | |||
516 | /* The last (fully or partially) initialized block. */ | ||
517 | iblock = ni->initialized_size >> blocksize_bits; | ||
518 | |||
519 | /* | ||
520 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | ||
521 | * here, and the (potentially unmapped) buffers may become dirty at | ||
522 | * any time. If a buffer becomes dirty here after we've inspected it | ||
523 | * then we just miss that fact, and the page stays dirty. | ||
524 | * | ||
525 | * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; | ||
526 | * handle that here by just cleaning them. | ||
527 | */ | ||
528 | |||
529 | /* | ||
530 | * Loop through all the buffers in the page, mapping all the dirty | ||
531 | * buffers to disk addresses and handling any aliases from the | ||
532 | * underlying block device's mapping. | ||
533 | */ | ||
534 | rl = NULL; | ||
535 | err = 0; | ||
536 | do { | ||
537 | BOOL is_retry = FALSE; | ||
538 | |||
539 | if (unlikely(block >= dblock)) { | ||
540 | /* | ||
541 | * Mapped buffers outside i_size will occur, because | ||
542 | * this page can be outside i_size when there is a | ||
543 | * truncate in progress. The contents of such buffers | ||
544 | * were zeroed by ntfs_writepage(). | ||
545 | * | ||
546 | * FIXME: What about the small race window where | ||
547 | * ntfs_writepage() has not done any clearing because | ||
548 | * the page was within i_size but before we get here, | ||
549 | * vmtruncate() modifies i_size? | ||
550 | */ | ||
551 | clear_buffer_dirty(bh); | ||
552 | set_buffer_uptodate(bh); | ||
553 | continue; | ||
554 | } | ||
555 | |||
556 | /* Clean buffers are not written out, so no need to map them. */ | ||
557 | if (!buffer_dirty(bh)) | ||
558 | continue; | ||
559 | |||
560 | /* Make sure we have enough initialized size. */ | ||
561 | if (unlikely((block >= iblock) && | ||
562 | (ni->initialized_size < vi->i_size))) { | ||
563 | /* | ||
564 | * If this page is fully outside initialized size, zero | ||
565 | * out all pages between the current initialized size | ||
566 | * and the current page. Just use ntfs_readpage() to do | ||
567 | * the zeroing transparently. | ||
568 | */ | ||
569 | if (block > iblock) { | ||
570 | // TODO: | ||
571 | // For each page do: | ||
572 | // - read_cache_page() | ||
573 | // Again for each page do: | ||
574 | // - wait_on_page_locked() | ||
575 | // - Check (PageUptodate(page) && | ||
576 | // !PageError(page)) | ||
577 | // Update initialized size in the attribute and | ||
578 | // in the inode. | ||
579 | // Again, for each page do: | ||
580 | // __set_page_dirty_buffers(); | ||
581 | // page_cache_release() | ||
582 | // We don't need to wait on the writes. | ||
583 | // Update iblock. | ||
584 | } | ||
585 | /* | ||
586 | * The current page straddles initialized size. Zero | ||
587 | * all non-uptodate buffers and set them uptodate (and | ||
588 | * dirty?). Note, there aren't any non-uptodate buffers | ||
589 | * if the page is uptodate. | ||
590 | * FIXME: For an uptodate page, the buffers may need to | ||
591 | * be written out because they were not initialized on | ||
592 | * disk before. | ||
593 | */ | ||
594 | if (!PageUptodate(page)) { | ||
595 | // TODO: | ||
596 | // Zero any non-uptodate buffers up to i_size. | ||
597 | // Set them uptodate and dirty. | ||
598 | } | ||
599 | // TODO: | ||
600 | // Update initialized size in the attribute and in the | ||
601 | // inode (up to i_size). | ||
602 | // Update iblock. | ||
603 | // FIXME: This is inefficient. Try to batch the two | ||
604 | // size changes to happen in one go. | ||
605 | ntfs_error(vol->sb, "Writing beyond initialized size " | ||
606 | "is not supported yet. Sorry."); | ||
607 | err = -EOPNOTSUPP; | ||
608 | break; | ||
609 | // Do NOT set_buffer_new() BUT DO clear buffer range | ||
610 | // outside write request range. | ||
611 | // set_buffer_uptodate() on complete buffers as well as | ||
612 | // set_buffer_dirty(). | ||
613 | } | ||
614 | |||
615 | /* No need to map buffers that are already mapped. */ | ||
616 | if (buffer_mapped(bh)) | ||
617 | continue; | ||
618 | |||
619 | /* Unmapped, dirty buffer. Need to map it. */ | ||
620 | bh->b_bdev = vol->sb->s_bdev; | ||
621 | |||
622 | /* Convert block into corresponding vcn and offset. */ | ||
623 | vcn = (VCN)block << blocksize_bits; | ||
624 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
625 | vcn >>= vol->cluster_size_bits; | ||
626 | if (!rl) { | ||
627 | lock_retry_remap: | ||
628 | down_read(&ni->runlist.lock); | ||
629 | rl = ni->runlist.rl; | ||
630 | } | ||
631 | if (likely(rl != NULL)) { | ||
632 | /* Seek to element containing target vcn. */ | ||
633 | while (rl->length && rl[1].vcn <= vcn) | ||
634 | rl++; | ||
635 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
636 | } else | ||
637 | lcn = LCN_RL_NOT_MAPPED; | ||
638 | /* Successful remap. */ | ||
639 | if (lcn >= 0) { | ||
640 | /* Setup buffer head to point to correct block. */ | ||
641 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) + | ||
642 | vcn_ofs) >> blocksize_bits; | ||
643 | set_buffer_mapped(bh); | ||
644 | continue; | ||
645 | } | ||
646 | /* It is a hole, need to instantiate it. */ | ||
647 | if (lcn == LCN_HOLE) { | ||
648 | // TODO: Instantiate the hole. | ||
649 | // clear_buffer_new(bh); | ||
650 | // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
651 | ntfs_error(vol->sb, "Writing into sparse regions is " | ||
652 | "not supported yet. Sorry."); | ||
653 | err = -EOPNOTSUPP; | ||
654 | break; | ||
655 | } | ||
656 | /* If first try and runlist unmapped, map and retry. */ | ||
657 | if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { | ||
658 | is_retry = TRUE; | ||
659 | /* | ||
660 | * Attempt to map runlist, dropping lock for | ||
661 | * the duration. | ||
662 | */ | ||
663 | up_read(&ni->runlist.lock); | ||
664 | err = ntfs_map_runlist(ni, vcn); | ||
665 | if (likely(!err)) | ||
666 | goto lock_retry_remap; | ||
667 | rl = NULL; | ||
668 | lcn = err; | ||
669 | } | ||
670 | /* Failed to map the buffer, even after retrying. */ | ||
671 | bh->b_blocknr = -1; | ||
672 | ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " | ||
673 | "attribute type 0x%x, vcn 0x%llx, offset 0x%x " | ||
674 | "because its location on disk could not be " | ||
675 | "determined%s (error code %lli).", ni->mft_no, | ||
676 | ni->type, (unsigned long long)vcn, | ||
677 | vcn_ofs, is_retry ? " even after " | ||
678 | "retrying" : "", (long long)lcn); | ||
679 | if (!err) | ||
680 | err = -EIO; | ||
681 | break; | ||
682 | } while (block++, (bh = bh->b_this_page) != head); | ||
683 | |||
684 | /* Release the lock if we took it. */ | ||
685 | if (rl) | ||
686 | up_read(&ni->runlist.lock); | ||
687 | |||
688 | /* For the error case, need to reset bh to the beginning. */ | ||
689 | bh = head; | ||
690 | |||
691 | /* Just an optimization, so ->readpage() isn't called later. */ | ||
692 | if (unlikely(!PageUptodate(page))) { | ||
693 | int uptodate = 1; | ||
694 | do { | ||
695 | if (!buffer_uptodate(bh)) { | ||
696 | uptodate = 0; | ||
697 | bh = head; | ||
698 | break; | ||
699 | } | ||
700 | } while ((bh = bh->b_this_page) != head); | ||
701 | if (uptodate) | ||
702 | SetPageUptodate(page); | ||
703 | } | ||
704 | |||
705 | /* Setup all mapped, dirty buffers for async write i/o. */ | ||
706 | do { | ||
707 | get_bh(bh); | ||
708 | if (buffer_mapped(bh) && buffer_dirty(bh)) { | ||
709 | lock_buffer(bh); | ||
710 | if (test_clear_buffer_dirty(bh)) { | ||
711 | BUG_ON(!buffer_uptodate(bh)); | ||
712 | mark_buffer_async_write(bh); | ||
713 | } else | ||
714 | unlock_buffer(bh); | ||
715 | } else if (unlikely(err)) { | ||
716 | /* | ||
717 | * For the error case. The buffer may have been set | ||
718 | * dirty during attachment to a dirty page. | ||
719 | */ | ||
720 | if (err != -ENOMEM) | ||
721 | clear_buffer_dirty(bh); | ||
722 | } | ||
723 | } while ((bh = bh->b_this_page) != head); | ||
724 | |||
725 | if (unlikely(err)) { | ||
726 | // TODO: Remove the -EOPNOTSUPP check later on... | ||
727 | if (unlikely(err == -EOPNOTSUPP)) | ||
728 | err = 0; | ||
729 | else if (err == -ENOMEM) { | ||
730 | ntfs_warning(vol->sb, "Error allocating memory. " | ||
731 | "Redirtying page so we try again " | ||
732 | "later."); | ||
733 | /* | ||
734 | * Put the page back on mapping->dirty_pages, but | ||
735 | * leave its buffer's dirty state as-is. | ||
736 | */ | ||
737 | redirty_page_for_writepage(wbc, page); | ||
738 | err = 0; | ||
739 | } else | ||
740 | SetPageError(page); | ||
741 | } | ||
742 | |||
743 | BUG_ON(PageWriteback(page)); | ||
744 | set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ | ||
745 | unlock_page(page); | ||
746 | |||
747 | /* | ||
748 | * Submit the prepared buffers for i/o. Note the page is unlocked, | ||
749 | * and the async write i/o completion handler can end_page_writeback() | ||
750 | * at any time after the *first* submit_bh(). So the buffers can then | ||
751 | * disappear... | ||
752 | */ | ||
753 | need_end_writeback = TRUE; | ||
754 | do { | ||
755 | struct buffer_head *next = bh->b_this_page; | ||
756 | if (buffer_async_write(bh)) { | ||
757 | submit_bh(WRITE, bh); | ||
758 | need_end_writeback = FALSE; | ||
759 | } | ||
760 | put_bh(bh); | ||
761 | bh = next; | ||
762 | } while (bh != head); | ||
763 | |||
764 | /* If no i/o was started, need to end_page_writeback(). */ | ||
765 | if (unlikely(need_end_writeback)) | ||
766 | end_page_writeback(page); | ||
767 | |||
768 | ntfs_debug("Done."); | ||
769 | return err; | ||
770 | } | ||
771 | |||
772 | /** | ||
773 | * ntfs_write_mst_block - write a @page to the backing store | ||
774 | * @page: page cache page to write out | ||
775 | * @wbc: writeback control structure | ||
776 | * | ||
777 | * This function is for writing pages belonging to non-resident, mst protected | ||
778 | * attributes to their backing store. The only supported attributes are index | ||
779 | * allocation and $MFT/$DATA. Both directory inodes and index inodes are | ||
780 | * supported for the index allocation case. | ||
781 | * | ||
782 | * The page must remain locked for the duration of the write because we apply | ||
783 | * the mst fixups, write, and then undo the fixups, so if we were to unlock the | ||
784 | * page before undoing the fixups, any other user of the page will see the | ||
785 | * page contents as corrupt. | ||
786 | * | ||
787 | * We clear the page uptodate flag for the duration of the function to ensure | ||
788 | * exclusion for the $MFT/$DATA case against someone mapping an mft record we | ||
789 | * are about to apply the mst fixups to. | ||
790 | * | ||
791 | * Return 0 on success and -errno on error. | ||
792 | * | ||
793 | * Based on ntfs_write_block(), ntfs_mft_writepage(), and | ||
794 | * write_mft_record_nolock(). | ||
795 | */ | ||
796 | static int ntfs_write_mst_block(struct page *page, | ||
797 | struct writeback_control *wbc) | ||
798 | { | ||
799 | sector_t block, dblock, rec_block; | ||
800 | struct inode *vi = page->mapping->host; | ||
801 | ntfs_inode *ni = NTFS_I(vi); | ||
802 | ntfs_volume *vol = ni->vol; | ||
803 | u8 *kaddr; | ||
804 | unsigned char bh_size_bits = vi->i_blkbits; | ||
805 | unsigned int bh_size = 1 << bh_size_bits; | ||
806 | unsigned int rec_size = ni->itype.index.block_size; | ||
807 | ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; | ||
808 | struct buffer_head *bh, *head, *tbh, *rec_start_bh; | ||
809 | int max_bhs = PAGE_CACHE_SIZE / bh_size; | ||
810 | struct buffer_head *bhs[max_bhs]; | ||
811 | runlist_element *rl; | ||
812 | int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; | ||
813 | unsigned rec_size_bits; | ||
814 | BOOL sync, is_mft, page_is_dirty, rec_is_dirty; | ||
815 | |||
816 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
817 | "0x%lx.", vi->i_ino, ni->type, page->index); | ||
818 | BUG_ON(!NInoNonResident(ni)); | ||
819 | BUG_ON(!NInoMstProtected(ni)); | ||
820 | is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); | ||
821 | /* | ||
822 | * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page | ||
823 | * in its page cache were to be marked dirty. However this should | ||
824 | * never happen with the current driver and considering we do not | ||
825 | * handle this case here we do want to BUG(), at least for now. | ||
826 | */ | ||
827 | BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || | ||
828 | (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); | ||
829 | BUG_ON(!max_bhs); | ||
830 | |||
831 | /* Were we called for sync purposes? */ | ||
832 | sync = (wbc->sync_mode == WB_SYNC_ALL); | ||
833 | |||
834 | /* Make sure we have mapped buffers. */ | ||
835 | BUG_ON(!page_has_buffers(page)); | ||
836 | bh = head = page_buffers(page); | ||
837 | BUG_ON(!bh); | ||
838 | |||
839 | rec_size_bits = ni->itype.index.block_size_bits; | ||
840 | BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits)); | ||
841 | bhs_per_rec = rec_size >> bh_size_bits; | ||
842 | BUG_ON(!bhs_per_rec); | ||
843 | |||
844 | /* The first block in the page. */ | ||
845 | rec_block = block = (sector_t)page->index << | ||
846 | (PAGE_CACHE_SHIFT - bh_size_bits); | ||
847 | |||
848 | /* The first out of bounds block for the data size. */ | ||
849 | dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; | ||
850 | |||
851 | rl = NULL; | ||
852 | err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; | ||
853 | page_is_dirty = rec_is_dirty = FALSE; | ||
854 | rec_start_bh = NULL; | ||
855 | do { | ||
856 | BOOL is_retry = FALSE; | ||
857 | |||
858 | if (likely(block < rec_block)) { | ||
859 | if (unlikely(block >= dblock)) { | ||
860 | clear_buffer_dirty(bh); | ||
861 | continue; | ||
862 | } | ||
863 | /* | ||
864 | * This block is not the first one in the record. We | ||
865 | * ignore the buffer's dirty state because we could | ||
866 | * have raced with a parallel mark_ntfs_record_dirty(). | ||
867 | */ | ||
868 | if (!rec_is_dirty) | ||
869 | continue; | ||
870 | if (unlikely(err2)) { | ||
871 | if (err2 != -ENOMEM) | ||
872 | clear_buffer_dirty(bh); | ||
873 | continue; | ||
874 | } | ||
875 | } else /* if (block == rec_block) */ { | ||
876 | BUG_ON(block > rec_block); | ||
877 | /* This block is the first one in the record. */ | ||
878 | rec_block += bhs_per_rec; | ||
879 | err2 = 0; | ||
880 | if (unlikely(block >= dblock)) { | ||
881 | clear_buffer_dirty(bh); | ||
882 | continue; | ||
883 | } | ||
884 | if (!buffer_dirty(bh)) { | ||
885 | /* Clean records are not written out. */ | ||
886 | rec_is_dirty = FALSE; | ||
887 | continue; | ||
888 | } | ||
889 | rec_is_dirty = TRUE; | ||
890 | rec_start_bh = bh; | ||
891 | } | ||
892 | /* Need to map the buffer if it is not mapped already. */ | ||
893 | if (unlikely(!buffer_mapped(bh))) { | ||
894 | VCN vcn; | ||
895 | LCN lcn; | ||
896 | unsigned int vcn_ofs; | ||
897 | |||
898 | /* Obtain the vcn and offset of the current block. */ | ||
899 | vcn = (VCN)block << bh_size_bits; | ||
900 | vcn_ofs = vcn & vol->cluster_size_mask; | ||
901 | vcn >>= vol->cluster_size_bits; | ||
902 | if (!rl) { | ||
903 | lock_retry_remap: | ||
904 | down_read(&ni->runlist.lock); | ||
905 | rl = ni->runlist.rl; | ||
906 | } | ||
907 | if (likely(rl != NULL)) { | ||
908 | /* Seek to element containing target vcn. */ | ||
909 | while (rl->length && rl[1].vcn <= vcn) | ||
910 | rl++; | ||
911 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
912 | } else | ||
913 | lcn = LCN_RL_NOT_MAPPED; | ||
914 | /* Successful remap. */ | ||
915 | if (likely(lcn >= 0)) { | ||
916 | /* Setup buffer head to correct block. */ | ||
917 | bh->b_blocknr = ((lcn << | ||
918 | vol->cluster_size_bits) + | ||
919 | vcn_ofs) >> bh_size_bits; | ||
920 | set_buffer_mapped(bh); | ||
921 | } else { | ||
922 | /* | ||
923 | * Remap failed. Retry to map the runlist once | ||
924 | * unless we are working on $MFT which always | ||
925 | * has the whole of its runlist in memory. | ||
926 | */ | ||
927 | if (!is_mft && !is_retry && | ||
928 | lcn == LCN_RL_NOT_MAPPED) { | ||
929 | is_retry = TRUE; | ||
930 | /* | ||
931 | * Attempt to map runlist, dropping | ||
932 | * lock for the duration. | ||
933 | */ | ||
934 | up_read(&ni->runlist.lock); | ||
935 | err2 = ntfs_map_runlist(ni, vcn); | ||
936 | if (likely(!err2)) | ||
937 | goto lock_retry_remap; | ||
938 | if (err2 == -ENOMEM) | ||
939 | page_is_dirty = TRUE; | ||
940 | lcn = err2; | ||
941 | } else | ||
942 | err2 = -EIO; | ||
943 | /* Hard error. Abort writing this record. */ | ||
944 | if (!err || err == -ENOMEM) | ||
945 | err = err2; | ||
946 | bh->b_blocknr = -1; | ||
947 | ntfs_error(vol->sb, "Cannot write ntfs record " | ||
948 | "0x%llx (inode 0x%lx, " | ||
949 | "attribute type 0x%x) because " | ||
950 | "its location on disk could " | ||
951 | "not be determined (error " | ||
952 | "code %lli).", (s64)block << | ||
953 | bh_size_bits >> | ||
954 | vol->mft_record_size_bits, | ||
955 | ni->mft_no, ni->type, | ||
956 | (long long)lcn); | ||
957 | /* | ||
958 | * If this is not the first buffer, remove the | ||
959 | * buffers in this record from the list of | ||
960 | * buffers to write and clear their dirty bit | ||
961 | * if not error -ENOMEM. | ||
962 | */ | ||
963 | if (rec_start_bh != bh) { | ||
964 | while (bhs[--nr_bhs] != rec_start_bh) | ||
965 | ; | ||
966 | if (err2 != -ENOMEM) { | ||
967 | do { | ||
968 | clear_buffer_dirty( | ||
969 | rec_start_bh); | ||
970 | } while ((rec_start_bh = | ||
971 | rec_start_bh-> | ||
972 | b_this_page) != | ||
973 | bh); | ||
974 | } | ||
975 | } | ||
976 | continue; | ||
977 | } | ||
978 | } | ||
979 | BUG_ON(!buffer_uptodate(bh)); | ||
980 | BUG_ON(nr_bhs >= max_bhs); | ||
981 | bhs[nr_bhs++] = bh; | ||
982 | } while (block++, (bh = bh->b_this_page) != head); | ||
983 | if (unlikely(rl)) | ||
984 | up_read(&ni->runlist.lock); | ||
985 | /* If there were no dirty buffers, we are done. */ | ||
986 | if (!nr_bhs) | ||
987 | goto done; | ||
988 | /* Map the page so we can access its contents. */ | ||
989 | kaddr = kmap(page); | ||
990 | /* Clear the page uptodate flag whilst the mst fixups are applied. */ | ||
991 | BUG_ON(!PageUptodate(page)); | ||
992 | ClearPageUptodate(page); | ||
993 | for (i = 0; i < nr_bhs; i++) { | ||
994 | unsigned int ofs; | ||
995 | |||
996 | /* Skip buffers which are not at the beginning of records. */ | ||
997 | if (i % bhs_per_rec) | ||
998 | continue; | ||
999 | tbh = bhs[i]; | ||
1000 | ofs = bh_offset(tbh); | ||
1001 | if (is_mft) { | ||
1002 | ntfs_inode *tni; | ||
1003 | unsigned long mft_no; | ||
1004 | |||
1005 | /* Get the mft record number. */ | ||
1006 | mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) | ||
1007 | >> rec_size_bits; | ||
1008 | /* Check whether to write this mft record. */ | ||
1009 | tni = NULL; | ||
1010 | if (!ntfs_may_write_mft_record(vol, mft_no, | ||
1011 | (MFT_RECORD*)(kaddr + ofs), &tni)) { | ||
1012 | /* | ||
1013 | * The record should not be written. This | ||
1014 | * means we need to redirty the page before | ||
1015 | * returning. | ||
1016 | */ | ||
1017 | page_is_dirty = TRUE; | ||
1018 | /* | ||
1019 | * Remove the buffers in this mft record from | ||
1020 | * the list of buffers to write. | ||
1021 | */ | ||
1022 | do { | ||
1023 | bhs[i] = NULL; | ||
1024 | } while (++i % bhs_per_rec); | ||
1025 | continue; | ||
1026 | } | ||
1027 | /* | ||
1028 | * The record should be written. If a locked ntfs | ||
1029 | * inode was returned, add it to the array of locked | ||
1030 | * ntfs inodes. | ||
1031 | */ | ||
1032 | if (tni) | ||
1033 | locked_nis[nr_locked_nis++] = tni; | ||
1034 | } | ||
1035 | /* Apply the mst protection fixups. */ | ||
1036 | err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs), | ||
1037 | rec_size); | ||
1038 | if (unlikely(err2)) { | ||
1039 | if (!err || err == -ENOMEM) | ||
1040 | err = -EIO; | ||
1041 | ntfs_error(vol->sb, "Failed to apply mst fixups " | ||
1042 | "(inode 0x%lx, attribute type 0x%x, " | ||
1043 | "page index 0x%lx, page offset 0x%x)!" | ||
1044 | " Unmount and run chkdsk.", vi->i_ino, | ||
1045 | ni->type, page->index, ofs); | ||
1046 | /* | ||
1047 | * Mark all the buffers in this record clean as we do | ||
1048 | * not want to write corrupt data to disk. | ||
1049 | */ | ||
1050 | do { | ||
1051 | clear_buffer_dirty(bhs[i]); | ||
1052 | bhs[i] = NULL; | ||
1053 | } while (++i % bhs_per_rec); | ||
1054 | continue; | ||
1055 | } | ||
1056 | nr_recs++; | ||
1057 | } | ||
1058 | /* If no records are to be written out, we are done. */ | ||
1059 | if (!nr_recs) | ||
1060 | goto unm_done; | ||
1061 | flush_dcache_page(page); | ||
1062 | /* Lock buffers and start synchronous write i/o on them. */ | ||
1063 | for (i = 0; i < nr_bhs; i++) { | ||
1064 | tbh = bhs[i]; | ||
1065 | if (!tbh) | ||
1066 | continue; | ||
1067 | if (unlikely(test_set_buffer_locked(tbh))) | ||
1068 | BUG(); | ||
1069 | /* The buffer dirty state is now irrelevant, just clean it. */ | ||
1070 | clear_buffer_dirty(tbh); | ||
1071 | BUG_ON(!buffer_uptodate(tbh)); | ||
1072 | BUG_ON(!buffer_mapped(tbh)); | ||
1073 | get_bh(tbh); | ||
1074 | tbh->b_end_io = end_buffer_write_sync; | ||
1075 | submit_bh(WRITE, tbh); | ||
1076 | } | ||
1077 | /* Synchronize the mft mirror now if not @sync. */ | ||
1078 | if (is_mft && !sync) | ||
1079 | goto do_mirror; | ||
1080 | do_wait: | ||
1081 | /* Wait on i/o completion of buffers. */ | ||
1082 | for (i = 0; i < nr_bhs; i++) { | ||
1083 | tbh = bhs[i]; | ||
1084 | if (!tbh) | ||
1085 | continue; | ||
1086 | wait_on_buffer(tbh); | ||
1087 | if (unlikely(!buffer_uptodate(tbh))) { | ||
1088 | ntfs_error(vol->sb, "I/O error while writing ntfs " | ||
1089 | "record buffer (inode 0x%lx, " | ||
1090 | "attribute type 0x%x, page index " | ||
1091 | "0x%lx, page offset 0x%lx)! Unmount " | ||
1092 | "and run chkdsk.", vi->i_ino, ni->type, | ||
1093 | page->index, bh_offset(tbh)); | ||
1094 | if (!err || err == -ENOMEM) | ||
1095 | err = -EIO; | ||
1096 | /* | ||
1097 | * Set the buffer uptodate so the page and buffer | ||
1098 | * states do not become out of sync. | ||
1099 | */ | ||
1100 | set_buffer_uptodate(tbh); | ||
1101 | } | ||
1102 | } | ||
1103 | /* If @sync, now synchronize the mft mirror. */ | ||
1104 | if (is_mft && sync) { | ||
1105 | do_mirror: | ||
1106 | for (i = 0; i < nr_bhs; i++) { | ||
1107 | unsigned long mft_no; | ||
1108 | unsigned int ofs; | ||
1109 | |||
1110 | /* | ||
1111 | * Skip buffers which are not at the beginning of | ||
1112 | * records. | ||
1113 | */ | ||
1114 | if (i % bhs_per_rec) | ||
1115 | continue; | ||
1116 | tbh = bhs[i]; | ||
1117 | /* Skip removed buffers (and hence records). */ | ||
1118 | if (!tbh) | ||
1119 | continue; | ||
1120 | ofs = bh_offset(tbh); | ||
1121 | /* Get the mft record number. */ | ||
1122 | mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs) | ||
1123 | >> rec_size_bits; | ||
1124 | if (mft_no < vol->mftmirr_size) | ||
1125 | ntfs_sync_mft_mirror(vol, mft_no, | ||
1126 | (MFT_RECORD*)(kaddr + ofs), | ||
1127 | sync); | ||
1128 | } | ||
1129 | if (!sync) | ||
1130 | goto do_wait; | ||
1131 | } | ||
1132 | /* Remove the mst protection fixups again. */ | ||
1133 | for (i = 0; i < nr_bhs; i++) { | ||
1134 | if (!(i % bhs_per_rec)) { | ||
1135 | tbh = bhs[i]; | ||
1136 | if (!tbh) | ||
1137 | continue; | ||
1138 | post_write_mst_fixup((NTFS_RECORD*)(kaddr + | ||
1139 | bh_offset(tbh))); | ||
1140 | } | ||
1141 | } | ||
1142 | flush_dcache_page(page); | ||
1143 | unm_done: | ||
1144 | /* Unlock any locked inodes. */ | ||
1145 | while (nr_locked_nis-- > 0) { | ||
1146 | ntfs_inode *tni, *base_tni; | ||
1147 | |||
1148 | tni = locked_nis[nr_locked_nis]; | ||
1149 | /* Get the base inode. */ | ||
1150 | down(&tni->extent_lock); | ||
1151 | if (tni->nr_extents >= 0) | ||
1152 | base_tni = tni; | ||
1153 | else { | ||
1154 | base_tni = tni->ext.base_ntfs_ino; | ||
1155 | BUG_ON(!base_tni); | ||
1156 | } | ||
1157 | up(&tni->extent_lock); | ||
1158 | ntfs_debug("Unlocking %s inode 0x%lx.", | ||
1159 | tni == base_tni ? "base" : "extent", | ||
1160 | tni->mft_no); | ||
1161 | up(&tni->mrec_lock); | ||
1162 | atomic_dec(&tni->count); | ||
1163 | iput(VFS_I(base_tni)); | ||
1164 | } | ||
1165 | SetPageUptodate(page); | ||
1166 | kunmap(page); | ||
1167 | done: | ||
1168 | if (unlikely(err && err != -ENOMEM)) { | ||
1169 | /* | ||
1170 | * Set page error if there is only one ntfs record in the page. | ||
1171 | * Otherwise we would loose per-record granularity. | ||
1172 | */ | ||
1173 | if (ni->itype.index.block_size == PAGE_CACHE_SIZE) | ||
1174 | SetPageError(page); | ||
1175 | NVolSetErrors(vol); | ||
1176 | } | ||
1177 | if (page_is_dirty) { | ||
1178 | ntfs_debug("Page still contains one or more dirty ntfs " | ||
1179 | "records. Redirtying the page starting at " | ||
1180 | "record 0x%lx.", page->index << | ||
1181 | (PAGE_CACHE_SHIFT - rec_size_bits)); | ||
1182 | redirty_page_for_writepage(wbc, page); | ||
1183 | unlock_page(page); | ||
1184 | } else { | ||
1185 | /* | ||
1186 | * Keep the VM happy. This must be done otherwise the | ||
1187 | * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though | ||
1188 | * the page is clean. | ||
1189 | */ | ||
1190 | BUG_ON(PageWriteback(page)); | ||
1191 | set_page_writeback(page); | ||
1192 | unlock_page(page); | ||
1193 | end_page_writeback(page); | ||
1194 | } | ||
1195 | if (likely(!err)) | ||
1196 | ntfs_debug("Done."); | ||
1197 | return err; | ||
1198 | } | ||
1199 | |||
1200 | /** | ||
1201 | * ntfs_writepage - write a @page to the backing store | ||
1202 | * @page: page cache page to write out | ||
1203 | * @wbc: writeback control structure | ||
1204 | * | ||
1205 | * This is called from the VM when it wants to have a dirty ntfs page cache | ||
1206 | * page cleaned. The VM has already locked the page and marked it clean. | ||
1207 | * | ||
1208 | * For non-resident attributes, ntfs_writepage() writes the @page by calling | ||
1209 | * the ntfs version of the generic block_write_full_page() function, | ||
1210 | * ntfs_write_block(), which in turn if necessary creates and writes the | ||
1211 | * buffers associated with the page asynchronously. | ||
1212 | * | ||
1213 | * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying | ||
1214 | * the data to the mft record (which at this stage is most likely in memory). | ||
1215 | * The mft record is then marked dirty and written out asynchronously via the | ||
1216 | * vfs inode dirty code path for the inode the mft record belongs to or via the | ||
1217 | * vm page dirty code path for the page the mft record is in. | ||
1218 | * | ||
1219 | * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page(). | ||
1220 | * | ||
1221 | * Return 0 on success and -errno on error. | ||
1222 | */ | ||
1223 | static int ntfs_writepage(struct page *page, struct writeback_control *wbc) | ||
1224 | { | ||
1225 | loff_t i_size; | ||
1226 | struct inode *vi; | ||
1227 | ntfs_inode *ni, *base_ni; | ||
1228 | char *kaddr; | ||
1229 | ntfs_attr_search_ctx *ctx; | ||
1230 | MFT_RECORD *m; | ||
1231 | u32 attr_len; | ||
1232 | int err; | ||
1233 | |||
1234 | BUG_ON(!PageLocked(page)); | ||
1235 | |||
1236 | vi = page->mapping->host; | ||
1237 | i_size = i_size_read(vi); | ||
1238 | |||
1239 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
1240 | if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> | ||
1241 | PAGE_CACHE_SHIFT)) { | ||
1242 | /* | ||
1243 | * The page may have dirty, unmapped buffers. Make them | ||
1244 | * freeable here, so the page does not leak. | ||
1245 | */ | ||
1246 | block_invalidatepage(page, 0); | ||
1247 | unlock_page(page); | ||
1248 | ntfs_debug("Write outside i_size - truncated?"); | ||
1249 | return 0; | ||
1250 | } | ||
1251 | ni = NTFS_I(vi); | ||
1252 | |||
1253 | /* NInoNonResident() == NInoIndexAllocPresent() */ | ||
1254 | if (NInoNonResident(ni)) { | ||
1255 | /* | ||
1256 | * Only unnamed $DATA attributes can be compressed, encrypted, | ||
1257 | * and/or sparse. | ||
1258 | */ | ||
1259 | if (ni->type == AT_DATA && !ni->name_len) { | ||
1260 | /* If file is encrypted, deny access, just like NT4. */ | ||
1261 | if (NInoEncrypted(ni)) { | ||
1262 | unlock_page(page); | ||
1263 | ntfs_debug("Denying write access to encrypted " | ||
1264 | "file."); | ||
1265 | return -EACCES; | ||
1266 | } | ||
1267 | /* Compressed data streams are handled in compress.c. */ | ||
1268 | if (NInoCompressed(ni)) { | ||
1269 | // TODO: Implement and replace this check with | ||
1270 | // return ntfs_write_compressed_block(page); | ||
1271 | unlock_page(page); | ||
1272 | ntfs_error(vi->i_sb, "Writing to compressed " | ||
1273 | "files is not supported yet. " | ||
1274 | "Sorry."); | ||
1275 | return -EOPNOTSUPP; | ||
1276 | } | ||
1277 | // TODO: Implement and remove this check. | ||
1278 | if (NInoSparse(ni)) { | ||
1279 | unlock_page(page); | ||
1280 | ntfs_error(vi->i_sb, "Writing to sparse files " | ||
1281 | "is not supported yet. Sorry."); | ||
1282 | return -EOPNOTSUPP; | ||
1283 | } | ||
1284 | } | ||
1285 | /* We have to zero every time due to mmap-at-end-of-file. */ | ||
1286 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { | ||
1287 | /* The page straddles i_size. */ | ||
1288 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; | ||
1289 | kaddr = kmap_atomic(page, KM_USER0); | ||
1290 | memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); | ||
1291 | flush_dcache_page(page); | ||
1292 | kunmap_atomic(kaddr, KM_USER0); | ||
1293 | } | ||
1294 | /* Handle mst protected attributes. */ | ||
1295 | if (NInoMstProtected(ni)) | ||
1296 | return ntfs_write_mst_block(page, wbc); | ||
1297 | /* Normal data stream. */ | ||
1298 | return ntfs_write_block(page, wbc); | ||
1299 | } | ||
1300 | /* | ||
1301 | * Attribute is resident, implying it is not compressed, encrypted, | ||
1302 | * sparse, or mst protected. This also means the attribute is smaller | ||
1303 | * than an mft record and hence smaller than a page, so can simply | ||
1304 | * return error on any pages with index above 0. | ||
1305 | */ | ||
1306 | BUG_ON(page_has_buffers(page)); | ||
1307 | BUG_ON(!PageUptodate(page)); | ||
1308 | if (unlikely(page->index > 0)) { | ||
1309 | ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. " | ||
1310 | "Aborting write.", page->index); | ||
1311 | BUG_ON(PageWriteback(page)); | ||
1312 | set_page_writeback(page); | ||
1313 | unlock_page(page); | ||
1314 | end_page_writeback(page); | ||
1315 | return -EIO; | ||
1316 | } | ||
1317 | if (!NInoAttr(ni)) | ||
1318 | base_ni = ni; | ||
1319 | else | ||
1320 | base_ni = ni->ext.base_ntfs_ino; | ||
1321 | /* Map, pin, and lock the mft record. */ | ||
1322 | m = map_mft_record(base_ni); | ||
1323 | if (IS_ERR(m)) { | ||
1324 | err = PTR_ERR(m); | ||
1325 | m = NULL; | ||
1326 | ctx = NULL; | ||
1327 | goto err_out; | ||
1328 | } | ||
1329 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1330 | if (unlikely(!ctx)) { | ||
1331 | err = -ENOMEM; | ||
1332 | goto err_out; | ||
1333 | } | ||
1334 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
1335 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1336 | if (unlikely(err)) | ||
1337 | goto err_out; | ||
1338 | /* | ||
1339 | * Keep the VM happy. This must be done otherwise the radix-tree tag | ||
1340 | * PAGECACHE_TAG_DIRTY remains set even though the page is clean. | ||
1341 | */ | ||
1342 | BUG_ON(PageWriteback(page)); | ||
1343 | set_page_writeback(page); | ||
1344 | unlock_page(page); | ||
1345 | |||
1346 | /* | ||
1347 | * Here, we don't need to zero the out of bounds area everytime because | ||
1348 | * the below memcpy() already takes care of the mmap-at-end-of-file | ||
1349 | * requirements. If the file is converted to a non-resident one, then | ||
1350 | * the code path use is switched to the non-resident one where the | ||
1351 | * zeroing happens on each ntfs_writepage() invocation. | ||
1352 | * | ||
1353 | * The above also applies nicely when i_size is decreased. | ||
1354 | * | ||
1355 | * When i_size is increased, the memory between the old and new i_size | ||
1356 | * _must_ be zeroed (or overwritten with new data). Otherwise we will | ||
1357 | * expose data to userspace/disk which should never have been exposed. | ||
1358 | * | ||
1359 | * FIXME: Ensure that i_size increases do the zeroing/overwriting and | ||
1360 | * if we cannot guarantee that, then enable the zeroing below. If the | ||
1361 | * zeroing below is enabled, we MUST move the unlock_page() from above | ||
1362 | * to after the kunmap_atomic(), i.e. just before the | ||
1363 | * end_page_writeback(). | ||
1364 | * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size | ||
1365 | * increases for resident attributes so those are ok. | ||
1366 | * TODO: ntfs_truncate(), others? | ||
1367 | */ | ||
1368 | |||
1369 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | ||
1370 | i_size = i_size_read(VFS_I(ni)); | ||
1371 | kaddr = kmap_atomic(page, KM_USER0); | ||
1372 | if (unlikely(attr_len > i_size)) { | ||
1373 | /* Zero out of bounds area in the mft record. */ | ||
1374 | memset((u8*)ctx->attr + le16_to_cpu( | ||
1375 | ctx->attr->data.resident.value_offset) + | ||
1376 | i_size, 0, attr_len - i_size); | ||
1377 | attr_len = i_size; | ||
1378 | } | ||
1379 | /* Copy the data from the page to the mft record. */ | ||
1380 | memcpy((u8*)ctx->attr + | ||
1381 | le16_to_cpu(ctx->attr->data.resident.value_offset), | ||
1382 | kaddr, attr_len); | ||
1383 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1384 | /* Zero out of bounds area in the page cache page. */ | ||
1385 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
1386 | flush_dcache_page(page); | ||
1387 | kunmap_atomic(kaddr, KM_USER0); | ||
1388 | |||
1389 | end_page_writeback(page); | ||
1390 | |||
1391 | /* Mark the mft record dirty, so it gets written back. */ | ||
1392 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1393 | ntfs_attr_put_search_ctx(ctx); | ||
1394 | unmap_mft_record(base_ni); | ||
1395 | return 0; | ||
1396 | err_out: | ||
1397 | if (err == -ENOMEM) { | ||
1398 | ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " | ||
1399 | "page so we try again later."); | ||
1400 | /* | ||
1401 | * Put the page back on mapping->dirty_pages, but leave its | ||
1402 | * buffers' dirty state as-is. | ||
1403 | */ | ||
1404 | redirty_page_for_writepage(wbc, page); | ||
1405 | err = 0; | ||
1406 | } else { | ||
1407 | ntfs_error(vi->i_sb, "Resident attribute write failed with " | ||
1408 | "error %i. Setting page error flag.", err); | ||
1409 | SetPageError(page); | ||
1410 | } | ||
1411 | unlock_page(page); | ||
1412 | if (ctx) | ||
1413 | ntfs_attr_put_search_ctx(ctx); | ||
1414 | if (m) | ||
1415 | unmap_mft_record(base_ni); | ||
1416 | return err; | ||
1417 | } | ||
1418 | |||
1419 | /** | ||
1420 | * ntfs_prepare_nonresident_write - | ||
1421 | * | ||
1422 | */ | ||
1423 | static int ntfs_prepare_nonresident_write(struct page *page, | ||
1424 | unsigned from, unsigned to) | ||
1425 | { | ||
1426 | VCN vcn; | ||
1427 | LCN lcn; | ||
1428 | sector_t block, ablock, iblock; | ||
1429 | struct inode *vi; | ||
1430 | ntfs_inode *ni; | ||
1431 | ntfs_volume *vol; | ||
1432 | runlist_element *rl; | ||
1433 | struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; | ||
1434 | unsigned int vcn_ofs, block_start, block_end, blocksize; | ||
1435 | int err; | ||
1436 | BOOL is_retry; | ||
1437 | unsigned char blocksize_bits; | ||
1438 | |||
1439 | vi = page->mapping->host; | ||
1440 | ni = NTFS_I(vi); | ||
1441 | vol = ni->vol; | ||
1442 | |||
1443 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
1444 | "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type, | ||
1445 | page->index, from, to); | ||
1446 | |||
1447 | BUG_ON(!NInoNonResident(ni)); | ||
1448 | |||
1449 | blocksize_bits = vi->i_blkbits; | ||
1450 | blocksize = 1 << blocksize_bits; | ||
1451 | |||
1452 | /* | ||
1453 | * create_empty_buffers() will create uptodate/dirty buffers if the | ||
1454 | * page is uptodate/dirty. | ||
1455 | */ | ||
1456 | if (!page_has_buffers(page)) | ||
1457 | create_empty_buffers(page, blocksize, 0); | ||
1458 | bh = head = page_buffers(page); | ||
1459 | if (unlikely(!bh)) | ||
1460 | return -ENOMEM; | ||
1461 | |||
1462 | /* The first block in the page. */ | ||
1463 | block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | ||
1464 | |||
1465 | /* | ||
1466 | * The first out of bounds block for the allocated size. No need to | ||
1467 | * round up as allocated_size is in multiples of cluster size and the | ||
1468 | * minimum cluster size is 512 bytes, which is equal to the smallest | ||
1469 | * blocksize. | ||
1470 | */ | ||
1471 | ablock = ni->allocated_size >> blocksize_bits; | ||
1472 | |||
1473 | /* The last (fully or partially) initialized block. */ | ||
1474 | iblock = ni->initialized_size >> blocksize_bits; | ||
1475 | |||
1476 | /* Loop through all the buffers in the page. */ | ||
1477 | block_start = 0; | ||
1478 | rl = NULL; | ||
1479 | err = 0; | ||
1480 | do { | ||
1481 | block_end = block_start + blocksize; | ||
1482 | /* | ||
1483 | * If buffer @bh is outside the write, just mark it uptodate | ||
1484 | * if the page is uptodate and continue with the next buffer. | ||
1485 | */ | ||
1486 | if (block_end <= from || block_start >= to) { | ||
1487 | if (PageUptodate(page)) { | ||
1488 | if (!buffer_uptodate(bh)) | ||
1489 | set_buffer_uptodate(bh); | ||
1490 | } | ||
1491 | continue; | ||
1492 | } | ||
1493 | /* | ||
1494 | * @bh is at least partially being written to. | ||
1495 | * Make sure it is not marked as new. | ||
1496 | */ | ||
1497 | //if (buffer_new(bh)) | ||
1498 | // clear_buffer_new(bh); | ||
1499 | |||
1500 | if (block >= ablock) { | ||
1501 | // TODO: block is above allocated_size, need to | ||
1502 | // allocate it. Best done in one go to accommodate not | ||
1503 | // only block but all above blocks up to and including: | ||
1504 | // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize | ||
1505 | // - 1) >> blobksize_bits. Obviously will need to round | ||
1506 | // up to next cluster boundary, too. This should be | ||
1507 | // done with a helper function, so it can be reused. | ||
1508 | ntfs_error(vol->sb, "Writing beyond allocated size " | ||
1509 | "is not supported yet. Sorry."); | ||
1510 | err = -EOPNOTSUPP; | ||
1511 | goto err_out; | ||
1512 | // Need to update ablock. | ||
1513 | // Need to set_buffer_new() on all block bhs that are | ||
1514 | // newly allocated. | ||
1515 | } | ||
1516 | /* | ||
1517 | * Now we have enough allocated size to fulfill the whole | ||
1518 | * request, i.e. block < ablock is true. | ||
1519 | */ | ||
1520 | if (unlikely((block >= iblock) && | ||
1521 | (ni->initialized_size < vi->i_size))) { | ||
1522 | /* | ||
1523 | * If this page is fully outside initialized size, zero | ||
1524 | * out all pages between the current initialized size | ||
1525 | * and the current page. Just use ntfs_readpage() to do | ||
1526 | * the zeroing transparently. | ||
1527 | */ | ||
1528 | if (block > iblock) { | ||
1529 | // TODO: | ||
1530 | // For each page do: | ||
1531 | // - read_cache_page() | ||
1532 | // Again for each page do: | ||
1533 | // - wait_on_page_locked() | ||
1534 | // - Check (PageUptodate(page) && | ||
1535 | // !PageError(page)) | ||
1536 | // Update initialized size in the attribute and | ||
1537 | // in the inode. | ||
1538 | // Again, for each page do: | ||
1539 | // __set_page_dirty_buffers(); | ||
1540 | // page_cache_release() | ||
1541 | // We don't need to wait on the writes. | ||
1542 | // Update iblock. | ||
1543 | } | ||
1544 | /* | ||
1545 | * The current page straddles initialized size. Zero | ||
1546 | * all non-uptodate buffers and set them uptodate (and | ||
1547 | * dirty?). Note, there aren't any non-uptodate buffers | ||
1548 | * if the page is uptodate. | ||
1549 | * FIXME: For an uptodate page, the buffers may need to | ||
1550 | * be written out because they were not initialized on | ||
1551 | * disk before. | ||
1552 | */ | ||
1553 | if (!PageUptodate(page)) { | ||
1554 | // TODO: | ||
1555 | // Zero any non-uptodate buffers up to i_size. | ||
1556 | // Set them uptodate and dirty. | ||
1557 | } | ||
1558 | // TODO: | ||
1559 | // Update initialized size in the attribute and in the | ||
1560 | // inode (up to i_size). | ||
1561 | // Update iblock. | ||
1562 | // FIXME: This is inefficient. Try to batch the two | ||
1563 | // size changes to happen in one go. | ||
1564 | ntfs_error(vol->sb, "Writing beyond initialized size " | ||
1565 | "is not supported yet. Sorry."); | ||
1566 | err = -EOPNOTSUPP; | ||
1567 | goto err_out; | ||
1568 | // Do NOT set_buffer_new() BUT DO clear buffer range | ||
1569 | // outside write request range. | ||
1570 | // set_buffer_uptodate() on complete buffers as well as | ||
1571 | // set_buffer_dirty(). | ||
1572 | } | ||
1573 | |||
1574 | /* Need to map unmapped buffers. */ | ||
1575 | if (!buffer_mapped(bh)) { | ||
1576 | /* Unmapped buffer. Need to map it. */ | ||
1577 | bh->b_bdev = vol->sb->s_bdev; | ||
1578 | |||
1579 | /* Convert block into corresponding vcn and offset. */ | ||
1580 | vcn = (VCN)block << blocksize_bits >> | ||
1581 | vol->cluster_size_bits; | ||
1582 | vcn_ofs = ((VCN)block << blocksize_bits) & | ||
1583 | vol->cluster_size_mask; | ||
1584 | |||
1585 | is_retry = FALSE; | ||
1586 | if (!rl) { | ||
1587 | lock_retry_remap: | ||
1588 | down_read(&ni->runlist.lock); | ||
1589 | rl = ni->runlist.rl; | ||
1590 | } | ||
1591 | if (likely(rl != NULL)) { | ||
1592 | /* Seek to element containing target vcn. */ | ||
1593 | while (rl->length && rl[1].vcn <= vcn) | ||
1594 | rl++; | ||
1595 | lcn = ntfs_rl_vcn_to_lcn(rl, vcn); | ||
1596 | } else | ||
1597 | lcn = LCN_RL_NOT_MAPPED; | ||
1598 | if (unlikely(lcn < 0)) { | ||
1599 | /* | ||
1600 | * We extended the attribute allocation above. | ||
1601 | * If we hit an ENOENT here it means that the | ||
1602 | * allocation was insufficient which is a bug. | ||
1603 | */ | ||
1604 | BUG_ON(lcn == LCN_ENOENT); | ||
1605 | |||
1606 | /* It is a hole, need to instantiate it. */ | ||
1607 | if (lcn == LCN_HOLE) { | ||
1608 | // TODO: Instantiate the hole. | ||
1609 | // clear_buffer_new(bh); | ||
1610 | // unmap_underlying_metadata(bh->b_bdev, | ||
1611 | // bh->b_blocknr); | ||
1612 | // For non-uptodate buffers, need to | ||
1613 | // zero out the region outside the | ||
1614 | // request in this bh or all bhs, | ||
1615 | // depending on what we implemented | ||
1616 | // above. | ||
1617 | // Need to flush_dcache_page(). | ||
1618 | // Or could use set_buffer_new() | ||
1619 | // instead? | ||
1620 | ntfs_error(vol->sb, "Writing into " | ||
1621 | "sparse regions is " | ||
1622 | "not supported yet. " | ||
1623 | "Sorry."); | ||
1624 | err = -EOPNOTSUPP; | ||
1625 | goto err_out; | ||
1626 | } else if (!is_retry && | ||
1627 | lcn == LCN_RL_NOT_MAPPED) { | ||
1628 | is_retry = TRUE; | ||
1629 | /* | ||
1630 | * Attempt to map runlist, dropping | ||
1631 | * lock for the duration. | ||
1632 | */ | ||
1633 | up_read(&ni->runlist.lock); | ||
1634 | err = ntfs_map_runlist(ni, vcn); | ||
1635 | if (likely(!err)) | ||
1636 | goto lock_retry_remap; | ||
1637 | rl = NULL; | ||
1638 | lcn = err; | ||
1639 | } | ||
1640 | /* | ||
1641 | * Failed to map the buffer, even after | ||
1642 | * retrying. | ||
1643 | */ | ||
1644 | bh->b_blocknr = -1; | ||
1645 | ntfs_error(vol->sb, "Failed to write to inode " | ||
1646 | "0x%lx, attribute type 0x%x, " | ||
1647 | "vcn 0x%llx, offset 0x%x " | ||
1648 | "because its location on disk " | ||
1649 | "could not be determined%s " | ||
1650 | "(error code %lli).", | ||
1651 | ni->mft_no, ni->type, | ||
1652 | (unsigned long long)vcn, | ||
1653 | vcn_ofs, is_retry ? " even " | ||
1654 | "after retrying" : "", | ||
1655 | (long long)lcn); | ||
1656 | if (!err) | ||
1657 | err = -EIO; | ||
1658 | goto err_out; | ||
1659 | } | ||
1660 | /* We now have a successful remap, i.e. lcn >= 0. */ | ||
1661 | |||
1662 | /* Setup buffer head to correct block. */ | ||
1663 | bh->b_blocknr = ((lcn << vol->cluster_size_bits) | ||
1664 | + vcn_ofs) >> blocksize_bits; | ||
1665 | set_buffer_mapped(bh); | ||
1666 | |||
1667 | // FIXME: Something analogous to this is needed for | ||
1668 | // each newly allocated block, i.e. BH_New. | ||
1669 | // FIXME: Might need to take this out of the | ||
1670 | // if (!buffer_mapped(bh)) {}, depending on how we | ||
1671 | // implement things during the allocated_size and | ||
1672 | // initialized_size extension code above. | ||
1673 | if (buffer_new(bh)) { | ||
1674 | clear_buffer_new(bh); | ||
1675 | unmap_underlying_metadata(bh->b_bdev, | ||
1676 | bh->b_blocknr); | ||
1677 | if (PageUptodate(page)) { | ||
1678 | set_buffer_uptodate(bh); | ||
1679 | continue; | ||
1680 | } | ||
1681 | /* | ||
1682 | * Page is _not_ uptodate, zero surrounding | ||
1683 | * region. NOTE: This is how we decide if to | ||
1684 | * zero or not! | ||
1685 | */ | ||
1686 | if (block_end > to || block_start < from) { | ||
1687 | void *kaddr; | ||
1688 | |||
1689 | kaddr = kmap_atomic(page, KM_USER0); | ||
1690 | if (block_end > to) | ||
1691 | memset(kaddr + to, 0, | ||
1692 | block_end - to); | ||
1693 | if (block_start < from) | ||
1694 | memset(kaddr + block_start, 0, | ||
1695 | from - | ||
1696 | block_start); | ||
1697 | flush_dcache_page(page); | ||
1698 | kunmap_atomic(kaddr, KM_USER0); | ||
1699 | } | ||
1700 | continue; | ||
1701 | } | ||
1702 | } | ||
1703 | /* @bh is mapped, set it uptodate if the page is uptodate. */ | ||
1704 | if (PageUptodate(page)) { | ||
1705 | if (!buffer_uptodate(bh)) | ||
1706 | set_buffer_uptodate(bh); | ||
1707 | continue; | ||
1708 | } | ||
1709 | /* | ||
1710 | * The page is not uptodate. The buffer is mapped. If it is not | ||
1711 | * uptodate, and it is only partially being written to, we need | ||
1712 | * to read the buffer in before the write, i.e. right now. | ||
1713 | */ | ||
1714 | if (!buffer_uptodate(bh) && | ||
1715 | (block_start < from || block_end > to)) { | ||
1716 | ll_rw_block(READ, 1, &bh); | ||
1717 | *wait_bh++ = bh; | ||
1718 | } | ||
1719 | } while (block++, block_start = block_end, | ||
1720 | (bh = bh->b_this_page) != head); | ||
1721 | |||
1722 | /* Release the lock if we took it. */ | ||
1723 | if (rl) { | ||
1724 | up_read(&ni->runlist.lock); | ||
1725 | rl = NULL; | ||
1726 | } | ||
1727 | |||
1728 | /* If we issued read requests, let them complete. */ | ||
1729 | while (wait_bh > wait) { | ||
1730 | wait_on_buffer(*--wait_bh); | ||
1731 | if (!buffer_uptodate(*wait_bh)) | ||
1732 | return -EIO; | ||
1733 | } | ||
1734 | |||
1735 | ntfs_debug("Done."); | ||
1736 | return 0; | ||
1737 | err_out: | ||
1738 | /* | ||
1739 | * Zero out any newly allocated blocks to avoid exposing stale data. | ||
1740 | * If BH_New is set, we know that the block was newly allocated in the | ||
1741 | * above loop. | ||
1742 | * FIXME: What about initialized_size increments? Have we done all the | ||
1743 | * required zeroing above? If not this error handling is broken, and | ||
1744 | * in particular the if (block_end <= from) check is completely bogus. | ||
1745 | */ | ||
1746 | bh = head; | ||
1747 | block_start = 0; | ||
1748 | is_retry = FALSE; | ||
1749 | do { | ||
1750 | block_end = block_start + blocksize; | ||
1751 | if (block_end <= from) | ||
1752 | continue; | ||
1753 | if (block_start >= to) | ||
1754 | break; | ||
1755 | if (buffer_new(bh)) { | ||
1756 | void *kaddr; | ||
1757 | |||
1758 | clear_buffer_new(bh); | ||
1759 | kaddr = kmap_atomic(page, KM_USER0); | ||
1760 | memset(kaddr + block_start, 0, bh->b_size); | ||
1761 | kunmap_atomic(kaddr, KM_USER0); | ||
1762 | set_buffer_uptodate(bh); | ||
1763 | mark_buffer_dirty(bh); | ||
1764 | is_retry = TRUE; | ||
1765 | } | ||
1766 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
1767 | if (is_retry) | ||
1768 | flush_dcache_page(page); | ||
1769 | if (rl) | ||
1770 | up_read(&ni->runlist.lock); | ||
1771 | return err; | ||
1772 | } | ||
1773 | |||
1774 | /** | ||
1775 | * ntfs_prepare_write - prepare a page for receiving data | ||
1776 | * | ||
1777 | * This is called from generic_file_write() with i_sem held on the inode | ||
1778 | * (@page->mapping->host). The @page is locked but not kmap()ped. The source | ||
1779 | * data has not yet been copied into the @page. | ||
1780 | * | ||
1781 | * Need to extend the attribute/fill in holes if necessary, create blocks and | ||
1782 | * make partially overwritten blocks uptodate, | ||
1783 | * | ||
1784 | * i_size is not to be modified yet. | ||
1785 | * | ||
1786 | * Return 0 on success or -errno on error. | ||
1787 | * | ||
1788 | * Should be using block_prepare_write() [support for sparse files] or | ||
1789 | * cont_prepare_write() [no support for sparse files]. Cannot do that due to | ||
1790 | * ntfs specifics but can look at them for implementation guidance. | ||
1791 | * | ||
1792 | * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is | ||
1793 | * the first byte in the page that will be written to and @to is the first byte | ||
1794 | * after the last byte that will be written to. | ||
1795 | */ | ||
1796 | static int ntfs_prepare_write(struct file *file, struct page *page, | ||
1797 | unsigned from, unsigned to) | ||
1798 | { | ||
1799 | s64 new_size; | ||
1800 | struct inode *vi = page->mapping->host; | ||
1801 | ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); | ||
1802 | ntfs_volume *vol = ni->vol; | ||
1803 | ntfs_attr_search_ctx *ctx = NULL; | ||
1804 | MFT_RECORD *m = NULL; | ||
1805 | ATTR_RECORD *a; | ||
1806 | u8 *kaddr; | ||
1807 | u32 attr_len; | ||
1808 | int err; | ||
1809 | |||
1810 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
1811 | "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, | ||
1812 | page->index, from, to); | ||
1813 | BUG_ON(!PageLocked(page)); | ||
1814 | BUG_ON(from > PAGE_CACHE_SIZE); | ||
1815 | BUG_ON(to > PAGE_CACHE_SIZE); | ||
1816 | BUG_ON(from > to); | ||
1817 | BUG_ON(NInoMstProtected(ni)); | ||
1818 | /* | ||
1819 | * If a previous ntfs_truncate() failed, repeat it and abort if it | ||
1820 | * fails again. | ||
1821 | */ | ||
1822 | if (unlikely(NInoTruncateFailed(ni))) { | ||
1823 | down_write(&vi->i_alloc_sem); | ||
1824 | err = ntfs_truncate(vi); | ||
1825 | up_write(&vi->i_alloc_sem); | ||
1826 | if (err || NInoTruncateFailed(ni)) { | ||
1827 | if (!err) | ||
1828 | err = -EIO; | ||
1829 | goto err_out; | ||
1830 | } | ||
1831 | } | ||
1832 | /* If the attribute is not resident, deal with it elsewhere. */ | ||
1833 | if (NInoNonResident(ni)) { | ||
1834 | /* | ||
1835 | * Only unnamed $DATA attributes can be compressed, encrypted, | ||
1836 | * and/or sparse. | ||
1837 | */ | ||
1838 | if (ni->type == AT_DATA && !ni->name_len) { | ||
1839 | /* If file is encrypted, deny access, just like NT4. */ | ||
1840 | if (NInoEncrypted(ni)) { | ||
1841 | ntfs_debug("Denying write access to encrypted " | ||
1842 | "file."); | ||
1843 | return -EACCES; | ||
1844 | } | ||
1845 | /* Compressed data streams are handled in compress.c. */ | ||
1846 | if (NInoCompressed(ni)) { | ||
1847 | // TODO: Implement and replace this check with | ||
1848 | // return ntfs_write_compressed_block(page); | ||
1849 | ntfs_error(vi->i_sb, "Writing to compressed " | ||
1850 | "files is not supported yet. " | ||
1851 | "Sorry."); | ||
1852 | return -EOPNOTSUPP; | ||
1853 | } | ||
1854 | // TODO: Implement and remove this check. | ||
1855 | if (NInoSparse(ni)) { | ||
1856 | ntfs_error(vi->i_sb, "Writing to sparse files " | ||
1857 | "is not supported yet. Sorry."); | ||
1858 | return -EOPNOTSUPP; | ||
1859 | } | ||
1860 | } | ||
1861 | /* Normal data stream. */ | ||
1862 | return ntfs_prepare_nonresident_write(page, from, to); | ||
1863 | } | ||
1864 | /* | ||
1865 | * Attribute is resident, implying it is not compressed, encrypted, or | ||
1866 | * sparse. | ||
1867 | */ | ||
1868 | BUG_ON(page_has_buffers(page)); | ||
1869 | new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; | ||
1870 | /* If we do not need to resize the attribute allocation we are done. */ | ||
1871 | if (new_size <= vi->i_size) | ||
1872 | goto done; | ||
1873 | |||
1874 | // FIXME: We abort for now as this code is not safe. | ||
1875 | ntfs_error(vi->i_sb, "Changing the file size is not supported yet. " | ||
1876 | "Sorry."); | ||
1877 | return -EOPNOTSUPP; | ||
1878 | |||
1879 | /* Map, pin, and lock the (base) mft record. */ | ||
1880 | if (!NInoAttr(ni)) | ||
1881 | base_ni = ni; | ||
1882 | else | ||
1883 | base_ni = ni->ext.base_ntfs_ino; | ||
1884 | m = map_mft_record(base_ni); | ||
1885 | if (IS_ERR(m)) { | ||
1886 | err = PTR_ERR(m); | ||
1887 | m = NULL; | ||
1888 | ctx = NULL; | ||
1889 | goto err_out; | ||
1890 | } | ||
1891 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
1892 | if (unlikely(!ctx)) { | ||
1893 | err = -ENOMEM; | ||
1894 | goto err_out; | ||
1895 | } | ||
1896 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
1897 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
1898 | if (unlikely(err)) { | ||
1899 | if (err == -ENOENT) | ||
1900 | err = -EIO; | ||
1901 | goto err_out; | ||
1902 | } | ||
1903 | m = ctx->mrec; | ||
1904 | a = ctx->attr; | ||
1905 | /* The total length of the attribute value. */ | ||
1906 | attr_len = le32_to_cpu(a->data.resident.value_length); | ||
1907 | BUG_ON(vi->i_size != attr_len); | ||
1908 | /* Check if new size is allowed in $AttrDef. */ | ||
1909 | err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); | ||
1910 | if (unlikely(err)) { | ||
1911 | if (err == -ERANGE) { | ||
1912 | ntfs_error(vol->sb, "Write would cause the inode " | ||
1913 | "0x%lx to exceed the maximum size for " | ||
1914 | "its attribute type (0x%x). Aborting " | ||
1915 | "write.", vi->i_ino, | ||
1916 | le32_to_cpu(ni->type)); | ||
1917 | } else { | ||
1918 | ntfs_error(vol->sb, "Inode 0x%lx has unknown " | ||
1919 | "attribute type 0x%x. Aborting " | ||
1920 | "write.", vi->i_ino, | ||
1921 | le32_to_cpu(ni->type)); | ||
1922 | err = -EIO; | ||
1923 | } | ||
1924 | goto err_out2; | ||
1925 | } | ||
1926 | /* | ||
1927 | * Extend the attribute record to be able to store the new attribute | ||
1928 | * size. | ||
1929 | */ | ||
1930 | if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a, | ||
1931 | le16_to_cpu(a->data.resident.value_offset) + | ||
1932 | new_size)) { | ||
1933 | /* Not enough space in the mft record. */ | ||
1934 | ntfs_error(vol->sb, "Not enough space in the mft record for " | ||
1935 | "the resized attribute value. This is not " | ||
1936 | "supported yet. Aborting write."); | ||
1937 | err = -EOPNOTSUPP; | ||
1938 | goto err_out2; | ||
1939 | } | ||
1940 | /* | ||
1941 | * We have enough space in the mft record to fit the write. This | ||
1942 | * implies the attribute is smaller than the mft record and hence the | ||
1943 | * attribute must be in a single page and hence page->index must be 0. | ||
1944 | */ | ||
1945 | BUG_ON(page->index); | ||
1946 | /* | ||
1947 | * If the beginning of the write is past the old size, enlarge the | ||
1948 | * attribute value up to the beginning of the write and fill it with | ||
1949 | * zeroes. | ||
1950 | */ | ||
1951 | if (from > attr_len) { | ||
1952 | memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + | ||
1953 | attr_len, 0, from - attr_len); | ||
1954 | a->data.resident.value_length = cpu_to_le32(from); | ||
1955 | /* Zero the corresponding area in the page as well. */ | ||
1956 | if (PageUptodate(page)) { | ||
1957 | kaddr = kmap_atomic(page, KM_USER0); | ||
1958 | memset(kaddr + attr_len, 0, from - attr_len); | ||
1959 | kunmap_atomic(kaddr, KM_USER0); | ||
1960 | flush_dcache_page(page); | ||
1961 | } | ||
1962 | } | ||
1963 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1964 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
1965 | ntfs_attr_put_search_ctx(ctx); | ||
1966 | unmap_mft_record(base_ni); | ||
1967 | /* | ||
1968 | * Because resident attributes are handled by memcpy() to/from the | ||
1969 | * corresponding MFT record, and because this form of i/o is byte | ||
1970 | * aligned rather than block aligned, there is no need to bring the | ||
1971 | * page uptodate here as in the non-resident case where we need to | ||
1972 | * bring the buffers straddled by the write uptodate before | ||
1973 | * generic_file_write() does the copying from userspace. | ||
1974 | * | ||
1975 | * We thus defer the uptodate bringing of the page region outside the | ||
1976 | * region written to to ntfs_commit_write(), which makes the code | ||
1977 | * simpler and saves one atomic kmap which is good. | ||
1978 | */ | ||
1979 | done: | ||
1980 | ntfs_debug("Done."); | ||
1981 | return 0; | ||
1982 | err_out: | ||
1983 | if (err == -ENOMEM) | ||
1984 | ntfs_warning(vi->i_sb, "Error allocating memory required to " | ||
1985 | "prepare the write."); | ||
1986 | else { | ||
1987 | ntfs_error(vi->i_sb, "Resident attribute prepare write failed " | ||
1988 | "with error %i.", err); | ||
1989 | NVolSetErrors(vol); | ||
1990 | make_bad_inode(vi); | ||
1991 | } | ||
1992 | err_out2: | ||
1993 | if (ctx) | ||
1994 | ntfs_attr_put_search_ctx(ctx); | ||
1995 | if (m) | ||
1996 | unmap_mft_record(base_ni); | ||
1997 | return err; | ||
1998 | } | ||
1999 | |||
2000 | /** | ||
2001 | * ntfs_commit_nonresident_write - | ||
2002 | * | ||
2003 | */ | ||
2004 | static int ntfs_commit_nonresident_write(struct page *page, | ||
2005 | unsigned from, unsigned to) | ||
2006 | { | ||
2007 | s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; | ||
2008 | struct inode *vi = page->mapping->host; | ||
2009 | struct buffer_head *bh, *head; | ||
2010 | unsigned int block_start, block_end, blocksize; | ||
2011 | BOOL partial; | ||
2012 | |||
2013 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
2014 | "0x%lx, from = %u, to = %u.", vi->i_ino, | ||
2015 | NTFS_I(vi)->type, page->index, from, to); | ||
2016 | blocksize = 1 << vi->i_blkbits; | ||
2017 | |||
2018 | // FIXME: We need a whole slew of special cases in here for compressed | ||
2019 | // files for example... | ||
2020 | // For now, we know ntfs_prepare_write() would have failed so we can't | ||
2021 | // get here in any of the cases which we have to special case, so we | ||
2022 | // are just a ripped off, unrolled generic_commit_write(). | ||
2023 | |||
2024 | bh = head = page_buffers(page); | ||
2025 | block_start = 0; | ||
2026 | partial = FALSE; | ||
2027 | do { | ||
2028 | block_end = block_start + blocksize; | ||
2029 | if (block_end <= from || block_start >= to) { | ||
2030 | if (!buffer_uptodate(bh)) | ||
2031 | partial = TRUE; | ||
2032 | } else { | ||
2033 | set_buffer_uptodate(bh); | ||
2034 | mark_buffer_dirty(bh); | ||
2035 | } | ||
2036 | } while (block_start = block_end, (bh = bh->b_this_page) != head); | ||
2037 | /* | ||
2038 | * If this is a partial write which happened to make all buffers | ||
2039 | * uptodate then we can optimize away a bogus ->readpage() for the next | ||
2040 | * read(). Here we 'discover' whether the page went uptodate as a | ||
2041 | * result of this (potentially partial) write. | ||
2042 | */ | ||
2043 | if (!partial) | ||
2044 | SetPageUptodate(page); | ||
2045 | /* | ||
2046 | * Not convinced about this at all. See disparity comment above. For | ||
2047 | * now we know ntfs_prepare_write() would have failed in the write | ||
2048 | * exceeds i_size case, so this will never trigger which is fine. | ||
2049 | */ | ||
2050 | if (pos > vi->i_size) { | ||
2051 | ntfs_error(vi->i_sb, "Writing beyond the existing file size is " | ||
2052 | "not supported yet. Sorry."); | ||
2053 | return -EOPNOTSUPP; | ||
2054 | // vi->i_size = pos; | ||
2055 | // mark_inode_dirty(vi); | ||
2056 | } | ||
2057 | ntfs_debug("Done."); | ||
2058 | return 0; | ||
2059 | } | ||
2060 | |||
2061 | /** | ||
2062 | * ntfs_commit_write - commit the received data | ||
2063 | * | ||
2064 | * This is called from generic_file_write() with i_sem held on the inode | ||
2065 | * (@page->mapping->host). The @page is locked but not kmap()ped. The source | ||
2066 | * data has already been copied into the @page. ntfs_prepare_write() has been | ||
2067 | * called before the data copied and it returned success so we can take the | ||
2068 | * results of various BUG checks and some error handling for granted. | ||
2069 | * | ||
2070 | * Need to mark modified blocks dirty so they get written out later when | ||
2071 | * ntfs_writepage() is invoked by the VM. | ||
2072 | * | ||
2073 | * Return 0 on success or -errno on error. | ||
2074 | * | ||
2075 | * Should be using generic_commit_write(). This marks buffers uptodate and | ||
2076 | * dirty, sets the page uptodate if all buffers in the page are uptodate, and | ||
2077 | * updates i_size if the end of io is beyond i_size. In that case, it also | ||
2078 | * marks the inode dirty. | ||
2079 | * | ||
2080 | * Cannot use generic_commit_write() due to ntfs specialities but can look at | ||
2081 | * it for implementation guidance. | ||
2082 | * | ||
2083 | * If things have gone as outlined in ntfs_prepare_write(), then we do not | ||
2084 | * need to do any page content modifications here at all, except in the write | ||
2085 | * to resident attribute case, where we need to do the uptodate bringing here | ||
2086 | * which we combine with the copying into the mft record which means we save | ||
2087 | * one atomic kmap. | ||
2088 | */ | ||
2089 | static int ntfs_commit_write(struct file *file, struct page *page, | ||
2090 | unsigned from, unsigned to) | ||
2091 | { | ||
2092 | struct inode *vi = page->mapping->host; | ||
2093 | ntfs_inode *base_ni, *ni = NTFS_I(vi); | ||
2094 | char *kaddr, *kattr; | ||
2095 | ntfs_attr_search_ctx *ctx; | ||
2096 | MFT_RECORD *m; | ||
2097 | ATTR_RECORD *a; | ||
2098 | u32 attr_len; | ||
2099 | int err; | ||
2100 | |||
2101 | ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " | ||
2102 | "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, | ||
2103 | page->index, from, to); | ||
2104 | /* If the attribute is not resident, deal with it elsewhere. */ | ||
2105 | if (NInoNonResident(ni)) { | ||
2106 | /* Only unnamed $DATA attributes can be compressed/encrypted. */ | ||
2107 | if (ni->type == AT_DATA && !ni->name_len) { | ||
2108 | /* Encrypted files need separate handling. */ | ||
2109 | if (NInoEncrypted(ni)) { | ||
2110 | // We never get here at present! | ||
2111 | BUG(); | ||
2112 | } | ||
2113 | /* Compressed data streams are handled in compress.c. */ | ||
2114 | if (NInoCompressed(ni)) { | ||
2115 | // TODO: Implement this! | ||
2116 | // return ntfs_write_compressed_block(page); | ||
2117 | // We never get here at present! | ||
2118 | BUG(); | ||
2119 | } | ||
2120 | } | ||
2121 | /* Normal data stream. */ | ||
2122 | return ntfs_commit_nonresident_write(page, from, to); | ||
2123 | } | ||
2124 | /* | ||
2125 | * Attribute is resident, implying it is not compressed, encrypted, or | ||
2126 | * sparse. | ||
2127 | */ | ||
2128 | if (!NInoAttr(ni)) | ||
2129 | base_ni = ni; | ||
2130 | else | ||
2131 | base_ni = ni->ext.base_ntfs_ino; | ||
2132 | /* Map, pin, and lock the mft record. */ | ||
2133 | m = map_mft_record(base_ni); | ||
2134 | if (IS_ERR(m)) { | ||
2135 | err = PTR_ERR(m); | ||
2136 | m = NULL; | ||
2137 | ctx = NULL; | ||
2138 | goto err_out; | ||
2139 | } | ||
2140 | ctx = ntfs_attr_get_search_ctx(base_ni, m); | ||
2141 | if (unlikely(!ctx)) { | ||
2142 | err = -ENOMEM; | ||
2143 | goto err_out; | ||
2144 | } | ||
2145 | err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, | ||
2146 | CASE_SENSITIVE, 0, NULL, 0, ctx); | ||
2147 | if (unlikely(err)) { | ||
2148 | if (err == -ENOENT) | ||
2149 | err = -EIO; | ||
2150 | goto err_out; | ||
2151 | } | ||
2152 | a = ctx->attr; | ||
2153 | /* The total length of the attribute value. */ | ||
2154 | attr_len = le32_to_cpu(a->data.resident.value_length); | ||
2155 | BUG_ON(from > attr_len); | ||
2156 | kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); | ||
2157 | kaddr = kmap_atomic(page, KM_USER0); | ||
2158 | /* Copy the received data from the page to the mft record. */ | ||
2159 | memcpy(kattr + from, kaddr + from, to - from); | ||
2160 | /* Update the attribute length if necessary. */ | ||
2161 | if (to > attr_len) { | ||
2162 | attr_len = to; | ||
2163 | a->data.resident.value_length = cpu_to_le32(attr_len); | ||
2164 | } | ||
2165 | /* | ||
2166 | * If the page is not uptodate, bring the out of bounds area(s) | ||
2167 | * uptodate by copying data from the mft record to the page. | ||
2168 | */ | ||
2169 | if (!PageUptodate(page)) { | ||
2170 | if (from > 0) | ||
2171 | memcpy(kaddr, kattr, from); | ||
2172 | if (to < attr_len) | ||
2173 | memcpy(kaddr + to, kattr + to, attr_len - to); | ||
2174 | /* Zero the region outside the end of the attribute value. */ | ||
2175 | if (attr_len < PAGE_CACHE_SIZE) | ||
2176 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | ||
2177 | /* | ||
2178 | * The probability of not having done any of the above is | ||
2179 | * extremely small, so we just flush unconditionally. | ||
2180 | */ | ||
2181 | flush_dcache_page(page); | ||
2182 | SetPageUptodate(page); | ||
2183 | } | ||
2184 | kunmap_atomic(kaddr, KM_USER0); | ||
2185 | /* Update i_size if necessary. */ | ||
2186 | if (vi->i_size < attr_len) { | ||
2187 | ni->allocated_size = ni->initialized_size = attr_len; | ||
2188 | i_size_write(vi, attr_len); | ||
2189 | } | ||
2190 | /* Mark the mft record dirty, so it gets written back. */ | ||
2191 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
2192 | mark_mft_record_dirty(ctx->ntfs_ino); | ||
2193 | ntfs_attr_put_search_ctx(ctx); | ||
2194 | unmap_mft_record(base_ni); | ||
2195 | ntfs_debug("Done."); | ||
2196 | return 0; | ||
2197 | err_out: | ||
2198 | if (err == -ENOMEM) { | ||
2199 | ntfs_warning(vi->i_sb, "Error allocating memory required to " | ||
2200 | "commit the write."); | ||
2201 | if (PageUptodate(page)) { | ||
2202 | ntfs_warning(vi->i_sb, "Page is uptodate, setting " | ||
2203 | "dirty so the write will be retried " | ||
2204 | "later on by the VM."); | ||
2205 | /* | ||
2206 | * Put the page on mapping->dirty_pages, but leave its | ||
2207 | * buffers' dirty state as-is. | ||
2208 | */ | ||
2209 | __set_page_dirty_nobuffers(page); | ||
2210 | err = 0; | ||
2211 | } else | ||
2212 | ntfs_error(vi->i_sb, "Page is not uptodate. Written " | ||
2213 | "data has been lost."); | ||
2214 | } else { | ||
2215 | ntfs_error(vi->i_sb, "Resident attribute commit write failed " | ||
2216 | "with error %i.", err); | ||
2217 | NVolSetErrors(ni->vol); | ||
2218 | make_bad_inode(vi); | ||
2219 | } | ||
2220 | if (ctx) | ||
2221 | ntfs_attr_put_search_ctx(ctx); | ||
2222 | if (m) | ||
2223 | unmap_mft_record(base_ni); | ||
2224 | return err; | ||
2225 | } | ||
2226 | |||
2227 | #endif /* NTFS_RW */ | ||
2228 | |||
2229 | /** | ||
2230 | * ntfs_aops - general address space operations for inodes and attributes | ||
2231 | */ | ||
2232 | struct address_space_operations ntfs_aops = { | ||
2233 | .readpage = ntfs_readpage, /* Fill page with data. */ | ||
2234 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
2235 | disk request queue. */ | ||
2236 | #ifdef NTFS_RW | ||
2237 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | ||
2238 | .prepare_write = ntfs_prepare_write, /* Prepare page and buffers | ||
2239 | ready to receive data. */ | ||
2240 | .commit_write = ntfs_commit_write, /* Commit received data. */ | ||
2241 | #endif /* NTFS_RW */ | ||
2242 | }; | ||
2243 | |||
2244 | /** | ||
2245 | * ntfs_mst_aops - general address space operations for mst protecteed inodes | ||
2246 | * and attributes | ||
2247 | */ | ||
2248 | struct address_space_operations ntfs_mst_aops = { | ||
2249 | .readpage = ntfs_readpage, /* Fill page with data. */ | ||
2250 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
2251 | disk request queue. */ | ||
2252 | #ifdef NTFS_RW | ||
2253 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | ||
2254 | .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty | ||
2255 | without touching the buffers | ||
2256 | belonging to the page. */ | ||
2257 | #endif /* NTFS_RW */ | ||
2258 | }; | ||
2259 | |||
2260 | #ifdef NTFS_RW | ||
2261 | |||
2262 | /** | ||
2263 | * mark_ntfs_record_dirty - mark an ntfs record dirty | ||
2264 | * @page: page containing the ntfs record to mark dirty | ||
2265 | * @ofs: byte offset within @page at which the ntfs record begins | ||
2266 | * | ||
2267 | * Set the buffers and the page in which the ntfs record is located dirty. | ||
2268 | * | ||
2269 | * The latter also marks the vfs inode the ntfs record belongs to dirty | ||
2270 | * (I_DIRTY_PAGES only). | ||
2271 | * | ||
2272 | * If the page does not have buffers, we create them and set them uptodate. | ||
2273 | * The page may not be locked which is why we need to handle the buffers under | ||
2274 | * the mapping->private_lock. Once the buffers are marked dirty we no longer | ||
2275 | * need the lock since try_to_free_buffers() does not free dirty buffers. | ||
2276 | */ | ||
2277 | void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { | ||
2278 | struct address_space *mapping = page->mapping; | ||
2279 | ntfs_inode *ni = NTFS_I(mapping->host); | ||
2280 | struct buffer_head *bh, *head, *buffers_to_free = NULL; | ||
2281 | unsigned int end, bh_size, bh_ofs; | ||
2282 | |||
2283 | BUG_ON(!PageUptodate(page)); | ||
2284 | end = ofs + ni->itype.index.block_size; | ||
2285 | bh_size = 1 << VFS_I(ni)->i_blkbits; | ||
2286 | spin_lock(&mapping->private_lock); | ||
2287 | if (unlikely(!page_has_buffers(page))) { | ||
2288 | spin_unlock(&mapping->private_lock); | ||
2289 | bh = head = alloc_page_buffers(page, bh_size, 1); | ||
2290 | spin_lock(&mapping->private_lock); | ||
2291 | if (likely(!page_has_buffers(page))) { | ||
2292 | struct buffer_head *tail; | ||
2293 | |||
2294 | do { | ||
2295 | set_buffer_uptodate(bh); | ||
2296 | tail = bh; | ||
2297 | bh = bh->b_this_page; | ||
2298 | } while (bh); | ||
2299 | tail->b_this_page = head; | ||
2300 | attach_page_buffers(page, head); | ||
2301 | } else | ||
2302 | buffers_to_free = bh; | ||
2303 | } | ||
2304 | bh = head = page_buffers(page); | ||
2305 | do { | ||
2306 | bh_ofs = bh_offset(bh); | ||
2307 | if (bh_ofs + bh_size <= ofs) | ||
2308 | continue; | ||
2309 | if (unlikely(bh_ofs >= end)) | ||
2310 | break; | ||
2311 | set_buffer_dirty(bh); | ||
2312 | } while ((bh = bh->b_this_page) != head); | ||
2313 | spin_unlock(&mapping->private_lock); | ||
2314 | __set_page_dirty_nobuffers(page); | ||
2315 | if (unlikely(buffers_to_free)) { | ||
2316 | do { | ||
2317 | bh = buffers_to_free->b_this_page; | ||
2318 | free_buffer_head(buffers_to_free); | ||
2319 | buffers_to_free = bh; | ||
2320 | } while (buffers_to_free); | ||
2321 | } | ||
2322 | } | ||
2323 | |||
2324 | #endif /* NTFS_RW */ | ||