diff options
author | Anton Altaparmakov <aia21@cantab.net> | 2005-09-19 04:41:39 -0400 |
---|---|---|
committer | Anton Altaparmakov <aia21@cantab.net> | 2005-09-19 04:41:39 -0400 |
commit | f6098cf449b81c14a51e48dd22ae47d03126a1de (patch) | |
tree | 89532dea05312fc70248842cd416aeb38fefb8b5 /fs/ntfs | |
parent | 4e64c88693fde1b1cbaa4cfecad43a0c3fad354e (diff) |
NTFS: Fix ntfs_{read,write}page() to cope with concurrent truncates better.
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Diffstat (limited to 'fs/ntfs')
-rw-r--r-- | fs/ntfs/aops.c | 110 | ||||
-rw-r--r-- | fs/ntfs/inode.c | 9 | ||||
-rw-r--r-- | fs/ntfs/malloc.h | 2 |
3 files changed, 80 insertions, 41 deletions
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 5fd516f42eec..5e80c07c6a4d 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
59 | unsigned long flags; | 59 | unsigned long flags; |
60 | struct buffer_head *first, *tmp; | 60 | struct buffer_head *first, *tmp; |
61 | struct page *page; | 61 | struct page *page; |
62 | struct inode *vi; | ||
62 | ntfs_inode *ni; | 63 | ntfs_inode *ni; |
63 | int page_uptodate = 1; | 64 | int page_uptodate = 1; |
64 | 65 | ||
65 | page = bh->b_page; | 66 | page = bh->b_page; |
66 | ni = NTFS_I(page->mapping->host); | 67 | vi = page->mapping->host; |
68 | ni = NTFS_I(vi); | ||
67 | 69 | ||
68 | if (likely(uptodate)) { | 70 | if (likely(uptodate)) { |
69 | s64 file_ofs, initialized_size; | 71 | loff_t i_size; |
72 | s64 file_ofs, init_size; | ||
70 | 73 | ||
71 | set_buffer_uptodate(bh); | 74 | set_buffer_uptodate(bh); |
72 | 75 | ||
73 | file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + | 76 | file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + |
74 | bh_offset(bh); | 77 | bh_offset(bh); |
75 | read_lock_irqsave(&ni->size_lock, flags); | 78 | read_lock_irqsave(&ni->size_lock, flags); |
76 | initialized_size = ni->initialized_size; | 79 | init_size = ni->initialized_size; |
80 | i_size = i_size_read(vi); | ||
77 | read_unlock_irqrestore(&ni->size_lock, flags); | 81 | read_unlock_irqrestore(&ni->size_lock, flags); |
82 | if (unlikely(init_size > i_size)) { | ||
83 | /* Race with shrinking truncate. */ | ||
84 | init_size = i_size; | ||
85 | } | ||
78 | /* Check for the current buffer head overflowing. */ | 86 | /* Check for the current buffer head overflowing. */ |
79 | if (file_ofs + bh->b_size > initialized_size) { | 87 | if (unlikely(file_ofs + bh->b_size > init_size)) { |
80 | char *addr; | 88 | u8 *kaddr; |
81 | int ofs = 0; | 89 | int ofs; |
82 | 90 | ||
83 | if (file_ofs < initialized_size) | 91 | ofs = 0; |
84 | ofs = initialized_size - file_ofs; | 92 | if (file_ofs < init_size) |
85 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | 93 | ofs = init_size - file_ofs; |
86 | memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); | 94 | kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); |
95 | memset(kaddr + bh_offset(bh) + ofs, 0, | ||
96 | bh->b_size - ofs); | ||
97 | kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); | ||
87 | flush_dcache_page(page); | 98 | flush_dcache_page(page); |
88 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
89 | } | 99 | } |
90 | } else { | 100 | } else { |
91 | clear_buffer_uptodate(bh); | 101 | clear_buffer_uptodate(bh); |
92 | SetPageError(page); | 102 | SetPageError(page); |
93 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", | 103 | ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " |
94 | (unsigned long long)bh->b_blocknr); | 104 | "0x%llx.", (unsigned long long)bh->b_blocknr); |
95 | } | 105 | } |
96 | first = page_buffers(page); | 106 | first = page_buffers(page); |
97 | local_irq_save(flags); | 107 | local_irq_save(flags); |
@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
124 | if (likely(page_uptodate && !PageError(page))) | 134 | if (likely(page_uptodate && !PageError(page))) |
125 | SetPageUptodate(page); | 135 | SetPageUptodate(page); |
126 | } else { | 136 | } else { |
127 | char *addr; | 137 | u8 *kaddr; |
128 | unsigned int i, recs; | 138 | unsigned int i, recs; |
129 | u32 rec_size; | 139 | u32 rec_size; |
130 | 140 | ||
@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
132 | recs = PAGE_CACHE_SIZE / rec_size; | 142 | recs = PAGE_CACHE_SIZE / rec_size; |
133 | /* Should have been verified before we got here... */ | 143 | /* Should have been verified before we got here... */ |
134 | BUG_ON(!recs); | 144 | BUG_ON(!recs); |
135 | addr = kmap_atomic(page, KM_BIO_SRC_IRQ); | 145 | kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); |
136 | for (i = 0; i < recs; i++) | 146 | for (i = 0; i < recs; i++) |
137 | post_read_mst_fixup((NTFS_RECORD*)(addr + | 147 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + |
138 | i * rec_size), rec_size); | 148 | i * rec_size), rec_size); |
149 | kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); | ||
139 | flush_dcache_page(page); | 150 | flush_dcache_page(page); |
140 | kunmap_atomic(addr, KM_BIO_SRC_IRQ); | ||
141 | if (likely(page_uptodate && !PageError(page))) | 151 | if (likely(page_uptodate && !PageError(page))) |
142 | SetPageUptodate(page); | 152 | SetPageUptodate(page); |
143 | } | 153 | } |
@@ -168,8 +178,11 @@ still_busy: | |||
168 | */ | 178 | */ |
169 | static int ntfs_read_block(struct page *page) | 179 | static int ntfs_read_block(struct page *page) |
170 | { | 180 | { |
181 | loff_t i_size; | ||
171 | VCN vcn; | 182 | VCN vcn; |
172 | LCN lcn; | 183 | LCN lcn; |
184 | s64 init_size; | ||
185 | struct inode *vi; | ||
173 | ntfs_inode *ni; | 186 | ntfs_inode *ni; |
174 | ntfs_volume *vol; | 187 | ntfs_volume *vol; |
175 | runlist_element *rl; | 188 | runlist_element *rl; |
@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page) | |||
180 | int i, nr; | 193 | int i, nr; |
181 | unsigned char blocksize_bits; | 194 | unsigned char blocksize_bits; |
182 | 195 | ||
183 | ni = NTFS_I(page->mapping->host); | 196 | vi = page->mapping->host; |
197 | ni = NTFS_I(vi); | ||
184 | vol = ni->vol; | 198 | vol = ni->vol; |
185 | 199 | ||
186 | /* $MFT/$DATA must have its complete runlist in memory at all times. */ | 200 | /* $MFT/$DATA must have its complete runlist in memory at all times. */ |
@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page) | |||
199 | bh = head = page_buffers(page); | 213 | bh = head = page_buffers(page); |
200 | BUG_ON(!bh); | 214 | BUG_ON(!bh); |
201 | 215 | ||
216 | /* | ||
217 | * We may be racing with truncate. To avoid some of the problems we | ||
218 | * now take a snapshot of the various sizes and use those for the whole | ||
219 | * of the function. In case of an extending truncate it just means we | ||
220 | * may leave some buffers unmapped which are now allocated. This is | ||
221 | * not a problem since these buffers will just get mapped when a write | ||
222 | * occurs. In case of a shrinking truncate, we will detect this later | ||
223 | * on due to the runlist being incomplete and if the page is being | ||
224 | * fully truncated, truncate will throw it away as soon as we unlock | ||
225 | * it so no need to worry what we do with it. | ||
226 | */ | ||
202 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); | 227 | iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); |
203 | read_lock_irqsave(&ni->size_lock, flags); | 228 | read_lock_irqsave(&ni->size_lock, flags); |
204 | lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; | 229 | lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; |
205 | zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; | 230 | init_size = ni->initialized_size; |
231 | i_size = i_size_read(vi); | ||
206 | read_unlock_irqrestore(&ni->size_lock, flags); | 232 | read_unlock_irqrestore(&ni->size_lock, flags); |
233 | if (unlikely(init_size > i_size)) { | ||
234 | /* Race with shrinking truncate. */ | ||
235 | init_size = i_size; | ||
236 | } | ||
237 | zblock = (init_size + blocksize - 1) >> blocksize_bits; | ||
207 | 238 | ||
208 | /* Loop through all the buffers in the page. */ | 239 | /* Loop through all the buffers in the page. */ |
209 | rl = NULL; | 240 | rl = NULL; |
@@ -366,6 +397,8 @@ handle_zblock: | |||
366 | */ | 397 | */ |
367 | static int ntfs_readpage(struct file *file, struct page *page) | 398 | static int ntfs_readpage(struct file *file, struct page *page) |
368 | { | 399 | { |
400 | loff_t i_size; | ||
401 | struct inode *vi; | ||
369 | ntfs_inode *ni, *base_ni; | 402 | ntfs_inode *ni, *base_ni; |
370 | u8 *kaddr; | 403 | u8 *kaddr; |
371 | ntfs_attr_search_ctx *ctx; | 404 | ntfs_attr_search_ctx *ctx; |
@@ -384,7 +417,8 @@ retry_readpage: | |||
384 | unlock_page(page); | 417 | unlock_page(page); |
385 | return 0; | 418 | return 0; |
386 | } | 419 | } |
387 | ni = NTFS_I(page->mapping->host); | 420 | vi = page->mapping->host; |
421 | ni = NTFS_I(vi); | ||
388 | /* | 422 | /* |
389 | * Only $DATA attributes can be encrypted and only unnamed $DATA | 423 | * Only $DATA attributes can be encrypted and only unnamed $DATA |
390 | * attributes can be compressed. Index root can have the flags set but | 424 | * attributes can be compressed. Index root can have the flags set but |
@@ -458,7 +492,12 @@ retry_readpage: | |||
458 | read_lock_irqsave(&ni->size_lock, flags); | 492 | read_lock_irqsave(&ni->size_lock, flags); |
459 | if (unlikely(attr_len > ni->initialized_size)) | 493 | if (unlikely(attr_len > ni->initialized_size)) |
460 | attr_len = ni->initialized_size; | 494 | attr_len = ni->initialized_size; |
495 | i_size = i_size_read(vi); | ||
461 | read_unlock_irqrestore(&ni->size_lock, flags); | 496 | read_unlock_irqrestore(&ni->size_lock, flags); |
497 | if (unlikely(attr_len > i_size)) { | ||
498 | /* Race with shrinking truncate. */ | ||
499 | attr_len = i_size; | ||
500 | } | ||
462 | kaddr = kmap_atomic(page, KM_USER0); | 501 | kaddr = kmap_atomic(page, KM_USER0); |
463 | /* Copy the data to the page. */ | 502 | /* Copy the data to the page. */ |
464 | memcpy(kaddr, (u8*)ctx->attr + | 503 | memcpy(kaddr, (u8*)ctx->attr + |
@@ -1383,8 +1422,8 @@ retry_writepage: | |||
1383 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; | 1422 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; |
1384 | kaddr = kmap_atomic(page, KM_USER0); | 1423 | kaddr = kmap_atomic(page, KM_USER0); |
1385 | memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); | 1424 | memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); |
1386 | flush_dcache_page(page); | ||
1387 | kunmap_atomic(kaddr, KM_USER0); | 1425 | kunmap_atomic(kaddr, KM_USER0); |
1426 | flush_dcache_page(page); | ||
1388 | } | 1427 | } |
1389 | /* Handle mst protected attributes. */ | 1428 | /* Handle mst protected attributes. */ |
1390 | if (NInoMstProtected(ni)) | 1429 | if (NInoMstProtected(ni)) |
@@ -1447,34 +1486,33 @@ retry_writepage: | |||
1447 | BUG_ON(PageWriteback(page)); | 1486 | BUG_ON(PageWriteback(page)); |
1448 | set_page_writeback(page); | 1487 | set_page_writeback(page); |
1449 | unlock_page(page); | 1488 | unlock_page(page); |
1450 | /* | ||
1451 | * Here, we do not need to zero the out of bounds area everytime | ||
1452 | * because the below memcpy() already takes care of the | ||
1453 | * mmap-at-end-of-file requirements. If the file is converted to a | ||
1454 | * non-resident one, then the code path use is switched to the | ||
1455 | * non-resident one where the zeroing happens on each ntfs_writepage() | ||
1456 | * invocation. | ||
1457 | */ | ||
1458 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); | 1489 | attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); |
1459 | i_size = i_size_read(vi); | 1490 | i_size = i_size_read(vi); |
1460 | if (unlikely(attr_len > i_size)) { | 1491 | if (unlikely(attr_len > i_size)) { |
1492 | /* Race with shrinking truncate or a failed truncate. */ | ||
1461 | attr_len = i_size; | 1493 | attr_len = i_size; |
1462 | ctx->attr->data.resident.value_length = cpu_to_le32(attr_len); | 1494 | /* |
1495 | * If the truncate failed, fix it up now. If a concurrent | ||
1496 | * truncate, we do its job, so it does not have to do anything. | ||
1497 | */ | ||
1498 | err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, | ||
1499 | attr_len); | ||
1500 | /* Shrinking cannot fail. */ | ||
1501 | BUG_ON(err); | ||
1463 | } | 1502 | } |
1464 | kaddr = kmap_atomic(page, KM_USER0); | 1503 | kaddr = kmap_atomic(page, KM_USER0); |
1465 | /* Copy the data from the page to the mft record. */ | 1504 | /* Copy the data from the page to the mft record. */ |
1466 | memcpy((u8*)ctx->attr + | 1505 | memcpy((u8*)ctx->attr + |
1467 | le16_to_cpu(ctx->attr->data.resident.value_offset), | 1506 | le16_to_cpu(ctx->attr->data.resident.value_offset), |
1468 | kaddr, attr_len); | 1507 | kaddr, attr_len); |
1469 | flush_dcache_mft_record_page(ctx->ntfs_ino); | ||
1470 | /* Zero out of bounds area in the page cache page. */ | 1508 | /* Zero out of bounds area in the page cache page. */ |
1471 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); | 1509 | memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); |
1472 | flush_dcache_page(page); | ||
1473 | kunmap_atomic(kaddr, KM_USER0); | 1510 | kunmap_atomic(kaddr, KM_USER0); |
1474 | 1511 | flush_dcache_mft_record_page(ctx->ntfs_ino); | |
1512 | flush_dcache_page(page); | ||
1513 | /* We are done with the page. */ | ||
1475 | end_page_writeback(page); | 1514 | end_page_writeback(page); |
1476 | 1515 | /* Finally, mark the mft record dirty, so it gets written back. */ | |
1477 | /* Mark the mft record dirty, so it gets written back. */ | ||
1478 | mark_mft_record_dirty(ctx->ntfs_ino); | 1516 | mark_mft_record_dirty(ctx->ntfs_ino); |
1479 | ntfs_attr_put_search_ctx(ctx); | 1517 | ntfs_attr_put_search_ctx(ctx); |
1480 | unmap_mft_record(base_ni); | 1518 | unmap_mft_record(base_ni); |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc4bbe3acf5c..7ec045131808 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -1166,6 +1166,8 @@ err_out: | |||
1166 | * | 1166 | * |
1167 | * Return 0 on success and -errno on error. In the error case, the inode will | 1167 | * Return 0 on success and -errno on error. In the error case, the inode will |
1168 | * have had make_bad_inode() executed on it. | 1168 | * have had make_bad_inode() executed on it. |
1169 | * | ||
1170 | * Note this cannot be called for AT_INDEX_ALLOCATION. | ||
1169 | */ | 1171 | */ |
1170 | static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | 1172 | static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) |
1171 | { | 1173 | { |
@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1242 | } | 1244 | } |
1243 | } | 1245 | } |
1244 | /* | 1246 | /* |
1245 | * The encryption flag set in an index root just means to | 1247 | * The compressed/sparse flag set in an index root just means |
1246 | * compress all files. | 1248 | * to compress all files. |
1247 | */ | 1249 | */ |
1248 | if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { | 1250 | if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { |
1249 | ntfs_error(vi->i_sb, "Found mst protected attribute " | 1251 | ntfs_error(vi->i_sb, "Found mst protected attribute " |
@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) | |||
1319 | "the mapping pairs array."); | 1321 | "the mapping pairs array."); |
1320 | goto unm_err_out; | 1322 | goto unm_err_out; |
1321 | } | 1323 | } |
1322 | if ((NInoCompressed(ni) || NInoSparse(ni)) && | 1324 | if (NInoCompressed(ni) || NInoSparse(ni)) { |
1323 | ni->type != AT_INDEX_ROOT) { | ||
1324 | if (a->data.non_resident.compression_unit != 4) { | 1325 | if (a->data.non_resident.compression_unit != 4) { |
1325 | ntfs_error(vi->i_sb, "Found nonstandard " | 1326 | ntfs_error(vi->i_sb, "Found nonstandard " |
1326 | "compression unit (%u instead " | 1327 | "compression unit (%u instead " |
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 3288bcc2c4aa..006946efca8c 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. | 2 | * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. |
3 | * | 3 | * |
4 | * Copyright (c) 2001-2004 Anton Altaparmakov | 4 | * Copyright (c) 2001-2005 Anton Altaparmakov |
5 | * | 5 | * |
6 | * This program/include file is free software; you can redistribute it and/or | 6 | * This program/include file is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License as published | 7 | * modify it under the terms of the GNU General Public License as published |