diff options
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r-- | fs/ocfs2/aops.c | 137 |
1 files changed, 81 insertions, 56 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 56f7790cad46..bc7b4cbbe8ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
29 | #include <linux/mpage.h> | ||
29 | 30 | ||
30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 31 | #define MLOG_MASK_PREFIX ML_FILE_IO |
31 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
139 | { | 140 | { |
140 | int err = 0; | 141 | int err = 0; |
141 | unsigned int ext_flags; | 142 | unsigned int ext_flags; |
142 | u64 p_blkno, past_eof; | 143 | u64 max_blocks = bh_result->b_size >> inode->i_blkbits; |
144 | u64 p_blkno, count, past_eof; | ||
143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
144 | 146 | ||
145 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 147 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
155 | goto bail; | 157 | goto bail; |
156 | } | 158 | } |
157 | 159 | ||
158 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 160 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, |
159 | &ext_flags); | 161 | &ext_flags); |
160 | if (err) { | 162 | if (err) { |
161 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 163 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
164 | goto bail; | 166 | goto bail; |
165 | } | 167 | } |
166 | 168 | ||
169 | if (max_blocks < count) | ||
170 | count = max_blocks; | ||
171 | |||
167 | /* | 172 | /* |
168 | * ocfs2 never allocates in this function - the only time we | 173 | * ocfs2 never allocates in this function - the only time we |
169 | * need to use BH_New is when we're extending i_size on a file | 174 | * need to use BH_New is when we're extending i_size on a file |
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
178 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 183 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
179 | map_bh(bh_result, inode->i_sb, p_blkno); | 184 | map_bh(bh_result, inode->i_sb, p_blkno); |
180 | 185 | ||
186 | bh_result->b_size = count << inode->i_blkbits; | ||
187 | |||
181 | if (!ocfs2_sparse_alloc(osb)) { | 188 | if (!ocfs2_sparse_alloc(osb)) { |
182 | if (p_blkno == 0) { | 189 | if (p_blkno == 0) { |
183 | err = -EIO; | 190 | err = -EIO; |
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
210 | struct buffer_head *di_bh) | 217 | struct buffer_head *di_bh) |
211 | { | 218 | { |
212 | void *kaddr; | 219 | void *kaddr; |
213 | unsigned int size; | 220 | loff_t size; |
214 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 221 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
215 | 222 | ||
216 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 223 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { |
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
224 | if (size > PAGE_CACHE_SIZE || | 231 | if (size > PAGE_CACHE_SIZE || |
225 | size > ocfs2_max_inline_data(inode->i_sb)) { | 232 | size > ocfs2_max_inline_data(inode->i_sb)) { |
226 | ocfs2_error(inode->i_sb, | 233 | ocfs2_error(inode->i_sb, |
227 | "Inode %llu has with inline data has bad size: %u", | 234 | "Inode %llu has with inline data has bad size: %Lu", |
228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, size); | 235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
236 | (unsigned long long)size); | ||
229 | return -EROFS; | 237 | return -EROFS; |
230 | } | 238 | } |
231 | 239 | ||
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
275 | 283 | ||
276 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 284 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
277 | 285 | ||
278 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 286 | ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); |
279 | if (ret != 0) { | 287 | if (ret != 0) { |
280 | if (ret == AOP_TRUNCATED_PAGE) | 288 | if (ret == AOP_TRUNCATED_PAGE) |
281 | unlock = 0; | 289 | unlock = 0; |
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
285 | 293 | ||
286 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 294 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { |
287 | ret = AOP_TRUNCATED_PAGE; | 295 | ret = AOP_TRUNCATED_PAGE; |
288 | goto out_meta_unlock; | 296 | goto out_inode_unlock; |
289 | } | 297 | } |
290 | 298 | ||
291 | /* | 299 | /* |
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
305 | goto out_alloc; | 313 | goto out_alloc; |
306 | } | 314 | } |
307 | 315 | ||
308 | ret = ocfs2_data_lock_with_page(inode, 0, page); | ||
309 | if (ret != 0) { | ||
310 | if (ret == AOP_TRUNCATED_PAGE) | ||
311 | unlock = 0; | ||
312 | mlog_errno(ret); | ||
313 | goto out_alloc; | ||
314 | } | ||
315 | |||
316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
317 | ret = ocfs2_readpage_inline(inode, page); | 317 | ret = ocfs2_readpage_inline(inode, page); |
318 | else | 318 | else |
319 | ret = block_read_full_page(page, ocfs2_get_block); | 319 | ret = block_read_full_page(page, ocfs2_get_block); |
320 | unlock = 0; | 320 | unlock = 0; |
321 | 321 | ||
322 | ocfs2_data_unlock(inode, 0); | ||
323 | out_alloc: | 322 | out_alloc: |
324 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 323 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
325 | out_meta_unlock: | 324 | out_inode_unlock: |
326 | ocfs2_meta_unlock(inode, 0); | 325 | ocfs2_inode_unlock(inode, 0); |
327 | out: | 326 | out: |
328 | if (unlock) | 327 | if (unlock) |
329 | unlock_page(page); | 328 | unlock_page(page); |
@@ -331,6 +330,62 @@ out: | |||
331 | return ret; | 330 | return ret; |
332 | } | 331 | } |
333 | 332 | ||
333 | /* | ||
334 | * This is used only for read-ahead. Failures or difficult to handle | ||
335 | * situations are safe to ignore. | ||
336 | * | ||
337 | * Right now, we don't bother with BH_Boundary - in-inode extent lists | ||
338 | * are quite large (243 extents on 4k blocks), so most inodes don't | ||
339 | * grow out to a tree. If need be, detecting boundary extents could | ||
340 | * trivially be added in a future version of ocfs2_get_block(). | ||
341 | */ | ||
342 | static int ocfs2_readpages(struct file *filp, struct address_space *mapping, | ||
343 | struct list_head *pages, unsigned nr_pages) | ||
344 | { | ||
345 | int ret, err = -EIO; | ||
346 | struct inode *inode = mapping->host; | ||
347 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
348 | loff_t start; | ||
349 | struct page *last; | ||
350 | |||
351 | /* | ||
352 | * Use the nonblocking flag for the dlm code to avoid page | ||
353 | * lock inversion, but don't bother with retrying. | ||
354 | */ | ||
355 | ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); | ||
356 | if (ret) | ||
357 | return err; | ||
358 | |||
359 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | ||
360 | ocfs2_inode_unlock(inode, 0); | ||
361 | return err; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Don't bother with inline-data. There isn't anything | ||
366 | * to read-ahead in that case anyway... | ||
367 | */ | ||
368 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
369 | goto out_unlock; | ||
370 | |||
371 | /* | ||
372 | * Check whether a remote node truncated this file - we just | ||
373 | * drop out in that case as it's not worth handling here. | ||
374 | */ | ||
375 | last = list_entry(pages->prev, struct page, lru); | ||
376 | start = (loff_t)last->index << PAGE_CACHE_SHIFT; | ||
377 | if (start >= i_size_read(inode)) | ||
378 | goto out_unlock; | ||
379 | |||
380 | err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); | ||
381 | |||
382 | out_unlock: | ||
383 | up_read(&oi->ip_alloc_sem); | ||
384 | ocfs2_inode_unlock(inode, 0); | ||
385 | |||
386 | return err; | ||
387 | } | ||
388 | |||
334 | /* Note: Because we don't support holes, our allocation has | 389 | /* Note: Because we don't support holes, our allocation has |
335 | * already happened (allocation writes zeros to the file data) | 390 | * already happened (allocation writes zeros to the file data) |
336 | * so we don't have to worry about ordered writes in | 391 | * so we don't have to worry about ordered writes in |
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
452 | * accessed concurrently from multiple nodes. | 507 | * accessed concurrently from multiple nodes. |
453 | */ | 508 | */ |
454 | if (!INODE_JOURNAL(inode)) { | 509 | if (!INODE_JOURNAL(inode)) { |
455 | err = ocfs2_meta_lock(inode, NULL, 0); | 510 | err = ocfs2_inode_lock(inode, NULL, 0); |
456 | if (err) { | 511 | if (err) { |
457 | if (err != -ENOENT) | 512 | if (err != -ENOENT) |
458 | mlog_errno(err); | 513 | mlog_errno(err); |
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
467 | 522 | ||
468 | if (!INODE_JOURNAL(inode)) { | 523 | if (!INODE_JOURNAL(inode)) { |
469 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 524 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
470 | ocfs2_meta_unlock(inode, 0); | 525 | ocfs2_inode_unlock(inode, 0); |
471 | } | 526 | } |
472 | 527 | ||
473 | if (err) { | 528 | if (err) { |
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
638 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 693 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
639 | return 0; | 694 | return 0; |
640 | 695 | ||
641 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
642 | /* | ||
643 | * We get PR data locks even for O_DIRECT. This | ||
644 | * allows concurrent O_DIRECT I/O but doesn't let | ||
645 | * O_DIRECT with extending and buffered zeroing writes | ||
646 | * race. If they did race then the buffered zeroing | ||
647 | * could be written back after the O_DIRECT I/O. It's | ||
648 | * one thing to tell people not to mix buffered and | ||
649 | * O_DIRECT writes, but expecting them to understand | ||
650 | * that file extension is also an implicit buffered | ||
651 | * write is too much. By getting the PR we force | ||
652 | * writeback of the buffered zeroing before | ||
653 | * proceeding. | ||
654 | */ | ||
655 | ret = ocfs2_data_lock(inode, 0); | ||
656 | if (ret < 0) { | ||
657 | mlog_errno(ret); | ||
658 | goto out; | ||
659 | } | ||
660 | ocfs2_data_unlock(inode, 0); | ||
661 | } | ||
662 | |||
663 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 696 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
664 | inode->i_sb->s_bdev, iov, offset, | 697 | inode->i_sb->s_bdev, iov, offset, |
665 | nr_segs, | 698 | nr_segs, |
666 | ocfs2_direct_IO_get_blocks, | 699 | ocfs2_direct_IO_get_blocks, |
667 | ocfs2_dio_end_io); | 700 | ocfs2_dio_end_io); |
668 | out: | 701 | |
669 | mlog_exit(ret); | 702 | mlog_exit(ret); |
670 | return ret; | 703 | return ret; |
671 | } | 704 | } |
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1754 | struct buffer_head *di_bh = NULL; | 1787 | struct buffer_head *di_bh = NULL; |
1755 | struct inode *inode = mapping->host; | 1788 | struct inode *inode = mapping->host; |
1756 | 1789 | ||
1757 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1790 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1758 | if (ret) { | 1791 | if (ret) { |
1759 | mlog_errno(ret); | 1792 | mlog_errno(ret); |
1760 | return ret; | 1793 | return ret; |
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1769 | */ | 1802 | */ |
1770 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1803 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1771 | 1804 | ||
1772 | ret = ocfs2_data_lock(inode, 1); | ||
1773 | if (ret) { | ||
1774 | mlog_errno(ret); | ||
1775 | goto out_fail; | ||
1776 | } | ||
1777 | |||
1778 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1805 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, |
1779 | fsdata, di_bh, NULL); | 1806 | fsdata, di_bh, NULL); |
1780 | if (ret) { | 1807 | if (ret) { |
1781 | mlog_errno(ret); | 1808 | mlog_errno(ret); |
1782 | goto out_fail_data; | 1809 | goto out_fail; |
1783 | } | 1810 | } |
1784 | 1811 | ||
1785 | brelse(di_bh); | 1812 | brelse(di_bh); |
1786 | 1813 | ||
1787 | return 0; | 1814 | return 0; |
1788 | 1815 | ||
1789 | out_fail_data: | ||
1790 | ocfs2_data_unlock(inode, 1); | ||
1791 | out_fail: | 1816 | out_fail: |
1792 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1817 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1793 | 1818 | ||
1794 | brelse(di_bh); | 1819 | brelse(di_bh); |
1795 | ocfs2_meta_unlock(inode, 1); | 1820 | ocfs2_inode_unlock(inode, 1); |
1796 | 1821 | ||
1797 | return ret; | 1822 | return ret; |
1798 | } | 1823 | } |
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
1908 | 1933 | ||
1909 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 1934 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
1910 | 1935 | ||
1911 | ocfs2_data_unlock(inode, 1); | ||
1912 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1936 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1913 | ocfs2_meta_unlock(inode, 1); | 1937 | ocfs2_inode_unlock(inode, 1); |
1914 | 1938 | ||
1915 | return ret; | 1939 | return ret; |
1916 | } | 1940 | } |
1917 | 1941 | ||
1918 | const struct address_space_operations ocfs2_aops = { | 1942 | const struct address_space_operations ocfs2_aops = { |
1919 | .readpage = ocfs2_readpage, | 1943 | .readpage = ocfs2_readpage, |
1944 | .readpages = ocfs2_readpages, | ||
1920 | .writepage = ocfs2_writepage, | 1945 | .writepage = ocfs2_writepage, |
1921 | .write_begin = ocfs2_write_begin, | 1946 | .write_begin = ocfs2_write_begin, |
1922 | .write_end = ocfs2_write_end, | 1947 | .write_end = ocfs2_write_end, |