diff options
Diffstat (limited to 'fs/ocfs2/aops.c')
| -rw-r--r-- | fs/ocfs2/aops.c | 137 |
1 files changed, 81 insertions, 56 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 56f7790cad46..bc7b4cbbe8ec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
| 27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
| 28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
| 29 | #include <linux/mpage.h> | ||
| 29 | 30 | ||
| 30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 31 | #define MLOG_MASK_PREFIX ML_FILE_IO |
| 31 | #include <cluster/masklog.h> | 32 | #include <cluster/masklog.h> |
| @@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 139 | { | 140 | { |
| 140 | int err = 0; | 141 | int err = 0; |
| 141 | unsigned int ext_flags; | 142 | unsigned int ext_flags; |
| 142 | u64 p_blkno, past_eof; | 143 | u64 max_blocks = bh_result->b_size >> inode->i_blkbits; |
| 144 | u64 p_blkno, count, past_eof; | ||
| 143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 144 | 146 | ||
| 145 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 147 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
| @@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 155 | goto bail; | 157 | goto bail; |
| 156 | } | 158 | } |
| 157 | 159 | ||
| 158 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 160 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, |
| 159 | &ext_flags); | 161 | &ext_flags); |
| 160 | if (err) { | 162 | if (err) { |
| 161 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 163 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
| @@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 164 | goto bail; | 166 | goto bail; |
| 165 | } | 167 | } |
| 166 | 168 | ||
| 169 | if (max_blocks < count) | ||
| 170 | count = max_blocks; | ||
| 171 | |||
| 167 | /* | 172 | /* |
| 168 | * ocfs2 never allocates in this function - the only time we | 173 | * ocfs2 never allocates in this function - the only time we |
| 169 | * need to use BH_New is when we're extending i_size on a file | 174 | * need to use BH_New is when we're extending i_size on a file |
| @@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
| 178 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 183 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
| 179 | map_bh(bh_result, inode->i_sb, p_blkno); | 184 | map_bh(bh_result, inode->i_sb, p_blkno); |
| 180 | 185 | ||
| 186 | bh_result->b_size = count << inode->i_blkbits; | ||
| 187 | |||
| 181 | if (!ocfs2_sparse_alloc(osb)) { | 188 | if (!ocfs2_sparse_alloc(osb)) { |
| 182 | if (p_blkno == 0) { | 189 | if (p_blkno == 0) { |
| 183 | err = -EIO; | 190 | err = -EIO; |
| @@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
| 210 | struct buffer_head *di_bh) | 217 | struct buffer_head *di_bh) |
| 211 | { | 218 | { |
| 212 | void *kaddr; | 219 | void *kaddr; |
| 213 | unsigned int size; | 220 | loff_t size; |
| 214 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 221 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
| 215 | 222 | ||
| 216 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { | 223 | if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { |
| @@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
| 224 | if (size > PAGE_CACHE_SIZE || | 231 | if (size > PAGE_CACHE_SIZE || |
| 225 | size > ocfs2_max_inline_data(inode->i_sb)) { | 232 | size > ocfs2_max_inline_data(inode->i_sb)) { |
| 226 | ocfs2_error(inode->i_sb, | 233 | ocfs2_error(inode->i_sb, |
| 227 | "Inode %llu has with inline data has bad size: %u", | 234 | "Inode %llu has with inline data has bad size: %Lu", |
| 228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, size); | 235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 236 | (unsigned long long)size); | ||
| 229 | return -EROFS; | 237 | return -EROFS; |
| 230 | } | 238 | } |
| 231 | 239 | ||
| @@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 275 | 283 | ||
| 276 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 284 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
| 277 | 285 | ||
| 278 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 286 | ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); |
| 279 | if (ret != 0) { | 287 | if (ret != 0) { |
| 280 | if (ret == AOP_TRUNCATED_PAGE) | 288 | if (ret == AOP_TRUNCATED_PAGE) |
| 281 | unlock = 0; | 289 | unlock = 0; |
| @@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 285 | 293 | ||
| 286 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 294 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { |
| 287 | ret = AOP_TRUNCATED_PAGE; | 295 | ret = AOP_TRUNCATED_PAGE; |
| 288 | goto out_meta_unlock; | 296 | goto out_inode_unlock; |
| 289 | } | 297 | } |
| 290 | 298 | ||
| 291 | /* | 299 | /* |
| @@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 305 | goto out_alloc; | 313 | goto out_alloc; |
| 306 | } | 314 | } |
| 307 | 315 | ||
| 308 | ret = ocfs2_data_lock_with_page(inode, 0, page); | ||
| 309 | if (ret != 0) { | ||
| 310 | if (ret == AOP_TRUNCATED_PAGE) | ||
| 311 | unlock = 0; | ||
| 312 | mlog_errno(ret); | ||
| 313 | goto out_alloc; | ||
| 314 | } | ||
| 315 | |||
| 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 316 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
| 317 | ret = ocfs2_readpage_inline(inode, page); | 317 | ret = ocfs2_readpage_inline(inode, page); |
| 318 | else | 318 | else |
| 319 | ret = block_read_full_page(page, ocfs2_get_block); | 319 | ret = block_read_full_page(page, ocfs2_get_block); |
| 320 | unlock = 0; | 320 | unlock = 0; |
| 321 | 321 | ||
| 322 | ocfs2_data_unlock(inode, 0); | ||
| 323 | out_alloc: | 322 | out_alloc: |
| 324 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 323 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
| 325 | out_meta_unlock: | 324 | out_inode_unlock: |
| 326 | ocfs2_meta_unlock(inode, 0); | 325 | ocfs2_inode_unlock(inode, 0); |
| 327 | out: | 326 | out: |
| 328 | if (unlock) | 327 | if (unlock) |
| 329 | unlock_page(page); | 328 | unlock_page(page); |
| @@ -331,6 +330,62 @@ out: | |||
| 331 | return ret; | 330 | return ret; |
| 332 | } | 331 | } |
| 333 | 332 | ||
| 333 | /* | ||
| 334 | * This is used only for read-ahead. Failures or difficult to handle | ||
| 335 | * situations are safe to ignore. | ||
| 336 | * | ||
| 337 | * Right now, we don't bother with BH_Boundary - in-inode extent lists | ||
| 338 | * are quite large (243 extents on 4k blocks), so most inodes don't | ||
| 339 | * grow out to a tree. If need be, detecting boundary extents could | ||
| 340 | * trivially be added in a future version of ocfs2_get_block(). | ||
| 341 | */ | ||
| 342 | static int ocfs2_readpages(struct file *filp, struct address_space *mapping, | ||
| 343 | struct list_head *pages, unsigned nr_pages) | ||
| 344 | { | ||
| 345 | int ret, err = -EIO; | ||
| 346 | struct inode *inode = mapping->host; | ||
| 347 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 348 | loff_t start; | ||
| 349 | struct page *last; | ||
| 350 | |||
| 351 | /* | ||
| 352 | * Use the nonblocking flag for the dlm code to avoid page | ||
| 353 | * lock inversion, but don't bother with retrying. | ||
| 354 | */ | ||
| 355 | ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); | ||
| 356 | if (ret) | ||
| 357 | return err; | ||
| 358 | |||
| 359 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | ||
| 360 | ocfs2_inode_unlock(inode, 0); | ||
| 361 | return err; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * Don't bother with inline-data. There isn't anything | ||
| 366 | * to read-ahead in that case anyway... | ||
| 367 | */ | ||
| 368 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
| 369 | goto out_unlock; | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Check whether a remote node truncated this file - we just | ||
| 373 | * drop out in that case as it's not worth handling here. | ||
| 374 | */ | ||
| 375 | last = list_entry(pages->prev, struct page, lru); | ||
| 376 | start = (loff_t)last->index << PAGE_CACHE_SHIFT; | ||
| 377 | if (start >= i_size_read(inode)) | ||
| 378 | goto out_unlock; | ||
| 379 | |||
| 380 | err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); | ||
| 381 | |||
| 382 | out_unlock: | ||
| 383 | up_read(&oi->ip_alloc_sem); | ||
| 384 | ocfs2_inode_unlock(inode, 0); | ||
| 385 | |||
| 386 | return err; | ||
| 387 | } | ||
| 388 | |||
| 334 | /* Note: Because we don't support holes, our allocation has | 389 | /* Note: Because we don't support holes, our allocation has |
| 335 | * already happened (allocation writes zeros to the file data) | 390 | * already happened (allocation writes zeros to the file data) |
| 336 | * so we don't have to worry about ordered writes in | 391 | * so we don't have to worry about ordered writes in |
| @@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
| 452 | * accessed concurrently from multiple nodes. | 507 | * accessed concurrently from multiple nodes. |
| 453 | */ | 508 | */ |
| 454 | if (!INODE_JOURNAL(inode)) { | 509 | if (!INODE_JOURNAL(inode)) { |
| 455 | err = ocfs2_meta_lock(inode, NULL, 0); | 510 | err = ocfs2_inode_lock(inode, NULL, 0); |
| 456 | if (err) { | 511 | if (err) { |
| 457 | if (err != -ENOENT) | 512 | if (err != -ENOENT) |
| 458 | mlog_errno(err); | 513 | mlog_errno(err); |
| @@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | |||
| 467 | 522 | ||
| 468 | if (!INODE_JOURNAL(inode)) { | 523 | if (!INODE_JOURNAL(inode)) { |
| 469 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 524 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
| 470 | ocfs2_meta_unlock(inode, 0); | 525 | ocfs2_inode_unlock(inode, 0); |
| 471 | } | 526 | } |
| 472 | 527 | ||
| 473 | if (err) { | 528 | if (err) { |
| @@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 638 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 693 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
| 639 | return 0; | 694 | return 0; |
| 640 | 695 | ||
| 641 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
| 642 | /* | ||
| 643 | * We get PR data locks even for O_DIRECT. This | ||
| 644 | * allows concurrent O_DIRECT I/O but doesn't let | ||
| 645 | * O_DIRECT with extending and buffered zeroing writes | ||
| 646 | * race. If they did race then the buffered zeroing | ||
| 647 | * could be written back after the O_DIRECT I/O. It's | ||
| 648 | * one thing to tell people not to mix buffered and | ||
| 649 | * O_DIRECT writes, but expecting them to understand | ||
| 650 | * that file extension is also an implicit buffered | ||
| 651 | * write is too much. By getting the PR we force | ||
| 652 | * writeback of the buffered zeroing before | ||
| 653 | * proceeding. | ||
| 654 | */ | ||
| 655 | ret = ocfs2_data_lock(inode, 0); | ||
| 656 | if (ret < 0) { | ||
| 657 | mlog_errno(ret); | ||
| 658 | goto out; | ||
| 659 | } | ||
| 660 | ocfs2_data_unlock(inode, 0); | ||
| 661 | } | ||
| 662 | |||
| 663 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 696 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
| 664 | inode->i_sb->s_bdev, iov, offset, | 697 | inode->i_sb->s_bdev, iov, offset, |
| 665 | nr_segs, | 698 | nr_segs, |
| 666 | ocfs2_direct_IO_get_blocks, | 699 | ocfs2_direct_IO_get_blocks, |
| 667 | ocfs2_dio_end_io); | 700 | ocfs2_dio_end_io); |
| 668 | out: | 701 | |
| 669 | mlog_exit(ret); | 702 | mlog_exit(ret); |
| 670 | return ret; | 703 | return ret; |
| 671 | } | 704 | } |
| @@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 1754 | struct buffer_head *di_bh = NULL; | 1787 | struct buffer_head *di_bh = NULL; |
| 1755 | struct inode *inode = mapping->host; | 1788 | struct inode *inode = mapping->host; |
| 1756 | 1789 | ||
| 1757 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1790 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
| 1758 | if (ret) { | 1791 | if (ret) { |
| 1759 | mlog_errno(ret); | 1792 | mlog_errno(ret); |
| 1760 | return ret; | 1793 | return ret; |
| @@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 1769 | */ | 1802 | */ |
| 1770 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1803 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 1771 | 1804 | ||
| 1772 | ret = ocfs2_data_lock(inode, 1); | ||
| 1773 | if (ret) { | ||
| 1774 | mlog_errno(ret); | ||
| 1775 | goto out_fail; | ||
| 1776 | } | ||
| 1777 | |||
| 1778 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1805 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, |
| 1779 | fsdata, di_bh, NULL); | 1806 | fsdata, di_bh, NULL); |
| 1780 | if (ret) { | 1807 | if (ret) { |
| 1781 | mlog_errno(ret); | 1808 | mlog_errno(ret); |
| 1782 | goto out_fail_data; | 1809 | goto out_fail; |
| 1783 | } | 1810 | } |
| 1784 | 1811 | ||
| 1785 | brelse(di_bh); | 1812 | brelse(di_bh); |
| 1786 | 1813 | ||
| 1787 | return 0; | 1814 | return 0; |
| 1788 | 1815 | ||
| 1789 | out_fail_data: | ||
| 1790 | ocfs2_data_unlock(inode, 1); | ||
| 1791 | out_fail: | 1816 | out_fail: |
| 1792 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1817 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 1793 | 1818 | ||
| 1794 | brelse(di_bh); | 1819 | brelse(di_bh); |
| 1795 | ocfs2_meta_unlock(inode, 1); | 1820 | ocfs2_inode_unlock(inode, 1); |
| 1796 | 1821 | ||
| 1797 | return ret; | 1822 | return ret; |
| 1798 | } | 1823 | } |
| @@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
| 1908 | 1933 | ||
| 1909 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); | 1934 | ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); |
| 1910 | 1935 | ||
| 1911 | ocfs2_data_unlock(inode, 1); | ||
| 1912 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1936 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 1913 | ocfs2_meta_unlock(inode, 1); | 1937 | ocfs2_inode_unlock(inode, 1); |
| 1914 | 1938 | ||
| 1915 | return ret; | 1939 | return ret; |
| 1916 | } | 1940 | } |
| 1917 | 1941 | ||
| 1918 | const struct address_space_operations ocfs2_aops = { | 1942 | const struct address_space_operations ocfs2_aops = { |
| 1919 | .readpage = ocfs2_readpage, | 1943 | .readpage = ocfs2_readpage, |
| 1944 | .readpages = ocfs2_readpages, | ||
| 1920 | .writepage = ocfs2_writepage, | 1945 | .writepage = ocfs2_writepage, |
| 1921 | .write_begin = ocfs2_write_begin, | 1946 | .write_begin = ocfs2_write_begin, |
| 1922 | .write_end = ocfs2_write_end, | 1947 | .write_end = ocfs2_write_end, |
