aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r--fs/ocfs2/aops.c137
1 files changed, 81 insertions, 56 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 56f7790cad46..bc7b4cbbe8ec 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -26,6 +26,7 @@
26#include <asm/byteorder.h> 26#include <asm/byteorder.h>
27#include <linux/swap.h> 27#include <linux/swap.h>
28#include <linux/pipe_fs_i.h> 28#include <linux/pipe_fs_i.h>
29#include <linux/mpage.h>
29 30
30#define MLOG_MASK_PREFIX ML_FILE_IO 31#define MLOG_MASK_PREFIX ML_FILE_IO
31#include <cluster/masklog.h> 32#include <cluster/masklog.h>
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
139{ 140{
140 int err = 0; 141 int err = 0;
141 unsigned int ext_flags; 142 unsigned int ext_flags;
142 u64 p_blkno, past_eof; 143 u64 max_blocks = bh_result->b_size >> inode->i_blkbits;
144 u64 p_blkno, count, past_eof;
143 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
144 146
145 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 147 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
155 goto bail; 157 goto bail;
156 } 158 }
157 159
158 err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, 160 err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
159 &ext_flags); 161 &ext_flags);
160 if (err) { 162 if (err) {
161 mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " 163 mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
164 goto bail; 166 goto bail;
165 } 167 }
166 168
169 if (max_blocks < count)
170 count = max_blocks;
171
167 /* 172 /*
168 * ocfs2 never allocates in this function - the only time we 173 * ocfs2 never allocates in this function - the only time we
169 * need to use BH_New is when we're extending i_size on a file 174 * need to use BH_New is when we're extending i_size on a file
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
178 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) 183 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
179 map_bh(bh_result, inode->i_sb, p_blkno); 184 map_bh(bh_result, inode->i_sb, p_blkno);
180 185
186 bh_result->b_size = count << inode->i_blkbits;
187
181 if (!ocfs2_sparse_alloc(osb)) { 188 if (!ocfs2_sparse_alloc(osb)) {
182 if (p_blkno == 0) { 189 if (p_blkno == 0) {
183 err = -EIO; 190 err = -EIO;
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
210 struct buffer_head *di_bh) 217 struct buffer_head *di_bh)
211{ 218{
212 void *kaddr; 219 void *kaddr;
213 unsigned int size; 220 loff_t size;
214 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 221 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
215 222
216 if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) { 223 if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
224 if (size > PAGE_CACHE_SIZE || 231 if (size > PAGE_CACHE_SIZE ||
225 size > ocfs2_max_inline_data(inode->i_sb)) { 232 size > ocfs2_max_inline_data(inode->i_sb)) {
226 ocfs2_error(inode->i_sb, 233 ocfs2_error(inode->i_sb,
227 "Inode %llu has with inline data has bad size: %u", 234 "Inode %llu has with inline data has bad size: %Lu",
228 (unsigned long long)OCFS2_I(inode)->ip_blkno, size); 235 (unsigned long long)OCFS2_I(inode)->ip_blkno,
236 (unsigned long long)size);
229 return -EROFS; 237 return -EROFS;
230 } 238 }
231 239
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
275 283
276 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); 284 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
277 285
278 ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); 286 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
279 if (ret != 0) { 287 if (ret != 0) {
280 if (ret == AOP_TRUNCATED_PAGE) 288 if (ret == AOP_TRUNCATED_PAGE)
281 unlock = 0; 289 unlock = 0;
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
285 293
286 if (down_read_trylock(&oi->ip_alloc_sem) == 0) { 294 if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
287 ret = AOP_TRUNCATED_PAGE; 295 ret = AOP_TRUNCATED_PAGE;
288 goto out_meta_unlock; 296 goto out_inode_unlock;
289 } 297 }
290 298
291 /* 299 /*
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page)
305 goto out_alloc; 313 goto out_alloc;
306 } 314 }
307 315
308 ret = ocfs2_data_lock_with_page(inode, 0, page);
309 if (ret != 0) {
310 if (ret == AOP_TRUNCATED_PAGE)
311 unlock = 0;
312 mlog_errno(ret);
313 goto out_alloc;
314 }
315
316 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 316 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
317 ret = ocfs2_readpage_inline(inode, page); 317 ret = ocfs2_readpage_inline(inode, page);
318 else 318 else
319 ret = block_read_full_page(page, ocfs2_get_block); 319 ret = block_read_full_page(page, ocfs2_get_block);
320 unlock = 0; 320 unlock = 0;
321 321
322 ocfs2_data_unlock(inode, 0);
323out_alloc: 322out_alloc:
324 up_read(&OCFS2_I(inode)->ip_alloc_sem); 323 up_read(&OCFS2_I(inode)->ip_alloc_sem);
325out_meta_unlock: 324out_inode_unlock:
326 ocfs2_meta_unlock(inode, 0); 325 ocfs2_inode_unlock(inode, 0);
327out: 326out:
328 if (unlock) 327 if (unlock)
329 unlock_page(page); 328 unlock_page(page);
@@ -331,6 +330,62 @@ out:
331 return ret; 330 return ret;
332} 331}
333 332
333/*
334 * This is used only for read-ahead. Failures or difficult to handle
335 * situations are safe to ignore.
336 *
337 * Right now, we don't bother with BH_Boundary - in-inode extent lists
338 * are quite large (243 extents on 4k blocks), so most inodes don't
339 * grow out to a tree. If need be, detecting boundary extents could
340 * trivially be added in a future version of ocfs2_get_block().
341 */
342static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
343 struct list_head *pages, unsigned nr_pages)
344{
345 int ret, err = -EIO;
346 struct inode *inode = mapping->host;
347 struct ocfs2_inode_info *oi = OCFS2_I(inode);
348 loff_t start;
349 struct page *last;
350
351 /*
352 * Use the nonblocking flag for the dlm code to avoid page
353 * lock inversion, but don't bother with retrying.
354 */
355 ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);
356 if (ret)
357 return err;
358
359 if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
360 ocfs2_inode_unlock(inode, 0);
361 return err;
362 }
363
364 /*
365 * Don't bother with inline-data. There isn't anything
366 * to read-ahead in that case anyway...
367 */
368 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
369 goto out_unlock;
370
371 /*
372 * Check whether a remote node truncated this file - we just
373 * drop out in that case as it's not worth handling here.
374 */
375 last = list_entry(pages->prev, struct page, lru);
376 start = (loff_t)last->index << PAGE_CACHE_SHIFT;
377 if (start >= i_size_read(inode))
378 goto out_unlock;
379
380 err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);
381
382out_unlock:
383 up_read(&oi->ip_alloc_sem);
384 ocfs2_inode_unlock(inode, 0);
385
386 return err;
387}
388
334/* Note: Because we don't support holes, our allocation has 389/* Note: Because we don't support holes, our allocation has
335 * already happened (allocation writes zeros to the file data) 390 * already happened (allocation writes zeros to the file data)
336 * so we don't have to worry about ordered writes in 391 * so we don't have to worry about ordered writes in
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
452 * accessed concurrently from multiple nodes. 507 * accessed concurrently from multiple nodes.
453 */ 508 */
454 if (!INODE_JOURNAL(inode)) { 509 if (!INODE_JOURNAL(inode)) {
455 err = ocfs2_meta_lock(inode, NULL, 0); 510 err = ocfs2_inode_lock(inode, NULL, 0);
456 if (err) { 511 if (err) {
457 if (err != -ENOENT) 512 if (err != -ENOENT)
458 mlog_errno(err); 513 mlog_errno(err);
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
467 522
468 if (!INODE_JOURNAL(inode)) { 523 if (!INODE_JOURNAL(inode)) {
469 up_read(&OCFS2_I(inode)->ip_alloc_sem); 524 up_read(&OCFS2_I(inode)->ip_alloc_sem);
470 ocfs2_meta_unlock(inode, 0); 525 ocfs2_inode_unlock(inode, 0);
471 } 526 }
472 527
473 if (err) { 528 if (err) {
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw,
638 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 693 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
639 return 0; 694 return 0;
640 695
641 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
642 /*
643 * We get PR data locks even for O_DIRECT. This
644 * allows concurrent O_DIRECT I/O but doesn't let
645 * O_DIRECT with extending and buffered zeroing writes
646 * race. If they did race then the buffered zeroing
647 * could be written back after the O_DIRECT I/O. It's
648 * one thing to tell people not to mix buffered and
649 * O_DIRECT writes, but expecting them to understand
650 * that file extension is also an implicit buffered
651 * write is too much. By getting the PR we force
652 * writeback of the buffered zeroing before
653 * proceeding.
654 */
655 ret = ocfs2_data_lock(inode, 0);
656 if (ret < 0) {
657 mlog_errno(ret);
658 goto out;
659 }
660 ocfs2_data_unlock(inode, 0);
661 }
662
663 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 696 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
664 inode->i_sb->s_bdev, iov, offset, 697 inode->i_sb->s_bdev, iov, offset,
665 nr_segs, 698 nr_segs,
666 ocfs2_direct_IO_get_blocks, 699 ocfs2_direct_IO_get_blocks,
667 ocfs2_dio_end_io); 700 ocfs2_dio_end_io);
668out: 701
669 mlog_exit(ret); 702 mlog_exit(ret);
670 return ret; 703 return ret;
671} 704}
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
1754 struct buffer_head *di_bh = NULL; 1787 struct buffer_head *di_bh = NULL;
1755 struct inode *inode = mapping->host; 1788 struct inode *inode = mapping->host;
1756 1789
1757 ret = ocfs2_meta_lock(inode, &di_bh, 1); 1790 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1758 if (ret) { 1791 if (ret) {
1759 mlog_errno(ret); 1792 mlog_errno(ret);
1760 return ret; 1793 return ret;
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
1769 */ 1802 */
1770 down_write(&OCFS2_I(inode)->ip_alloc_sem); 1803 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1771 1804
1772 ret = ocfs2_data_lock(inode, 1);
1773 if (ret) {
1774 mlog_errno(ret);
1775 goto out_fail;
1776 }
1777
1778 ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, 1805 ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
1779 fsdata, di_bh, NULL); 1806 fsdata, di_bh, NULL);
1780 if (ret) { 1807 if (ret) {
1781 mlog_errno(ret); 1808 mlog_errno(ret);
1782 goto out_fail_data; 1809 goto out_fail;
1783 } 1810 }
1784 1811
1785 brelse(di_bh); 1812 brelse(di_bh);
1786 1813
1787 return 0; 1814 return 0;
1788 1815
1789out_fail_data:
1790 ocfs2_data_unlock(inode, 1);
1791out_fail: 1816out_fail:
1792 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1817 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1793 1818
1794 brelse(di_bh); 1819 brelse(di_bh);
1795 ocfs2_meta_unlock(inode, 1); 1820 ocfs2_inode_unlock(inode, 1);
1796 1821
1797 return ret; 1822 return ret;
1798} 1823}
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
1908 1933
1909 ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); 1934 ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
1910 1935
1911 ocfs2_data_unlock(inode, 1);
1912 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1936 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1913 ocfs2_meta_unlock(inode, 1); 1937 ocfs2_inode_unlock(inode, 1);
1914 1938
1915 return ret; 1939 return ret;
1916} 1940}
1917 1941
1918const struct address_space_operations ocfs2_aops = { 1942const struct address_space_operations ocfs2_aops = {
1919 .readpage = ocfs2_readpage, 1943 .readpage = ocfs2_readpage,
1944 .readpages = ocfs2_readpages,
1920 .writepage = ocfs2_writepage, 1945 .writepage = ocfs2_writepage,
1921 .write_begin = ocfs2_write_begin, 1946 .write_begin = ocfs2_write_begin,
1922 .write_end = ocfs2_write_end, 1947 .write_end = ocfs2_write_end,