aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c344
1 files changed, 59 insertions, 285 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 27ebc0c9571b..a82fbe4c9e8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -77,7 +77,7 @@
77 * ->mmap_sem 77 * ->mmap_sem
78 * ->lock_page (access_process_vm) 78 * ->lock_page (access_process_vm)
79 * 79 *
80 * ->i_mutex (generic_file_buffered_write) 80 * ->i_mutex (generic_perform_write)
81 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 81 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
82 * 82 *
83 * bdi->wb.list_lock 83 * bdi->wb.list_lock
@@ -1428,7 +1428,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1428 * do_generic_file_read - generic file read routine 1428 * do_generic_file_read - generic file read routine
1429 * @filp: the file to read 1429 * @filp: the file to read
1430 * @ppos: current file position 1430 * @ppos: current file position
1431 * @desc: read_descriptor 1431 * @iter: data destination
1432 * @written: already copied
1432 * 1433 *
1433 * This is a generic file read routine, and uses the 1434 * This is a generic file read routine, and uses the
1434 * mapping->a_ops->readpage() function for the actual low-level stuff. 1435 * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1436,8 +1437,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1436 * This is really ugly. But the goto's actually try to clarify some 1437 * This is really ugly. But the goto's actually try to clarify some
1437 * of the logic when it comes to error handling etc. 1438 * of the logic when it comes to error handling etc.
1438 */ 1439 */
1439static void do_generic_file_read(struct file *filp, loff_t *ppos, 1440static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1440 read_descriptor_t *desc) 1441 struct iov_iter *iter, ssize_t written)
1441{ 1442{
1442 struct address_space *mapping = filp->f_mapping; 1443 struct address_space *mapping = filp->f_mapping;
1443 struct inode *inode = mapping->host; 1444 struct inode *inode = mapping->host;
@@ -1447,12 +1448,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
1447 pgoff_t prev_index; 1448 pgoff_t prev_index;
1448 unsigned long offset; /* offset into pagecache page */ 1449 unsigned long offset; /* offset into pagecache page */
1449 unsigned int prev_offset; 1450 unsigned int prev_offset;
1450 int error; 1451 int error = 0;
1451 1452
1452 index = *ppos >> PAGE_CACHE_SHIFT; 1453 index = *ppos >> PAGE_CACHE_SHIFT;
1453 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 1454 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
1454 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); 1455 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
1455 last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1456 last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
1456 offset = *ppos & ~PAGE_CACHE_MASK; 1457 offset = *ppos & ~PAGE_CACHE_MASK;
1457 1458
1458 for (;;) { 1459 for (;;) {
@@ -1487,7 +1488,7 @@ find_page:
1487 if (!page->mapping) 1488 if (!page->mapping)
1488 goto page_not_up_to_date_locked; 1489 goto page_not_up_to_date_locked;
1489 if (!mapping->a_ops->is_partially_uptodate(page, 1490 if (!mapping->a_ops->is_partially_uptodate(page,
1490 desc, offset)) 1491 offset, iter->count))
1491 goto page_not_up_to_date_locked; 1492 goto page_not_up_to_date_locked;
1492 unlock_page(page); 1493 unlock_page(page);
1493 } 1494 }
@@ -1537,24 +1538,23 @@ page_ok:
1537 /* 1538 /*
1538 * Ok, we have the page, and it's up-to-date, so 1539 * Ok, we have the page, and it's up-to-date, so
1539 * now we can copy it to user space... 1540 * now we can copy it to user space...
1540 *
1541 * The file_read_actor routine returns how many bytes were
1542 * actually used..
1543 * NOTE! This may not be the same as how much of a user buffer
1544 * we filled up (we may be padding etc), so we can only update
1545 * "pos" here (the actor routine has to update the user buffer
1546 * pointers and the remaining count).
1547 */ 1541 */
1548 ret = file_read_actor(desc, page, offset, nr); 1542
1543 ret = copy_page_to_iter(page, offset, nr, iter);
1549 offset += ret; 1544 offset += ret;
1550 index += offset >> PAGE_CACHE_SHIFT; 1545 index += offset >> PAGE_CACHE_SHIFT;
1551 offset &= ~PAGE_CACHE_MASK; 1546 offset &= ~PAGE_CACHE_MASK;
1552 prev_offset = offset; 1547 prev_offset = offset;
1553 1548
1554 page_cache_release(page); 1549 page_cache_release(page);
1555 if (ret == nr && desc->count) 1550 written += ret;
1556 continue; 1551 if (!iov_iter_count(iter))
1557 goto out; 1552 goto out;
1553 if (ret < nr) {
1554 error = -EFAULT;
1555 goto out;
1556 }
1557 continue;
1558 1558
1559page_not_up_to_date: 1559page_not_up_to_date:
1560 /* Get exclusive access to the page ... */ 1560 /* Get exclusive access to the page ... */
@@ -1589,6 +1589,7 @@ readpage:
1589 if (unlikely(error)) { 1589 if (unlikely(error)) {
1590 if (error == AOP_TRUNCATED_PAGE) { 1590 if (error == AOP_TRUNCATED_PAGE) {
1591 page_cache_release(page); 1591 page_cache_release(page);
1592 error = 0;
1592 goto find_page; 1593 goto find_page;
1593 } 1594 }
1594 goto readpage_error; 1595 goto readpage_error;
@@ -1619,7 +1620,6 @@ readpage:
1619 1620
1620readpage_error: 1621readpage_error:
1621 /* UHHUH! A synchronous read error occurred. Report it */ 1622 /* UHHUH! A synchronous read error occurred. Report it */
1622 desc->error = error;
1623 page_cache_release(page); 1623 page_cache_release(page);
1624 goto out; 1624 goto out;
1625 1625
@@ -1630,16 +1630,17 @@ no_cached_page:
1630 */ 1630 */
1631 page = page_cache_alloc_cold(mapping); 1631 page = page_cache_alloc_cold(mapping);
1632 if (!page) { 1632 if (!page) {
1633 desc->error = -ENOMEM; 1633 error = -ENOMEM;
1634 goto out; 1634 goto out;
1635 } 1635 }
1636 error = add_to_page_cache_lru(page, mapping, 1636 error = add_to_page_cache_lru(page, mapping,
1637 index, GFP_KERNEL); 1637 index, GFP_KERNEL);
1638 if (error) { 1638 if (error) {
1639 page_cache_release(page); 1639 page_cache_release(page);
1640 if (error == -EEXIST) 1640 if (error == -EEXIST) {
1641 error = 0;
1641 goto find_page; 1642 goto find_page;
1642 desc->error = error; 1643 }
1643 goto out; 1644 goto out;
1644 } 1645 }
1645 goto readpage; 1646 goto readpage;
@@ -1652,44 +1653,7 @@ out:
1652 1653
1653 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 1654 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
1654 file_accessed(filp); 1655 file_accessed(filp);
1655} 1656 return written ? written : error;
1656
1657int file_read_actor(read_descriptor_t *desc, struct page *page,
1658 unsigned long offset, unsigned long size)
1659{
1660 char *kaddr;
1661 unsigned long left, count = desc->count;
1662
1663 if (size > count)
1664 size = count;
1665
1666 /*
1667 * Faults on the destination of a read are common, so do it before
1668 * taking the kmap.
1669 */
1670 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1671 kaddr = kmap_atomic(page);
1672 left = __copy_to_user_inatomic(desc->arg.buf,
1673 kaddr + offset, size);
1674 kunmap_atomic(kaddr);
1675 if (left == 0)
1676 goto success;
1677 }
1678
1679 /* Do it the slow way */
1680 kaddr = kmap(page);
1681 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1682 kunmap(page);
1683
1684 if (left) {
1685 size -= left;
1686 desc->error = -EFAULT;
1687 }
1688success:
1689 desc->count = count - size;
1690 desc->written += size;
1691 desc->arg.buf += size;
1692 return size;
1693} 1657}
1694 1658
1695/* 1659/*
@@ -1747,14 +1711,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1747{ 1711{
1748 struct file *filp = iocb->ki_filp; 1712 struct file *filp = iocb->ki_filp;
1749 ssize_t retval; 1713 ssize_t retval;
1750 unsigned long seg = 0;
1751 size_t count; 1714 size_t count;
1752 loff_t *ppos = &iocb->ki_pos; 1715 loff_t *ppos = &iocb->ki_pos;
1716 struct iov_iter i;
1753 1717
1754 count = 0; 1718 count = 0;
1755 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1719 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1756 if (retval) 1720 if (retval)
1757 return retval; 1721 return retval;
1722 iov_iter_init(&i, iov, nr_segs, count, 0);
1758 1723
1759 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1724 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
1760 if (filp->f_flags & O_DIRECT) { 1725 if (filp->f_flags & O_DIRECT) {
@@ -1776,6 +1741,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1776 if (retval > 0) { 1741 if (retval > 0) {
1777 *ppos = pos + retval; 1742 *ppos = pos + retval;
1778 count -= retval; 1743 count -= retval;
1744 /*
1745 * If we did a short DIO read we need to skip the
1746 * section of the iov that we've already read data into.
1747 */
1748 iov_iter_advance(&i, retval);
1779 } 1749 }
1780 1750
1781 /* 1751 /*
@@ -1792,39 +1762,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1792 } 1762 }
1793 } 1763 }
1794 1764
1795 count = retval; 1765 retval = do_generic_file_read(filp, ppos, &i, retval);
1796 for (seg = 0; seg < nr_segs; seg++) {
1797 read_descriptor_t desc;
1798 loff_t offset = 0;
1799
1800 /*
1801 * If we did a short DIO read we need to skip the section of the
1802 * iov that we've already read data into.
1803 */
1804 if (count) {
1805 if (count > iov[seg].iov_len) {
1806 count -= iov[seg].iov_len;
1807 continue;
1808 }
1809 offset = count;
1810 count = 0;
1811 }
1812
1813 desc.written = 0;
1814 desc.arg.buf = iov[seg].iov_base + offset;
1815 desc.count = iov[seg].iov_len - offset;
1816 if (desc.count == 0)
1817 continue;
1818 desc.error = 0;
1819 do_generic_file_read(filp, ppos, &desc);
1820 retval += desc.written;
1821 if (desc.error) {
1822 retval = retval ?: desc.error;
1823 break;
1824 }
1825 if (desc.count > 0)
1826 break;
1827 }
1828out: 1766out:
1829 return retval; 1767 return retval;
1830} 1768}
@@ -2335,150 +2273,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
2335} 2273}
2336EXPORT_SYMBOL(read_cache_page_gfp); 2274EXPORT_SYMBOL(read_cache_page_gfp);
2337 2275
2338static size_t __iovec_copy_from_user_inatomic(char *vaddr,
2339 const struct iovec *iov, size_t base, size_t bytes)
2340{
2341 size_t copied = 0, left = 0;
2342
2343 while (bytes) {
2344 char __user *buf = iov->iov_base + base;
2345 int copy = min(bytes, iov->iov_len - base);
2346
2347 base = 0;
2348 left = __copy_from_user_inatomic(vaddr, buf, copy);
2349 copied += copy;
2350 bytes -= copy;
2351 vaddr += copy;
2352 iov++;
2353
2354 if (unlikely(left))
2355 break;
2356 }
2357 return copied - left;
2358}
2359
2360/*
2361 * Copy as much as we can into the page and return the number of bytes which
2362 * were successfully copied. If a fault is encountered then return the number of
2363 * bytes which were copied.
2364 */
2365size_t iov_iter_copy_from_user_atomic(struct page *page,
2366 struct iov_iter *i, unsigned long offset, size_t bytes)
2367{
2368 char *kaddr;
2369 size_t copied;
2370
2371 BUG_ON(!in_atomic());
2372 kaddr = kmap_atomic(page);
2373 if (likely(i->nr_segs == 1)) {
2374 int left;
2375 char __user *buf = i->iov->iov_base + i->iov_offset;
2376 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
2377 copied = bytes - left;
2378 } else {
2379 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2380 i->iov, i->iov_offset, bytes);
2381 }
2382 kunmap_atomic(kaddr);
2383
2384 return copied;
2385}
2386EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
2387
2388/*
2389 * This has the same sideeffects and return value as
2390 * iov_iter_copy_from_user_atomic().
2391 * The difference is that it attempts to resolve faults.
2392 * Page must not be locked.
2393 */
2394size_t iov_iter_copy_from_user(struct page *page,
2395 struct iov_iter *i, unsigned long offset, size_t bytes)
2396{
2397 char *kaddr;
2398 size_t copied;
2399
2400 kaddr = kmap(page);
2401 if (likely(i->nr_segs == 1)) {
2402 int left;
2403 char __user *buf = i->iov->iov_base + i->iov_offset;
2404 left = __copy_from_user(kaddr + offset, buf, bytes);
2405 copied = bytes - left;
2406 } else {
2407 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2408 i->iov, i->iov_offset, bytes);
2409 }
2410 kunmap(page);
2411 return copied;
2412}
2413EXPORT_SYMBOL(iov_iter_copy_from_user);
2414
2415void iov_iter_advance(struct iov_iter *i, size_t bytes)
2416{
2417 BUG_ON(i->count < bytes);
2418
2419 if (likely(i->nr_segs == 1)) {
2420 i->iov_offset += bytes;
2421 i->count -= bytes;
2422 } else {
2423 const struct iovec *iov = i->iov;
2424 size_t base = i->iov_offset;
2425 unsigned long nr_segs = i->nr_segs;
2426
2427 /*
2428 * The !iov->iov_len check ensures we skip over unlikely
2429 * zero-length segments (without overruning the iovec).
2430 */
2431 while (bytes || unlikely(i->count && !iov->iov_len)) {
2432 int copy;
2433
2434 copy = min(bytes, iov->iov_len - base);
2435 BUG_ON(!i->count || i->count < copy);
2436 i->count -= copy;
2437 bytes -= copy;
2438 base += copy;
2439 if (iov->iov_len == base) {
2440 iov++;
2441 nr_segs--;
2442 base = 0;
2443 }
2444 }
2445 i->iov = iov;
2446 i->iov_offset = base;
2447 i->nr_segs = nr_segs;
2448 }
2449}
2450EXPORT_SYMBOL(iov_iter_advance);
2451
2452/*
2453 * Fault in the first iovec of the given iov_iter, to a maximum length
2454 * of bytes. Returns 0 on success, or non-zero if the memory could not be
2455 * accessed (ie. because it is an invalid address).
2456 *
2457 * writev-intensive code may want this to prefault several iovecs -- that
2458 * would be possible (callers must not rely on the fact that _only_ the
2459 * first iovec will be faulted with the current implementation).
2460 */
2461int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
2462{
2463 char __user *buf = i->iov->iov_base + i->iov_offset;
2464 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
2465 return fault_in_pages_readable(buf, bytes);
2466}
2467EXPORT_SYMBOL(iov_iter_fault_in_readable);
2468
2469/*
2470 * Return the count of just the current iov_iter segment.
2471 */
2472size_t iov_iter_single_seg_count(const struct iov_iter *i)
2473{
2474 const struct iovec *iov = i->iov;
2475 if (i->nr_segs == 1)
2476 return i->count;
2477 else
2478 return min(i->count, iov->iov_len - i->iov_offset);
2479}
2480EXPORT_SYMBOL(iov_iter_single_seg_count);
2481
2482/* 2276/*
2483 * Performs necessary checks before doing a write 2277 * Performs necessary checks before doing a write
2484 * 2278 *
@@ -2585,7 +2379,7 @@ EXPORT_SYMBOL(pagecache_write_end);
2585 2379
2586ssize_t 2380ssize_t
2587generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 2381generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2588 unsigned long *nr_segs, loff_t pos, loff_t *ppos, 2382 unsigned long *nr_segs, loff_t pos,
2589 size_t count, size_t ocount) 2383 size_t count, size_t ocount)
2590{ 2384{
2591 struct file *file = iocb->ki_filp; 2385 struct file *file = iocb->ki_filp;
@@ -2646,7 +2440,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2646 i_size_write(inode, pos); 2440 i_size_write(inode, pos);
2647 mark_inode_dirty(inode); 2441 mark_inode_dirty(inode);
2648 } 2442 }
2649 *ppos = pos; 2443 iocb->ki_pos = pos;
2650 } 2444 }
2651out: 2445out:
2652 return written; 2446 return written;
@@ -2692,7 +2486,7 @@ found:
2692} 2486}
2693EXPORT_SYMBOL(grab_cache_page_write_begin); 2487EXPORT_SYMBOL(grab_cache_page_write_begin);
2694 2488
2695static ssize_t generic_perform_write(struct file *file, 2489ssize_t generic_perform_write(struct file *file,
2696 struct iov_iter *i, loff_t pos) 2490 struct iov_iter *i, loff_t pos)
2697{ 2491{
2698 struct address_space *mapping = file->f_mapping; 2492 struct address_space *mapping = file->f_mapping;
@@ -2742,9 +2536,7 @@ again:
2742 if (mapping_writably_mapped(mapping)) 2536 if (mapping_writably_mapped(mapping))
2743 flush_dcache_page(page); 2537 flush_dcache_page(page);
2744 2538
2745 pagefault_disable();
2746 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2539 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2747 pagefault_enable();
2748 flush_dcache_page(page); 2540 flush_dcache_page(page);
2749 2541
2750 mark_page_accessed(page); 2542 mark_page_accessed(page);
@@ -2782,27 +2574,7 @@ again:
2782 2574
2783 return written ? written : status; 2575 return written ? written : status;
2784} 2576}
2785 2577EXPORT_SYMBOL(generic_perform_write);
2786ssize_t
2787generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2788 unsigned long nr_segs, loff_t pos, loff_t *ppos,
2789 size_t count, ssize_t written)
2790{
2791 struct file *file = iocb->ki_filp;
2792 ssize_t status;
2793 struct iov_iter i;
2794
2795 iov_iter_init(&i, iov, nr_segs, count, written);
2796 status = generic_perform_write(file, &i, pos);
2797
2798 if (likely(status >= 0)) {
2799 written += status;
2800 *ppos = pos + status;
2801 }
2802
2803 return written ? written : status;
2804}
2805EXPORT_SYMBOL(generic_file_buffered_write);
2806 2578
2807/** 2579/**
2808 * __generic_file_aio_write - write data to a file 2580 * __generic_file_aio_write - write data to a file
@@ -2824,16 +2596,18 @@ EXPORT_SYMBOL(generic_file_buffered_write);
2824 * avoid syncing under i_mutex. 2596 * avoid syncing under i_mutex.
2825 */ 2597 */
2826ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 2598ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2827 unsigned long nr_segs, loff_t *ppos) 2599 unsigned long nr_segs)
2828{ 2600{
2829 struct file *file = iocb->ki_filp; 2601 struct file *file = iocb->ki_filp;
2830 struct address_space * mapping = file->f_mapping; 2602 struct address_space * mapping = file->f_mapping;
2831 size_t ocount; /* original count */ 2603 size_t ocount; /* original count */
2832 size_t count; /* after file limit checks */ 2604 size_t count; /* after file limit checks */
2833 struct inode *inode = mapping->host; 2605 struct inode *inode = mapping->host;
2834 loff_t pos; 2606 loff_t pos = iocb->ki_pos;
2835 ssize_t written; 2607 ssize_t written = 0;
2836 ssize_t err; 2608 ssize_t err;
2609 ssize_t status;
2610 struct iov_iter from;
2837 2611
2838 ocount = 0; 2612 ocount = 0;
2839 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 2613 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -2841,12 +2615,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2841 return err; 2615 return err;
2842 2616
2843 count = ocount; 2617 count = ocount;
2844 pos = *ppos;
2845 2618
2846 /* We can write back this queue in page reclaim */ 2619 /* We can write back this queue in page reclaim */
2847 current->backing_dev_info = mapping->backing_dev_info; 2620 current->backing_dev_info = mapping->backing_dev_info;
2848 written = 0;
2849
2850 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 2621 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2851 if (err) 2622 if (err)
2852 goto out; 2623 goto out;
@@ -2862,45 +2633,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2862 if (err) 2633 if (err)
2863 goto out; 2634 goto out;
2864 2635
2636 iov_iter_init(&from, iov, nr_segs, count, 0);
2637
2865 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2638 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
2866 if (unlikely(file->f_flags & O_DIRECT)) { 2639 if (unlikely(file->f_flags & O_DIRECT)) {
2867 loff_t endbyte; 2640 loff_t endbyte;
2868 ssize_t written_buffered;
2869 2641
2870 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 2642 written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
2871 ppos, count, ocount); 2643 count, ocount);
2872 if (written < 0 || written == count) 2644 if (written < 0 || written == count)
2873 goto out; 2645 goto out;
2646 iov_iter_advance(&from, written);
2647
2874 /* 2648 /*
2875 * direct-io write to a hole: fall through to buffered I/O 2649 * direct-io write to a hole: fall through to buffered I/O
2876 * for completing the rest of the request. 2650 * for completing the rest of the request.
2877 */ 2651 */
2878 pos += written; 2652 pos += written;
2879 count -= written; 2653 count -= written;
2880 written_buffered = generic_file_buffered_write(iocb, iov, 2654
2881 nr_segs, pos, ppos, count, 2655 status = generic_perform_write(file, &from, pos);
2882 written);
2883 /* 2656 /*
2884 * If generic_file_buffered_write() retuned a synchronous error 2657 * If generic_perform_write() returned a synchronous error
2885 * then we want to return the number of bytes which were 2658 * then we want to return the number of bytes which were
2886 * direct-written, or the error code if that was zero. Note 2659 * direct-written, or the error code if that was zero. Note
2887 * that this differs from normal direct-io semantics, which 2660 * that this differs from normal direct-io semantics, which
2888 * will return -EFOO even if some bytes were written. 2661 * will return -EFOO even if some bytes were written.
2889 */ 2662 */
2890 if (written_buffered < 0) { 2663 if (unlikely(status < 0) && !written) {
2891 err = written_buffered; 2664 err = status;
2892 goto out; 2665 goto out;
2893 } 2666 }
2894 2667 iocb->ki_pos = pos + status;
2895 /* 2668 /*
2896 * We need to ensure that the page cache pages are written to 2669 * We need to ensure that the page cache pages are written to
2897 * disk and invalidated to preserve the expected O_DIRECT 2670 * disk and invalidated to preserve the expected O_DIRECT
2898 * semantics. 2671 * semantics.
2899 */ 2672 */
2900 endbyte = pos + written_buffered - written - 1; 2673 endbyte = pos + status - 1;
2901 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); 2674 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
2902 if (err == 0) { 2675 if (err == 0) {
2903 written = written_buffered; 2676 written += status;
2904 invalidate_mapping_pages(mapping, 2677 invalidate_mapping_pages(mapping,
2905 pos >> PAGE_CACHE_SHIFT, 2678 pos >> PAGE_CACHE_SHIFT,
2906 endbyte >> PAGE_CACHE_SHIFT); 2679 endbyte >> PAGE_CACHE_SHIFT);
@@ -2911,8 +2684,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2911 */ 2684 */
2912 } 2685 }
2913 } else { 2686 } else {
2914 written = generic_file_buffered_write(iocb, iov, nr_segs, 2687 written = generic_perform_write(file, &from, pos);
2915 pos, ppos, count, written); 2688 if (likely(written >= 0))
2689 iocb->ki_pos = pos + written;
2916 } 2690 }
2917out: 2691out:
2918 current->backing_dev_info = NULL; 2692 current->backing_dev_info = NULL;
@@ -2941,7 +2715,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2941 BUG_ON(iocb->ki_pos != pos); 2715 BUG_ON(iocb->ki_pos != pos);
2942 2716
2943 mutex_lock(&inode->i_mutex); 2717 mutex_lock(&inode->i_mutex);
2944 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2718 ret = __generic_file_aio_write(iocb, iov, nr_segs);
2945 mutex_unlock(&inode->i_mutex); 2719 mutex_unlock(&inode->i_mutex);
2946 2720
2947 if (ret > 0) { 2721 if (ret > 0) {