diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 344 |
1 files changed, 59 insertions, 285 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 27ebc0c9571b..a82fbe4c9e8e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -77,7 +77,7 @@ | |||
77 | * ->mmap_sem | 77 | * ->mmap_sem |
78 | * ->lock_page (access_process_vm) | 78 | * ->lock_page (access_process_vm) |
79 | * | 79 | * |
80 | * ->i_mutex (generic_file_buffered_write) | 80 | * ->i_mutex (generic_perform_write) |
81 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) | 81 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) |
82 | * | 82 | * |
83 | * bdi->wb.list_lock | 83 | * bdi->wb.list_lock |
@@ -1428,7 +1428,8 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
1428 | * do_generic_file_read - generic file read routine | 1428 | * do_generic_file_read - generic file read routine |
1429 | * @filp: the file to read | 1429 | * @filp: the file to read |
1430 | * @ppos: current file position | 1430 | * @ppos: current file position |
1431 | * @desc: read_descriptor | 1431 | * @iter: data destination |
1432 | * @written: already copied | ||
1432 | * | 1433 | * |
1433 | * This is a generic file read routine, and uses the | 1434 | * This is a generic file read routine, and uses the |
1434 | * mapping->a_ops->readpage() function for the actual low-level stuff. | 1435 | * mapping->a_ops->readpage() function for the actual low-level stuff. |
@@ -1436,8 +1437,8 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
1436 | * This is really ugly. But the goto's actually try to clarify some | 1437 | * This is really ugly. But the goto's actually try to clarify some |
1437 | * of the logic when it comes to error handling etc. | 1438 | * of the logic when it comes to error handling etc. |
1438 | */ | 1439 | */ |
1439 | static void do_generic_file_read(struct file *filp, loff_t *ppos, | 1440 | static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, |
1440 | read_descriptor_t *desc) | 1441 | struct iov_iter *iter, ssize_t written) |
1441 | { | 1442 | { |
1442 | struct address_space *mapping = filp->f_mapping; | 1443 | struct address_space *mapping = filp->f_mapping; |
1443 | struct inode *inode = mapping->host; | 1444 | struct inode *inode = mapping->host; |
@@ -1447,12 +1448,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos, | |||
1447 | pgoff_t prev_index; | 1448 | pgoff_t prev_index; |
1448 | unsigned long offset; /* offset into pagecache page */ | 1449 | unsigned long offset; /* offset into pagecache page */ |
1449 | unsigned int prev_offset; | 1450 | unsigned int prev_offset; |
1450 | int error; | 1451 | int error = 0; |
1451 | 1452 | ||
1452 | index = *ppos >> PAGE_CACHE_SHIFT; | 1453 | index = *ppos >> PAGE_CACHE_SHIFT; |
1453 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; | 1454 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; |
1454 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); | 1455 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); |
1455 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 1456 | last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
1456 | offset = *ppos & ~PAGE_CACHE_MASK; | 1457 | offset = *ppos & ~PAGE_CACHE_MASK; |
1457 | 1458 | ||
1458 | for (;;) { | 1459 | for (;;) { |
@@ -1487,7 +1488,7 @@ find_page: | |||
1487 | if (!page->mapping) | 1488 | if (!page->mapping) |
1488 | goto page_not_up_to_date_locked; | 1489 | goto page_not_up_to_date_locked; |
1489 | if (!mapping->a_ops->is_partially_uptodate(page, | 1490 | if (!mapping->a_ops->is_partially_uptodate(page, |
1490 | desc, offset)) | 1491 | offset, iter->count)) |
1491 | goto page_not_up_to_date_locked; | 1492 | goto page_not_up_to_date_locked; |
1492 | unlock_page(page); | 1493 | unlock_page(page); |
1493 | } | 1494 | } |
@@ -1537,24 +1538,23 @@ page_ok: | |||
1537 | /* | 1538 | /* |
1538 | * Ok, we have the page, and it's up-to-date, so | 1539 | * Ok, we have the page, and it's up-to-date, so |
1539 | * now we can copy it to user space... | 1540 | * now we can copy it to user space... |
1540 | * | ||
1541 | * The file_read_actor routine returns how many bytes were | ||
1542 | * actually used.. | ||
1543 | * NOTE! This may not be the same as how much of a user buffer | ||
1544 | * we filled up (we may be padding etc), so we can only update | ||
1545 | * "pos" here (the actor routine has to update the user buffer | ||
1546 | * pointers and the remaining count). | ||
1547 | */ | 1541 | */ |
1548 | ret = file_read_actor(desc, page, offset, nr); | 1542 | |
1543 | ret = copy_page_to_iter(page, offset, nr, iter); | ||
1549 | offset += ret; | 1544 | offset += ret; |
1550 | index += offset >> PAGE_CACHE_SHIFT; | 1545 | index += offset >> PAGE_CACHE_SHIFT; |
1551 | offset &= ~PAGE_CACHE_MASK; | 1546 | offset &= ~PAGE_CACHE_MASK; |
1552 | prev_offset = offset; | 1547 | prev_offset = offset; |
1553 | 1548 | ||
1554 | page_cache_release(page); | 1549 | page_cache_release(page); |
1555 | if (ret == nr && desc->count) | 1550 | written += ret; |
1556 | continue; | 1551 | if (!iov_iter_count(iter)) |
1557 | goto out; | 1552 | goto out; |
1553 | if (ret < nr) { | ||
1554 | error = -EFAULT; | ||
1555 | goto out; | ||
1556 | } | ||
1557 | continue; | ||
1558 | 1558 | ||
1559 | page_not_up_to_date: | 1559 | page_not_up_to_date: |
1560 | /* Get exclusive access to the page ... */ | 1560 | /* Get exclusive access to the page ... */ |
@@ -1589,6 +1589,7 @@ readpage: | |||
1589 | if (unlikely(error)) { | 1589 | if (unlikely(error)) { |
1590 | if (error == AOP_TRUNCATED_PAGE) { | 1590 | if (error == AOP_TRUNCATED_PAGE) { |
1591 | page_cache_release(page); | 1591 | page_cache_release(page); |
1592 | error = 0; | ||
1592 | goto find_page; | 1593 | goto find_page; |
1593 | } | 1594 | } |
1594 | goto readpage_error; | 1595 | goto readpage_error; |
@@ -1619,7 +1620,6 @@ readpage: | |||
1619 | 1620 | ||
1620 | readpage_error: | 1621 | readpage_error: |
1621 | /* UHHUH! A synchronous read error occurred. Report it */ | 1622 | /* UHHUH! A synchronous read error occurred. Report it */ |
1622 | desc->error = error; | ||
1623 | page_cache_release(page); | 1623 | page_cache_release(page); |
1624 | goto out; | 1624 | goto out; |
1625 | 1625 | ||
@@ -1630,16 +1630,17 @@ no_cached_page: | |||
1630 | */ | 1630 | */ |
1631 | page = page_cache_alloc_cold(mapping); | 1631 | page = page_cache_alloc_cold(mapping); |
1632 | if (!page) { | 1632 | if (!page) { |
1633 | desc->error = -ENOMEM; | 1633 | error = -ENOMEM; |
1634 | goto out; | 1634 | goto out; |
1635 | } | 1635 | } |
1636 | error = add_to_page_cache_lru(page, mapping, | 1636 | error = add_to_page_cache_lru(page, mapping, |
1637 | index, GFP_KERNEL); | 1637 | index, GFP_KERNEL); |
1638 | if (error) { | 1638 | if (error) { |
1639 | page_cache_release(page); | 1639 | page_cache_release(page); |
1640 | if (error == -EEXIST) | 1640 | if (error == -EEXIST) { |
1641 | error = 0; | ||
1641 | goto find_page; | 1642 | goto find_page; |
1642 | desc->error = error; | 1643 | } |
1643 | goto out; | 1644 | goto out; |
1644 | } | 1645 | } |
1645 | goto readpage; | 1646 | goto readpage; |
@@ -1652,44 +1653,7 @@ out: | |||
1652 | 1653 | ||
1653 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; | 1654 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; |
1654 | file_accessed(filp); | 1655 | file_accessed(filp); |
1655 | } | 1656 | return written ? written : error; |
1656 | |||
1657 | int file_read_actor(read_descriptor_t *desc, struct page *page, | ||
1658 | unsigned long offset, unsigned long size) | ||
1659 | { | ||
1660 | char *kaddr; | ||
1661 | unsigned long left, count = desc->count; | ||
1662 | |||
1663 | if (size > count) | ||
1664 | size = count; | ||
1665 | |||
1666 | /* | ||
1667 | * Faults on the destination of a read are common, so do it before | ||
1668 | * taking the kmap. | ||
1669 | */ | ||
1670 | if (!fault_in_pages_writeable(desc->arg.buf, size)) { | ||
1671 | kaddr = kmap_atomic(page); | ||
1672 | left = __copy_to_user_inatomic(desc->arg.buf, | ||
1673 | kaddr + offset, size); | ||
1674 | kunmap_atomic(kaddr); | ||
1675 | if (left == 0) | ||
1676 | goto success; | ||
1677 | } | ||
1678 | |||
1679 | /* Do it the slow way */ | ||
1680 | kaddr = kmap(page); | ||
1681 | left = __copy_to_user(desc->arg.buf, kaddr + offset, size); | ||
1682 | kunmap(page); | ||
1683 | |||
1684 | if (left) { | ||
1685 | size -= left; | ||
1686 | desc->error = -EFAULT; | ||
1687 | } | ||
1688 | success: | ||
1689 | desc->count = count - size; | ||
1690 | desc->written += size; | ||
1691 | desc->arg.buf += size; | ||
1692 | return size; | ||
1693 | } | 1657 | } |
1694 | 1658 | ||
1695 | /* | 1659 | /* |
@@ -1747,14 +1711,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1747 | { | 1711 | { |
1748 | struct file *filp = iocb->ki_filp; | 1712 | struct file *filp = iocb->ki_filp; |
1749 | ssize_t retval; | 1713 | ssize_t retval; |
1750 | unsigned long seg = 0; | ||
1751 | size_t count; | 1714 | size_t count; |
1752 | loff_t *ppos = &iocb->ki_pos; | 1715 | loff_t *ppos = &iocb->ki_pos; |
1716 | struct iov_iter i; | ||
1753 | 1717 | ||
1754 | count = 0; | 1718 | count = 0; |
1755 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); | 1719 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); |
1756 | if (retval) | 1720 | if (retval) |
1757 | return retval; | 1721 | return retval; |
1722 | iov_iter_init(&i, iov, nr_segs, count, 0); | ||
1758 | 1723 | ||
1759 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1724 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
1760 | if (filp->f_flags & O_DIRECT) { | 1725 | if (filp->f_flags & O_DIRECT) { |
@@ -1776,6 +1741,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1776 | if (retval > 0) { | 1741 | if (retval > 0) { |
1777 | *ppos = pos + retval; | 1742 | *ppos = pos + retval; |
1778 | count -= retval; | 1743 | count -= retval; |
1744 | /* | ||
1745 | * If we did a short DIO read we need to skip the | ||
1746 | * section of the iov that we've already read data into. | ||
1747 | */ | ||
1748 | iov_iter_advance(&i, retval); | ||
1779 | } | 1749 | } |
1780 | 1750 | ||
1781 | /* | 1751 | /* |
@@ -1792,39 +1762,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1792 | } | 1762 | } |
1793 | } | 1763 | } |
1794 | 1764 | ||
1795 | count = retval; | 1765 | retval = do_generic_file_read(filp, ppos, &i, retval); |
1796 | for (seg = 0; seg < nr_segs; seg++) { | ||
1797 | read_descriptor_t desc; | ||
1798 | loff_t offset = 0; | ||
1799 | |||
1800 | /* | ||
1801 | * If we did a short DIO read we need to skip the section of the | ||
1802 | * iov that we've already read data into. | ||
1803 | */ | ||
1804 | if (count) { | ||
1805 | if (count > iov[seg].iov_len) { | ||
1806 | count -= iov[seg].iov_len; | ||
1807 | continue; | ||
1808 | } | ||
1809 | offset = count; | ||
1810 | count = 0; | ||
1811 | } | ||
1812 | |||
1813 | desc.written = 0; | ||
1814 | desc.arg.buf = iov[seg].iov_base + offset; | ||
1815 | desc.count = iov[seg].iov_len - offset; | ||
1816 | if (desc.count == 0) | ||
1817 | continue; | ||
1818 | desc.error = 0; | ||
1819 | do_generic_file_read(filp, ppos, &desc); | ||
1820 | retval += desc.written; | ||
1821 | if (desc.error) { | ||
1822 | retval = retval ?: desc.error; | ||
1823 | break; | ||
1824 | } | ||
1825 | if (desc.count > 0) | ||
1826 | break; | ||
1827 | } | ||
1828 | out: | 1766 | out: |
1829 | return retval; | 1767 | return retval; |
1830 | } | 1768 | } |
@@ -2335,150 +2273,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping, | |||
2335 | } | 2273 | } |
2336 | EXPORT_SYMBOL(read_cache_page_gfp); | 2274 | EXPORT_SYMBOL(read_cache_page_gfp); |
2337 | 2275 | ||
2338 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | ||
2339 | const struct iovec *iov, size_t base, size_t bytes) | ||
2340 | { | ||
2341 | size_t copied = 0, left = 0; | ||
2342 | |||
2343 | while (bytes) { | ||
2344 | char __user *buf = iov->iov_base + base; | ||
2345 | int copy = min(bytes, iov->iov_len - base); | ||
2346 | |||
2347 | base = 0; | ||
2348 | left = __copy_from_user_inatomic(vaddr, buf, copy); | ||
2349 | copied += copy; | ||
2350 | bytes -= copy; | ||
2351 | vaddr += copy; | ||
2352 | iov++; | ||
2353 | |||
2354 | if (unlikely(left)) | ||
2355 | break; | ||
2356 | } | ||
2357 | return copied - left; | ||
2358 | } | ||
2359 | |||
2360 | /* | ||
2361 | * Copy as much as we can into the page and return the number of bytes which | ||
2362 | * were successfully copied. If a fault is encountered then return the number of | ||
2363 | * bytes which were copied. | ||
2364 | */ | ||
2365 | size_t iov_iter_copy_from_user_atomic(struct page *page, | ||
2366 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
2367 | { | ||
2368 | char *kaddr; | ||
2369 | size_t copied; | ||
2370 | |||
2371 | BUG_ON(!in_atomic()); | ||
2372 | kaddr = kmap_atomic(page); | ||
2373 | if (likely(i->nr_segs == 1)) { | ||
2374 | int left; | ||
2375 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
2376 | left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); | ||
2377 | copied = bytes - left; | ||
2378 | } else { | ||
2379 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
2380 | i->iov, i->iov_offset, bytes); | ||
2381 | } | ||
2382 | kunmap_atomic(kaddr); | ||
2383 | |||
2384 | return copied; | ||
2385 | } | ||
2386 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | ||
2387 | |||
2388 | /* | ||
2389 | * This has the same sideeffects and return value as | ||
2390 | * iov_iter_copy_from_user_atomic(). | ||
2391 | * The difference is that it attempts to resolve faults. | ||
2392 | * Page must not be locked. | ||
2393 | */ | ||
2394 | size_t iov_iter_copy_from_user(struct page *page, | ||
2395 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
2396 | { | ||
2397 | char *kaddr; | ||
2398 | size_t copied; | ||
2399 | |||
2400 | kaddr = kmap(page); | ||
2401 | if (likely(i->nr_segs == 1)) { | ||
2402 | int left; | ||
2403 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
2404 | left = __copy_from_user(kaddr + offset, buf, bytes); | ||
2405 | copied = bytes - left; | ||
2406 | } else { | ||
2407 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
2408 | i->iov, i->iov_offset, bytes); | ||
2409 | } | ||
2410 | kunmap(page); | ||
2411 | return copied; | ||
2412 | } | ||
2413 | EXPORT_SYMBOL(iov_iter_copy_from_user); | ||
2414 | |||
2415 | void iov_iter_advance(struct iov_iter *i, size_t bytes) | ||
2416 | { | ||
2417 | BUG_ON(i->count < bytes); | ||
2418 | |||
2419 | if (likely(i->nr_segs == 1)) { | ||
2420 | i->iov_offset += bytes; | ||
2421 | i->count -= bytes; | ||
2422 | } else { | ||
2423 | const struct iovec *iov = i->iov; | ||
2424 | size_t base = i->iov_offset; | ||
2425 | unsigned long nr_segs = i->nr_segs; | ||
2426 | |||
2427 | /* | ||
2428 | * The !iov->iov_len check ensures we skip over unlikely | ||
2429 | * zero-length segments (without overruning the iovec). | ||
2430 | */ | ||
2431 | while (bytes || unlikely(i->count && !iov->iov_len)) { | ||
2432 | int copy; | ||
2433 | |||
2434 | copy = min(bytes, iov->iov_len - base); | ||
2435 | BUG_ON(!i->count || i->count < copy); | ||
2436 | i->count -= copy; | ||
2437 | bytes -= copy; | ||
2438 | base += copy; | ||
2439 | if (iov->iov_len == base) { | ||
2440 | iov++; | ||
2441 | nr_segs--; | ||
2442 | base = 0; | ||
2443 | } | ||
2444 | } | ||
2445 | i->iov = iov; | ||
2446 | i->iov_offset = base; | ||
2447 | i->nr_segs = nr_segs; | ||
2448 | } | ||
2449 | } | ||
2450 | EXPORT_SYMBOL(iov_iter_advance); | ||
2451 | |||
2452 | /* | ||
2453 | * Fault in the first iovec of the given iov_iter, to a maximum length | ||
2454 | * of bytes. Returns 0 on success, or non-zero if the memory could not be | ||
2455 | * accessed (ie. because it is an invalid address). | ||
2456 | * | ||
2457 | * writev-intensive code may want this to prefault several iovecs -- that | ||
2458 | * would be possible (callers must not rely on the fact that _only_ the | ||
2459 | * first iovec will be faulted with the current implementation). | ||
2460 | */ | ||
2461 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | ||
2462 | { | ||
2463 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
2464 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); | ||
2465 | return fault_in_pages_readable(buf, bytes); | ||
2466 | } | ||
2467 | EXPORT_SYMBOL(iov_iter_fault_in_readable); | ||
2468 | |||
2469 | /* | ||
2470 | * Return the count of just the current iov_iter segment. | ||
2471 | */ | ||
2472 | size_t iov_iter_single_seg_count(const struct iov_iter *i) | ||
2473 | { | ||
2474 | const struct iovec *iov = i->iov; | ||
2475 | if (i->nr_segs == 1) | ||
2476 | return i->count; | ||
2477 | else | ||
2478 | return min(i->count, iov->iov_len - i->iov_offset); | ||
2479 | } | ||
2480 | EXPORT_SYMBOL(iov_iter_single_seg_count); | ||
2481 | |||
2482 | /* | 2276 | /* |
2483 | * Performs necessary checks before doing a write | 2277 | * Performs necessary checks before doing a write |
2484 | * | 2278 | * |
@@ -2585,7 +2379,7 @@ EXPORT_SYMBOL(pagecache_write_end); | |||
2585 | 2379 | ||
2586 | ssize_t | 2380 | ssize_t |
2587 | generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | 2381 | generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, |
2588 | unsigned long *nr_segs, loff_t pos, loff_t *ppos, | 2382 | unsigned long *nr_segs, loff_t pos, |
2589 | size_t count, size_t ocount) | 2383 | size_t count, size_t ocount) |
2590 | { | 2384 | { |
2591 | struct file *file = iocb->ki_filp; | 2385 | struct file *file = iocb->ki_filp; |
@@ -2646,7 +2440,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2646 | i_size_write(inode, pos); | 2440 | i_size_write(inode, pos); |
2647 | mark_inode_dirty(inode); | 2441 | mark_inode_dirty(inode); |
2648 | } | 2442 | } |
2649 | *ppos = pos; | 2443 | iocb->ki_pos = pos; |
2650 | } | 2444 | } |
2651 | out: | 2445 | out: |
2652 | return written; | 2446 | return written; |
@@ -2692,7 +2486,7 @@ found: | |||
2692 | } | 2486 | } |
2693 | EXPORT_SYMBOL(grab_cache_page_write_begin); | 2487 | EXPORT_SYMBOL(grab_cache_page_write_begin); |
2694 | 2488 | ||
2695 | static ssize_t generic_perform_write(struct file *file, | 2489 | ssize_t generic_perform_write(struct file *file, |
2696 | struct iov_iter *i, loff_t pos) | 2490 | struct iov_iter *i, loff_t pos) |
2697 | { | 2491 | { |
2698 | struct address_space *mapping = file->f_mapping; | 2492 | struct address_space *mapping = file->f_mapping; |
@@ -2742,9 +2536,7 @@ again: | |||
2742 | if (mapping_writably_mapped(mapping)) | 2536 | if (mapping_writably_mapped(mapping)) |
2743 | flush_dcache_page(page); | 2537 | flush_dcache_page(page); |
2744 | 2538 | ||
2745 | pagefault_disable(); | ||
2746 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | 2539 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); |
2747 | pagefault_enable(); | ||
2748 | flush_dcache_page(page); | 2540 | flush_dcache_page(page); |
2749 | 2541 | ||
2750 | mark_page_accessed(page); | 2542 | mark_page_accessed(page); |
@@ -2782,27 +2574,7 @@ again: | |||
2782 | 2574 | ||
2783 | return written ? written : status; | 2575 | return written ? written : status; |
2784 | } | 2576 | } |
2785 | 2577 | EXPORT_SYMBOL(generic_perform_write); | |
2786 | ssize_t | ||
2787 | generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | ||
2788 | unsigned long nr_segs, loff_t pos, loff_t *ppos, | ||
2789 | size_t count, ssize_t written) | ||
2790 | { | ||
2791 | struct file *file = iocb->ki_filp; | ||
2792 | ssize_t status; | ||
2793 | struct iov_iter i; | ||
2794 | |||
2795 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
2796 | status = generic_perform_write(file, &i, pos); | ||
2797 | |||
2798 | if (likely(status >= 0)) { | ||
2799 | written += status; | ||
2800 | *ppos = pos + status; | ||
2801 | } | ||
2802 | |||
2803 | return written ? written : status; | ||
2804 | } | ||
2805 | EXPORT_SYMBOL(generic_file_buffered_write); | ||
2806 | 2578 | ||
2807 | /** | 2579 | /** |
2808 | * __generic_file_aio_write - write data to a file | 2580 | * __generic_file_aio_write - write data to a file |
@@ -2824,16 +2596,18 @@ EXPORT_SYMBOL(generic_file_buffered_write); | |||
2824 | * avoid syncing under i_mutex. | 2596 | * avoid syncing under i_mutex. |
2825 | */ | 2597 | */ |
2826 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2598 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2827 | unsigned long nr_segs, loff_t *ppos) | 2599 | unsigned long nr_segs) |
2828 | { | 2600 | { |
2829 | struct file *file = iocb->ki_filp; | 2601 | struct file *file = iocb->ki_filp; |
2830 | struct address_space * mapping = file->f_mapping; | 2602 | struct address_space * mapping = file->f_mapping; |
2831 | size_t ocount; /* original count */ | 2603 | size_t ocount; /* original count */ |
2832 | size_t count; /* after file limit checks */ | 2604 | size_t count; /* after file limit checks */ |
2833 | struct inode *inode = mapping->host; | 2605 | struct inode *inode = mapping->host; |
2834 | loff_t pos; | 2606 | loff_t pos = iocb->ki_pos; |
2835 | ssize_t written; | 2607 | ssize_t written = 0; |
2836 | ssize_t err; | 2608 | ssize_t err; |
2609 | ssize_t status; | ||
2610 | struct iov_iter from; | ||
2837 | 2611 | ||
2838 | ocount = 0; | 2612 | ocount = 0; |
2839 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | 2613 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); |
@@ -2841,12 +2615,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2841 | return err; | 2615 | return err; |
2842 | 2616 | ||
2843 | count = ocount; | 2617 | count = ocount; |
2844 | pos = *ppos; | ||
2845 | 2618 | ||
2846 | /* We can write back this queue in page reclaim */ | 2619 | /* We can write back this queue in page reclaim */ |
2847 | current->backing_dev_info = mapping->backing_dev_info; | 2620 | current->backing_dev_info = mapping->backing_dev_info; |
2848 | written = 0; | ||
2849 | |||
2850 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 2621 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
2851 | if (err) | 2622 | if (err) |
2852 | goto out; | 2623 | goto out; |
@@ -2862,45 +2633,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2862 | if (err) | 2633 | if (err) |
2863 | goto out; | 2634 | goto out; |
2864 | 2635 | ||
2636 | iov_iter_init(&from, iov, nr_segs, count, 0); | ||
2637 | |||
2865 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 2638 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
2866 | if (unlikely(file->f_flags & O_DIRECT)) { | 2639 | if (unlikely(file->f_flags & O_DIRECT)) { |
2867 | loff_t endbyte; | 2640 | loff_t endbyte; |
2868 | ssize_t written_buffered; | ||
2869 | 2641 | ||
2870 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, | 2642 | written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, |
2871 | ppos, count, ocount); | 2643 | count, ocount); |
2872 | if (written < 0 || written == count) | 2644 | if (written < 0 || written == count) |
2873 | goto out; | 2645 | goto out; |
2646 | iov_iter_advance(&from, written); | ||
2647 | |||
2874 | /* | 2648 | /* |
2875 | * direct-io write to a hole: fall through to buffered I/O | 2649 | * direct-io write to a hole: fall through to buffered I/O |
2876 | * for completing the rest of the request. | 2650 | * for completing the rest of the request. |
2877 | */ | 2651 | */ |
2878 | pos += written; | 2652 | pos += written; |
2879 | count -= written; | 2653 | count -= written; |
2880 | written_buffered = generic_file_buffered_write(iocb, iov, | 2654 | |
2881 | nr_segs, pos, ppos, count, | 2655 | status = generic_perform_write(file, &from, pos); |
2882 | written); | ||
2883 | /* | 2656 | /* |
2884 | * If generic_file_buffered_write() retuned a synchronous error | 2657 | * If generic_perform_write() returned a synchronous error |
2885 | * then we want to return the number of bytes which were | 2658 | * then we want to return the number of bytes which were |
2886 | * direct-written, or the error code if that was zero. Note | 2659 | * direct-written, or the error code if that was zero. Note |
2887 | * that this differs from normal direct-io semantics, which | 2660 | * that this differs from normal direct-io semantics, which |
2888 | * will return -EFOO even if some bytes were written. | 2661 | * will return -EFOO even if some bytes were written. |
2889 | */ | 2662 | */ |
2890 | if (written_buffered < 0) { | 2663 | if (unlikely(status < 0) && !written) { |
2891 | err = written_buffered; | 2664 | err = status; |
2892 | goto out; | 2665 | goto out; |
2893 | } | 2666 | } |
2894 | 2667 | iocb->ki_pos = pos + status; | |
2895 | /* | 2668 | /* |
2896 | * We need to ensure that the page cache pages are written to | 2669 | * We need to ensure that the page cache pages are written to |
2897 | * disk and invalidated to preserve the expected O_DIRECT | 2670 | * disk and invalidated to preserve the expected O_DIRECT |
2898 | * semantics. | 2671 | * semantics. |
2899 | */ | 2672 | */ |
2900 | endbyte = pos + written_buffered - written - 1; | 2673 | endbyte = pos + status - 1; |
2901 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | 2674 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); |
2902 | if (err == 0) { | 2675 | if (err == 0) { |
2903 | written = written_buffered; | 2676 | written += status; |
2904 | invalidate_mapping_pages(mapping, | 2677 | invalidate_mapping_pages(mapping, |
2905 | pos >> PAGE_CACHE_SHIFT, | 2678 | pos >> PAGE_CACHE_SHIFT, |
2906 | endbyte >> PAGE_CACHE_SHIFT); | 2679 | endbyte >> PAGE_CACHE_SHIFT); |
@@ -2911,8 +2684,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2911 | */ | 2684 | */ |
2912 | } | 2685 | } |
2913 | } else { | 2686 | } else { |
2914 | written = generic_file_buffered_write(iocb, iov, nr_segs, | 2687 | written = generic_perform_write(file, &from, pos); |
2915 | pos, ppos, count, written); | 2688 | if (likely(written >= 0)) |
2689 | iocb->ki_pos = pos + written; | ||
2916 | } | 2690 | } |
2917 | out: | 2691 | out: |
2918 | current->backing_dev_info = NULL; | 2692 | current->backing_dev_info = NULL; |
@@ -2941,7 +2715,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2941 | BUG_ON(iocb->ki_pos != pos); | 2715 | BUG_ON(iocb->ki_pos != pos); |
2942 | 2716 | ||
2943 | mutex_lock(&inode->i_mutex); | 2717 | mutex_lock(&inode->i_mutex); |
2944 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 2718 | ret = __generic_file_aio_write(iocb, iov, nr_segs); |
2945 | mutex_unlock(&inode->i_mutex); | 2719 | mutex_unlock(&inode->i_mutex); |
2946 | 2720 | ||
2947 | if (ret > 0) { | 2721 | if (ret > 0) { |