aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-28 12:43:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-28 13:40:47 -0400
commita11e1d432b51f63ba698d044441284a661f01144 (patch)
tree9f3c5a10bf0d7f9a342d5fb39c0c35ea14170124 /fs/aio.c
parentf57494321cbf5b1e7769b6135407d2995a369e28 (diff)
Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL
The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c148
1 files changed, 1 insertions, 147 deletions
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..210df9da1283 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,7 +5,6 @@
5 * Implements an efficient asynchronous io interface. 5 * Implements an efficient asynchronous io interface.
6 * 6 *
7 * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. 7 * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
8 * Copyright 2018 Christoph Hellwig.
9 * 8 *
10 * See ../COPYING for licensing terms. 9 * See ../COPYING for licensing terms.
11 */ 10 */
@@ -165,22 +164,10 @@ struct fsync_iocb {
165 bool datasync; 164 bool datasync;
166}; 165};
167 166
168struct poll_iocb {
169 struct file *file;
170 __poll_t events;
171 struct wait_queue_head *head;
172
173 union {
174 struct wait_queue_entry wait;
175 struct work_struct work;
176 };
177};
178
179struct aio_kiocb { 167struct aio_kiocb {
180 union { 168 union {
181 struct kiocb rw; 169 struct kiocb rw;
182 struct fsync_iocb fsync; 170 struct fsync_iocb fsync;
183 struct poll_iocb poll;
184 }; 171 };
185 172
186 struct kioctx *ki_ctx; 173 struct kioctx *ki_ctx;
@@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
1590 if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || 1577 if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
1591 iocb->aio_rw_flags)) 1578 iocb->aio_rw_flags))
1592 return -EINVAL; 1579 return -EINVAL;
1580
1593 req->file = fget(iocb->aio_fildes); 1581 req->file = fget(iocb->aio_fildes);
1594 if (unlikely(!req->file)) 1582 if (unlikely(!req->file))
1595 return -EBADF; 1583 return -EBADF;
@@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
1604 return 0; 1592 return 0;
1605} 1593}
1606 1594
1607/* need to use list_del_init so we can check if item was present */
1608static inline bool __aio_poll_remove(struct poll_iocb *req)
1609{
1610 if (list_empty(&req->wait.entry))
1611 return false;
1612 list_del_init(&req->wait.entry);
1613 return true;
1614}
1615
1616static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
1617{
1618 fput(iocb->poll.file);
1619 aio_complete(iocb, mangle_poll(mask), 0);
1620}
1621
1622static void aio_poll_work(struct work_struct *work)
1623{
1624 struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
1625
1626 if (!list_empty_careful(&iocb->ki_list))
1627 aio_remove_iocb(iocb);
1628 __aio_poll_complete(iocb, iocb->poll.events);
1629}
1630
1631static int aio_poll_cancel(struct kiocb *iocb)
1632{
1633 struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
1634 struct poll_iocb *req = &aiocb->poll;
1635 struct wait_queue_head *head = req->head;
1636 bool found = false;
1637
1638 spin_lock(&head->lock);
1639 found = __aio_poll_remove(req);
1640 spin_unlock(&head->lock);
1641
1642 if (found) {
1643 req->events = 0;
1644 INIT_WORK(&req->work, aio_poll_work);
1645 schedule_work(&req->work);
1646 }
1647 return 0;
1648}
1649
1650static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
1651 void *key)
1652{
1653 struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
1654 struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
1655 struct file *file = req->file;
1656 __poll_t mask = key_to_poll(key);
1657
1658 assert_spin_locked(&req->head->lock);
1659
1660 /* for instances that support it check for an event match first: */
1661 if (mask && !(mask & req->events))
1662 return 0;
1663
1664 mask = file->f_op->poll_mask(file, req->events) & req->events;
1665 if (!mask)
1666 return 0;
1667
1668 __aio_poll_remove(req);
1669
1670 /*
1671 * Try completing without a context switch if we can acquire ctx_lock
1672 * without spinning. Otherwise we need to defer to a workqueue to
1673 * avoid a deadlock due to the lock order.
1674 */
1675 if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
1676 list_del_init(&iocb->ki_list);
1677 spin_unlock(&iocb->ki_ctx->ctx_lock);
1678
1679 __aio_poll_complete(iocb, mask);
1680 } else {
1681 req->events = mask;
1682 INIT_WORK(&req->work, aio_poll_work);
1683 schedule_work(&req->work);
1684 }
1685
1686 return 1;
1687}
1688
1689static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
1690{
1691 struct kioctx *ctx = aiocb->ki_ctx;
1692 struct poll_iocb *req = &aiocb->poll;
1693 __poll_t mask;
1694
1695 /* reject any unknown events outside the normal event mask. */
1696 if ((u16)iocb->aio_buf != iocb->aio_buf)
1697 return -EINVAL;
1698 /* reject fields that are not defined for poll */
1699 if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
1700 return -EINVAL;
1701
1702 req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
1703 req->file = fget(iocb->aio_fildes);
1704 if (unlikely(!req->file))
1705 return -EBADF;
1706 if (!file_has_poll_mask(req->file))
1707 goto out_fail;
1708
1709 req->head = req->file->f_op->get_poll_head(req->file, req->events);
1710 if (!req->head)
1711 goto out_fail;
1712 if (IS_ERR(req->head)) {
1713 mask = EPOLLERR;
1714 goto done;
1715 }
1716
1717 init_waitqueue_func_entry(&req->wait, aio_poll_wake);
1718 aiocb->ki_cancel = aio_poll_cancel;
1719
1720 spin_lock_irq(&ctx->ctx_lock);
1721 spin_lock(&req->head->lock);
1722 mask = req->file->f_op->poll_mask(req->file, req->events) & req->events;
1723 if (!mask) {
1724 __add_wait_queue(req->head, &req->wait);
1725 list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
1726 }
1727 spin_unlock(&req->head->lock);
1728 spin_unlock_irq(&ctx->ctx_lock);
1729done:
1730 if (mask)
1731 __aio_poll_complete(aiocb, mask);
1732 return 0;
1733out_fail:
1734 fput(req->file);
1735 return -EINVAL; /* same as no support for IOCB_CMD_POLL */
1736}
1737
1738static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1595static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1739 bool compat) 1596 bool compat)
1740{ 1597{
@@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1808 case IOCB_CMD_FDSYNC: 1665 case IOCB_CMD_FDSYNC:
1809 ret = aio_fsync(&req->fsync, &iocb, true); 1666 ret = aio_fsync(&req->fsync, &iocb, true);
1810 break; 1667 break;
1811 case IOCB_CMD_POLL:
1812 ret = aio_poll(req, &iocb);
1813 break;
1814 default: 1668 default:
1815 pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); 1669 pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
1816 ret = -EINVAL; 1670 ret = -EINVAL;