aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-11-26 06:03:55 -0500
committerMiklos Szeredi <miklos@szeredi.hu>2008-11-26 06:03:55 -0500
commit59efec7b903987dcb60b9ebc85c7acd4443a11a1 (patch)
tree1287e817201fcb4008917b2bffd378c36540622b
parente9bb09dd6c5b8ec6a971ed6251df5eba3a4c8d3c (diff)
fuse: implement ioctl support
Generic ioctl support is tricky to implement because only the ioctl implementation itself knows which memory regions need to be read and/or written. To support this, fuse client can request retry of ioctl specifying memory regions to read and write. Deep copying (nested pointers) can be implemented by retrying multiple times resolving one depth of dereference at a time. For security and cleanliness considerations, ioctl implementation has restricted mode where the kernel determines data transfer directions and sizes using the _IOC_*() macros on the ioctl command. In this mode, retry is not allowed. For all FUSE servers, restricted mode is enforced. Unrestricted ioctl will be used by CUSE. Plese read the comment on top of fs/fuse/file.c::fuse_file_do_ioctl() for more information. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
-rw-r--r--fs/fuse/file.c280
-rw-r--r--include/linux/fuse.h32
2 files changed, 312 insertions, 0 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 617269803913..baed06ea7622 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1469,6 +1469,282 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1469 return retval; 1469 return retval;
1470} 1470}
1471 1471
1472static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1473 unsigned int nr_segs, size_t bytes, bool to_user)
1474{
1475 struct iov_iter ii;
1476 int page_idx = 0;
1477
1478 if (!bytes)
1479 return 0;
1480
1481 iov_iter_init(&ii, iov, nr_segs, bytes, 0);
1482
1483 while (iov_iter_count(&ii)) {
1484 struct page *page = pages[page_idx++];
1485 size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
1486 void *kaddr, *map;
1487
1488 kaddr = map = kmap(page);
1489
1490 while (todo) {
1491 char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
1492 size_t iov_len = ii.iov->iov_len - ii.iov_offset;
1493 size_t copy = min(todo, iov_len);
1494 size_t left;
1495
1496 if (!to_user)
1497 left = copy_from_user(kaddr, uaddr, copy);
1498 else
1499 left = copy_to_user(uaddr, kaddr, copy);
1500
1501 if (unlikely(left))
1502 return -EFAULT;
1503
1504 iov_iter_advance(&ii, copy);
1505 todo -= copy;
1506 kaddr += copy;
1507 }
1508
1509 kunmap(map);
1510 }
1511
1512 return 0;
1513}
1514
1515/*
1516 * For ioctls, there is no generic way to determine how much memory
1517 * needs to be read and/or written. Furthermore, ioctls are allowed
1518 * to dereference the passed pointer, so the parameter requires deep
1519 * copying but FUSE has no idea whatsoever about what to copy in or
1520 * out.
1521 *
1522 * This is solved by allowing FUSE server to retry ioctl with
1523 * necessary in/out iovecs. Let's assume the ioctl implementation
1524 * needs to read in the following structure.
1525 *
1526 * struct a {
1527 * char *buf;
1528 * size_t buflen;
1529 * }
1530 *
1531 * On the first callout to FUSE server, inarg->in_size and
1532 * inarg->out_size will be NULL; then, the server completes the ioctl
1533 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
1534 * the actual iov array to
1535 *
1536 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
1537 *
1538 * which tells FUSE to copy in the requested area and retry the ioctl.
1539 * On the second round, the server has access to the structure and
1540 * from that it can tell what to look for next, so on the invocation,
1541 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
1542 *
1543 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
1544 * { .iov_base = a.buf, .iov_len = a.buflen } }
1545 *
1546 * FUSE will copy both struct a and the pointed buffer from the
1547 * process doing the ioctl and retry ioctl with both struct a and the
1548 * buffer.
1549 *
1550 * This time, FUSE server has everything it needs and completes ioctl
1551 * without FUSE_IOCTL_RETRY which finishes the ioctl call.
1552 *
1553 * Copying data out works the same way.
1554 *
1555 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
1556 * automatically initializes in and out iovs by decoding @cmd with
1557 * _IOC_* macros and the server is not allowed to request RETRY. This
1558 * limits ioctl data transfers to well-formed ioctls and is the forced
1559 * behavior for all FUSE servers.
1560 */
1561static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
1562 unsigned long arg, unsigned int flags)
1563{
1564 struct inode *inode = file->f_dentry->d_inode;
1565 struct fuse_file *ff = file->private_data;
1566 struct fuse_conn *fc = get_fuse_conn(inode);
1567 struct fuse_ioctl_in inarg = {
1568 .fh = ff->fh,
1569 .cmd = cmd,
1570 .arg = arg,
1571 .flags = flags
1572 };
1573 struct fuse_ioctl_out outarg;
1574 struct fuse_req *req = NULL;
1575 struct page **pages = NULL;
1576 struct page *iov_page = NULL;
1577 struct iovec *in_iov = NULL, *out_iov = NULL;
1578 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
1579 size_t in_size, out_size, transferred;
1580 int err;
1581
1582 /* assume all the iovs returned by client always fits in a page */
1583 BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1584
1585 if (!fuse_allow_task(fc, current))
1586 return -EACCES;
1587
1588 err = -EIO;
1589 if (is_bad_inode(inode))
1590 goto out;
1591
1592 err = -ENOMEM;
1593 pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
1594 iov_page = alloc_page(GFP_KERNEL);
1595 if (!pages || !iov_page)
1596 goto out;
1597
1598 /*
1599 * If restricted, initialize IO parameters as encoded in @cmd.
1600 * RETRY from server is not allowed.
1601 */
1602 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
1603 struct iovec *iov = page_address(iov_page);
1604
1605 iov->iov_base = (void *)arg;
1606 iov->iov_len = _IOC_SIZE(cmd);
1607
1608 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1609 in_iov = iov;
1610 in_iovs = 1;
1611 }
1612
1613 if (_IOC_DIR(cmd) & _IOC_READ) {
1614 out_iov = iov;
1615 out_iovs = 1;
1616 }
1617 }
1618
1619 retry:
1620 inarg.in_size = in_size = iov_length(in_iov, in_iovs);
1621 inarg.out_size = out_size = iov_length(out_iov, out_iovs);
1622
1623 /*
1624 * Out data can be used either for actual out data or iovs,
1625 * make sure there always is at least one page.
1626 */
1627 out_size = max_t(size_t, out_size, PAGE_SIZE);
1628 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
1629
1630 /* make sure there are enough buffer pages and init request with them */
1631 err = -ENOMEM;
1632 if (max_pages > FUSE_MAX_PAGES_PER_REQ)
1633 goto out;
1634 while (num_pages < max_pages) {
1635 pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
1636 if (!pages[num_pages])
1637 goto out;
1638 num_pages++;
1639 }
1640
1641 req = fuse_get_req(fc);
1642 if (IS_ERR(req)) {
1643 err = PTR_ERR(req);
1644 req = NULL;
1645 goto out;
1646 }
1647 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1648 req->num_pages = num_pages;
1649
1650 /* okay, let's send it to the client */
1651 req->in.h.opcode = FUSE_IOCTL;
1652 req->in.h.nodeid = get_node_id(inode);
1653 req->in.numargs = 1;
1654 req->in.args[0].size = sizeof(inarg);
1655 req->in.args[0].value = &inarg;
1656 if (in_size) {
1657 req->in.numargs++;
1658 req->in.args[1].size = in_size;
1659 req->in.argpages = 1;
1660
1661 err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
1662 false);
1663 if (err)
1664 goto out;
1665 }
1666
1667 req->out.numargs = 2;
1668 req->out.args[0].size = sizeof(outarg);
1669 req->out.args[0].value = &outarg;
1670 req->out.args[1].size = out_size;
1671 req->out.argpages = 1;
1672 req->out.argvar = 1;
1673
1674 request_send(fc, req);
1675 err = req->out.h.error;
1676 transferred = req->out.args[1].size;
1677 fuse_put_request(fc, req);
1678 req = NULL;
1679 if (err)
1680 goto out;
1681
1682 /* did it ask for retry? */
1683 if (outarg.flags & FUSE_IOCTL_RETRY) {
1684 char *vaddr;
1685
1686 /* no retry if in restricted mode */
1687 err = -EIO;
1688 if (!(flags & FUSE_IOCTL_UNRESTRICTED))
1689 goto out;
1690
1691 in_iovs = outarg.in_iovs;
1692 out_iovs = outarg.out_iovs;
1693
1694 /*
1695 * Make sure things are in boundary, separate checks
1696 * are to protect against overflow.
1697 */
1698 err = -ENOMEM;
1699 if (in_iovs > FUSE_IOCTL_MAX_IOV ||
1700 out_iovs > FUSE_IOCTL_MAX_IOV ||
1701 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1702 goto out;
1703
1704 err = -EIO;
1705 if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
1706 goto out;
1707
1708 /* okay, copy in iovs and retry */
1709 vaddr = kmap_atomic(pages[0], KM_USER0);
1710 memcpy(page_address(iov_page), vaddr, transferred);
1711 kunmap_atomic(vaddr, KM_USER0);
1712
1713 in_iov = page_address(iov_page);
1714 out_iov = in_iov + in_iovs;
1715
1716 goto retry;
1717 }
1718
1719 err = -EIO;
1720 if (transferred > inarg.out_size)
1721 goto out;
1722
1723 err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
1724 out:
1725 if (req)
1726 fuse_put_request(fc, req);
1727 if (iov_page)
1728 __free_page(iov_page);
1729 while (num_pages)
1730 __free_page(pages[--num_pages]);
1731 kfree(pages);
1732
1733 return err ? err : outarg.result;
1734}
1735
1736static long fuse_file_ioctl(struct file *file, unsigned int cmd,
1737 unsigned long arg)
1738{
1739 return fuse_file_do_ioctl(file, cmd, arg, 0);
1740}
1741
1742static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
1743 unsigned long arg)
1744{
1745 return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
1746}
1747
1472static const struct file_operations fuse_file_operations = { 1748static const struct file_operations fuse_file_operations = {
1473 .llseek = fuse_file_llseek, 1749 .llseek = fuse_file_llseek,
1474 .read = do_sync_read, 1750 .read = do_sync_read,
@@ -1483,6 +1759,8 @@ static const struct file_operations fuse_file_operations = {
1483 .lock = fuse_file_lock, 1759 .lock = fuse_file_lock,
1484 .flock = fuse_file_flock, 1760 .flock = fuse_file_flock,
1485 .splice_read = generic_file_splice_read, 1761 .splice_read = generic_file_splice_read,
1762 .unlocked_ioctl = fuse_file_ioctl,
1763 .compat_ioctl = fuse_file_compat_ioctl,
1486}; 1764};
1487 1765
1488static const struct file_operations fuse_direct_io_file_operations = { 1766static const struct file_operations fuse_direct_io_file_operations = {
@@ -1495,6 +1773,8 @@ static const struct file_operations fuse_direct_io_file_operations = {
1495 .fsync = fuse_fsync, 1773 .fsync = fuse_fsync,
1496 .lock = fuse_file_lock, 1774 .lock = fuse_file_lock,
1497 .flock = fuse_file_flock, 1775 .flock = fuse_file_flock,
1776 .unlocked_ioctl = fuse_file_ioctl,
1777 .compat_ioctl = fuse_file_compat_ioctl,
1498 /* no mmap and splice_read */ 1778 /* no mmap and splice_read */
1499}; 1779};
1500 1780
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 7caa473306e4..608e300ae883 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -148,6 +148,21 @@ struct fuse_file_lock {
148 */ 148 */
149#define FUSE_READ_LOCKOWNER (1 << 1) 149#define FUSE_READ_LOCKOWNER (1 << 1)
150 150
151/**
152 * Ioctl flags
153 *
154 * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
155 * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
156 * FUSE_IOCTL_RETRY: retry with new iovecs
157 *
158 * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
159 */
160#define FUSE_IOCTL_COMPAT (1 << 0)
161#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
162#define FUSE_IOCTL_RETRY (1 << 2)
163
164#define FUSE_IOCTL_MAX_IOV 256
165
151enum fuse_opcode { 166enum fuse_opcode {
152 FUSE_LOOKUP = 1, 167 FUSE_LOOKUP = 1,
153 FUSE_FORGET = 2, /* no reply */ 168 FUSE_FORGET = 2, /* no reply */
@@ -185,6 +200,7 @@ enum fuse_opcode {
185 FUSE_INTERRUPT = 36, 200 FUSE_INTERRUPT = 36,
186 FUSE_BMAP = 37, 201 FUSE_BMAP = 37,
187 FUSE_DESTROY = 38, 202 FUSE_DESTROY = 38,
203 FUSE_IOCTL = 39,
188}; 204};
189 205
190/* The read buffer is required to be at least 8k, but may be much larger */ 206/* The read buffer is required to be at least 8k, but may be much larger */
@@ -385,6 +401,22 @@ struct fuse_bmap_out {
385 __u64 block; 401 __u64 block;
386}; 402};
387 403
404struct fuse_ioctl_in {
405 __u64 fh;
406 __u32 flags;
407 __u32 cmd;
408 __u64 arg;
409 __u32 in_size;
410 __u32 out_size;
411};
412
413struct fuse_ioctl_out {
414 __s32 result;
415 __u32 flags;
416 __u32 in_iovs;
417 __u32 out_iovs;
418};
419
388struct fuse_in_header { 420struct fuse_in_header {
389 __u32 len; 421 __u32 len;
390 __u32 opcode; 422 __u32 opcode;