summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Herrmann <dh.herrmann@gmail.com>2014-08-08 17:25:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:31 -0400
commit40e041a2c858b3caefc757e26cb85bfceae5062b (patch)
treeb55bfe2e25e490c94dba630b6299c60342d49cd8 /mm
parent4bb5f5d9395bc112d93a134d8f5b05611eddc9c0 (diff)
shm: add sealing API
If two processes share a common memory region, they usually want some guarantees to allow safe access. This often includes: - one side cannot overwrite data while the other reads it - one side cannot shrink the buffer while the other accesses it - one side cannot grow the buffer beyond previously set boundaries If there is a trust-relationship between both parties, there is no need for policy enforcement. However, if there's no trust relationship (eg., for general-purpose IPC) sharing memory-regions is highly fragile and often not possible without local copies. Look at the following two use-cases: 1) A graphics client wants to share its rendering-buffer with a graphics-server. The memory-region is allocated by the client for read/write access and a second FD is passed to the server. While scanning out from the memory region, the server has no guarantee that the client doesn't shrink the buffer at any time, requiring rather cumbersome SIGBUS handling. 2) A process wants to perform an RPC on another process. To avoid huge bandwidth consumption, zero-copy is preferred. After a message is assembled in-memory and a FD is passed to the remote side, both sides want to be sure that neither modifies this shared copy, anymore. The source may have put sensible data into the message without a separate copy and the target may want to parse the message inline, to avoid a local copy. While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide ways to achieve most of this, the first one is unproportionally ugly to use in libraries and the latter two are broken/racy or even disabled due to denial of service attacks. This patch introduces the concept of SEALING. If you seal a file, a specific set of operations is blocked on that file forever. Unlike locks, seals can only be set, never removed. Hence, once you verified a specific set of seals is set, you're guaranteed that no-one can perform the blocked operations on this file, anymore. An initial set of SEALS is introduced by this patch: - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced in size. This affects ftruncate() and open(O_TRUNC). - GROW: If SEAL_GROW is set, the file in question cannot be increased in size. This affects ftruncate(), fallocate() and write(). - WRITE: If SEAL_WRITE is set, no write operations (besides resizing) are possible. This affects fallocate(PUNCH_HOLE), mmap() and write(). - SEAL: If SEAL_SEAL is set, no further seals can be added to a file. This basically prevents the F_ADD_SEAL operation on a file and can be set to prevent others from adding further seals that you don't want. The described use-cases can easily use these seals to provide safe use without any trust-relationship: 1) The graphics server can verify that a passed file-descriptor has SEAL_SHRINK set. This allows safe scanout, while the client is allowed to increase buffer size for window-resizing on-the-fly. Concurrent writes are explicitly allowed. 2) For general-purpose IPC, both processes can verify that SEAL_SHRINK, SEAL_GROW and SEAL_WRITE are set. This guarantees that neither process can modify the data while the other side parses it. Furthermore, it guarantees that even with writable FDs passed to the peer, it cannot increase the size to hit memory-limits of the source process (in case the file-storage is accounted to the source). The new API is an extension to fcntl(), adding two new commands: F_GET_SEALS: Return a bitset describing the seals on the file. This can be called on any FD if the underlying file supports sealing. F_ADD_SEALS: Change the seals of a given file. This requires WRITE access to the file and F_SEAL_SEAL may not already be set. Furthermore, the underlying file must support sealing and there may not be any existing shared mapping of that file. Otherwise, EBADF/EPERM is returned. The given seals are _added_ to the existing set of seals on the file. You cannot remove seals again. The fcntl() handler is currently specific to shmem and disabled on all files. A file needs to explicitly support sealing for this interface to work. A separate syscall is added in a follow-up, which creates files that support sealing. There is no intention to support this on other file-systems. Semantics are unclear for non-volatile files and we lack any use-case right now. Therefore, the implementation is specific to shmem. Signed-off-by: David Herrmann <dh.herrmann@gmail.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Ryan Lortie <desrt@desrt.ca> Cc: Lennart Poettering <lennart@poettering.net> Cc: Daniel Mack <zonque@gmail.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/shmem.c143
1 files changed, 143 insertions, 0 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 6dc80d298f9d..8b43bb7a4efe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt;
66#include <linux/highmem.h> 66#include <linux/highmem.h>
67#include <linux/seq_file.h> 67#include <linux/seq_file.h>
68#include <linux/magic.h> 68#include <linux/magic.h>
69#include <linux/fcntl.h>
69 70
70#include <asm/uaccess.h> 71#include <asm/uaccess.h>
71#include <asm/pgtable.h> 72#include <asm/pgtable.h>
@@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
547static int shmem_setattr(struct dentry *dentry, struct iattr *attr) 548static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
548{ 549{
549 struct inode *inode = dentry->d_inode; 550 struct inode *inode = dentry->d_inode;
551 struct shmem_inode_info *info = SHMEM_I(inode);
550 int error; 552 int error;
551 553
552 error = inode_change_ok(inode, attr); 554 error = inode_change_ok(inode, attr);
@@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
557 loff_t oldsize = inode->i_size; 559 loff_t oldsize = inode->i_size;
558 loff_t newsize = attr->ia_size; 560 loff_t newsize = attr->ia_size;
559 561
562 /* protected by i_mutex */
563 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
564 (newsize > oldsize && (info->seals & F_SEAL_GROW)))
565 return -EPERM;
566
560 if (newsize != oldsize) { 567 if (newsize != oldsize) {
561 error = shmem_reacct_size(SHMEM_I(inode)->flags, 568 error = shmem_reacct_size(SHMEM_I(inode)->flags,
562 oldsize, newsize); 569 oldsize, newsize);
@@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1412 info = SHMEM_I(inode); 1419 info = SHMEM_I(inode);
1413 memset(info, 0, (char *)inode - (char *)info); 1420 memset(info, 0, (char *)inode - (char *)info);
1414 spin_lock_init(&info->lock); 1421 spin_lock_init(&info->lock);
1422 info->seals = F_SEAL_SEAL;
1415 info->flags = flags & VM_NORESERVE; 1423 info->flags = flags & VM_NORESERVE;
1416 INIT_LIST_HEAD(&info->swaplist); 1424 INIT_LIST_HEAD(&info->swaplist);
1417 simple_xattrs_init(&info->xattrs); 1425 simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
1470 struct page **pagep, void **fsdata) 1478 struct page **pagep, void **fsdata)
1471{ 1479{
1472 struct inode *inode = mapping->host; 1480 struct inode *inode = mapping->host;
1481 struct shmem_inode_info *info = SHMEM_I(inode);
1473 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1482 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1483
1484 /* i_mutex is held by caller */
1485 if (unlikely(info->seals)) {
1486 if (info->seals & F_SEAL_WRITE)
1487 return -EPERM;
1488 if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
1489 return -EPERM;
1490 }
1491
1474 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); 1492 return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1475} 1493}
1476 1494
@@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
1808 return offset; 1826 return offset;
1809} 1827}
1810 1828
1829static int shmem_wait_for_pins(struct address_space *mapping)
1830{
1831 return 0;
1832}
1833
1834#define F_ALL_SEALS (F_SEAL_SEAL | \
1835 F_SEAL_SHRINK | \
1836 F_SEAL_GROW | \
1837 F_SEAL_WRITE)
1838
1839int shmem_add_seals(struct file *file, unsigned int seals)
1840{
1841 struct inode *inode = file_inode(file);
1842 struct shmem_inode_info *info = SHMEM_I(inode);
1843 int error;
1844
1845 /*
1846 * SEALING
1847 * Sealing allows multiple parties to share a shmem-file but restrict
1848 * access to a specific subset of file operations. Seals can only be
1849 * added, but never removed. This way, mutually untrusted parties can
1850 * share common memory regions with a well-defined policy. A malicious
1851 * peer can thus never perform unwanted operations on a shared object.
1852 *
1853 * Seals are only supported on special shmem-files and always affect
1854 * the whole underlying inode. Once a seal is set, it may prevent some
1855 * kinds of access to the file. Currently, the following seals are
1856 * defined:
1857 * SEAL_SEAL: Prevent further seals from being set on this file
1858 * SEAL_SHRINK: Prevent the file from shrinking
1859 * SEAL_GROW: Prevent the file from growing
1860 * SEAL_WRITE: Prevent write access to the file
1861 *
1862 * As we don't require any trust relationship between two parties, we
1863 * must prevent seals from being removed. Therefore, sealing a file
1864 * only adds a given set of seals to the file, it never touches
1865 * existing seals. Furthermore, the "setting seals"-operation can be
1866 * sealed itself, which basically prevents any further seal from being
1867 * added.
1868 *
1869 * Semantics of sealing are only defined on volatile files. Only
1870 * anonymous shmem files support sealing. More importantly, seals are
1871 * never written to disk. Therefore, there's no plan to support it on
1872 * other file types.
1873 */
1874
1875 if (file->f_op != &shmem_file_operations)
1876 return -EINVAL;
1877 if (!(file->f_mode & FMODE_WRITE))
1878 return -EPERM;
1879 if (seals & ~(unsigned int)F_ALL_SEALS)
1880 return -EINVAL;
1881
1882 mutex_lock(&inode->i_mutex);
1883
1884 if (info->seals & F_SEAL_SEAL) {
1885 error = -EPERM;
1886 goto unlock;
1887 }
1888
1889 if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
1890 error = mapping_deny_writable(file->f_mapping);
1891 if (error)
1892 goto unlock;
1893
1894 error = shmem_wait_for_pins(file->f_mapping);
1895 if (error) {
1896 mapping_allow_writable(file->f_mapping);
1897 goto unlock;
1898 }
1899 }
1900
1901 info->seals |= seals;
1902 error = 0;
1903
1904unlock:
1905 mutex_unlock(&inode->i_mutex);
1906 return error;
1907}
1908EXPORT_SYMBOL_GPL(shmem_add_seals);
1909
1910int shmem_get_seals(struct file *file)
1911{
1912 if (file->f_op != &shmem_file_operations)
1913 return -EINVAL;
1914
1915 return SHMEM_I(file_inode(file))->seals;
1916}
1917EXPORT_SYMBOL_GPL(shmem_get_seals);
1918
1919long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1920{
1921 long error;
1922
1923 switch (cmd) {
1924 case F_ADD_SEALS:
1925 /* disallow upper 32bit */
1926 if (arg > UINT_MAX)
1927 return -EINVAL;
1928
1929 error = shmem_add_seals(file, arg);
1930 break;
1931 case F_GET_SEALS:
1932 error = shmem_get_seals(file);
1933 break;
1934 default:
1935 error = -EINVAL;
1936 break;
1937 }
1938
1939 return error;
1940}
1941
1811static long shmem_fallocate(struct file *file, int mode, loff_t offset, 1942static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1812 loff_t len) 1943 loff_t len)
1813{ 1944{
1814 struct inode *inode = file_inode(file); 1945 struct inode *inode = file_inode(file);
1815 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1946 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1947 struct shmem_inode_info *info = SHMEM_I(inode);
1816 struct shmem_falloc shmem_falloc; 1948 struct shmem_falloc shmem_falloc;
1817 pgoff_t start, index, end; 1949 pgoff_t start, index, end;
1818 int error; 1950 int error;
@@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1828 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; 1960 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1829 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); 1961 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
1830 1962
1963 /* protected by i_mutex */
1964 if (info->seals & F_SEAL_WRITE) {
1965 error = -EPERM;
1966 goto out;
1967 }
1968
1831 shmem_falloc.waitq = &shmem_falloc_waitq; 1969 shmem_falloc.waitq = &shmem_falloc_waitq;
1832 shmem_falloc.start = unmap_start >> PAGE_SHIFT; 1970 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
1833 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; 1971 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1854 if (error) 1992 if (error)
1855 goto out; 1993 goto out;
1856 1994
1995 if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
1996 error = -EPERM;
1997 goto out;
1998 }
1999
1857 start = offset >> PAGE_CACHE_SHIFT; 2000 start = offset >> PAGE_CACHE_SHIFT;
1858 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2001 end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1859 /* Try to avoid a swapstorm if len is impossible to satisfy */ 2002 /* Try to avoid a swapstorm if len is impossible to satisfy */