aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/inode.c36
-rw-r--r--fs/splice.c105
-rw-r--r--include/linux/fs.h7
-rw-r--r--mm/filemap.c30
4 files changed, 148 insertions, 30 deletions
diff --git a/fs/inode.c b/fs/inode.c
index d9a21d122926..26cdb115ce67 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
1306 wake_up_bit(&inode->i_state, __I_LOCK); 1306 wake_up_bit(&inode->i_state, __I_LOCK);
1307} 1307}
1308 1308
1309/*
1310 * We rarely want to lock two inodes that do not have a parent/child
1311 * relationship (such as directory, child inode) simultaneously. The
1312 * vast majority of file systems should be able to get along fine
1313 * without this. Do not use these functions except as a last resort.
1314 */
1315void inode_double_lock(struct inode *inode1, struct inode *inode2)
1316{
1317 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
1318 if (inode1)
1319 mutex_lock(&inode1->i_mutex);
1320 else if (inode2)
1321 mutex_lock(&inode2->i_mutex);
1322 return;
1323 }
1324
1325 if (inode1 < inode2) {
1326 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1327 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1328 } else {
1329 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1330 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1331 }
1332}
1333EXPORT_SYMBOL(inode_double_lock);
1334
1335void inode_double_unlock(struct inode *inode1, struct inode *inode2)
1336{
1337 if (inode1)
1338 mutex_unlock(&inode1->i_mutex);
1339
1340 if (inode2 && inode2 != inode1)
1341 mutex_unlock(&inode2->i_mutex);
1342}
1343EXPORT_SYMBOL(inode_double_unlock);
1344
1309static __initdata unsigned long ihash_entries; 1345static __initdata unsigned long ihash_entries;
1310static int __init set_ihash_entries(char *str) 1346static int __init set_ihash_entries(char *str)
1311{ 1347{
diff --git a/fs/splice.c b/fs/splice.c
index a567010b62ac..49fb9f129938 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -707,9 +707,9 @@ out_ret:
707 * key here is the 'actor' worker passed in that actually moves the data 707 * key here is the 'actor' worker passed in that actually moves the data
708 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 708 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
709 */ 709 */
710ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 710static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
711 loff_t *ppos, size_t len, unsigned int flags, 711 struct file *out, loff_t *ppos, size_t len,
712 splice_actor *actor) 712 unsigned int flags, splice_actor *actor)
713{ 713{
714 int ret, do_wakeup, err; 714 int ret, do_wakeup, err;
715 struct splice_desc sd; 715 struct splice_desc sd;
@@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
722 sd.file = out; 722 sd.file = out;
723 sd.pos = *ppos; 723 sd.pos = *ppos;
724 724
725 if (pipe->inode)
726 mutex_lock(&pipe->inode->i_mutex);
727
728 for (;;) { 725 for (;;) {
729 if (pipe->nrbufs) { 726 if (pipe->nrbufs) {
730 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 727 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
797 pipe_wait(pipe); 794 pipe_wait(pipe);
798 } 795 }
799 796
800 if (pipe->inode)
801 mutex_unlock(&pipe->inode->i_mutex);
802
803 if (do_wakeup) { 797 if (do_wakeup) {
804 smp_mb(); 798 smp_mb();
805 if (waitqueue_active(&pipe->wait)) 799 if (waitqueue_active(&pipe->wait))
@@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
810 return ret; 804 return ret;
811} 805}
812 806
807ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
808 loff_t *ppos, size_t len, unsigned int flags,
809 splice_actor *actor)
810{
811 ssize_t ret;
812 struct inode *inode = out->f_mapping->host;
813
814 /*
815 * The actor worker might be calling ->prepare_write and
816 * ->commit_write. Most of the time, these expect i_mutex to
817 * be held. Since this may result in an ABBA deadlock with
818 * pipe->inode, we have to order lock acquiry here.
819 */
820 inode_double_lock(inode, pipe->inode);
821 ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
822 inode_double_unlock(inode, pipe->inode);
823
824 return ret;
825}
826
827/**
828 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
829 * @pipe: pipe info
830 * @out: file to write to
831 * @len: number of bytes to splice
832 * @flags: splice modifier flags
833 *
834 * Will either move or copy pages (determined by @flags options) from
835 * the given pipe inode to the given file. The caller is responsible
836 * for acquiring i_mutex on both inodes.
837 *
838 */
839ssize_t
840generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
841 loff_t *ppos, size_t len, unsigned int flags)
842{
843 struct address_space *mapping = out->f_mapping;
844 struct inode *inode = mapping->host;
845 ssize_t ret;
846 int err;
847
848 err = remove_suid(out->f_dentry);
849 if (unlikely(err))
850 return err;
851
852 ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
853 if (ret > 0) {
854 *ppos += ret;
855
856 /*
857 * If file or inode is SYNC and we actually wrote some data,
858 * sync it.
859 */
860 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
861 err = generic_osync_inode(inode, mapping,
862 OSYNC_METADATA|OSYNC_DATA);
863
864 if (err)
865 ret = err;
866 }
867 }
868
869 return ret;
870}
871
872EXPORT_SYMBOL(generic_file_splice_write_nolock);
873
813/** 874/**
814 * generic_file_splice_write - splice data from a pipe to a file 875 * generic_file_splice_write - splice data from a pipe to a file
815 * @pipe: pipe info 876 * @pipe: pipe info
@@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
826 loff_t *ppos, size_t len, unsigned int flags) 887 loff_t *ppos, size_t len, unsigned int flags)
827{ 888{
828 struct address_space *mapping = out->f_mapping; 889 struct address_space *mapping = out->f_mapping;
890 struct inode *inode = mapping->host;
829 ssize_t ret; 891 ssize_t ret;
892 int err;
893
894 err = should_remove_suid(out->f_dentry);
895 if (unlikely(err)) {
896 mutex_lock(&inode->i_mutex);
897 err = __remove_suid(out->f_dentry, err);
898 mutex_unlock(&inode->i_mutex);
899 if (err)
900 return err;
901 }
830 902
831 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 903 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
832 if (ret > 0) { 904 if (ret > 0) {
833 struct inode *inode = mapping->host;
834
835 *ppos += ret; 905 *ppos += ret;
836 906
837 /* 907 /*
@@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
839 * sync it. 909 * sync it.
840 */ 910 */
841 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 911 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
842 int err;
843
844 mutex_lock(&inode->i_mutex); 912 mutex_lock(&inode->i_mutex);
845 err = generic_osync_inode(inode, mapping, 913 err = generic_osync_inode(inode, mapping,
846 OSYNC_METADATA|OSYNC_DATA); 914 OSYNC_METADATA|OSYNC_DATA);
@@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1400 * grabbing by inode address. Otherwise two different processes 1468 * grabbing by inode address. Otherwise two different processes
1401 * could deadlock (one doing tee from A -> B, the other from B -> A). 1469 * could deadlock (one doing tee from A -> B, the other from B -> A).
1402 */ 1470 */
1403 if (ipipe->inode < opipe->inode) { 1471 inode_double_lock(ipipe->inode, opipe->inode);
1404 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
1405 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
1406 } else {
1407 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
1408 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
1409 }
1410 1472
1411 do { 1473 do {
1412 if (!opipe->readers) { 1474 if (!opipe->readers) {
@@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1450 i++; 1512 i++;
1451 } while (len); 1513 } while (len);
1452 1514
1453 mutex_unlock(&ipipe->inode->i_mutex); 1515 inode_double_unlock(ipipe->inode, opipe->inode);
1454 mutex_unlock(&opipe->inode->i_mutex);
1455 1516
1456 /* 1517 /*
1457 * If we put data in the output pipe, wakeup any potential readers. 1518 * If we put data in the output pipe, wakeup any potential readers.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 661c7c572149..2fe6e3f900ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
623 I_MUTEX_QUOTA 623 I_MUTEX_QUOTA
624}; 624};
625 625
626extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
627extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
628
626/* 629/*
627 * NOTE: in a 32bit arch with a preemptable kernel and 630 * NOTE: in a 32bit arch with a preemptable kernel and
628 * an UP compile the i_size_read/write must be atomic 631 * an UP compile the i_size_read/write must be atomic
@@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode);
1709extern void clear_inode(struct inode *); 1712extern void clear_inode(struct inode *);
1710extern void destroy_inode(struct inode *); 1713extern void destroy_inode(struct inode *);
1711extern struct inode *new_inode(struct super_block *); 1714extern struct inode *new_inode(struct super_block *);
1715extern int __remove_suid(struct dentry *, int);
1716extern int should_remove_suid(struct dentry *);
1712extern int remove_suid(struct dentry *); 1717extern int remove_suid(struct dentry *);
1713extern void remove_dquot_ref(struct super_block *, int, struct list_head *); 1718extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
1714 1719
@@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
1755 struct pipe_inode_info *, size_t, unsigned int); 1760 struct pipe_inode_info *, size_t, unsigned int);
1756extern ssize_t generic_file_splice_write(struct pipe_inode_info *, 1761extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
1757 struct file *, loff_t *, size_t, unsigned int); 1762 struct file *, loff_t *, size_t, unsigned int);
1763extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
1764 struct file *, loff_t *, size_t, unsigned int);
1758extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 1765extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
1759 struct file *out, loff_t *, size_t len, unsigned int flags); 1766 struct file *out, loff_t *, size_t len, unsigned int flags);
1760extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 1767extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
diff --git a/mm/filemap.c b/mm/filemap.c
index 8558732e85c1..cb26e33fd0ff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1884,11 +1884,10 @@ repeat:
1884 * if suid or (sgid and xgrp) 1884 * if suid or (sgid and xgrp)
1885 * remove privs 1885 * remove privs
1886 */ 1886 */
1887int remove_suid(struct dentry *dentry) 1887int should_remove_suid(struct dentry *dentry)
1888{ 1888{
1889 mode_t mode = dentry->d_inode->i_mode; 1889 mode_t mode = dentry->d_inode->i_mode;
1890 int kill = 0; 1890 int kill = 0;
1891 int result = 0;
1892 1891
1893 /* suid always must be killed */ 1892 /* suid always must be killed */
1894 if (unlikely(mode & S_ISUID)) 1893 if (unlikely(mode & S_ISUID))
@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry)
1901 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) 1900 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1902 kill |= ATTR_KILL_SGID; 1901 kill |= ATTR_KILL_SGID;
1903 1902
1904 if (unlikely(kill && !capable(CAP_FSETID))) { 1903 if (unlikely(kill && !capable(CAP_FSETID)))
1905 struct iattr newattrs; 1904 return kill;
1906 1905
1907 newattrs.ia_valid = ATTR_FORCE | kill; 1906 return 0;
1908 result = notify_change(dentry, &newattrs); 1907}
1909 } 1908
1910 return result; 1909int __remove_suid(struct dentry *dentry, int kill)
1910{
1911 struct iattr newattrs;
1912
1913 newattrs.ia_valid = ATTR_FORCE | kill;
1914 return notify_change(dentry, &newattrs);
1915}
1916
1917int remove_suid(struct dentry *dentry)
1918{
1919 int kill = should_remove_suid(dentry);
1920
1921 if (unlikely(kill))
1922 return __remove_suid(dentry, kill);
1923
1924 return 0;
1911} 1925}
1912EXPORT_SYMBOL(remove_suid); 1926EXPORT_SYMBOL(remove_suid);
1913 1927