diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-21 13:01:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-21 13:01:52 -0400 |
commit | 7b7fc708b568a258595e1fa911b930a75ac07b48 (patch) | |
tree | 5c77a5397d01c91aaa59ee5517af28afee32afcb | |
parent | c7a3bd177f248d01ee18a01d22048c80e071c331 (diff) | |
parent | 8c34e2d63231d4bf4852bac8521883944d770fe3 (diff) |
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
[PATCH] Remove SUID when splicing into an inode
[PATCH] Add lockless helpers for remove_suid()
[PATCH] Introduce generic_file_splice_write_nolock()
[PATCH] Take i_mutex in splice_from_pipe()
-rw-r--r-- | fs/inode.c | 36 | ||||
-rw-r--r-- | fs/splice.c | 105 | ||||
-rw-r--r-- | include/linux/fs.h | 7 | ||||
-rw-r--r-- | mm/filemap.c | 30 |
4 files changed, 148 insertions, 30 deletions
diff --git a/fs/inode.c b/fs/inode.c index d9a21d122926..26cdb115ce67 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) | |||
1306 | wake_up_bit(&inode->i_state, __I_LOCK); | 1306 | wake_up_bit(&inode->i_state, __I_LOCK); |
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | /* | ||
1310 | * We rarely want to lock two inodes that do not have a parent/child | ||
1311 | * relationship (such as directory, child inode) simultaneously. The | ||
1312 | * vast majority of file systems should be able to get along fine | ||
1313 | * without this. Do not use these functions except as a last resort. | ||
1314 | */ | ||
1315 | void inode_double_lock(struct inode *inode1, struct inode *inode2) | ||
1316 | { | ||
1317 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | ||
1318 | if (inode1) | ||
1319 | mutex_lock(&inode1->i_mutex); | ||
1320 | else if (inode2) | ||
1321 | mutex_lock(&inode2->i_mutex); | ||
1322 | return; | ||
1323 | } | ||
1324 | |||
1325 | if (inode1 < inode2) { | ||
1326 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
1327 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
1328 | } else { | ||
1329 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | ||
1330 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | ||
1331 | } | ||
1332 | } | ||
1333 | EXPORT_SYMBOL(inode_double_lock); | ||
1334 | |||
1335 | void inode_double_unlock(struct inode *inode1, struct inode *inode2) | ||
1336 | { | ||
1337 | if (inode1) | ||
1338 | mutex_unlock(&inode1->i_mutex); | ||
1339 | |||
1340 | if (inode2 && inode2 != inode1) | ||
1341 | mutex_unlock(&inode2->i_mutex); | ||
1342 | } | ||
1343 | EXPORT_SYMBOL(inode_double_unlock); | ||
1344 | |||
1309 | static __initdata unsigned long ihash_entries; | 1345 | static __initdata unsigned long ihash_entries; |
1310 | static int __init set_ihash_entries(char *str) | 1346 | static int __init set_ihash_entries(char *str) |
1311 | { | 1347 | { |
diff --git a/fs/splice.c b/fs/splice.c index a567010b62ac..49fb9f129938 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -707,9 +707,9 @@ out_ret: | |||
707 | * key here is the 'actor' worker passed in that actually moves the data | 707 | * key here is the 'actor' worker passed in that actually moves the data |
708 | * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. | 708 | * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. |
709 | */ | 709 | */ |
710 | ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | 710 | static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, |
711 | loff_t *ppos, size_t len, unsigned int flags, | 711 | struct file *out, loff_t *ppos, size_t len, |
712 | splice_actor *actor) | 712 | unsigned int flags, splice_actor *actor) |
713 | { | 713 | { |
714 | int ret, do_wakeup, err; | 714 | int ret, do_wakeup, err; |
715 | struct splice_desc sd; | 715 | struct splice_desc sd; |
@@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
722 | sd.file = out; | 722 | sd.file = out; |
723 | sd.pos = *ppos; | 723 | sd.pos = *ppos; |
724 | 724 | ||
725 | if (pipe->inode) | ||
726 | mutex_lock(&pipe->inode->i_mutex); | ||
727 | |||
728 | for (;;) { | 725 | for (;;) { |
729 | if (pipe->nrbufs) { | 726 | if (pipe->nrbufs) { |
730 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | 727 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; |
@@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
797 | pipe_wait(pipe); | 794 | pipe_wait(pipe); |
798 | } | 795 | } |
799 | 796 | ||
800 | if (pipe->inode) | ||
801 | mutex_unlock(&pipe->inode->i_mutex); | ||
802 | |||
803 | if (do_wakeup) { | 797 | if (do_wakeup) { |
804 | smp_mb(); | 798 | smp_mb(); |
805 | if (waitqueue_active(&pipe->wait)) | 799 | if (waitqueue_active(&pipe->wait)) |
@@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
810 | return ret; | 804 | return ret; |
811 | } | 805 | } |
812 | 806 | ||
807 | ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | ||
808 | loff_t *ppos, size_t len, unsigned int flags, | ||
809 | splice_actor *actor) | ||
810 | { | ||
811 | ssize_t ret; | ||
812 | struct inode *inode = out->f_mapping->host; | ||
813 | |||
814 | /* | ||
815 | * The actor worker might be calling ->prepare_write and | ||
816 | * ->commit_write. Most of the time, these expect i_mutex to | ||
817 | * be held. Since this may result in an ABBA deadlock with | ||
818 | * pipe->inode, we have to order lock acquiry here. | ||
819 | */ | ||
820 | inode_double_lock(inode, pipe->inode); | ||
821 | ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); | ||
822 | inode_double_unlock(inode, pipe->inode); | ||
823 | |||
824 | return ret; | ||
825 | } | ||
826 | |||
827 | /** | ||
828 | * generic_file_splice_write_nolock - generic_file_splice_write without mutexes | ||
829 | * @pipe: pipe info | ||
830 | * @out: file to write to | ||
831 | * @len: number of bytes to splice | ||
832 | * @flags: splice modifier flags | ||
833 | * | ||
834 | * Will either move or copy pages (determined by @flags options) from | ||
835 | * the given pipe inode to the given file. The caller is responsible | ||
836 | * for acquiring i_mutex on both inodes. | ||
837 | * | ||
838 | */ | ||
839 | ssize_t | ||
840 | generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | ||
841 | loff_t *ppos, size_t len, unsigned int flags) | ||
842 | { | ||
843 | struct address_space *mapping = out->f_mapping; | ||
844 | struct inode *inode = mapping->host; | ||
845 | ssize_t ret; | ||
846 | int err; | ||
847 | |||
848 | err = remove_suid(out->f_dentry); | ||
849 | if (unlikely(err)) | ||
850 | return err; | ||
851 | |||
852 | ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); | ||
853 | if (ret > 0) { | ||
854 | *ppos += ret; | ||
855 | |||
856 | /* | ||
857 | * If file or inode is SYNC and we actually wrote some data, | ||
858 | * sync it. | ||
859 | */ | ||
860 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
861 | err = generic_osync_inode(inode, mapping, | ||
862 | OSYNC_METADATA|OSYNC_DATA); | ||
863 | |||
864 | if (err) | ||
865 | ret = err; | ||
866 | } | ||
867 | } | ||
868 | |||
869 | return ret; | ||
870 | } | ||
871 | |||
872 | EXPORT_SYMBOL(generic_file_splice_write_nolock); | ||
873 | |||
813 | /** | 874 | /** |
814 | * generic_file_splice_write - splice data from a pipe to a file | 875 | * generic_file_splice_write - splice data from a pipe to a file |
815 | * @pipe: pipe info | 876 | * @pipe: pipe info |
@@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
826 | loff_t *ppos, size_t len, unsigned int flags) | 887 | loff_t *ppos, size_t len, unsigned int flags) |
827 | { | 888 | { |
828 | struct address_space *mapping = out->f_mapping; | 889 | struct address_space *mapping = out->f_mapping; |
890 | struct inode *inode = mapping->host; | ||
829 | ssize_t ret; | 891 | ssize_t ret; |
892 | int err; | ||
893 | |||
894 | err = should_remove_suid(out->f_dentry); | ||
895 | if (unlikely(err)) { | ||
896 | mutex_lock(&inode->i_mutex); | ||
897 | err = __remove_suid(out->f_dentry, err); | ||
898 | mutex_unlock(&inode->i_mutex); | ||
899 | if (err) | ||
900 | return err; | ||
901 | } | ||
830 | 902 | ||
831 | ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); | 903 | ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); |
832 | if (ret > 0) { | 904 | if (ret > 0) { |
833 | struct inode *inode = mapping->host; | ||
834 | |||
835 | *ppos += ret; | 905 | *ppos += ret; |
836 | 906 | ||
837 | /* | 907 | /* |
@@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
839 | * sync it. | 909 | * sync it. |
840 | */ | 910 | */ |
841 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | 911 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { |
842 | int err; | ||
843 | |||
844 | mutex_lock(&inode->i_mutex); | 912 | mutex_lock(&inode->i_mutex); |
845 | err = generic_osync_inode(inode, mapping, | 913 | err = generic_osync_inode(inode, mapping, |
846 | OSYNC_METADATA|OSYNC_DATA); | 914 | OSYNC_METADATA|OSYNC_DATA); |
@@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1400 | * grabbing by inode address. Otherwise two different processes | 1468 | * grabbing by inode address. Otherwise two different processes |
1401 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1469 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
1402 | */ | 1470 | */ |
1403 | if (ipipe->inode < opipe->inode) { | 1471 | inode_double_lock(ipipe->inode, opipe->inode); |
1404 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); | ||
1405 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1406 | } else { | ||
1407 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); | ||
1408 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1409 | } | ||
1410 | 1472 | ||
1411 | do { | 1473 | do { |
1412 | if (!opipe->readers) { | 1474 | if (!opipe->readers) { |
@@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1450 | i++; | 1512 | i++; |
1451 | } while (len); | 1513 | } while (len); |
1452 | 1514 | ||
1453 | mutex_unlock(&ipipe->inode->i_mutex); | 1515 | inode_double_unlock(ipipe->inode, opipe->inode); |
1454 | mutex_unlock(&opipe->inode->i_mutex); | ||
1455 | 1516 | ||
1456 | /* | 1517 | /* |
1457 | * If we put data in the output pipe, wakeup any potential readers. | 1518 | * If we put data in the output pipe, wakeup any potential readers. |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 661c7c572149..2fe6e3f900ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class | |||
623 | I_MUTEX_QUOTA | 623 | I_MUTEX_QUOTA |
624 | }; | 624 | }; |
625 | 625 | ||
626 | extern void inode_double_lock(struct inode *inode1, struct inode *inode2); | ||
627 | extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); | ||
628 | |||
626 | /* | 629 | /* |
627 | * NOTE: in a 32bit arch with a preemptable kernel and | 630 | * NOTE: in a 32bit arch with a preemptable kernel and |
628 | * an UP compile the i_size_read/write must be atomic | 631 | * an UP compile the i_size_read/write must be atomic |
@@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode); | |||
1709 | extern void clear_inode(struct inode *); | 1712 | extern void clear_inode(struct inode *); |
1710 | extern void destroy_inode(struct inode *); | 1713 | extern void destroy_inode(struct inode *); |
1711 | extern struct inode *new_inode(struct super_block *); | 1714 | extern struct inode *new_inode(struct super_block *); |
1715 | extern int __remove_suid(struct dentry *, int); | ||
1716 | extern int should_remove_suid(struct dentry *); | ||
1712 | extern int remove_suid(struct dentry *); | 1717 | extern int remove_suid(struct dentry *); |
1713 | extern void remove_dquot_ref(struct super_block *, int, struct list_head *); | 1718 | extern void remove_dquot_ref(struct super_block *, int, struct list_head *); |
1714 | 1719 | ||
@@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, | |||
1755 | struct pipe_inode_info *, size_t, unsigned int); | 1760 | struct pipe_inode_info *, size_t, unsigned int); |
1756 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, | 1761 | extern ssize_t generic_file_splice_write(struct pipe_inode_info *, |
1757 | struct file *, loff_t *, size_t, unsigned int); | 1762 | struct file *, loff_t *, size_t, unsigned int); |
1763 | extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, | ||
1764 | struct file *, loff_t *, size_t, unsigned int); | ||
1758 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 1765 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
1759 | struct file *out, loff_t *, size_t len, unsigned int flags); | 1766 | struct file *out, loff_t *, size_t len, unsigned int flags); |
1760 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 1767 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
diff --git a/mm/filemap.c b/mm/filemap.c index 8558732e85c1..cb26e33fd0ff 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1884,11 +1884,10 @@ repeat: | |||
1884 | * if suid or (sgid and xgrp) | 1884 | * if suid or (sgid and xgrp) |
1885 | * remove privs | 1885 | * remove privs |
1886 | */ | 1886 | */ |
1887 | int remove_suid(struct dentry *dentry) | 1887 | int should_remove_suid(struct dentry *dentry) |
1888 | { | 1888 | { |
1889 | mode_t mode = dentry->d_inode->i_mode; | 1889 | mode_t mode = dentry->d_inode->i_mode; |
1890 | int kill = 0; | 1890 | int kill = 0; |
1891 | int result = 0; | ||
1892 | 1891 | ||
1893 | /* suid always must be killed */ | 1892 | /* suid always must be killed */ |
1894 | if (unlikely(mode & S_ISUID)) | 1893 | if (unlikely(mode & S_ISUID)) |
@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry) | |||
1901 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | 1900 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) |
1902 | kill |= ATTR_KILL_SGID; | 1901 | kill |= ATTR_KILL_SGID; |
1903 | 1902 | ||
1904 | if (unlikely(kill && !capable(CAP_FSETID))) { | 1903 | if (unlikely(kill && !capable(CAP_FSETID))) |
1905 | struct iattr newattrs; | 1904 | return kill; |
1906 | 1905 | ||
1907 | newattrs.ia_valid = ATTR_FORCE | kill; | 1906 | return 0; |
1908 | result = notify_change(dentry, &newattrs); | 1907 | } |
1909 | } | 1908 | |
1910 | return result; | 1909 | int __remove_suid(struct dentry *dentry, int kill) |
1910 | { | ||
1911 | struct iattr newattrs; | ||
1912 | |||
1913 | newattrs.ia_valid = ATTR_FORCE | kill; | ||
1914 | return notify_change(dentry, &newattrs); | ||
1915 | } | ||
1916 | |||
1917 | int remove_suid(struct dentry *dentry) | ||
1918 | { | ||
1919 | int kill = should_remove_suid(dentry); | ||
1920 | |||
1921 | if (unlikely(kill)) | ||
1922 | return __remove_suid(dentry, kill); | ||
1923 | |||
1924 | return 0; | ||
1911 | } | 1925 | } |
1912 | EXPORT_SYMBOL(remove_suid); | 1926 | EXPORT_SYMBOL(remove_suid); |
1913 | 1927 | ||