aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilly Tarreau <w@1wt.eu>2016-01-18 10:36:09 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2016-01-19 19:25:21 -0500
commit759c01142a5d0f364a462346168a56de28a80f52 (patch)
treede5edd048f10fdec85e7867d46db094e1575b69c
parent558041d8d21b48287224dd0e32cf19563c77607c (diff)
pipe: limit the per-user amount of pages allocated in pipes
On no-so-small systems, it is possible for a single process to cause an OOM condition by filling large pipes with data that are never read. A typical process filling 4000 pipes with 1 MB of data will use 4 GB of memory. On small systems it may be tricky to set the pipe max size to prevent this from happening. This patch makes it possible to enforce a per-user soft limit above which new pipes will be limited to a single page, effectively limiting them to 4 kB each, as well as a hard limit above which no new pipes may be created for this user. This has the effect of protecting the system against memory abuse without hurting other users, and still allowing pipes to work correctly though with less data at once. The limit are controlled by two new sysctls : pipe-user-pages-soft, and pipe-user-pages-hard. Both may be disabled by setting them to zero. The default soft limit allows the default number of FDs per process (1024) to create pipes of the default size (64kB), thus reaching a limit of 64MB before starting to create only smaller pipes. With 256 processes limited to 1024 FDs each, this results in 1024*64kB + (256*1024 - 1024) * 4kB = 1084 MB of memory allocated for a user. The hard limit is disabled by default to avoid breaking existing applications that make intensive use of pipes (eg: for splicing). Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Willy Tarreau <w@1wt.eu> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/sysctl/fs.txt23
-rw-r--r--fs/pipe.c47
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sysctl.c14
5 files changed, 87 insertions, 2 deletions
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 88152f214f48..302b5ed616a6 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/fs:
32- nr_open 32- nr_open
33- overflowuid 33- overflowuid
34- overflowgid 34- overflowgid
35- pipe-user-pages-hard
36- pipe-user-pages-soft
35- protected_hardlinks 37- protected_hardlinks
36- protected_symlinks 38- protected_symlinks
37- suid_dumpable 39- suid_dumpable
@@ -159,6 +161,27 @@ The default is 65534.
159 161
160============================================================== 162==============================================================
161 163
164pipe-user-pages-hard:
165
166Maximum total number of pages a non-privileged user may allocate for pipes.
167Once this limit is reached, no new pipes may be allocated until usage goes
168below the limit again. When set to 0, no limit is applied, which is the default
169setting.
170
171==============================================================
172
173pipe-user-pages-soft:
174
175Maximum total number of pages a non-privileged user may allocate for pipes
176before the pipe size gets limited to a single page. Once this limit is reached,
177new pipes will be limited to a single page in size for this user in order to
178limit total memory usage, and trying to increase them using fcntl() will be
179denied until usage goes below the limit again. The default value allows to
180allocate up to 1024 pipes at their default size. When set to 0, no limit is
181applied.
182
183==============================================================
184
162protected_hardlinks: 185protected_hardlinks:
163 186
164A long-standing class of security issues is the hardlink-based 187A long-standing class of security issues is the hardlink-based
diff --git a/fs/pipe.c b/fs/pipe.c
index 42cf8ddf0e55..ab8dad3ccb6a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
38 */ 38 */
39unsigned int pipe_min_size = PAGE_SIZE; 39unsigned int pipe_min_size = PAGE_SIZE;
40 40
41/* Maximum allocatable pages per user. Hard limit is unset by default, soft
42 * matches default values.
43 */
44unsigned long pipe_user_pages_hard;
45unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
46
41/* 47/*
42 * We use a start+len construction, which provides full use of the 48 * We use a start+len construction, which provides full use of the
43 * allocated memory. 49 * allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
583 return retval; 589 return retval;
584} 590}
585 591
592static void account_pipe_buffers(struct pipe_inode_info *pipe,
593 unsigned long old, unsigned long new)
594{
595 atomic_long_add(new - old, &pipe->user->pipe_bufs);
596}
597
598static bool too_many_pipe_buffers_soft(struct user_struct *user)
599{
600 return pipe_user_pages_soft &&
601 atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
602}
603
604static bool too_many_pipe_buffers_hard(struct user_struct *user)
605{
606 return pipe_user_pages_hard &&
607 atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
608}
609
586struct pipe_inode_info *alloc_pipe_info(void) 610struct pipe_inode_info *alloc_pipe_info(void)
587{ 611{
588 struct pipe_inode_info *pipe; 612 struct pipe_inode_info *pipe;
589 613
590 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 614 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
591 if (pipe) { 615 if (pipe) {
592 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); 616 unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
617 struct user_struct *user = get_current_user();
618
619 if (!too_many_pipe_buffers_hard(user)) {
620 if (too_many_pipe_buffers_soft(user))
621 pipe_bufs = 1;
622 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
623 }
624
593 if (pipe->bufs) { 625 if (pipe->bufs) {
594 init_waitqueue_head(&pipe->wait); 626 init_waitqueue_head(&pipe->wait);
595 pipe->r_counter = pipe->w_counter = 1; 627 pipe->r_counter = pipe->w_counter = 1;
596 pipe->buffers = PIPE_DEF_BUFFERS; 628 pipe->buffers = pipe_bufs;
629 pipe->user = user;
630 account_pipe_buffers(pipe, 0, pipe_bufs);
597 mutex_init(&pipe->mutex); 631 mutex_init(&pipe->mutex);
598 return pipe; 632 return pipe;
599 } 633 }
634 free_uid(user);
600 kfree(pipe); 635 kfree(pipe);
601 } 636 }
602 637
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
607{ 642{
608 int i; 643 int i;
609 644
645 account_pipe_buffers(pipe, pipe->buffers, 0);
646 free_uid(pipe->user);
610 for (i = 0; i < pipe->buffers; i++) { 647 for (i = 0; i < pipe->buffers; i++) {
611 struct pipe_buffer *buf = pipe->bufs + i; 648 struct pipe_buffer *buf = pipe->bufs + i;
612 if (buf->ops) 649 if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
998 memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); 1035 memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
999 } 1036 }
1000 1037
1038 account_pipe_buffers(pipe, pipe->buffers, nr_pages);
1001 pipe->curbuf = 0; 1039 pipe->curbuf = 0;
1002 kfree(pipe->bufs); 1040 kfree(pipe->bufs);
1003 pipe->bufs = bufs; 1041 pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1069 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { 1107 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1070 ret = -EPERM; 1108 ret = -EPERM;
1071 goto out; 1109 goto out;
1110 } else if ((too_many_pipe_buffers_hard(pipe->user) ||
1111 too_many_pipe_buffers_soft(pipe->user)) &&
1112 !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
1113 ret = -EPERM;
1114 goto out;
1072 } 1115 }
1073 ret = pipe_set_size(pipe, nr_pages); 1116 ret = pipe_set_size(pipe, nr_pages);
1074 break; 1117 break;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index eb8b8ac6df3c..24f5470d3944 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -42,6 +42,7 @@ struct pipe_buffer {
42 * @fasync_readers: reader side fasync 42 * @fasync_readers: reader side fasync
43 * @fasync_writers: writer side fasync 43 * @fasync_writers: writer side fasync
44 * @bufs: the circular array of pipe buffers 44 * @bufs: the circular array of pipe buffers
45 * @user: the user who created this pipe
45 **/ 46 **/
46struct pipe_inode_info { 47struct pipe_inode_info {
47 struct mutex mutex; 48 struct mutex mutex;
@@ -57,6 +58,7 @@ struct pipe_inode_info {
57 struct fasync_struct *fasync_readers; 58 struct fasync_struct *fasync_readers;
58 struct fasync_struct *fasync_writers; 59 struct fasync_struct *fasync_writers;
59 struct pipe_buffer *bufs; 60 struct pipe_buffer *bufs;
61 struct user_struct *user;
60}; 62};
61 63
62/* 64/*
@@ -123,6 +125,8 @@ void pipe_unlock(struct pipe_inode_info *);
123void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); 125void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
124 126
125extern unsigned int pipe_max_size, pipe_min_size; 127extern unsigned int pipe_max_size, pipe_min_size;
128extern unsigned long pipe_user_pages_hard;
129extern unsigned long pipe_user_pages_soft;
126int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); 130int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
127 131
128 132
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61aa9bbea871..1589ddc88e38 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -835,6 +835,7 @@ struct user_struct {
835#endif 835#endif
836 unsigned long locked_shm; /* How many pages of mlocked shm ? */ 836 unsigned long locked_shm; /* How many pages of mlocked shm ? */
837 unsigned long unix_inflight; /* How many files in flight in unix sockets */ 837 unsigned long unix_inflight; /* How many files in flight in unix sockets */
838 atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
838 839
839#ifdef CONFIG_KEYS 840#ifdef CONFIG_KEYS
840 struct key *uid_keyring; /* UID specific keyring */ 841 struct key *uid_keyring; /* UID specific keyring */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c810f8afdb7f..f6fd236429bd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1757,6 +1757,20 @@ static struct ctl_table fs_table[] = {
1757 .proc_handler = &pipe_proc_fn, 1757 .proc_handler = &pipe_proc_fn,
1758 .extra1 = &pipe_min_size, 1758 .extra1 = &pipe_min_size,
1759 }, 1759 },
1760 {
1761 .procname = "pipe-user-pages-hard",
1762 .data = &pipe_user_pages_hard,
1763 .maxlen = sizeof(pipe_user_pages_hard),
1764 .mode = 0644,
1765 .proc_handler = proc_doulongvec_minmax,
1766 },
1767 {
1768 .procname = "pipe-user-pages-soft",
1769 .data = &pipe_user_pages_soft,
1770 .maxlen = sizeof(pipe_user_pages_soft),
1771 .mode = 0644,
1772 .proc_handler = proc_doulongvec_minmax,
1773 },
1760 { } 1774 { }
1761}; 1775};
1762 1776