aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-02-06 04:37:16 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:41:06 -0500
commit9cfe015aa424b3c003baba3841a60dd9b5ad319b (patch)
tree5575e06efcf91018f860f2db43979e8e91aba1c3
parent774ed22c21ab95d582dfff38560f11cf290baeb4 (diff)
get rid of NR_OPEN and introduce a sysctl_nr_open
NR_OPEN (historically set to 1024*1024) actually forbids processes to open more than 1024*1024 handles. Unfortunatly some production servers hit the not so 'ridiculously high value' of 1024*1024 file descriptors per process. Changing NR_OPEN is not considered safe because of vmalloc space potential exhaust. This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to 1024*1024, so that admins can decide to change this limit if their workload needs it. [akpm@linux-foundation.org: export it for sparc64] Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: "David S. Miller" <davem@davemloft.net> Cc: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/proc.txt8
-rw-r--r--Documentation/sysctl/fs.txt10
-rw-r--r--arch/alpha/kernel/osf_sys.c2
-rw-r--r--arch/mips/kernel/sysirix.c2
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c1
-rw-r--r--arch/sparc64/solaris/fs.c2
-rw-r--r--arch/sparc64/solaris/timod.c6
-rw-r--r--fs/file.c8
-rw-r--r--include/linux/fs.h2
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c8
11 files changed, 41 insertions, 10 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e2799b5fafea..5681e2fa1496 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
1029Denotes the number of inodes the system has allocated. This number will 1029Denotes the number of inodes the system has allocated. This number will
1030grow and shrink dynamically. 1030grow and shrink dynamically.
1031 1031
1032nr_open
1033-------
1034
1035Denotes the maximum number of file-handles a process can
1036allocate. Default value is 1024*1024 (1048576) which should be
1037enough for most machines. Actual limit depends on RLIMIT_NOFILE
1038resource limit.
1039
1032nr_free_inodes 1040nr_free_inodes
1033-------------- 1041--------------
1034 1042
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a35e994..f99254327ae5 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
23- inode-max 23- inode-max
24- inode-nr 24- inode-nr
25- inode-state 25- inode-state
26- nr_open
26- overflowuid 27- overflowuid
27- overflowgid 28- overflowgid
28- suid_dumpable 29- suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
91 92
92============================================================== 93==============================================================
93 94
95nr_open:
96
97This denotes the maximum number of file-handles a process can
98allocate. Default value is 1024*1024 (1048576) which should be
99enough for most machines. Actual limit depends on RLIMIT_NOFILE
100resource limit.
101
102==============================================================
103
94inode-max, inode-nr & inode-state: 104inode-max, inode-nr & inode-state:
95 105
96As with file handles, the kernel allocates the inode structures 106As with file handles, the kernel allocates the inode structures
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6413c5f23226..72f9a619a66d 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -430,7 +430,7 @@ sys_getpagesize(void)
430asmlinkage unsigned long 430asmlinkage unsigned long
431sys_getdtablesize(void) 431sys_getdtablesize(void)
432{ 432{
433 return NR_OPEN; 433 return sysctl_nr_open;
434} 434}
435 435
436/* 436/*
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 4c477c7ff74a..22fd41e946b2 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
356 retval = NGROUPS_MAX; 356 retval = NGROUPS_MAX;
357 goto out; 357 goto out;
358 case 5: 358 case 5:
359 retval = NR_OPEN; 359 retval = sysctl_nr_open;
360 goto out; 360 goto out;
361 case 6: 361 case 6:
362 retval = 1; 362 retval = 1;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 60765e314bd8..8649635d6d74 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
277EXPORT_SYMBOL(sys_geteuid); 277EXPORT_SYMBOL(sys_geteuid);
278EXPORT_SYMBOL(sys_getuid); 278EXPORT_SYMBOL(sys_getuid);
279EXPORT_SYMBOL(sys_getegid); 279EXPORT_SYMBOL(sys_getegid);
280EXPORT_SYMBOL(sysctl_nr_open);
280EXPORT_SYMBOL(sys_getgid); 281EXPORT_SYMBOL(sys_getgid);
281EXPORT_SYMBOL(svr4_getcontext); 282EXPORT_SYMBOL(svr4_getcontext);
282EXPORT_SYMBOL(svr4_setcontext); 283EXPORT_SYMBOL(svr4_setcontext);
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 61be597bf430..9311bfe4f2f7 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
624 case 3: /* UL_GMEMLIM */ 624 case 3: /* UL_GMEMLIM */
625 return current->signal->rlim[RLIMIT_DATA].rlim_cur; 625 return current->signal->rlim[RLIMIT_DATA].rlim_cur;
626 case 4: /* UL_GDESLIM */ 626 case 4: /* UL_GDESLIM */
627 return NR_OPEN; 627 return sysctl_nr_open;
628 } 628 }
629 return -EINVAL; 629 return -EINVAL;
630} 630}
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c
index a9d32ceabf26..f53123c02c2b 100644
--- a/arch/sparc64/solaris/timod.c
+++ b/arch/sparc64/solaris/timod.c
@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
859 859
860 SOLD("entry"); 860 SOLD("entry");
861 lock_kernel(); 861 lock_kernel();
862 if(fd >= NR_OPEN) goto out; 862 if (fd >= sysctl_nr_open)
863 goto out;
863 864
864 fdt = files_fdtable(current->files); 865 fdt = files_fdtable(current->files);
865 filp = fdt->fd[fd]; 866 filp = fdt->fd[fd];
@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
927 928
928 SOLD("entry"); 929 SOLD("entry");
929 lock_kernel(); 930 lock_kernel();
930 if(fd >= NR_OPEN) goto out; 931 if (fd >= sysctl_nr_open)
932 goto out;
931 933
932 fdt = files_fdtable(current->files); 934 fdt = files_fdtable(current->files);
933 filp = fdt->fd[fd]; 935 filp = fdt->fd[fd];
diff --git a/fs/file.c b/fs/file.c
index c5575de01113..5110acb1c9ef 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -24,6 +24,8 @@ struct fdtable_defer {
24 struct fdtable *next; 24 struct fdtable *next;
25}; 25};
26 26
27int sysctl_nr_open __read_mostly = 1024*1024;
28
27/* 29/*
28 * We use this list to defer free fdtables that have vmalloced 30 * We use this list to defer free fdtables that have vmalloced
29 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 31 * sets/arrays. By keeping a per-cpu list, we avoid having to embed
@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
147 nr /= (1024 / sizeof(struct file *)); 149 nr /= (1024 / sizeof(struct file *));
148 nr = roundup_pow_of_two(nr + 1); 150 nr = roundup_pow_of_two(nr + 1);
149 nr *= (1024 / sizeof(struct file *)); 151 nr *= (1024 / sizeof(struct file *));
150 if (nr > NR_OPEN) 152 if (nr > sysctl_nr_open)
151 nr = NR_OPEN; 153 nr = sysctl_nr_open;
152 154
153 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 155 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
154 if (!fdt) 156 if (!fdt)
@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
233 if (nr < fdt->max_fds) 235 if (nr < fdt->max_fds)
234 return 0; 236 return 0;
235 /* Can we expand? */ 237 /* Can we expand? */
236 if (nr >= NR_OPEN) 238 if (nr >= sysctl_nr_open)
237 return -EMFILE; 239 return -EMFILE;
238 240
239 /* All good, so we try */ 241 /* All good, so we try */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19aab50c3b8e..109734bf6377 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,7 @@
21 21
22/* Fixed constants first: */ 22/* Fixed constants first: */
23#undef NR_OPEN 23#undef NR_OPEN
24#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ 24extern int sysctl_nr_open;
25#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ 25#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
26 26
27#define BLOCK_SIZE_BITS 10 27#define BLOCK_SIZE_BITS 10
diff --git a/kernel/sys.c b/kernel/sys.c
index 53de35fc8245..2b8e2daa9d95 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
1472 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1472 if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
1473 !capable(CAP_SYS_RESOURCE)) 1473 !capable(CAP_SYS_RESOURCE))
1474 return -EPERM; 1474 return -EPERM;
1475 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1475 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
1476 return -EPERM; 1476 return -EPERM;
1477 1477
1478 retval = security_task_setrlimit(resource, &new_rlim); 1478 retval = security_task_setrlimit(resource, &new_rlim);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5e2ad5bf88e2..86daaa26d120 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1203,6 +1203,14 @@ static struct ctl_table fs_table[] = {
1203 .proc_handler = &proc_dointvec, 1203 .proc_handler = &proc_dointvec,
1204 }, 1204 },
1205 { 1205 {
1206 .ctl_name = CTL_UNNUMBERED,
1207 .procname = "nr_open",
1208 .data = &sysctl_nr_open,
1209 .maxlen = sizeof(int),
1210 .mode = 0644,
1211 .proc_handler = &proc_dointvec,
1212 },
1213 {
1206 .ctl_name = FS_DENTRY, 1214 .ctl_name = FS_DENTRY,
1207 .procname = "dentry-state", 1215 .procname = "dentry-state",
1208 .data = &dentry_stat, 1216 .data = &dentry_stat,