diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-02-06 04:37:16 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 13:41:06 -0500 |
commit | 9cfe015aa424b3c003baba3841a60dd9b5ad319b (patch) | |
tree | 5575e06efcf91018f860f2db43979e8e91aba1c3 | |
parent | 774ed22c21ab95d582dfff38560f11cf290baeb4 (diff) |
get rid of NR_OPEN and introduce a sysctl_nr_open
NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.
Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.
Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.
This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.
[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 8 | ||||
-rw-r--r-- | Documentation/sysctl/fs.txt | 10 | ||||
-rw-r--r-- | arch/alpha/kernel/osf_sys.c | 2 | ||||
-rw-r--r-- | arch/mips/kernel/sysirix.c | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/sparc64_ksyms.c | 1 | ||||
-rw-r--r-- | arch/sparc64/solaris/fs.c | 2 | ||||
-rw-r--r-- | arch/sparc64/solaris/timod.c | 6 | ||||
-rw-r--r-- | fs/file.c | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
11 files changed, 41 insertions, 10 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e2799b5fafea..5681e2fa1496 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1029,6 +1029,14 @@ nr_inodes | |||
1029 | Denotes the number of inodes the system has allocated. This number will | 1029 | Denotes the number of inodes the system has allocated. This number will |
1030 | grow and shrink dynamically. | 1030 | grow and shrink dynamically. |
1031 | 1031 | ||
1032 | nr_open | ||
1033 | ------- | ||
1034 | |||
1035 | Denotes the maximum number of file-handles a process can | ||
1036 | allocate. Default value is 1024*1024 (1048576) which should be | ||
1037 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
1038 | resource limit. | ||
1039 | |||
1032 | nr_free_inodes | 1040 | nr_free_inodes |
1033 | -------------- | 1041 | -------------- |
1034 | 1042 | ||
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index aa986a35e994..f99254327ae5 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: | |||
23 | - inode-max | 23 | - inode-max |
24 | - inode-nr | 24 | - inode-nr |
25 | - inode-state | 25 | - inode-state |
26 | - nr_open | ||
26 | - overflowuid | 27 | - overflowuid |
27 | - overflowgid | 28 | - overflowgid |
28 | - suid_dumpable | 29 | - suid_dumpable |
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. | |||
91 | 92 | ||
92 | ============================================================== | 93 | ============================================================== |
93 | 94 | ||
95 | nr_open: | ||
96 | |||
97 | This denotes the maximum number of file-handles a process can | ||
98 | allocate. Default value is 1024*1024 (1048576) which should be | ||
99 | enough for most machines. Actual limit depends on RLIMIT_NOFILE | ||
100 | resource limit. | ||
101 | |||
102 | ============================================================== | ||
103 | |||
94 | inode-max, inode-nr & inode-state: | 104 | inode-max, inode-nr & inode-state: |
95 | 105 | ||
96 | As with file handles, the kernel allocates the inode structures | 106 | As with file handles, the kernel allocates the inode structures |
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6413c5f23226..72f9a619a66d 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c | |||
@@ -430,7 +430,7 @@ sys_getpagesize(void) | |||
430 | asmlinkage unsigned long | 430 | asmlinkage unsigned long |
431 | sys_getdtablesize(void) | 431 | sys_getdtablesize(void) |
432 | { | 432 | { |
433 | return NR_OPEN; | 433 | return sysctl_nr_open; |
434 | } | 434 | } |
435 | 435 | ||
436 | /* | 436 | /* |
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index 4c477c7ff74a..22fd41e946b2 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c | |||
@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs) | |||
356 | retval = NGROUPS_MAX; | 356 | retval = NGROUPS_MAX; |
357 | goto out; | 357 | goto out; |
358 | case 5: | 358 | case 5: |
359 | retval = NR_OPEN; | 359 | retval = sysctl_nr_open; |
360 | goto out; | 360 | goto out; |
361 | case 6: | 361 | case 6: |
362 | retval = 1; | 362 | retval = 1; |
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 60765e314bd8..8649635d6d74 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c | |||
@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid); | |||
277 | EXPORT_SYMBOL(sys_geteuid); | 277 | EXPORT_SYMBOL(sys_geteuid); |
278 | EXPORT_SYMBOL(sys_getuid); | 278 | EXPORT_SYMBOL(sys_getuid); |
279 | EXPORT_SYMBOL(sys_getegid); | 279 | EXPORT_SYMBOL(sys_getegid); |
280 | EXPORT_SYMBOL(sysctl_nr_open); | ||
280 | EXPORT_SYMBOL(sys_getgid); | 281 | EXPORT_SYMBOL(sys_getgid); |
281 | EXPORT_SYMBOL(svr4_getcontext); | 282 | EXPORT_SYMBOL(svr4_getcontext); |
282 | EXPORT_SYMBOL(svr4_setcontext); | 283 | EXPORT_SYMBOL(svr4_setcontext); |
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c index 61be597bf430..9311bfe4f2f7 100644 --- a/arch/sparc64/solaris/fs.c +++ b/arch/sparc64/solaris/fs.c | |||
@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val) | |||
624 | case 3: /* UL_GMEMLIM */ | 624 | case 3: /* UL_GMEMLIM */ |
625 | return current->signal->rlim[RLIMIT_DATA].rlim_cur; | 625 | return current->signal->rlim[RLIMIT_DATA].rlim_cur; |
626 | case 4: /* UL_GDESLIM */ | 626 | case 4: /* UL_GDESLIM */ |
627 | return NR_OPEN; | 627 | return sysctl_nr_open; |
628 | } | 628 | } |
629 | return -EINVAL; | 629 | return -EINVAL; |
630 | } | 630 | } |
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c index a9d32ceabf26..f53123c02c2b 100644 --- a/arch/sparc64/solaris/timod.c +++ b/arch/sparc64/solaris/timod.c | |||
@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) | |||
859 | 859 | ||
860 | SOLD("entry"); | 860 | SOLD("entry"); |
861 | lock_kernel(); | 861 | lock_kernel(); |
862 | if(fd >= NR_OPEN) goto out; | 862 | if (fd >= sysctl_nr_open) |
863 | goto out; | ||
863 | 864 | ||
864 | fdt = files_fdtable(current->files); | 865 | fdt = files_fdtable(current->files); |
865 | filp = fdt->fd[fd]; | 866 | filp = fdt->fd[fd]; |
@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) | |||
927 | 928 | ||
928 | SOLD("entry"); | 929 | SOLD("entry"); |
929 | lock_kernel(); | 930 | lock_kernel(); |
930 | if(fd >= NR_OPEN) goto out; | 931 | if (fd >= sysctl_nr_open) |
932 | goto out; | ||
931 | 933 | ||
932 | fdt = files_fdtable(current->files); | 934 | fdt = files_fdtable(current->files); |
933 | filp = fdt->fd[fd]; | 935 | filp = fdt->fd[fd]; |
@@ -24,6 +24,8 @@ struct fdtable_defer { | |||
24 | struct fdtable *next; | 24 | struct fdtable *next; |
25 | }; | 25 | }; |
26 | 26 | ||
27 | int sysctl_nr_open __read_mostly = 1024*1024; | ||
28 | |||
27 | /* | 29 | /* |
28 | * We use this list to defer free fdtables that have vmalloced | 30 | * We use this list to defer free fdtables that have vmalloced |
29 | * sets/arrays. By keeping a per-cpu list, we avoid having to embed | 31 | * sets/arrays. By keeping a per-cpu list, we avoid having to embed |
@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
147 | nr /= (1024 / sizeof(struct file *)); | 149 | nr /= (1024 / sizeof(struct file *)); |
148 | nr = roundup_pow_of_two(nr + 1); | 150 | nr = roundup_pow_of_two(nr + 1); |
149 | nr *= (1024 / sizeof(struct file *)); | 151 | nr *= (1024 / sizeof(struct file *)); |
150 | if (nr > NR_OPEN) | 152 | if (nr > sysctl_nr_open) |
151 | nr = NR_OPEN; | 153 | nr = sysctl_nr_open; |
152 | 154 | ||
153 | fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); | 155 | fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); |
154 | if (!fdt) | 156 | if (!fdt) |
@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr) | |||
233 | if (nr < fdt->max_fds) | 235 | if (nr < fdt->max_fds) |
234 | return 0; | 236 | return 0; |
235 | /* Can we expand? */ | 237 | /* Can we expand? */ |
236 | if (nr >= NR_OPEN) | 238 | if (nr >= sysctl_nr_open) |
237 | return -EMFILE; | 239 | return -EMFILE; |
238 | 240 | ||
239 | /* All good, so we try */ | 241 | /* All good, so we try */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 19aab50c3b8e..109734bf6377 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -21,7 +21,7 @@ | |||
21 | 21 | ||
22 | /* Fixed constants first: */ | 22 | /* Fixed constants first: */ |
23 | #undef NR_OPEN | 23 | #undef NR_OPEN |
24 | #define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ | 24 | extern int sysctl_nr_open; |
25 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ | 25 | #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ |
26 | 26 | ||
27 | #define BLOCK_SIZE_BITS 10 | 27 | #define BLOCK_SIZE_BITS 10 |
diff --git a/kernel/sys.c b/kernel/sys.c index 53de35fc8245..2b8e2daa9d95 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) | |||
1472 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && | 1472 | if ((new_rlim.rlim_max > old_rlim->rlim_max) && |
1473 | !capable(CAP_SYS_RESOURCE)) | 1473 | !capable(CAP_SYS_RESOURCE)) |
1474 | return -EPERM; | 1474 | return -EPERM; |
1475 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) | 1475 | if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) |
1476 | return -EPERM; | 1476 | return -EPERM; |
1477 | 1477 | ||
1478 | retval = security_task_setrlimit(resource, &new_rlim); | 1478 | retval = security_task_setrlimit(resource, &new_rlim); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5e2ad5bf88e2..86daaa26d120 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1203,6 +1203,14 @@ static struct ctl_table fs_table[] = { | |||
1203 | .proc_handler = &proc_dointvec, | 1203 | .proc_handler = &proc_dointvec, |
1204 | }, | 1204 | }, |
1205 | { | 1205 | { |
1206 | .ctl_name = CTL_UNNUMBERED, | ||
1207 | .procname = "nr_open", | ||
1208 | .data = &sysctl_nr_open, | ||
1209 | .maxlen = sizeof(int), | ||
1210 | .mode = 0644, | ||
1211 | .proc_handler = &proc_dointvec, | ||
1212 | }, | ||
1213 | { | ||
1206 | .ctl_name = FS_DENTRY, | 1214 | .ctl_name = FS_DENTRY, |
1207 | .procname = "dentry-state", | 1215 | .procname = "dentry-state", |
1208 | .data = &dentry_stat, | 1216 | .data = &dentry_stat, |