diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-05 03:32:55 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-10-25 21:18:20 -0400 |
commit | 7e360c38abe2c70eae3ba5a8a17f17671d8b77c5 (patch) | |
tree | 319034360c667ac704bce87b1a0856657bf67e4b /include/linux/fs.h | |
parent | fde214d414218fb6cace35708730986bcc94fb53 (diff) |
fs: allow for more than 2^31 files
Andrew,
Could you please review this patch, you probably are the right guy to
take it, because it crosses fs and net trees.
Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt
depend on previous patch (sysctl: fix min/max handling in
__do_proc_doulongvec_minmax())
Thanks !
[PATCH V4] fs: allow for more than 2^31 files
Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :
<quote>
We were seeing a failure which prevented boot. The kernel was incapable
of creating either a named pipe or unix domain socket. This comes down
to a common kernel function called unix_create1() which does:
atomic_inc(&unix_nr_socks);
if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = n;
In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000). That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.
</quote>
Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of
atomic_t.
get_max_files() is changed to return an unsigned long.
get_nr_files() is changed to return a long.
unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.
Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968
After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704 0 2147483648
Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 0a5d83633884..0cd6821013a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -34,9 +34,9 @@ | |||
34 | 34 | ||
35 | /* And dynamically-tunable limits and defaults: */ | 35 | /* And dynamically-tunable limits and defaults: */ |
36 | struct files_stat_struct { | 36 | struct files_stat_struct { |
37 | int nr_files; /* read only */ | 37 | unsigned long nr_files; /* read only */ |
38 | int nr_free_files; /* read only */ | 38 | unsigned long nr_free_files; /* read only */ |
39 | int max_files; /* tunable */ | 39 | unsigned long max_files; /* tunable */ |
40 | }; | 40 | }; |
41 | 41 | ||
42 | struct inodes_stat_t { | 42 | struct inodes_stat_t { |
@@ -400,7 +400,7 @@ extern void __init inode_init_early(void); | |||
400 | extern void __init files_init(unsigned long); | 400 | extern void __init files_init(unsigned long); |
401 | 401 | ||
402 | extern struct files_stat_struct files_stat; | 402 | extern struct files_stat_struct files_stat; |
403 | extern int get_max_files(void); | 403 | extern unsigned long get_max_files(void); |
404 | extern int sysctl_nr_open; | 404 | extern int sysctl_nr_open; |
405 | extern struct inodes_stat_t inodes_stat; | 405 | extern struct inodes_stat_t inodes_stat; |
406 | extern int leases_enable, lease_break_time; | 406 | extern int leases_enable, lease_break_time; |