diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-05 03:32:55 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-10-25 21:18:20 -0400 |
commit | 7e360c38abe2c70eae3ba5a8a17f17671d8b77c5 (patch) | |
tree | 319034360c667ac704bce87b1a0856657bf67e4b /net | |
parent | fde214d414218fb6cace35708730986bcc94fb53 (diff) |
fs: allow for more than 2^31 files
Andrew,
Could you please review this patch, you probably are the right guy to
take it, because it crosses fs and net trees.
Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt
depend on previous patch (sysctl: fix min/max handling in
__do_proc_doulongvec_minmax())
Thanks !
[PATCH V4] fs: allow for more than 2^31 files
Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :
<quote>
We were seeing a failure which prevented boot. The kernel was incapable
of creating either a named pipe or unix domain socket. This comes down
to a common kernel function called unix_create1() which does:
atomic_inc(&unix_nr_socks);
if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = n;
In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000). That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.
</quote>
Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of
atomic_t.
get_max_files() is changed to return an unsigned long.
get_nr_files() is changed to return a long.
unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.
Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968
After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704 0 2147483648
Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'net')
-rw-r--r-- | net/unix/af_unix.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0ebc777a6660..3c95304a0817 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -117,7 +117,7 @@ | |||
117 | 117 | ||
118 | static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; | 118 | static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; |
119 | static DEFINE_SPINLOCK(unix_table_lock); | 119 | static DEFINE_SPINLOCK(unix_table_lock); |
120 | static atomic_t unix_nr_socks = ATOMIC_INIT(0); | 120 | static atomic_long_t unix_nr_socks; |
121 | 121 | ||
122 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) | 122 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) |
123 | 123 | ||
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk) | |||
360 | if (u->addr) | 360 | if (u->addr) |
361 | unix_release_addr(u->addr); | 361 | unix_release_addr(u->addr); |
362 | 362 | ||
363 | atomic_dec(&unix_nr_socks); | 363 | atomic_long_dec(&unix_nr_socks); |
364 | local_bh_disable(); | 364 | local_bh_disable(); |
365 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 365 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
366 | local_bh_enable(); | 366 | local_bh_enable(); |
367 | #ifdef UNIX_REFCNT_DEBUG | 367 | #ifdef UNIX_REFCNT_DEBUG |
368 | printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, | 368 | printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, |
369 | atomic_read(&unix_nr_socks)); | 369 | atomic_long_read(&unix_nr_socks)); |
370 | #endif | 370 | #endif |
371 | } | 371 | } |
372 | 372 | ||
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) | |||
606 | struct sock *sk = NULL; | 606 | struct sock *sk = NULL; |
607 | struct unix_sock *u; | 607 | struct unix_sock *u; |
608 | 608 | ||
609 | atomic_inc(&unix_nr_socks); | 609 | atomic_long_inc(&unix_nr_socks); |
610 | if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) | 610 | if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) |
611 | goto out; | 611 | goto out; |
612 | 612 | ||
613 | sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); | 613 | sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); |
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) | |||
632 | unix_insert_socket(unix_sockets_unbound, sk); | 632 | unix_insert_socket(unix_sockets_unbound, sk); |
633 | out: | 633 | out: |
634 | if (sk == NULL) | 634 | if (sk == NULL) |
635 | atomic_dec(&unix_nr_socks); | 635 | atomic_long_dec(&unix_nr_socks); |
636 | else { | 636 | else { |
637 | local_bh_disable(); | 637 | local_bh_disable(); |
638 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 638 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |