aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_diag.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-10-03 03:22:02 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-08 23:19:24 -0400
commit05dbc7b59481ca891bbcfe6799a562d48159fbf7 (patch)
treef398ddbc5d2a72b3c3b7b16aed8a34b153491341 /net/ipv4/inet_diag.c
parent53af53ae83fe960ceb9ef74cac7915e9088f4266 (diff)
tcp/dccp: remove twchain
TCP listener refactoring, part 3 : Our goal is to hash SYN_RECV sockets into main ehash for fast lookup, and parallel SYN processing. Current inet_ehash_bucket contains two chains, one for ESTABLISH (and friend states) sockets, another for TIME_WAIT sockets only. As the hash table is sized to get at most one socket per bucket, it makes little sense to have separate twchain, as it makes the lookup slightly more complicated, and doubles hash table memory usage. If we make sure all socket types have the lookup keys at the same offsets, we can use a generic and faster lookup. It turns out TIME_WAIT and ESTABLISHED sockets already have common lookup fields for IPv4. [ INET_TW_MATCH() is no longer needed ] I'll provide a follow-up to factorize IPv6 lookup as well, to remove INET6_TW_MATCH() This way, SYN_RECV pseudo sockets will be supported the same. A new sock_gen_put() helper is added, doing either a sock_put() or inet_twsk_put() [ and will support SYN_RECV later ]. Note this helper should only be called in real slow path, when rcu lookup found a socket that was moved to another identity (freed/reused immediately), but could eventually be used in other contexts, like sock_edemux() Before patch : dmesg | grep "TCP established" TCP established hash table entries: 524288 (order: 11, 8388608 bytes) After patch : TCP established hash table entries: 524288 (order: 10, 4194304 bytes) Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_diag.c')
-rw-r--r--net/ipv4/inet_diag.c48
1 files changed, 13 insertions, 35 deletions
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 22000279efc8..8e1e40653357 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -635,12 +635,14 @@ static int inet_csk_diag_dump(struct sock *sk,
635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
636} 636}
637 637
638static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, 638static int inet_twsk_diag_dump(struct sock *sk,
639 struct sk_buff *skb, 639 struct sk_buff *skb,
640 struct netlink_callback *cb, 640 struct netlink_callback *cb,
641 struct inet_diag_req_v2 *r, 641 struct inet_diag_req_v2 *r,
642 const struct nlattr *bc) 642 const struct nlattr *bc)
643{ 643{
644 struct inet_timewait_sock *tw = inet_twsk(sk);
645
644 if (bc != NULL) { 646 if (bc != NULL) {
645 struct inet_diag_entry entry; 647 struct inet_diag_entry entry;
646 648
@@ -911,8 +913,7 @@ skip_listen_ht:
911 913
912 num = 0; 914 num = 0;
913 915
914 if (hlist_nulls_empty(&head->chain) && 916 if (hlist_nulls_empty(&head->chain))
915 hlist_nulls_empty(&head->twchain))
916 continue; 917 continue;
917 918
918 if (i > s_i) 919 if (i > s_i)
@@ -920,7 +921,7 @@ skip_listen_ht:
920 921
921 spin_lock_bh(lock); 922 spin_lock_bh(lock);
922 sk_nulls_for_each(sk, node, &head->chain) { 923 sk_nulls_for_each(sk, node, &head->chain) {
923 struct inet_sock *inet = inet_sk(sk); 924 int res;
924 925
925 if (!net_eq(sock_net(sk), net)) 926 if (!net_eq(sock_net(sk), net))
926 continue; 927 continue;
@@ -929,15 +930,19 @@ skip_listen_ht:
929 if (!(r->idiag_states & (1 << sk->sk_state))) 930 if (!(r->idiag_states & (1 << sk->sk_state)))
930 goto next_normal; 931 goto next_normal;
931 if (r->sdiag_family != AF_UNSPEC && 932 if (r->sdiag_family != AF_UNSPEC &&
932 sk->sk_family != r->sdiag_family) 933 sk->sk_family != r->sdiag_family)
933 goto next_normal; 934 goto next_normal;
934 if (r->id.idiag_sport != inet->inet_sport && 935 if (r->id.idiag_sport != htons(sk->sk_num) &&
935 r->id.idiag_sport) 936 r->id.idiag_sport)
936 goto next_normal; 937 goto next_normal;
937 if (r->id.idiag_dport != inet->inet_dport && 938 if (r->id.idiag_dport != sk->sk_dport &&
938 r->id.idiag_dport) 939 r->id.idiag_dport)
939 goto next_normal; 940 goto next_normal;
940 if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 941 if (sk->sk_state == TCP_TIME_WAIT)
942 res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
943 else
944 res = inet_csk_diag_dump(sk, skb, cb, r, bc);
945 if (res < 0) {
941 spin_unlock_bh(lock); 946 spin_unlock_bh(lock);
942 goto done; 947 goto done;
943 } 948 }
@@ -945,33 +950,6 @@ next_normal:
945 ++num; 950 ++num;
946 } 951 }
947 952
948 if (r->idiag_states & TCPF_TIME_WAIT) {
949 struct inet_timewait_sock *tw;
950
951 inet_twsk_for_each(tw, node,
952 &head->twchain) {
953 if (!net_eq(twsk_net(tw), net))
954 continue;
955
956 if (num < s_num)
957 goto next_dying;
958 if (r->sdiag_family != AF_UNSPEC &&
959 tw->tw_family != r->sdiag_family)
960 goto next_dying;
961 if (r->id.idiag_sport != tw->tw_sport &&
962 r->id.idiag_sport)
963 goto next_dying;
964 if (r->id.idiag_dport != tw->tw_dport &&
965 r->id.idiag_dport)
966 goto next_dying;
967 if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
968 spin_unlock_bh(lock);
969 goto done;
970 }
971next_dying:
972 ++num;
973 }
974 }
975 spin_unlock_bh(lock); 953 spin_unlock_bh(lock);
976 } 954 }
977 955