aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--net/dccp/ipv4.c32
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c15
6 files changed, 10 insertions, 56 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index f50f95968340..07840baa9341 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -125,9 +125,7 @@ struct inet_hashinfo {
125 rwlock_t lhash_lock ____cacheline_aligned; 125 rwlock_t lhash_lock ____cacheline_aligned;
126 atomic_t lhash_users; 126 atomic_t lhash_users;
127 wait_queue_head_t lhash_wait; 127 wait_queue_head_t lhash_wait;
128 spinlock_t portalloc_lock;
129 kmem_cache_t *bind_bucket_cachep; 128 kmem_cache_t *bind_bucket_cachep;
130 int port_rover;
131}; 129};
132 130
133static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, 131static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6298cf58ff9e..4b9bc81ae1a3 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
31 .lhash_lock = RW_LOCK_UNLOCKED, 31 .lhash_lock = RW_LOCK_UNLOCKED,
32 .lhash_users = ATOMIC_INIT(0), 32 .lhash_users = ATOMIC_INIT(0),
33 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), 33 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
34 .portalloc_lock = SPIN_LOCK_UNLOCKED,
35 .port_rover = 1024 - 1,
36}; 34};
37 35
38EXPORT_SYMBOL_GPL(dccp_hashinfo); 36EXPORT_SYMBOL_GPL(dccp_hashinfo);
@@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk)
125 int ret; 123 int ret;
126 124
127 if (snum == 0) { 125 if (snum == 0) {
128 int rover;
129 int low = sysctl_local_port_range[0]; 126 int low = sysctl_local_port_range[0];
130 int high = sysctl_local_port_range[1]; 127 int high = sysctl_local_port_range[1];
131 int remaining = (high - low) + 1; 128 int remaining = (high - low) + 1;
129 int rover = net_random() % (high - low) + low;
132 struct hlist_node *node; 130 struct hlist_node *node;
133 struct inet_timewait_sock *tw = NULL; 131 struct inet_timewait_sock *tw = NULL;
134 132
135 local_bh_disable(); 133 local_bh_disable();
136
137 /* TODO. Actually it is not so bad idea to remove
138 * dccp_hashinfo.portalloc_lock before next submission to
139 * Linus.
140 * As soon as we touch this place at all it is time to think.
141 *
142 * Now it protects single _advisory_ variable
143 * dccp_hashinfo.port_rover, hence it is mostly useless.
144 * Code will work nicely if we just delete it, but
145 * I am afraid in contented case it will work not better or
146 * even worse: another cpu just will hit the same bucket
147 * and spin there.
148 * So some cpu salt could remove both contention and
149 * memory pingpong. Any ideas how to do this in a nice way?
150 */
151 spin_lock(&dccp_hashinfo.portalloc_lock);
152 rover = dccp_hashinfo.port_rover;
153
154 do { 134 do {
155 rover++;
156 if ((rover < low) || (rover > high))
157 rover = low;
158 head = &dccp_hashinfo.bhash[inet_bhashfn(rover, 135 head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
159 dccp_hashinfo.bhash_size)]; 136 dccp_hashinfo.bhash_size)];
160 spin_lock(&head->lock); 137 spin_lock(&head->lock);
@@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk)
187 164
188 next_port: 165 next_port:
189 spin_unlock(&head->lock); 166 spin_unlock(&head->lock);
167 if (++rover > high)
168 rover = low;
190 } while (--remaining > 0); 169 } while (--remaining > 0);
191 dccp_hashinfo.port_rover = rover;
192 spin_unlock(&dccp_hashinfo.portalloc_lock);
193 170
194 local_bh_enable(); 171 local_bh_enable();
195 172
@@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk)
197 174
198ok: 175ok:
199 /* All locks still held and bhs disabled */ 176 /* All locks still held and bhs disabled */
200 dccp_hashinfo.port_rover = rover;
201 spin_unlock(&dccp_hashinfo.portalloc_lock);
202
203 inet_bind_hash(sk, tb, rover); 177 inet_bind_hash(sk, tb, rover);
204 if (sk_unhashed(sk)) { 178 if (sk_unhashed(sk)) {
205 inet_sk(sk)->sport = htons(rover); 179 inet_sk(sk)->sport = htons(rover);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 94468a76c5b4..3fe021f1a566 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
78 int low = sysctl_local_port_range[0]; 78 int low = sysctl_local_port_range[0];
79 int high = sysctl_local_port_range[1]; 79 int high = sysctl_local_port_range[1];
80 int remaining = (high - low) + 1; 80 int remaining = (high - low) + 1;
81 int rover; 81 int rover = net_random() % (high - low) + low;
82 82
83 spin_lock(&hashinfo->portalloc_lock);
84 if (hashinfo->port_rover < low)
85 rover = low;
86 else
87 rover = hashinfo->port_rover;
88 do { 83 do {
89 rover++;
90 if (rover > high)
91 rover = low;
92 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; 84 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
93 spin_lock(&head->lock); 85 spin_lock(&head->lock);
94 inet_bind_bucket_for_each(tb, node, &head->chain) 86 inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
97 break; 89 break;
98 next: 90 next:
99 spin_unlock(&head->lock); 91 spin_unlock(&head->lock);
92 if (++rover > high)
93 rover = low;
100 } while (--remaining > 0); 94 } while (--remaining > 0);
101 hashinfo->port_rover = rover;
102 spin_unlock(&hashinfo->portalloc_lock);
103 95
104 /* Exhausted local port range during search? It is not 96 /* Exhausted local port range during search? It is not
105 * possible for us to be holding one of the bind hash 97 * possible for us to be holding one of the bind hash
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013a9580..72b7c22e1ea5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,7 +2112,6 @@ void __init tcp_init(void)
2112 sysctl_tcp_max_orphans >>= (3 - order); 2112 sysctl_tcp_max_orphans >>= (3 - order);
2113 sysctl_max_syn_backlog = 128; 2113 sysctl_max_syn_backlog = 128;
2114 } 2114 }
2115 tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
2116 2115
2117 sysctl_tcp_mem[0] = 768 << order; 2116 sysctl_tcp_mem[0] = 768 << order;
2118 sysctl_tcp_mem[1] = 1024 << order; 2117 sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c85819d8474b..49d67cd75edd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = RW_LOCK_UNLOCKED,
94 .lhash_users = ATOMIC_INIT(0), 94 .lhash_users = ATOMIC_INIT(0),
95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96 .portalloc_lock = SPIN_LOCK_UNLOCKED,
97 .port_rover = 1024 - 1,
98}; 96};
99 97
100static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 98static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d693cb988b78..d746d3b27efb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
114 int low = sysctl_local_port_range[0]; 114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1]; 115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1; 116 int remaining = (high - low) + 1;
117 int rover; 117 int rover = net_random() % (high - low) + low;
118 118
119 spin_lock(&tcp_hashinfo.portalloc_lock); 119 do {
120 if (tcp_hashinfo.port_rover < low)
121 rover = low;
122 else
123 rover = tcp_hashinfo.port_rover;
124 do { rover++;
125 if (rover > high)
126 rover = low;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; 120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock); 121 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain) 122 inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
132 break; 125 break;
133 next: 126 next:
134 spin_unlock(&head->lock); 127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
135 } while (--remaining > 0); 130 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
138 131
139 /* Exhausted local port range during search? It is not 132 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash 133 * possible for us to be holding one of the bind hash