aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-11-11 08:54:28 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-11 13:00:06 -0500
commit2c8c56e15df3d4c2af3d656e44feb18789f75837 (patch)
treee3c81c868a7c14ca2bac7efd69b6b21e25c355d4 /net
parent3d97379a67486bc481ab5b8f7aa5b7ceb6154a95 (diff)
net: introduce SO_INCOMING_CPU
Alternative to RPS/RFS is to use hardware support for multiple queues. Then split a set of million of sockets into worker threads, each one using epoll() to manage events on its own socket pool. Ideally, we want one thread per RX/TX queue/cpu, but we have no way to know after accept() or connect() on which queue/cpu a socket is managed. We normally use one cpu per RX queue (IRQ smp_affinity being properly set), so remembering on socket structure which cpu delivered last packet is enough to solve the problem. After accept(), connect(), or even file descriptor passing around processes, applications can use : int cpu; socklen_t len = sizeof(cpu); getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); And use this information to put the socket into the right silo for optimal performance, as all networking stack should run on the appropriate cpu, without need to send IPI (RPS/RFS). Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/sctp/ulpqueue.c5
6 files changed, 12 insertions, 2 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index ac56dd06c306..0725cf0cb685 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1213 v.val = sk->sk_max_pacing_rate; 1213 v.val = sk->sk_max_pacing_rate;
1214 break; 1214 break;
1215 1215
1216 case SO_INCOMING_CPU:
1217 v.val = sk->sk_incoming_cpu;
1218 break;
1219
1216 default: 1220 default:
1217 return -ENOPROTOOPT; 1221 return -ENOPROTOOPT;
1218 } 1222 }
@@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1517 1521
1518 newsk->sk_err = 0; 1522 newsk->sk_err = 0;
1519 newsk->sk_priority = 0; 1523 newsk->sk_priority = 0;
1524 newsk->sk_incoming_cpu = raw_smp_processor_id();
1520 /* 1525 /*
1521 * Before updating sk_refcnt, we must commit prior changes to memory 1526 * Before updating sk_refcnt, we must commit prior changes to memory
1522 * (Documentation/RCU/rculist_nulls.txt for details) 1527 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8893598a4124..2c6a955fd5c3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1663,6 +1663,7 @@ process:
1663 if (sk_filter(sk, skb)) 1663 if (sk_filter(sk, skb))
1664 goto discard_and_relse; 1664 goto discard_and_relse;
1665 1665
1666 sk_incoming_cpu_update(sk);
1666 skb->dev = NULL; 1667 skb->dev = NULL;
1667 1668
1668 bh_lock_sock_nested(sk); 1669 bh_lock_sock_nested(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d0fdca8e965..d13751685f44 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1445,6 +1445,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1445 if (inet_sk(sk)->inet_daddr) { 1445 if (inet_sk(sk)->inet_daddr) {
1446 sock_rps_save_rxhash(sk, skb); 1446 sock_rps_save_rxhash(sk, skb);
1447 sk_mark_napi_id(sk, skb); 1447 sk_mark_napi_id(sk, skb);
1448 sk_incoming_cpu_update(sk);
1448 } 1449 }
1449 1450
1450 rc = sock_queue_rcv_skb(sk, skb); 1451 rc = sock_queue_rcv_skb(sk, skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fd8e50b380e7..1985b4933a6b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1456,6 +1456,7 @@ process:
1456 if (sk_filter(sk, skb)) 1456 if (sk_filter(sk, skb))
1457 goto discard_and_relse; 1457 goto discard_and_relse;
1458 1458
1459 sk_incoming_cpu_update(sk);
1459 skb->dev = NULL; 1460 skb->dev = NULL;
1460 1461
1461 bh_lock_sock_nested(sk); 1462 bh_lock_sock_nested(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b756355e9739..d1fe36274906 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
577 if (!ipv6_addr_any(&sk->sk_v6_daddr)) { 577 if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
578 sock_rps_save_rxhash(sk, skb); 578 sock_rps_save_rxhash(sk, skb);
579 sk_mark_napi_id(sk, skb); 579 sk_mark_napi_id(sk, skb);
580 sk_incoming_cpu_update(sk);
580 } 581 }
581 582
582 rc = sock_queue_rcv_skb(sk, skb); 583 rc = sock_queue_rcv_skb(sk, skb);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index d49dc2ed30ad..ce469d648ffb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -205,9 +205,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
205 if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) 205 if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
206 goto out_free; 206 goto out_free;
207 207
208 if (!sctp_ulpevent_is_notification(event)) 208 if (!sctp_ulpevent_is_notification(event)) {
209 sk_mark_napi_id(sk, skb); 209 sk_mark_napi_id(sk, skb);
210 210 sk_incoming_cpu_update(sk);
211 }
211 /* Check if the user wishes to receive this event. */ 212 /* Check if the user wishes to receive this event. */
212 if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) 213 if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
213 goto out_free; 214 goto out_free;