aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-11-11 08:54:28 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-11 13:00:06 -0500
commit2c8c56e15df3d4c2af3d656e44feb18789f75837 (patch)
treee3c81c868a7c14ca2bac7efd69b6b21e25c355d4
parent3d97379a67486bc481ab5b8f7aa5b7ceb6154a95 (diff)
net: introduce SO_INCOMING_CPU
Alternative to RPS/RFS is to use hardware support for multiple queues. Then split a set of million of sockets into worker threads, each one using epoll() to manage events on its own socket pool. Ideally, we want one thread per RX/TX queue/cpu, but we have no way to know after accept() or connect() on which queue/cpu a socket is managed. We normally use one cpu per RX queue (IRQ smp_affinity being properly set), so remembering on socket structure which cpu delivered last packet is enough to solve the problem. After accept(), connect(), or even file descriptor passing around processes, applications can use : int cpu; socklen_t len = sizeof(cpu); getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); And use this information to put the socket into the right silo for optimal performance, as all networking stack should run on the appropriate cpu, without need to send IPI (RPS/RFS). Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/avr32/include/uapi/asm/socket.h2
-rw-r--r--arch/cris/include/uapi/asm/socket.h2
-rw-r--r--arch/frv/include/uapi/asm/socket.h2
-rw-r--r--arch/ia64/include/uapi/asm/socket.h2
-rw-r--r--arch/m32r/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/mn10300/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h2
-rw-r--r--include/net/sock.h12
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/sctp/ulpqueue.c5
21 files changed, 52 insertions, 2 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 3de1394bcab8..e2fe0700b3b4 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -87,4 +87,6 @@
87 87
88#define SO_BPF_EXTENSIONS 48 88#define SO_BPF_EXTENSIONS 48
89 89
90#define SO_INCOMING_CPU 49
91
90#endif /* _UAPI_ASM_SOCKET_H */ 92#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 6e6cd159924b..92121b0f5b98 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
80 80
81#define SO_BPF_EXTENSIONS 48 81#define SO_BPF_EXTENSIONS 48
82 82
83#define SO_INCOMING_CPU 49
84
83#endif /* _UAPI__ASM_AVR32_SOCKET_H */ 85#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index ed94e5ed0a23..60f60f5b9b35 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -82,6 +82,8 @@
82 82
83#define SO_BPF_EXTENSIONS 48 83#define SO_BPF_EXTENSIONS 48
84 84
85#define SO_INCOMING_CPU 49
86
85#endif /* _ASM_SOCKET_H */ 87#endif /* _ASM_SOCKET_H */
86 88
87 89
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index ca2c6e6f31c6..2c6890209ea6 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -80,5 +80,7 @@
80 80
81#define SO_BPF_EXTENSIONS 48 81#define SO_BPF_EXTENSIONS 48
82 82
83#define SO_INCOMING_CPU 49
84
83#endif /* _ASM_SOCKET_H */ 85#endif /* _ASM_SOCKET_H */
84 86
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index a1b49bac7951..09a93fb566f6 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
89 89
90#define SO_BPF_EXTENSIONS 48 90#define SO_BPF_EXTENSIONS 48
91 91
92#define SO_INCOMING_CPU 49
93
92#endif /* _ASM_IA64_SOCKET_H */ 94#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 6c9a24b3aefa..e8589819c274 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
80 80
81#define SO_BPF_EXTENSIONS 48 81#define SO_BPF_EXTENSIONS 48
82 82
83#define SO_INCOMING_CPU 49
84
83#endif /* _ASM_M32R_SOCKET_H */ 85#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index a14baa218c76..2e9ee8c55a10 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -98,4 +98,6 @@
98 98
99#define SO_BPF_EXTENSIONS 48 99#define SO_BPF_EXTENSIONS 48
100 100
101#define SO_INCOMING_CPU 49
102
101#endif /* _UAPI_ASM_SOCKET_H */ 103#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 6aa3ce1854aa..f3492e8c9f70 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
80 80
81#define SO_BPF_EXTENSIONS 48 81#define SO_BPF_EXTENSIONS 48
82 82
83#define SO_INCOMING_CPU 49
84
83#endif /* _ASM_SOCKET_H */ 85#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index fe35ceacf0e7..7984a1cab3da 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -79,4 +79,6 @@
79 79
80#define SO_BPF_EXTENSIONS 0x4029 80#define SO_BPF_EXTENSIONS 0x4029
81 81
82#define SO_INCOMING_CPU 0x402A
83
82#endif /* _UAPI_ASM_SOCKET_H */ 84#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a9c3e2e18c05..3474e4ef166d 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -87,4 +87,6 @@
87 87
88#define SO_BPF_EXTENSIONS 48 88#define SO_BPF_EXTENSIONS 48
89 89
90#define SO_INCOMING_CPU 49
91
90#endif /* _ASM_POWERPC_SOCKET_H */ 92#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index e031332096d7..8457636c33e1 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -86,4 +86,6 @@
86 86
87#define SO_BPF_EXTENSIONS 48 87#define SO_BPF_EXTENSIONS 48
88 88
89#define SO_INCOMING_CPU 49
90
89#endif /* _ASM_SOCKET_H */ 91#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 54d9608681b6..4a8003a94163 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -76,6 +76,8 @@
76 76
77#define SO_BPF_EXTENSIONS 0x0032 77#define SO_BPF_EXTENSIONS 0x0032
78 78
79#define SO_INCOMING_CPU 0x0033
80
79/* Security levels - as per NRL IPv6 - don't actually do anything */ 81/* Security levels - as per NRL IPv6 - don't actually do anything */
80#define SO_SECURITY_AUTHENTICATION 0x5001 82#define SO_SECURITY_AUTHENTICATION 0x5001
81#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 83#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 39acec0cf0b1..c46f6a696849 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -91,4 +91,6 @@
91 91
92#define SO_BPF_EXTENSIONS 48 92#define SO_BPF_EXTENSIONS 48
93 93
94#define SO_INCOMING_CPU 49
95
94#endif /* _XTENSA_SOCKET_H */ 96#endif /* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 6767d75ecb17..7789b59c0c40 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -273,6 +273,7 @@ struct cg_proto;
273 * @sk_rcvtimeo: %SO_RCVTIMEO setting 273 * @sk_rcvtimeo: %SO_RCVTIMEO setting
274 * @sk_sndtimeo: %SO_SNDTIMEO setting 274 * @sk_sndtimeo: %SO_SNDTIMEO setting
275 * @sk_rxhash: flow hash received from netif layer 275 * @sk_rxhash: flow hash received from netif layer
276 * @sk_incoming_cpu: record cpu processing incoming packets
276 * @sk_txhash: computed flow hash for use on transmit 277 * @sk_txhash: computed flow hash for use on transmit
277 * @sk_filter: socket filtering instructions 278 * @sk_filter: socket filtering instructions
278 * @sk_protinfo: private area, net family specific, when not using slab 279 * @sk_protinfo: private area, net family specific, when not using slab
@@ -350,6 +351,12 @@ struct sock {
350#ifdef CONFIG_RPS 351#ifdef CONFIG_RPS
351 __u32 sk_rxhash; 352 __u32 sk_rxhash;
352#endif 353#endif
354 u16 sk_incoming_cpu;
355 /* 16bit hole
356 * Warned : sk_incoming_cpu can be set from softirq,
357 * Do not use this hole without fully understanding possible issues.
358 */
359
353 __u32 sk_txhash; 360 __u32 sk_txhash;
354#ifdef CONFIG_NET_RX_BUSY_POLL 361#ifdef CONFIG_NET_RX_BUSY_POLL
355 unsigned int sk_napi_id; 362 unsigned int sk_napi_id;
@@ -833,6 +840,11 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
833 return sk->sk_backlog_rcv(sk, skb); 840 return sk->sk_backlog_rcv(sk, skb);
834} 841}
835 842
843static inline void sk_incoming_cpu_update(struct sock *sk)
844{
845 sk->sk_incoming_cpu = raw_smp_processor_id();
846}
847
836static inline void sock_rps_record_flow_hash(__u32 hash) 848static inline void sock_rps_record_flow_hash(__u32 hash)
837{ 849{
838#ifdef CONFIG_RPS 850#ifdef CONFIG_RPS
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index ea0796bdcf88..f541ccefd4ac 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -82,4 +82,6 @@
82 82
83#define SO_BPF_EXTENSIONS 48 83#define SO_BPF_EXTENSIONS 48
84 84
85#define SO_INCOMING_CPU 49
86
85#endif /* __ASM_GENERIC_SOCKET_H */ 87#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index ac56dd06c306..0725cf0cb685 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1213,6 +1213,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1213 v.val = sk->sk_max_pacing_rate; 1213 v.val = sk->sk_max_pacing_rate;
1214 break; 1214 break;
1215 1215
1216 case SO_INCOMING_CPU:
1217 v.val = sk->sk_incoming_cpu;
1218 break;
1219
1216 default: 1220 default:
1217 return -ENOPROTOOPT; 1221 return -ENOPROTOOPT;
1218 } 1222 }
@@ -1517,6 +1521,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1517 1521
1518 newsk->sk_err = 0; 1522 newsk->sk_err = 0;
1519 newsk->sk_priority = 0; 1523 newsk->sk_priority = 0;
1524 newsk->sk_incoming_cpu = raw_smp_processor_id();
1520 /* 1525 /*
1521 * Before updating sk_refcnt, we must commit prior changes to memory 1526 * Before updating sk_refcnt, we must commit prior changes to memory
1522 * (Documentation/RCU/rculist_nulls.txt for details) 1527 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8893598a4124..2c6a955fd5c3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1663,6 +1663,7 @@ process:
1663 if (sk_filter(sk, skb)) 1663 if (sk_filter(sk, skb))
1664 goto discard_and_relse; 1664 goto discard_and_relse;
1665 1665
1666 sk_incoming_cpu_update(sk);
1666 skb->dev = NULL; 1667 skb->dev = NULL;
1667 1668
1668 bh_lock_sock_nested(sk); 1669 bh_lock_sock_nested(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d0fdca8e965..d13751685f44 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1445,6 +1445,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1445 if (inet_sk(sk)->inet_daddr) { 1445 if (inet_sk(sk)->inet_daddr) {
1446 sock_rps_save_rxhash(sk, skb); 1446 sock_rps_save_rxhash(sk, skb);
1447 sk_mark_napi_id(sk, skb); 1447 sk_mark_napi_id(sk, skb);
1448 sk_incoming_cpu_update(sk);
1448 } 1449 }
1449 1450
1450 rc = sock_queue_rcv_skb(sk, skb); 1451 rc = sock_queue_rcv_skb(sk, skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fd8e50b380e7..1985b4933a6b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1456,6 +1456,7 @@ process:
1456 if (sk_filter(sk, skb)) 1456 if (sk_filter(sk, skb))
1457 goto discard_and_relse; 1457 goto discard_and_relse;
1458 1458
1459 sk_incoming_cpu_update(sk);
1459 skb->dev = NULL; 1460 skb->dev = NULL;
1460 1461
1461 bh_lock_sock_nested(sk); 1462 bh_lock_sock_nested(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b756355e9739..d1fe36274906 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
577 if (!ipv6_addr_any(&sk->sk_v6_daddr)) { 577 if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
578 sock_rps_save_rxhash(sk, skb); 578 sock_rps_save_rxhash(sk, skb);
579 sk_mark_napi_id(sk, skb); 579 sk_mark_napi_id(sk, skb);
580 sk_incoming_cpu_update(sk);
580 } 581 }
581 582
582 rc = sock_queue_rcv_skb(sk, skb); 583 rc = sock_queue_rcv_skb(sk, skb);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index d49dc2ed30ad..ce469d648ffb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -205,9 +205,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
205 if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) 205 if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
206 goto out_free; 206 goto out_free;
207 207
208 if (!sctp_ulpevent_is_notification(event)) 208 if (!sctp_ulpevent_is_notification(event)) {
209 sk_mark_napi_id(sk, skb); 209 sk_mark_napi_id(sk, skb);
210 210 sk_incoming_cpu_update(sk);
211 }
211 /* Check if the user wishes to receive this event. */ 212 /* Check if the user wishes to receive this event. */
212 if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) 213 if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
213 goto out_free; 214 goto out_free;