diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-07-23 06:59:36 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-07-23 06:59:36 -0400 |
commit | e8648a1fdb54da1f683784b36a17aa65ea56e931 (patch) | |
tree | 66fd69f1987e4aba0025429b581d394e6db28af0 /net | |
parent | 7f1c407579519e71a0dcadc05614fd98acec585e (diff) |
netfilter: add xt_cpu match
In some situations a CPU match permits a better spreading of
connections, or select targets only for a given cpu.
With Remote Packet Steering or multiqueue NIC and appropriate IRQ
affinities, we can distribute trafic on available cpus, per session.
(all RX packets for a given flow is handled by a given cpu)
Some legacy applications being not SMP friendly, one way to scale a
server is to run multiple copies of them.
Instead of randomly choosing an instance, we can use the cpu number as a
key so that softirq handler for a whole instance is running on a single
cpu, maximizing cache effects in TCP/UDP stacks.
Using NAT for example, a four ways machine might run four copies of
server application, using a separate listening port for each instance,
but still presenting an unique external port :
iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 0 \
-j REDIRECT --to-port 8080
iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 1 \
-j REDIRECT --to-port 8081
iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 2 \
-j REDIRECT --to-port 8082
iptables -t nat -A PREROUTING -p tcp --dport 80 -m cpu --cpu 3 \
-j REDIRECT --to-port 8083
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/Kconfig | 9 | ||||
-rw-r--r-- | net/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/netfilter/xt_cpu.c | 63 |
3 files changed, 73 insertions, 0 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 551b58419df9..43288259f4a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -663,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK | |||
663 | 663 | ||
664 | To compile it as a module, choose M here. If unsure, say N. | 664 | To compile it as a module, choose M here. If unsure, say N. |
665 | 665 | ||
666 | config NETFILTER_XT_MATCH_CPU | ||
667 | tristate '"cpu" match support' | ||
668 | depends on NETFILTER_ADVANCED | ||
669 | help | ||
670 | CPU matching allows you to match packets based on the CPU | ||
671 | currently handling the packet. | ||
672 | |||
673 | To compile it as a module, choose M here. If unsure, say N. | ||
674 | |||
666 | config NETFILTER_XT_MATCH_DCCP | 675 | config NETFILTER_XT_MATCH_DCCP |
667 | tristate '"dccp" protocol match support' | 676 | tristate '"dccp" protocol match support' |
668 | depends on NETFILTER_ADVANCED | 677 | depends on NETFILTER_ADVANCED |
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4366c79a6683..441050f31111 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile | |||
@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o | |||
70 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o | 70 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o |
71 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o | 71 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o |
72 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o | 72 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o |
73 | obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o | ||
73 | obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o | 74 | obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o |
74 | obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o | 75 | obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o |
75 | obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o | 76 | obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o |
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c new file mode 100644 index 000000000000..b39db8a5cbae --- /dev/null +++ b/net/netfilter/xt_cpu.c | |||
@@ -0,0 +1,63 @@ | |||
1 | /* Kernel module to match running CPU */ | ||
2 | |||
3 | /* | ||
4 | * Might be used to distribute connections on several daemons, if | ||
5 | * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, | ||
6 | * each RX queue IRQ affined to one CPU (1:1 mapping) | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | /* (C) 2010 Eric Dumazet | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License version 2 as | ||
14 | * published by the Free Software Foundation. | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/netfilter/xt_cpu.h> | ||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | |||
22 | MODULE_LICENSE("GPL"); | ||
23 | MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); | ||
24 | MODULE_DESCRIPTION("Xtables: CPU match"); | ||
25 | |||
26 | static int cpu_mt_check(const struct xt_mtchk_param *par) | ||
27 | { | ||
28 | const struct xt_cpu_info *info = par->matchinfo; | ||
29 | |||
30 | if (info->invert & ~1) | ||
31 | return -EINVAL; | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) | ||
36 | { | ||
37 | const struct xt_cpu_info *info = par->matchinfo; | ||
38 | |||
39 | return (info->cpu == smp_processor_id()) ^ info->invert; | ||
40 | } | ||
41 | |||
42 | static struct xt_match cpu_mt_reg __read_mostly = { | ||
43 | .name = "cpu", | ||
44 | .revision = 0, | ||
45 | .family = NFPROTO_UNSPEC, | ||
46 | .checkentry = cpu_mt_check, | ||
47 | .match = cpu_mt, | ||
48 | .matchsize = sizeof(struct xt_cpu_info), | ||
49 | .me = THIS_MODULE, | ||
50 | }; | ||
51 | |||
52 | static int __init cpu_mt_init(void) | ||
53 | { | ||
54 | return xt_register_match(&cpu_mt_reg); | ||
55 | } | ||
56 | |||
57 | static void __exit cpu_mt_exit(void) | ||
58 | { | ||
59 | xt_unregister_match(&cpu_mt_reg); | ||
60 | } | ||
61 | |||
62 | module_init(cpu_mt_init); | ||
63 | module_exit(cpu_mt_exit); | ||