diff options
-rw-r--r-- | include/linux/netdevice.h | 17 | ||||
-rw-r--r-- | net/Kconfig | 12 | ||||
-rw-r--r-- | net/core/dev.c | 48 | ||||
-rw-r--r-- | net/core/net-procfs.c | 16 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 104 |
5 files changed, 194 insertions, 3 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a94a5a0ab122..7dd535d4b41e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1778,6 +1778,19 @@ static inline int unregister_gifconf(unsigned int family) | |||
1778 | return register_gifconf(family, NULL); | 1778 | return register_gifconf(family, NULL); |
1779 | } | 1779 | } |
1780 | 1780 | ||
1781 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
1782 | #define FLOW_LIMIT_HISTORY (1 << 8) /* must be ^2 */ | ||
1783 | struct sd_flow_limit { | ||
1784 | u64 count; | ||
1785 | unsigned int num_buckets; | ||
1786 | unsigned int history_head; | ||
1787 | u16 history[FLOW_LIMIT_HISTORY]; | ||
1788 | u8 buckets[]; | ||
1789 | }; | ||
1790 | |||
1791 | extern int netdev_flow_limit_table_len; | ||
1792 | #endif /* CONFIG_NET_FLOW_LIMIT */ | ||
1793 | |||
1781 | /* | 1794 | /* |
1782 | * Incoming packets are placed on per-cpu queues | 1795 | * Incoming packets are placed on per-cpu queues |
1783 | */ | 1796 | */ |
@@ -1807,6 +1820,10 @@ struct softnet_data { | |||
1807 | unsigned int dropped; | 1820 | unsigned int dropped; |
1808 | struct sk_buff_head input_pkt_queue; | 1821 | struct sk_buff_head input_pkt_queue; |
1809 | struct napi_struct backlog; | 1822 | struct napi_struct backlog; |
1823 | |||
1824 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
1825 | struct sd_flow_limit *flow_limit; | ||
1826 | #endif | ||
1810 | }; | 1827 | }; |
1811 | 1828 | ||
1812 | static inline void input_queue_head_incr(struct softnet_data *sd) | 1829 | static inline void input_queue_head_incr(struct softnet_data *sd) |
diff --git a/net/Kconfig b/net/Kconfig index 2ddc9046868e..08de901415ee 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -259,6 +259,18 @@ config BPF_JIT | |||
259 | packet sniffing (libpcap/tcpdump). Note : Admin should enable | 259 | packet sniffing (libpcap/tcpdump). Note : Admin should enable |
260 | this feature changing /proc/sys/net/core/bpf_jit_enable | 260 | this feature changing /proc/sys/net/core/bpf_jit_enable |
261 | 261 | ||
262 | config NET_FLOW_LIMIT | ||
263 | boolean | ||
264 | depends on RPS | ||
265 | default y | ||
266 | ---help--- | ||
267 | The network stack has to drop packets when a receive processing CPU's | ||
268 | backlog reaches netdev_max_backlog. If a few out of many active flows | ||
269 | generate the vast majority of load, drop their traffic earlier to | ||
270 | maintain capacity for the other flows. This feature provides servers | ||
271 | with many clients some protection against DoS by a single (spoofed) | ||
272 | flow that greatly exceeds average workload. | ||
273 | |||
262 | menu "Network testing" | 274 | menu "Network testing" |
263 | 275 | ||
264 | config NET_PKTGEN | 276 | config NET_PKTGEN |
diff --git a/net/core/dev.c b/net/core/dev.c index 18e9730cc4be..7229bc30e509 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3064,6 +3064,46 @@ static int rps_ipi_queued(struct softnet_data *sd) | |||
3064 | return 0; | 3064 | return 0; |
3065 | } | 3065 | } |
3066 | 3066 | ||
3067 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
3068 | int netdev_flow_limit_table_len __read_mostly = (1 << 12); | ||
3069 | #endif | ||
3070 | |||
3071 | static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) | ||
3072 | { | ||
3073 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
3074 | struct sd_flow_limit *fl; | ||
3075 | struct softnet_data *sd; | ||
3076 | unsigned int old_flow, new_flow; | ||
3077 | |||
3078 | if (qlen < (netdev_max_backlog >> 1)) | ||
3079 | return false; | ||
3080 | |||
3081 | sd = &__get_cpu_var(softnet_data); | ||
3082 | |||
3083 | rcu_read_lock(); | ||
3084 | fl = rcu_dereference(sd->flow_limit); | ||
3085 | if (fl) { | ||
3086 | new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); | ||
3087 | old_flow = fl->history[fl->history_head]; | ||
3088 | fl->history[fl->history_head] = new_flow; | ||
3089 | |||
3090 | fl->history_head++; | ||
3091 | fl->history_head &= FLOW_LIMIT_HISTORY - 1; | ||
3092 | |||
3093 | if (likely(fl->buckets[old_flow])) | ||
3094 | fl->buckets[old_flow]--; | ||
3095 | |||
3096 | if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { | ||
3097 | fl->count++; | ||
3098 | rcu_read_unlock(); | ||
3099 | return true; | ||
3100 | } | ||
3101 | } | ||
3102 | rcu_read_unlock(); | ||
3103 | #endif | ||
3104 | return false; | ||
3105 | } | ||
3106 | |||
3067 | /* | 3107 | /* |
3068 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | 3108 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog |
3069 | * queue (may be a remote CPU queue). | 3109 | * queue (may be a remote CPU queue). |
@@ -3073,13 +3113,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | |||
3073 | { | 3113 | { |
3074 | struct softnet_data *sd; | 3114 | struct softnet_data *sd; |
3075 | unsigned long flags; | 3115 | unsigned long flags; |
3116 | unsigned int qlen; | ||
3076 | 3117 | ||
3077 | sd = &per_cpu(softnet_data, cpu); | 3118 | sd = &per_cpu(softnet_data, cpu); |
3078 | 3119 | ||
3079 | local_irq_save(flags); | 3120 | local_irq_save(flags); |
3080 | 3121 | ||
3081 | rps_lock(sd); | 3122 | rps_lock(sd); |
3082 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { | 3123 | qlen = skb_queue_len(&sd->input_pkt_queue); |
3124 | if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { | ||
3083 | if (skb_queue_len(&sd->input_pkt_queue)) { | 3125 | if (skb_queue_len(&sd->input_pkt_queue)) { |
3084 | enqueue: | 3126 | enqueue: |
3085 | __skb_queue_tail(&sd->input_pkt_queue, skb); | 3127 | __skb_queue_tail(&sd->input_pkt_queue, skb); |
@@ -6269,6 +6311,10 @@ static int __init net_dev_init(void) | |||
6269 | sd->backlog.weight = weight_p; | 6311 | sd->backlog.weight = weight_p; |
6270 | sd->backlog.gro_list = NULL; | 6312 | sd->backlog.gro_list = NULL; |
6271 | sd->backlog.gro_count = 0; | 6313 | sd->backlog.gro_count = 0; |
6314 | |||
6315 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
6316 | sd->flow_limit = NULL; | ||
6317 | #endif | ||
6272 | } | 6318 | } |
6273 | 6319 | ||
6274 | dev_boot_phase = 0; | 6320 | dev_boot_phase = 0; |
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 569d355fec3e..2bf83299600a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c | |||
@@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) | |||
146 | static int softnet_seq_show(struct seq_file *seq, void *v) | 146 | static int softnet_seq_show(struct seq_file *seq, void *v) |
147 | { | 147 | { |
148 | struct softnet_data *sd = v; | 148 | struct softnet_data *sd = v; |
149 | unsigned int flow_limit_count = 0; | ||
149 | 150 | ||
150 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 151 | #ifdef CONFIG_NET_FLOW_LIMIT |
152 | struct sd_flow_limit *fl; | ||
153 | |||
154 | rcu_read_lock(); | ||
155 | fl = rcu_dereference(sd->flow_limit); | ||
156 | if (fl) | ||
157 | flow_limit_count = fl->count; | ||
158 | rcu_read_unlock(); | ||
159 | #endif | ||
160 | |||
161 | seq_printf(seq, | ||
162 | "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | ||
151 | sd->processed, sd->dropped, sd->time_squeeze, 0, | 163 | sd->processed, sd->dropped, sd->time_squeeze, 0, |
152 | 0, 0, 0, 0, /* was fastroute */ | 164 | 0, 0, 0, 0, /* was fastroute */ |
153 | sd->cpu_collision, sd->received_rps); | 165 | sd->cpu_collision, sd->received_rps, flow_limit_count); |
154 | return 0; | 166 | return 0; |
155 | } | 167 | } |
156 | 168 | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..741db5fc7806 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -87,6 +87,96 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
87 | } | 87 | } |
88 | #endif /* CONFIG_RPS */ | 88 | #endif /* CONFIG_RPS */ |
89 | 89 | ||
90 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
91 | static DEFINE_MUTEX(flow_limit_update_mutex); | ||
92 | |||
93 | static int flow_limit_cpu_sysctl(ctl_table *table, int write, | ||
94 | void __user *buffer, size_t *lenp, | ||
95 | loff_t *ppos) | ||
96 | { | ||
97 | struct sd_flow_limit *cur; | ||
98 | struct softnet_data *sd; | ||
99 | cpumask_var_t mask; | ||
100 | int i, len, ret = 0; | ||
101 | |||
102 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
103 | return -ENOMEM; | ||
104 | |||
105 | if (write) { | ||
106 | ret = cpumask_parse_user(buffer, *lenp, mask); | ||
107 | if (ret) | ||
108 | goto done; | ||
109 | |||
110 | mutex_lock(&flow_limit_update_mutex); | ||
111 | len = sizeof(*cur) + netdev_flow_limit_table_len; | ||
112 | for_each_possible_cpu(i) { | ||
113 | sd = &per_cpu(softnet_data, i); | ||
114 | cur = rcu_dereference_protected(sd->flow_limit, | ||
115 | lockdep_is_held(&flow_limit_update_mutex)); | ||
116 | if (cur && !cpumask_test_cpu(i, mask)) { | ||
117 | RCU_INIT_POINTER(sd->flow_limit, NULL); | ||
118 | synchronize_rcu(); | ||
119 | kfree(cur); | ||
120 | } else if (!cur && cpumask_test_cpu(i, mask)) { | ||
121 | cur = kzalloc(len, GFP_KERNEL); | ||
122 | if (!cur) { | ||
123 | /* not unwinding previous changes */ | ||
124 | ret = -ENOMEM; | ||
125 | goto write_unlock; | ||
126 | } | ||
127 | cur->num_buckets = netdev_flow_limit_table_len; | ||
128 | rcu_assign_pointer(sd->flow_limit, cur); | ||
129 | } | ||
130 | } | ||
131 | write_unlock: | ||
132 | mutex_unlock(&flow_limit_update_mutex); | ||
133 | } else { | ||
134 | if (*ppos || !*lenp) { | ||
135 | *lenp = 0; | ||
136 | goto done; | ||
137 | } | ||
138 | |||
139 | cpumask_clear(mask); | ||
140 | rcu_read_lock(); | ||
141 | for_each_possible_cpu(i) { | ||
142 | sd = &per_cpu(softnet_data, i); | ||
143 | if (rcu_dereference(sd->flow_limit)) | ||
144 | cpumask_set_cpu(i, mask); | ||
145 | } | ||
146 | rcu_read_unlock(); | ||
147 | |||
148 | len = cpumask_scnprintf(buffer, *lenp, mask); | ||
149 | *lenp = len + 1; | ||
150 | *ppos += len + 1; | ||
151 | } | ||
152 | |||
153 | done: | ||
154 | free_cpumask_var(mask); | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | static int flow_limit_table_len_sysctl(ctl_table *table, int write, | ||
159 | void __user *buffer, size_t *lenp, | ||
160 | loff_t *ppos) | ||
161 | { | ||
162 | unsigned int old, *ptr; | ||
163 | int ret; | ||
164 | |||
165 | mutex_lock(&flow_limit_update_mutex); | ||
166 | |||
167 | ptr = table->data; | ||
168 | old = *ptr; | ||
169 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
170 | if (!ret && write && !is_power_of_2(*ptr)) { | ||
171 | *ptr = old; | ||
172 | ret = -EINVAL; | ||
173 | } | ||
174 | |||
175 | mutex_unlock(&flow_limit_update_mutex); | ||
176 | return ret; | ||
177 | } | ||
178 | #endif /* CONFIG_NET_FLOW_LIMIT */ | ||
179 | |||
90 | static struct ctl_table net_core_table[] = { | 180 | static struct ctl_table net_core_table[] = { |
91 | #ifdef CONFIG_NET | 181 | #ifdef CONFIG_NET |
92 | { | 182 | { |
@@ -180,6 +270,20 @@ static struct ctl_table net_core_table[] = { | |||
180 | .proc_handler = rps_sock_flow_sysctl | 270 | .proc_handler = rps_sock_flow_sysctl |
181 | }, | 271 | }, |
182 | #endif | 272 | #endif |
273 | #ifdef CONFIG_NET_FLOW_LIMIT | ||
274 | { | ||
275 | .procname = "flow_limit_cpu_bitmap", | ||
276 | .mode = 0644, | ||
277 | .proc_handler = flow_limit_cpu_sysctl | ||
278 | }, | ||
279 | { | ||
280 | .procname = "flow_limit_table_len", | ||
281 | .data = &netdev_flow_limit_table_len, | ||
282 | .maxlen = sizeof(int), | ||
283 | .mode = 0644, | ||
284 | .proc_handler = flow_limit_table_len_sysctl | ||
285 | }, | ||
286 | #endif /* CONFIG_NET_FLOW_LIMIT */ | ||
183 | #endif /* CONFIG_NET */ | 287 | #endif /* CONFIG_NET */ |
184 | { | 288 | { |
185 | .procname = "netdev_budget", | 289 | .procname = "netdev_budget", |