aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h17
-rw-r--r--net/Kconfig12
-rw-r--r--net/core/dev.c48
-rw-r--r--net/core/net-procfs.c16
-rw-r--r--net/core/sysctl_net_core.c104
5 files changed, 194 insertions, 3 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a94a5a0ab122..7dd535d4b41e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1778,6 +1778,19 @@ static inline int unregister_gifconf(unsigned int family)
1778 return register_gifconf(family, NULL); 1778 return register_gifconf(family, NULL);
1779} 1779}
1780 1780
1781#ifdef CONFIG_NET_FLOW_LIMIT
1782#define FLOW_LIMIT_HISTORY (1 << 8) /* must be ^2 */
1783struct sd_flow_limit {
1784 u64 count;
1785 unsigned int num_buckets;
1786 unsigned int history_head;
1787 u16 history[FLOW_LIMIT_HISTORY];
1788 u8 buckets[];
1789};
1790
1791extern int netdev_flow_limit_table_len;
1792#endif /* CONFIG_NET_FLOW_LIMIT */
1793
1781/* 1794/*
1782 * Incoming packets are placed on per-cpu queues 1795 * Incoming packets are placed on per-cpu queues
1783 */ 1796 */
@@ -1807,6 +1820,10 @@ struct softnet_data {
1807 unsigned int dropped; 1820 unsigned int dropped;
1808 struct sk_buff_head input_pkt_queue; 1821 struct sk_buff_head input_pkt_queue;
1809 struct napi_struct backlog; 1822 struct napi_struct backlog;
1823
1824#ifdef CONFIG_NET_FLOW_LIMIT
1825 struct sd_flow_limit *flow_limit;
1826#endif
1810}; 1827};
1811 1828
1812static inline void input_queue_head_incr(struct softnet_data *sd) 1829static inline void input_queue_head_incr(struct softnet_data *sd)
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868e..08de901415ee 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -259,6 +259,18 @@ config BPF_JIT
259 packet sniffing (libpcap/tcpdump). Note : Admin should enable 259 packet sniffing (libpcap/tcpdump). Note : Admin should enable
260 this feature changing /proc/sys/net/core/bpf_jit_enable 260 this feature changing /proc/sys/net/core/bpf_jit_enable
261 261
262config NET_FLOW_LIMIT
263 boolean
264 depends on RPS
265 default y
266 ---help---
267 The network stack has to drop packets when a receive processing CPU's
268 backlog reaches netdev_max_backlog. If a few out of many active flows
269 generate the vast majority of load, drop their traffic earlier to
270 maintain capacity for the other flows. This feature provides servers
271 with many clients some protection against DoS by a single (spoofed)
272 flow that greatly exceeds average workload.
273
262menu "Network testing" 274menu "Network testing"
263 275
264config NET_PKTGEN 276config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index 18e9730cc4be..7229bc30e509 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3064,6 +3064,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
3064 return 0; 3064 return 0;
3065} 3065}
3066 3066
3067#ifdef CONFIG_NET_FLOW_LIMIT
3068int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3069#endif
3070
3071static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3072{
3073#ifdef CONFIG_NET_FLOW_LIMIT
3074 struct sd_flow_limit *fl;
3075 struct softnet_data *sd;
3076 unsigned int old_flow, new_flow;
3077
3078 if (qlen < (netdev_max_backlog >> 1))
3079 return false;
3080
3081 sd = &__get_cpu_var(softnet_data);
3082
3083 rcu_read_lock();
3084 fl = rcu_dereference(sd->flow_limit);
3085 if (fl) {
3086 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3087 old_flow = fl->history[fl->history_head];
3088 fl->history[fl->history_head] = new_flow;
3089
3090 fl->history_head++;
3091 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3092
3093 if (likely(fl->buckets[old_flow]))
3094 fl->buckets[old_flow]--;
3095
3096 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3097 fl->count++;
3098 rcu_read_unlock();
3099 return true;
3100 }
3101 }
3102 rcu_read_unlock();
3103#endif
3104 return false;
3105}
3106
3067/* 3107/*
3068 * enqueue_to_backlog is called to queue an skb to a per CPU backlog 3108 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
3069 * queue (may be a remote CPU queue). 3109 * queue (may be a remote CPU queue).
@@ -3073,13 +3113,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3073{ 3113{
3074 struct softnet_data *sd; 3114 struct softnet_data *sd;
3075 unsigned long flags; 3115 unsigned long flags;
3116 unsigned int qlen;
3076 3117
3077 sd = &per_cpu(softnet_data, cpu); 3118 sd = &per_cpu(softnet_data, cpu);
3078 3119
3079 local_irq_save(flags); 3120 local_irq_save(flags);
3080 3121
3081 rps_lock(sd); 3122 rps_lock(sd);
3082 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { 3123 qlen = skb_queue_len(&sd->input_pkt_queue);
3124 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3083 if (skb_queue_len(&sd->input_pkt_queue)) { 3125 if (skb_queue_len(&sd->input_pkt_queue)) {
3084enqueue: 3126enqueue:
3085 __skb_queue_tail(&sd->input_pkt_queue, skb); 3127 __skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -6269,6 +6311,10 @@ static int __init net_dev_init(void)
6269 sd->backlog.weight = weight_p; 6311 sd->backlog.weight = weight_p;
6270 sd->backlog.gro_list = NULL; 6312 sd->backlog.gro_list = NULL;
6271 sd->backlog.gro_count = 0; 6313 sd->backlog.gro_count = 0;
6314
6315#ifdef CONFIG_NET_FLOW_LIMIT
6316 sd->flow_limit = NULL;
6317#endif
6272 } 6318 }
6273 6319
6274 dev_boot_phase = 0; 6320 dev_boot_phase = 0;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 569d355fec3e..2bf83299600a 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
146static int softnet_seq_show(struct seq_file *seq, void *v) 146static int softnet_seq_show(struct seq_file *seq, void *v)
147{ 147{
148 struct softnet_data *sd = v; 148 struct softnet_data *sd = v;
149 unsigned int flow_limit_count = 0;
149 150
150 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 151#ifdef CONFIG_NET_FLOW_LIMIT
152 struct sd_flow_limit *fl;
153
154 rcu_read_lock();
155 fl = rcu_dereference(sd->flow_limit);
156 if (fl)
157 flow_limit_count = fl->count;
158 rcu_read_unlock();
159#endif
160
161 seq_printf(seq,
162 "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
151 sd->processed, sd->dropped, sd->time_squeeze, 0, 163 sd->processed, sd->dropped, sd->time_squeeze, 0,
152 0, 0, 0, 0, /* was fastroute */ 164 0, 0, 0, 0, /* was fastroute */
153 sd->cpu_collision, sd->received_rps); 165 sd->cpu_collision, sd->received_rps, flow_limit_count);
154 return 0; 166 return 0;
155} 167}
156 168
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7f..741db5fc7806 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -87,6 +87,96 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
87} 87}
88#endif /* CONFIG_RPS */ 88#endif /* CONFIG_RPS */
89 89
90#ifdef CONFIG_NET_FLOW_LIMIT
91static DEFINE_MUTEX(flow_limit_update_mutex);
92
93static int flow_limit_cpu_sysctl(ctl_table *table, int write,
94 void __user *buffer, size_t *lenp,
95 loff_t *ppos)
96{
97 struct sd_flow_limit *cur;
98 struct softnet_data *sd;
99 cpumask_var_t mask;
100 int i, len, ret = 0;
101
102 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
103 return -ENOMEM;
104
105 if (write) {
106 ret = cpumask_parse_user(buffer, *lenp, mask);
107 if (ret)
108 goto done;
109
110 mutex_lock(&flow_limit_update_mutex);
111 len = sizeof(*cur) + netdev_flow_limit_table_len;
112 for_each_possible_cpu(i) {
113 sd = &per_cpu(softnet_data, i);
114 cur = rcu_dereference_protected(sd->flow_limit,
115 lockdep_is_held(&flow_limit_update_mutex));
116 if (cur && !cpumask_test_cpu(i, mask)) {
117 RCU_INIT_POINTER(sd->flow_limit, NULL);
118 synchronize_rcu();
119 kfree(cur);
120 } else if (!cur && cpumask_test_cpu(i, mask)) {
121 cur = kzalloc(len, GFP_KERNEL);
122 if (!cur) {
123 /* not unwinding previous changes */
124 ret = -ENOMEM;
125 goto write_unlock;
126 }
127 cur->num_buckets = netdev_flow_limit_table_len;
128 rcu_assign_pointer(sd->flow_limit, cur);
129 }
130 }
131write_unlock:
132 mutex_unlock(&flow_limit_update_mutex);
133 } else {
134 if (*ppos || !*lenp) {
135 *lenp = 0;
136 goto done;
137 }
138
139 cpumask_clear(mask);
140 rcu_read_lock();
141 for_each_possible_cpu(i) {
142 sd = &per_cpu(softnet_data, i);
143 if (rcu_dereference(sd->flow_limit))
144 cpumask_set_cpu(i, mask);
145 }
146 rcu_read_unlock();
147
148 len = cpumask_scnprintf(buffer, *lenp, mask);
149 *lenp = len + 1;
150 *ppos += len + 1;
151 }
152
153done:
154 free_cpumask_var(mask);
155 return ret;
156}
157
158static int flow_limit_table_len_sysctl(ctl_table *table, int write,
159 void __user *buffer, size_t *lenp,
160 loff_t *ppos)
161{
162 unsigned int old, *ptr;
163 int ret;
164
165 mutex_lock(&flow_limit_update_mutex);
166
167 ptr = table->data;
168 old = *ptr;
169 ret = proc_dointvec(table, write, buffer, lenp, ppos);
170 if (!ret && write && !is_power_of_2(*ptr)) {
171 *ptr = old;
172 ret = -EINVAL;
173 }
174
175 mutex_unlock(&flow_limit_update_mutex);
176 return ret;
177}
178#endif /* CONFIG_NET_FLOW_LIMIT */
179
90static struct ctl_table net_core_table[] = { 180static struct ctl_table net_core_table[] = {
91#ifdef CONFIG_NET 181#ifdef CONFIG_NET
92 { 182 {
@@ -180,6 +270,20 @@ static struct ctl_table net_core_table[] = {
180 .proc_handler = rps_sock_flow_sysctl 270 .proc_handler = rps_sock_flow_sysctl
181 }, 271 },
182#endif 272#endif
273#ifdef CONFIG_NET_FLOW_LIMIT
274 {
275 .procname = "flow_limit_cpu_bitmap",
276 .mode = 0644,
277 .proc_handler = flow_limit_cpu_sysctl
278 },
279 {
280 .procname = "flow_limit_table_len",
281 .data = &netdev_flow_limit_table_len,
282 .maxlen = sizeof(int),
283 .mode = 0644,
284 .proc_handler = flow_limit_table_len_sysctl
285 },
286#endif /* CONFIG_NET_FLOW_LIMIT */
183#endif /* CONFIG_NET */ 287#endif /* CONFIG_NET */
184 { 288 {
185 .procname = "netdev_budget", 289 .procname = "netdev_budget",