aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig24
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_bpf.c208
-rw-r--r--net/sched/act_connmark.c192
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/cls_basic.c7
-rw-r--r--net/sched/cls_bpf.c33
-rw-r--r--net/sched/cls_flow.c8
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fq.c43
-rw-r--r--net/sched/sch_teql.c11
15 files changed, 506 insertions, 45 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 706af73c969f..2274e723a3df 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -698,6 +698,30 @@ config NET_ACT_VLAN
698 To compile this code as a module, choose M here: the 698 To compile this code as a module, choose M here: the
699 module will be called act_vlan. 699 module will be called act_vlan.
700 700
701config NET_ACT_BPF
702 tristate "BPF based action"
703 depends on NET_CLS_ACT
704 ---help---
705 Say Y here to execute BPF code on packets. The BPF code will decide
706 if the packet should be dropped or not.
707
708 If unsure, say N.
709
710 To compile this code as a module, choose M here: the
711 module will be called act_bpf.
712
713config NET_ACT_CONNMARK
714 tristate "Netfilter Connection Mark Retriever"
715 depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
716 depends on NF_CONNTRACK && NF_CONNTRACK_MARK
717 ---help---
718 Say Y here to allow retrieving of conn mark
719
720 If unsure, say N.
721
722 To compile this code as a module, choose M here: the
723 module will be called act_connmark.
724
701config NET_CLS_IND 725config NET_CLS_IND
702 bool "Incoming device classification" 726 bool "Incoming device classification"
703 depends on NET_CLS_U32 || NET_CLS_FW 727 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 679f24ae7f93..7ca7f4c1b8c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o 18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o 19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
20obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
21obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
20obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 22obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
21obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 23obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
22obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 24obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..82c5d7fc1988
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,208 @@
1/*
2 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/skbuff.h>
14#include <linux/rtnetlink.h>
15#include <linux/filter.h>
16#include <net/netlink.h>
17#include <net/pkt_sched.h>
18
19#include <linux/tc_act/tc_bpf.h>
20#include <net/tc_act/tc_bpf.h>
21
22#define BPF_TAB_MASK 15
23
24static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
25 struct tcf_result *res)
26{
27 struct tcf_bpf *b = a->priv;
28 int action;
29 int filter_res;
30
31 spin_lock(&b->tcf_lock);
32 b->tcf_tm.lastuse = jiffies;
33 bstats_update(&b->tcf_bstats, skb);
34 action = b->tcf_action;
35
36 filter_res = BPF_PROG_RUN(b->filter, skb);
37 if (filter_res == 0) {
38 /* Return code 0 from the BPF program
39 * is being interpreted as a drop here.
40 */
41 action = TC_ACT_SHOT;
42 b->tcf_qstats.drops++;
43 }
44
45 spin_unlock(&b->tcf_lock);
46 return action;
47}
48
49static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
50 int bind, int ref)
51{
52 unsigned char *tp = skb_tail_pointer(skb);
53 struct tcf_bpf *b = a->priv;
54 struct tc_act_bpf opt = {
55 .index = b->tcf_index,
56 .refcnt = b->tcf_refcnt - ref,
57 .bindcnt = b->tcf_bindcnt - bind,
58 .action = b->tcf_action,
59 };
60 struct tcf_t t;
61 struct nlattr *nla;
62
63 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
64 goto nla_put_failure;
65
66 if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
67 goto nla_put_failure;
68
69 nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
70 sizeof(struct sock_filter));
71 if (!nla)
72 goto nla_put_failure;
73
74 memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
75
76 t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
77 t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
78 t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
79 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
80 goto nla_put_failure;
81 return skb->len;
82
83nla_put_failure:
84 nlmsg_trim(skb, tp);
85 return -1;
86}
87
88static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
89 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
90 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
91 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
92 .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
93};
94
95static int tcf_bpf_init(struct net *net, struct nlattr *nla,
96 struct nlattr *est, struct tc_action *a,
97 int ovr, int bind)
98{
99 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
100 struct tc_act_bpf *parm;
101 struct tcf_bpf *b;
102 u16 bpf_size, bpf_num_ops;
103 struct sock_filter *bpf_ops;
104 struct sock_fprog_kern tmp;
105 struct bpf_prog *fp;
106 int ret;
107
108 if (!nla)
109 return -EINVAL;
110
111 ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
112 if (ret < 0)
113 return ret;
114
115 if (!tb[TCA_ACT_BPF_PARMS] ||
116 !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
117 return -EINVAL;
118 parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
119
120 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
121 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
122 return -EINVAL;
123
124 bpf_size = bpf_num_ops * sizeof(*bpf_ops);
125 if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
126 return -EINVAL;
127
128 bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
129 if (!bpf_ops)
130 return -ENOMEM;
131
132 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
133
134 tmp.len = bpf_num_ops;
135 tmp.filter = bpf_ops;
136
137 ret = bpf_prog_create(&fp, &tmp);
138 if (ret)
139 goto free_bpf_ops;
140
141 if (!tcf_hash_check(parm->index, a, bind)) {
142 ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
143 if (ret)
144 goto destroy_fp;
145
146 ret = ACT_P_CREATED;
147 } else {
148 if (bind)
149 goto destroy_fp;
150 tcf_hash_release(a, bind);
151 if (!ovr) {
152 ret = -EEXIST;
153 goto destroy_fp;
154 }
155 }
156
157 b = to_bpf(a);
158 spin_lock_bh(&b->tcf_lock);
159 b->tcf_action = parm->action;
160 b->bpf_num_ops = bpf_num_ops;
161 b->bpf_ops = bpf_ops;
162 b->filter = fp;
163 spin_unlock_bh(&b->tcf_lock);
164
165 if (ret == ACT_P_CREATED)
166 tcf_hash_insert(a);
167 return ret;
168
169destroy_fp:
170 bpf_prog_destroy(fp);
171free_bpf_ops:
172 kfree(bpf_ops);
173 return ret;
174}
175
176static void tcf_bpf_cleanup(struct tc_action *a, int bind)
177{
178 struct tcf_bpf *b = a->priv;
179
180 bpf_prog_destroy(b->filter);
181}
182
183static struct tc_action_ops act_bpf_ops = {
184 .kind = "bpf",
185 .type = TCA_ACT_BPF,
186 .owner = THIS_MODULE,
187 .act = tcf_bpf,
188 .dump = tcf_bpf_dump,
189 .cleanup = tcf_bpf_cleanup,
190 .init = tcf_bpf_init,
191};
192
193static int __init bpf_init_module(void)
194{
195 return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
196}
197
198static void __exit bpf_cleanup_module(void)
199{
200 tcf_unregister_action(&act_bpf_ops);
201}
202
203module_init(bpf_init_module);
204module_exit(bpf_cleanup_module);
205
206MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
207MODULE_DESCRIPTION("TC BPF based action");
208MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
new file mode 100644
index 000000000000..8e472518f9f6
--- /dev/null
+++ b/net/sched/act_connmark.c
@@ -0,0 +1,192 @@
1/*
2 * net/sched/act_connmark.c netfilter connmark retriever action
3 * skb mark is over-written
4 *
5 * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11*/
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/rtnetlink.h>
18#include <linux/pkt_cls.h>
19#include <linux/ip.h>
20#include <linux/ipv6.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23#include <net/act_api.h>
24#include <uapi/linux/tc_act/tc_connmark.h>
25#include <net/tc_act/tc_connmark.h>
26
27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_zones.h>
30
31#define CONNMARK_TAB_MASK 3
32
33static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
34 struct tcf_result *res)
35{
36 const struct nf_conntrack_tuple_hash *thash;
37 struct nf_conntrack_tuple tuple;
38 enum ip_conntrack_info ctinfo;
39 struct tcf_connmark_info *ca = a->priv;
40 struct nf_conn *c;
41 int proto;
42
43 spin_lock(&ca->tcf_lock);
44 ca->tcf_tm.lastuse = jiffies;
45 bstats_update(&ca->tcf_bstats, skb);
46
47 if (skb->protocol == htons(ETH_P_IP)) {
48 if (skb->len < sizeof(struct iphdr))
49 goto out;
50
51 proto = NFPROTO_IPV4;
52 } else if (skb->protocol == htons(ETH_P_IPV6)) {
53 if (skb->len < sizeof(struct ipv6hdr))
54 goto out;
55
56 proto = NFPROTO_IPV6;
57 } else {
58 goto out;
59 }
60
61 c = nf_ct_get(skb, &ctinfo);
62 if (c) {
63 skb->mark = c->mark;
64 /* using overlimits stats to count how many packets marked */
65 ca->tcf_qstats.overlimits++;
66 nf_ct_put(c);
67 goto out;
68 }
69
70 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
71 proto, &tuple))
72 goto out;
73
74 thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
75 if (!thash)
76 goto out;
77
78 c = nf_ct_tuplehash_to_ctrack(thash);
79 /* using overlimits stats to count how many packets marked */
80 ca->tcf_qstats.overlimits++;
81 skb->mark = c->mark;
82 nf_ct_put(c);
83
84out:
85 skb->nfct = NULL;
86 spin_unlock(&ca->tcf_lock);
87 return ca->tcf_action;
88}
89
90static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
91 [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
92};
93
94static int tcf_connmark_init(struct net *net, struct nlattr *nla,
95 struct nlattr *est, struct tc_action *a,
96 int ovr, int bind)
97{
98 struct nlattr *tb[TCA_CONNMARK_MAX + 1];
99 struct tcf_connmark_info *ci;
100 struct tc_connmark *parm;
101 int ret = 0;
102
103 if (!nla)
104 return -EINVAL;
105
106 ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
107 if (ret < 0)
108 return ret;
109
110 parm = nla_data(tb[TCA_CONNMARK_PARMS]);
111
112 if (!tcf_hash_check(parm->index, a, bind)) {
113 ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
114 if (ret)
115 return ret;
116
117 ci = to_connmark(a);
118 ci->tcf_action = parm->action;
119 ci->zone = parm->zone;
120
121 tcf_hash_insert(a);
122 ret = ACT_P_CREATED;
123 } else {
124 ci = to_connmark(a);
125 if (bind)
126 return 0;
127 tcf_hash_release(a, bind);
128 if (!ovr)
129 return -EEXIST;
130 /* replacing action and zone */
131 ci->tcf_action = parm->action;
132 ci->zone = parm->zone;
133 }
134
135 return ret;
136}
137
138static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
139 int bind, int ref)
140{
141 unsigned char *b = skb_tail_pointer(skb);
142 struct tcf_connmark_info *ci = a->priv;
143
144 struct tc_connmark opt = {
145 .index = ci->tcf_index,
146 .refcnt = ci->tcf_refcnt - ref,
147 .bindcnt = ci->tcf_bindcnt - bind,
148 .action = ci->tcf_action,
149 .zone = ci->zone,
150 };
151 struct tcf_t t;
152
153 if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
154 goto nla_put_failure;
155
156 t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
157 t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
158 t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
159 if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
160 goto nla_put_failure;
161
162 return skb->len;
163nla_put_failure:
164 nlmsg_trim(skb, b);
165 return -1;
166}
167
168static struct tc_action_ops act_connmark_ops = {
169 .kind = "connmark",
170 .type = TCA_ACT_CONNMARK,
171 .owner = THIS_MODULE,
172 .act = tcf_connmark,
173 .dump = tcf_connmark_dump,
174 .init = tcf_connmark_init,
175};
176
177static int __init connmark_init_module(void)
178{
179 return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
180}
181
182static void __exit connmark_cleanup_module(void)
183{
184 tcf_unregister_action(&act_connmark_ops);
185}
186
187module_init(connmark_init_module);
188module_exit(connmark_cleanup_module);
189MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
190MODULE_DESCRIPTION("Connection tracking mark restoring");
191MODULE_LICENSE("GPL");
192
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40dac709..4cd5cf1aedf8 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
509 if (unlikely(action == TC_ACT_SHOT)) 509 if (unlikely(action == TC_ACT_SHOT))
510 goto drop; 510 goto drop;
511 511
512 switch (skb->protocol) { 512 switch (tc_skb_protocol(skb)) {
513 case cpu_to_be16(ETH_P_IP): 513 case cpu_to_be16(ETH_P_IP):
514 if (!tcf_csum_ipv4(skb, update_flags)) 514 if (!tcf_csum_ipv4(skb, update_flags))
515 goto drop; 515 goto drop;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index aad6a679fb13..baef987fe2c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
556} 556}
557EXPORT_SYMBOL(tcf_exts_change); 557EXPORT_SYMBOL(tcf_exts_change);
558 558
559#define tcf_exts_first_act(ext) \ 559#define tcf_exts_first_act(ext) \
560 list_first_entry(&(exts)->actions, struct tc_action, list) 560 list_first_entry_or_null(&(exts)->actions, \
561 struct tc_action, list)
561 562
562int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) 563int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
563{ 564{
@@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
603{ 604{
604#ifdef CONFIG_NET_CLS_ACT 605#ifdef CONFIG_NET_CLS_ACT
605 struct tc_action *a = tcf_exts_first_act(exts); 606 struct tc_action *a = tcf_exts_first_act(exts);
606 if (tcf_action_copy_stats(skb, a, 1) < 0) 607 if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
607 return -1; 608 return -1;
608#endif 609#endif
609 return 0; 610 return 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5aed341406c2..fc399db86f11 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
65 if (head == NULL) 65 if (head == NULL)
66 return 0UL; 66 return 0UL;
67 67
68 list_for_each_entry(f, &head->flist, link) 68 list_for_each_entry(f, &head->flist, link) {
69 if (f->handle == handle) 69 if (f->handle == handle) {
70 l = (unsigned long) f; 70 l = (unsigned long) f;
71 break;
72 }
73 }
71 74
72 return l; 75 return l;
73} 76}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 84c8219c3e1c..5f3ee9e4b5bf 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -37,7 +37,7 @@ struct cls_bpf_prog {
37 struct tcf_result res; 37 struct tcf_result res;
38 struct list_head link; 38 struct list_head link;
39 u32 handle; 39 u32 handle;
40 u16 bpf_len; 40 u16 bpf_num_ops;
41 struct tcf_proto *tp; 41 struct tcf_proto *tp;
42 struct rcu_head rcu; 42 struct rcu_head rcu;
43}; 43};
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
160 struct tcf_exts exts; 160 struct tcf_exts exts;
161 struct sock_fprog_kern tmp; 161 struct sock_fprog_kern tmp;
162 struct bpf_prog *fp; 162 struct bpf_prog *fp;
163 u16 bpf_size, bpf_len; 163 u16 bpf_size, bpf_num_ops;
164 u32 classid; 164 u32 classid;
165 int ret; 165 int ret;
166 166
@@ -173,13 +173,18 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
173 return ret; 173 return ret;
174 174
175 classid = nla_get_u32(tb[TCA_BPF_CLASSID]); 175 classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
176 bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); 176 bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
177 if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { 177 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
178 ret = -EINVAL;
179 goto errout;
180 }
181
182 bpf_size = bpf_num_ops * sizeof(*bpf_ops);
183 if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
178 ret = -EINVAL; 184 ret = -EINVAL;
179 goto errout; 185 goto errout;
180 } 186 }
181 187
182 bpf_size = bpf_len * sizeof(*bpf_ops);
183 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 188 bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
184 if (bpf_ops == NULL) { 189 if (bpf_ops == NULL) {
185 ret = -ENOMEM; 190 ret = -ENOMEM;
@@ -188,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
188 193
189 memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); 194 memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
190 195
191 tmp.len = bpf_len; 196 tmp.len = bpf_num_ops;
192 tmp.filter = bpf_ops; 197 tmp.filter = bpf_ops;
193 198
194 ret = bpf_prog_create(&fp, &tmp); 199 ret = bpf_prog_create(&fp, &tmp);
195 if (ret) 200 if (ret)
196 goto errout_free; 201 goto errout_free;
197 202
198 prog->bpf_len = bpf_len; 203 prog->bpf_num_ops = bpf_num_ops;
199 prog->bpf_ops = bpf_ops; 204 prog->bpf_ops = bpf_ops;
200 prog->filter = fp; 205 prog->filter = fp;
201 prog->res.classid = classid; 206 prog->res.classid = classid;
@@ -215,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
215 struct cls_bpf_head *head) 220 struct cls_bpf_head *head)
216{ 221{
217 unsigned int i = 0x80000000; 222 unsigned int i = 0x80000000;
223 u32 handle;
218 224
219 do { 225 do {
220 if (++head->hgen == 0x7FFFFFFF) 226 if (++head->hgen == 0x7FFFFFFF)
221 head->hgen = 1; 227 head->hgen = 1;
222 } while (--i > 0 && cls_bpf_get(tp, head->hgen)); 228 } while (--i > 0 && cls_bpf_get(tp, head->hgen));
223 if (i == 0) 229
230 if (unlikely(i == 0)) {
224 pr_err("Insufficient number of handles\n"); 231 pr_err("Insufficient number of handles\n");
232 handle = 0;
233 } else {
234 handle = head->hgen;
235 }
225 236
226 return i; 237 return handle;
227} 238}
228 239
229static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, 240static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
@@ -303,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
303 314
304 if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) 315 if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
305 goto nla_put_failure; 316 goto nla_put_failure;
306 if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) 317 if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
307 goto nla_put_failure; 318 goto nla_put_failure;
308 319
309 nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * 320 nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
310 sizeof(struct sock_filter)); 321 sizeof(struct sock_filter));
311 if (nla == NULL) 322 if (nla == NULL)
312 goto nla_put_failure; 323 goto nla_put_failure;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f24a521..461410394d08 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
77{ 77{
78 if (flow->dst) 78 if (flow->dst)
79 return ntohl(flow->dst); 79 return ntohl(flow->dst);
80 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 80 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
81} 81}
82 82
83static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) 83static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
98 if (flow->ports) 98 if (flow->ports)
99 return ntohs(flow->port16[1]); 99 return ntohs(flow->port16[1]);
100 100
101 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 101 return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
102} 102}
103 103
104static u32 flow_get_iif(const struct sk_buff *skb) 104static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
144 144
145static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) 145static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
146{ 146{
147 switch (skb->protocol) { 147 switch (tc_skb_protocol(skb)) {
148 case htons(ETH_P_IP): 148 case htons(ETH_P_IP):
149 return ntohl(CTTUPLE(skb, src.u3.ip)); 149 return ntohl(CTTUPLE(skb, src.u3.ip));
150 case htons(ETH_P_IPV6): 150 case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
156 156
157static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) 157static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
158{ 158{
159 switch (skb->protocol) { 159 switch (tc_skb_protocol(skb)) {
160 case htons(ETH_P_IP): 160 case htons(ETH_P_IP):
161 return ntohl(CTTUPLE(skb, dst.u3.ip)); 161 return ntohl(CTTUPLE(skb, dst.u3.ip));
162 case htons(ETH_P_IPV6): 162 case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4efe468c..a3d79c8bf3b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
59 struct net_device *dev, *indev = NULL; 59 struct net_device *dev, *indev = NULL;
60 int ret, network_offset; 60 int ret, network_offset;
61 61
62 switch (skb->protocol) { 62 switch (tc_skb_protocol(skb)) {
63 case htons(ETH_P_IP): 63 case htons(ETH_P_IP):
64 acpar.family = NFPROTO_IPV4; 64 acpar.family = NFPROTO_IPV4;
65 if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) 65 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c399b99a..b5294ce20cd4 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag)
176{ 176{
177 unsigned short tag; 177 unsigned short tag;
178 178
179 tag = vlan_tx_tag_get(skb); 179 tag = skb_vlan_tag_get(skb);
180 if (!tag && __vlan_get_tag(skb, &tag)) 180 if (!tag && __vlan_get_tag(skb, &tag))
181 *err = -1; 181 *err = -1;
182 else 182 else
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
197META_COLLECTOR(int_protocol) 197META_COLLECTOR(int_protocol)
198{ 198{
199 /* Let userspace take care of the byte ordering */ 199 /* Let userspace take care of the byte ordering */
200 dst->value = skb->protocol; 200 dst->value = tc_skb_protocol(skb);
201} 201}
202 202
203META_COLLECTOR(int_pkttype) 203META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e05bd6..243b7d169d61 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
1807int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, 1807int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
1808 struct tcf_result *res) 1808 struct tcf_result *res)
1809{ 1809{
1810 __be16 protocol = skb->protocol; 1810 __be16 protocol = tc_skb_protocol(skb);
1811 int err; 1811 int err;
1812 1812
1813 for (; tp; tp = rcu_dereference_bh(tp->next)) { 1813 for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f27f94..66700a6116aa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
203 pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); 203 pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
204 204
205 if (p->set_tc_index) { 205 if (p->set_tc_index) {
206 switch (skb->protocol) { 206 switch (tc_skb_protocol(skb)) {
207 case htons(ETH_P_IP): 207 case htons(ETH_P_IP):
208 if (skb_cow_head(skb, sizeof(struct iphdr))) 208 if (skb_cow_head(skb, sizeof(struct iphdr)))
209 goto drop; 209 goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
289 index = skb->tc_index & (p->indices - 1); 289 index = skb->tc_index & (p->indices - 1);
290 pr_debug("index %d->%d\n", skb->tc_index, index); 290 pr_debug("index %d->%d\n", skb->tc_index, index);
291 291
292 switch (skb->protocol) { 292 switch (tc_skb_protocol(skb)) {
293 case htons(ETH_P_IP): 293 case htons(ETH_P_IP):
294 ipv4_change_dsfield(ip_hdr(skb), p->mask[index], 294 ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
295 p->value[index]); 295 p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
306 */ 306 */
307 if (p->mask[index] != 0xff || p->value[index]) 307 if (p->mask[index] != 0xff || p->value[index])
308 pr_warn("%s: unsupported protocol %d\n", 308 pr_warn("%s: unsupported protocol %d\n",
309 __func__, ntohs(skb->protocol)); 309 __func__, ntohs(tc_skb_protocol(skb)));
310 break; 310 break;
311 } 311 }
312 312
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9b05924cc386..dfcea20e3171 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) 2 * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
3 * 3 *
4 * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> 4 * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
@@ -52,6 +52,7 @@
52#include <net/pkt_sched.h> 52#include <net/pkt_sched.h>
53#include <net/sock.h> 53#include <net/sock.h>
54#include <net/tcp_states.h> 54#include <net/tcp_states.h>
55#include <net/tcp.h>
55 56
56/* 57/*
57 * Per flow structure, dynamically allocated 58 * Per flow structure, dynamically allocated
@@ -92,6 +93,7 @@ struct fq_sched_data {
92 u32 flow_refill_delay; 93 u32 flow_refill_delay;
93 u32 flow_max_rate; /* optional max rate per flow */ 94 u32 flow_max_rate; /* optional max rate per flow */
94 u32 flow_plimit; /* max packets per flow */ 95 u32 flow_plimit; /* max packets per flow */
96 u32 orphan_mask; /* mask for orphaned skb */
95 struct rb_root *fq_root; 97 struct rb_root *fq_root;
96 u8 rate_enable; 98 u8 rate_enable;
97 u8 fq_trees_log; 99 u8 fq_trees_log;
@@ -222,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
222 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) 224 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
223 return &q->internal; 225 return &q->internal;
224 226
225 if (unlikely(!sk)) { 227 /* SYNACK messages are attached to a listener socket.
228 * 1) They are not part of a 'flow' yet
229 * 2) We do not want to rate limit them (eg SYNFLOOD attack),
230 * especially if the listener set SO_MAX_PACING_RATE
231 * 3) We pretend they are orphaned
232 */
233 if (!sk || sk->sk_state == TCP_LISTEN) {
234 unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
235
226 /* By forcing low order bit to 1, we make sure to not 236 /* By forcing low order bit to 1, we make sure to not
227 * collide with a local flow (socket pointers are word aligned) 237 * collide with a local flow (socket pointers are word aligned)
228 */ 238 */
229 sk = (struct sock *)(skb_get_hash(skb) | 1L); 239 sk = (struct sock *)((hash << 1) | 1UL);
240 skb_orphan(skb);
230 } 241 }
231 242
232 root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; 243 root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -445,7 +456,9 @@ begin:
445 goto begin; 456 goto begin;
446 } 457 }
447 458
448 if (unlikely(f->head && now < f->time_next_packet)) { 459 skb = f->head;
460 if (unlikely(skb && now < f->time_next_packet &&
461 !skb_is_tcp_pure_ack(skb))) {
449 head->first = f->next; 462 head->first = f->next;
450 fq_flow_set_throttled(q, f); 463 fq_flow_set_throttled(q, f);
451 goto begin; 464 goto begin;
@@ -464,14 +477,17 @@ begin:
464 goto begin; 477 goto begin;
465 } 478 }
466 prefetch(&skb->end); 479 prefetch(&skb->end);
467 f->time_next_packet = now;
468 f->credit -= qdisc_pkt_len(skb); 480 f->credit -= qdisc_pkt_len(skb);
469 481
470 if (f->credit > 0 || !q->rate_enable) 482 if (f->credit > 0 || !q->rate_enable)
471 goto out; 483 goto out;
472 484
485 /* Do not pace locally generated ack packets */
486 if (skb_is_tcp_pure_ack(skb))
487 goto out;
488
473 rate = q->flow_max_rate; 489 rate = q->flow_max_rate;
474 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) 490 if (skb->sk)
475 rate = min(skb->sk->sk_pacing_rate, rate); 491 rate = min(skb->sk->sk_pacing_rate, rate);
476 492
477 if (rate != ~0U) { 493 if (rate != ~0U) {
@@ -670,8 +686,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
670 if (tb[TCA_FQ_FLOW_PLIMIT]) 686 if (tb[TCA_FQ_FLOW_PLIMIT])
671 q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); 687 q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
672 688
673 if (tb[TCA_FQ_QUANTUM]) 689 if (tb[TCA_FQ_QUANTUM]) {
674 q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); 690 u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
691
692 if (quantum > 0)
693 q->quantum = quantum;
694 else
695 err = -EINVAL;
696 }
675 697
676 if (tb[TCA_FQ_INITIAL_QUANTUM]) 698 if (tb[TCA_FQ_INITIAL_QUANTUM])
677 q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); 699 q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
@@ -698,6 +720,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
698 q->flow_refill_delay = usecs_to_jiffies(usecs_delay); 720 q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
699 } 721 }
700 722
723 if (tb[TCA_FQ_ORPHAN_MASK])
724 q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
725
701 if (!err) { 726 if (!err) {
702 sch_tree_unlock(sch); 727 sch_tree_unlock(sch);
703 err = fq_resize(sch, fq_log); 728 err = fq_resize(sch, fq_log);
@@ -743,6 +768,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
743 q->delayed = RB_ROOT; 768 q->delayed = RB_ROOT;
744 q->fq_root = NULL; 769 q->fq_root = NULL;
745 q->fq_trees_log = ilog2(1024); 770 q->fq_trees_log = ilog2(1024);
771 q->orphan_mask = 1024 - 1;
746 qdisc_watchdog_init(&q->watchdog, sch); 772 qdisc_watchdog_init(&q->watchdog, sch);
747 773
748 if (opt) 774 if (opt)
@@ -772,6 +798,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
772 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || 798 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
773 nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, 799 nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
774 jiffies_to_usecs(q->flow_refill_delay)) || 800 jiffies_to_usecs(q->flow_refill_delay)) ||
801 nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
775 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) 802 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
776 goto nla_put_failure; 803 goto nla_put_failure;
777 804
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6ada42396a24..e02687185a59 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch)
122 return NULL; 122 return NULL;
123} 123}
124 124
125static inline void
126teql_neigh_release(struct neighbour *n)
127{
128 if (n)
129 neigh_release(n);
130}
131
132static void 125static void
133teql_reset(struct Qdisc *sch) 126teql_reset(struct Qdisc *sch)
134{ 127{
@@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
249 char haddr[MAX_ADDR_LEN]; 242 char haddr[MAX_ADDR_LEN];
250 243
251 neigh_ha_snapshot(haddr, n, dev); 244 neigh_ha_snapshot(haddr, n, dev);
252 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, 245 err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
253 NULL, skb->len); 246 haddr, NULL, skb->len);
254 247
255 if (err < 0) 248 if (err < 0)
256 err = -EINVAL; 249 err = -EINVAL;