aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-04-23 12:36:35 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2019-04-23 12:36:36 -0400
commit2aad32613c353b1e05da5994324bc5f20d0dd55a (patch)
treeda9f76ad09a765c5818de87d3b4e5401725ed49d
parent7e6e185c74dd8a8dc539300c079adc6bc27045d6 (diff)
parent02ee0658362d3713421851bb7487af77a4098bb5 (diff)
Merge branch 'bpf-eth-get-headlen'
Stanislav Fomichev says: ==================== Currently, when eth_get_headlen calls flow dissector, it doesn't pass any skb. Because we use passed skb to lookup associated networking namespace to find whether we have a BPF program attached or not, we always use C-based flow dissector in this case. The goal of this patch series is to add new networking namespace argument to the eth_get_headlen and make BPF flow dissector programs be able to work in the skb-less case. The series goes like this: * use new kernel context (struct bpf_flow_dissector) for flow dissector programs; this makes it easy to distinguish between skb and no-skb case and supports calling BPF flow dissector on a chunk of raw data * convert BPF_PROG_TEST_RUN to use raw data * plumb network namespace into __skb_flow_dissect from all callers * handle no-skb case in __skb_flow_dissect * update eth_get_headlen to include net namespace argument and convert all existing users * add selftest to make sure bpf_skb_load_bytes is not allowed in the no-skb mode * extend test_progs to exercise skb-less flow dissection as well * stop adjusting nhoff/thoff by ETH_HLEN in BPF_PROG_TEST_RUN v6: * more suggestions by Alexei: * eth_get_headlen now takes net dev, not net namespace * test skb-less case via tun eth_get_headlen * fix return errors in bpf_flow_load * don't adjust nhoff/thoff by ETH_HLEN v5: * API changes have been submitted via bpf/stable tree v4: * prohibit access to vlan fields as well (otherwise, inconsistent between skb/skb-less cases) * drop extra unneeded check for skb->vlan_present in bpf_flow.c v3: * new kernel xdp_buff-like context per Alexei suggestion * drop skb_net helper * properly clamp flow_keys->nhoff v2: * moved temporary skb from stack into percpu (avoids memset of ~200 bytes per packet) * tightened down access to __sk_buff fields from flow dissector programs to avoid touching shinfo (whitelist only relevant fields) * addressed suggestions from Willem ==================== Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c3
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
-rw-r--r--drivers/net/tun.c3
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/skbuff.h28
-rw-r--r--include/net/flow_dissector.h7
-rw-r--r--include/net/sch_generic.h11
-rw-r--r--net/bpf/test_run.c48
-rw-r--r--net/core/filter.c105
-rw-r--r--net/core/flow_dissector.c90
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c2
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c113
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c48
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c79
27 files changed, 411 insertions, 186 deletions
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index c64e2fb5a4f1..350e385528fd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -354,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
354 354
355 hdr_len = buff->len; 355 hdr_len = buff->len;
356 if (hdr_len > AQ_CFG_RX_HDR_SIZE) 356 if (hdr_len > AQ_CFG_RX_HDR_SIZE)
357 hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata), 357 hdr_len = eth_get_headlen(skb->dev,
358 aq_buf_vaddr(&buff->rxdata),
358 AQ_CFG_RX_HDR_SIZE); 359 AQ_CFG_RX_HDR_SIZE);
359 360
360 memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata), 361 memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6528a597367b..526f36dcb204 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -899,7 +899,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
899 DMA_ATTR_WEAK_ORDERING); 899 DMA_ATTR_WEAK_ORDERING);
900 900
901 if (unlikely(!payload)) 901 if (unlikely(!payload))
902 payload = eth_get_headlen(data_ptr, len); 902 payload = eth_get_headlen(bp->dev, data_ptr, len);
903 903
904 skb = napi_alloc_skb(&rxr->bnapi->napi, payload); 904 skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
905 if (!skb) { 905 if (!skb) {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 297b95c1b3c1..65b985acae38 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -598,7 +598,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
598 } else { 598 } else {
599 ring->stats.seg_pkt_cnt++; 599 ring->stats.seg_pkt_cnt++;
600 600
601 pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); 601 pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE);
602 memcpy(__skb_put(skb, pull_len), va, 602 memcpy(__skb_put(skb, pull_len), va,
603 ALIGN(pull_len, sizeof(long))); 603 ALIGN(pull_len, sizeof(long)));
604 604
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 176d4b965709..5f7b51c6ee91 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2580,7 +2580,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
2580 ring->stats.seg_pkt_cnt++; 2580 ring->stats.seg_pkt_cnt++;
2581 u64_stats_update_end(&ring->syncp); 2581 u64_stats_update_end(&ring->syncp);
2582 2582
2583 ring->pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE); 2583 ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE);
2584 __skb_put(skb, ring->pull_len); 2584 __skb_put(skb, ring->pull_len);
2585 hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len, 2585 hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
2586 desc_cb); 2586 desc_cb);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 2325cee76211..b4d970e44163 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -280,7 +280,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
280 /* we need the header to contain the greater of either ETH_HLEN or 280 /* we need the header to contain the greater of either ETH_HLEN or
281 * 60 bytes if the skb->len is less than 60 for skb_pad. 281 * 60 bytes if the skb->len is less than 60 for skb_pad.
282 */ 282 */
283 pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN); 283 pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);
284 284
285 /* align pull length to size of long to optimize memcpy performance */ 285 /* align pull length to size of long to optimize memcpy performance */
286 memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); 286 memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1a95223c9f99..e1931701cd7e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2035,7 +2035,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
2035 /* Determine available headroom for copy */ 2035 /* Determine available headroom for copy */
2036 headlen = size; 2036 headlen = size;
2037 if (headlen > I40E_RX_HDR_SIZE) 2037 if (headlen > I40E_RX_HDR_SIZE)
2038 headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE); 2038 headlen = eth_get_headlen(skb->dev, xdp->data,
2039 I40E_RX_HDR_SIZE);
2039 2040
2040 /* align pull length to size of long to optimize memcpy performance */ 2041 /* align pull length to size of long to optimize memcpy performance */
2041 memcpy(__skb_put(skb, headlen), xdp->data, 2042 memcpy(__skb_put(skb, headlen), xdp->data,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index b64187753ad6..cf8be63a8a4f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1315,7 +1315,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
1315 /* Determine available headroom for copy */ 1315 /* Determine available headroom for copy */
1316 headlen = size; 1316 headlen = size;
1317 if (headlen > IAVF_RX_HDR_SIZE) 1317 if (headlen > IAVF_RX_HDR_SIZE)
1318 headlen = eth_get_headlen(va, IAVF_RX_HDR_SIZE); 1318 headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);
1319 1319
1320 /* align pull length to size of long to optimize memcpy performance */ 1320 /* align pull length to size of long to optimize memcpy performance */
1321 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 1321 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 79043fec0187..259f118c7d8b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -699,7 +699,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
699 /* Determine available headroom for copy */ 699 /* Determine available headroom for copy */
700 headlen = size; 700 headlen = size;
701 if (headlen > ICE_RX_HDR_SIZE) 701 if (headlen > ICE_RX_HDR_SIZE)
702 headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE); 702 headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
703 703
704 /* align pull length to size of long to optimize memcpy performance */ 704 /* align pull length to size of long to optimize memcpy performance */
705 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 705 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index acbb5b4f333d..9b8a4bb25327 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8051,7 +8051,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
8051 /* Determine available headroom for copy */ 8051 /* Determine available headroom for copy */
8052 headlen = size; 8052 headlen = size;
8053 if (headlen > IGB_RX_HDR_LEN) 8053 if (headlen > IGB_RX_HDR_LEN)
8054 headlen = eth_get_headlen(va, IGB_RX_HDR_LEN); 8054 headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN);
8055 8055
8056 /* align pull length to size of long to optimize memcpy performance */ 8056 /* align pull length to size of long to optimize memcpy performance */
8057 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 8057 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index f79728381e8a..e58a6e0dc4d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1199,7 +1199,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1199 /* Determine available headroom for copy */ 1199 /* Determine available headroom for copy */
1200 headlen = size; 1200 headlen = size;
1201 if (headlen > IGC_RX_HDR_LEN) 1201 if (headlen > IGC_RX_HDR_LEN)
1202 headlen = eth_get_headlen(va, IGC_RX_HDR_LEN); 1202 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1203 1203
1204 /* align pull length to size of long to optimize memcpy performance */ 1204 /* align pull length to size of long to optimize memcpy performance */
1205 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 1205 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 60cec3540dd7..7b903206b534 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1800,7 +1800,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
1800 * we need the header to contain the greater of either ETH_HLEN or 1800 * we need the header to contain the greater of either ETH_HLEN or
1801 * 60 bytes if the skb->len is less than 60 for skb_pad. 1801 * 60 bytes if the skb->len is less than 60 for skb_pad.
1802 */ 1802 */
1803 pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE); 1803 pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
1804 1804
1805 /* align pull length to size of long to optimize memcpy performance */ 1805 /* align pull length to size of long to optimize memcpy performance */
1806 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); 1806 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 49e23afa05a2..d189ed247665 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -895,7 +895,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
895 /* Determine available headroom for copy */ 895 /* Determine available headroom for copy */
896 headlen = size; 896 headlen = size;
897 if (headlen > IXGBEVF_RX_HDR_SIZE) 897 if (headlen > IXGBEVF_RX_HDR_SIZE)
898 headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE); 898 headlen = eth_get_headlen(skb->dev, xdp->data,
899 IXGBEVF_RX_HDR_SIZE);
899 900
900 /* align pull length to size of long to optimize memcpy performance */ 901 /* align pull length to size of long to optimize memcpy performance */
901 memcpy(__skb_put(skb, headlen), xdp->data, 902 memcpy(__skb_put(skb, headlen), xdp->data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 40f3f98aa279..7b61126fcec9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -163,7 +163,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
163 case MLX5_INLINE_MODE_NONE: 163 case MLX5_INLINE_MODE_NONE:
164 return 0; 164 return 0;
165 case MLX5_INLINE_MODE_TCP_UDP: 165 case MLX5_INLINE_MODE_TCP_UDP:
166 hlen = eth_get_headlen(skb->data, skb_headlen(skb)); 166 hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
167 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) 167 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
168 hlen += VLAN_HLEN; 168 hlen += VLAN_HLEN;
169 break; 169 break;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 24d0220b9ba0..9d72f8c76c15 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1965,7 +1965,8 @@ drop:
1965 1965
1966 if (frags) { 1966 if (frags) {
1967 /* Exercise flow dissector code path. */ 1967 /* Exercise flow dissector code path. */
1968 u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); 1968 u32 headlen = eth_get_headlen(tun->dev, skb->data,
1969 skb_headlen(skb));
1969 1970
1970 if (unlikely(headlen > skb_headlen(skb))) { 1971 if (unlikely(headlen > skb_headlen(skb))) {
1971 this_cpu_inc(tun->pcpu_stats->rx_dropped); 1972 this_cpu_inc(tun->pcpu_stats->rx_dropped);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e2f3b21cd72a..c6c1930e28a0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -33,7 +33,7 @@ struct device;
33int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); 33int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
34unsigned char *arch_get_platform_mac_address(void); 34unsigned char *arch_get_platform_mac_address(void);
35int nvmem_get_mac_address(struct device *dev, void *addrbuf); 35int nvmem_get_mac_address(struct device *dev, void *addrbuf);
36u32 eth_get_headlen(void *data, unsigned int max_len); 36u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
37__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); 37__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
38extern const struct header_ops eth_header_ops; 38extern const struct header_ops eth_header_ops;
39 39
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f42942a443b..998256c2820b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1275,12 +1275,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
1275} 1275}
1276#endif 1276#endif
1277 1277
1278struct bpf_flow_keys; 1278struct bpf_flow_dissector;
1279bool __skb_flow_bpf_dissect(struct bpf_prog *prog, 1279bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
1280 const struct sk_buff *skb, 1280 __be16 proto, int nhoff, int hlen);
1281 struct flow_dissector *flow_dissector, 1281
1282 struct bpf_flow_keys *flow_keys); 1282bool __skb_flow_dissect(const struct net *net,
1283bool __skb_flow_dissect(const struct sk_buff *skb, 1283 const struct sk_buff *skb,
1284 struct flow_dissector *flow_dissector, 1284 struct flow_dissector *flow_dissector,
1285 void *target_container, 1285 void *target_container,
1286 void *data, __be16 proto, int nhoff, int hlen, 1286 void *data, __be16 proto, int nhoff, int hlen,
@@ -1290,8 +1290,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
1290 struct flow_dissector *flow_dissector, 1290 struct flow_dissector *flow_dissector,
1291 void *target_container, unsigned int flags) 1291 void *target_container, unsigned int flags)
1292{ 1292{
1293 return __skb_flow_dissect(skb, flow_dissector, target_container, 1293 return __skb_flow_dissect(NULL, skb, flow_dissector,
1294 NULL, 0, 0, 0, flags); 1294 target_container, NULL, 0, 0, 0, flags);
1295} 1295}
1296 1296
1297static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, 1297static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
@@ -1299,18 +1299,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
1299 unsigned int flags) 1299 unsigned int flags)
1300{ 1300{
1301 memset(flow, 0, sizeof(*flow)); 1301 memset(flow, 0, sizeof(*flow));
1302 return __skb_flow_dissect(skb, &flow_keys_dissector, flow, 1302 return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
1303 NULL, 0, 0, 0, flags); 1303 flow, NULL, 0, 0, 0, flags);
1304} 1304}
1305 1305
1306static inline bool 1306static inline bool
1307skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, 1307skb_flow_dissect_flow_keys_basic(const struct net *net,
1308 const struct sk_buff *skb,
1308 struct flow_keys_basic *flow, void *data, 1309 struct flow_keys_basic *flow, void *data,
1309 __be16 proto, int nhoff, int hlen, 1310 __be16 proto, int nhoff, int hlen,
1310 unsigned int flags) 1311 unsigned int flags)
1311{ 1312{
1312 memset(flow, 0, sizeof(*flow)); 1313 memset(flow, 0, sizeof(*flow));
1313 return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, 1314 return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
1314 data, proto, nhoff, hlen, flags); 1315 data, proto, nhoff, hlen, flags);
1315} 1316}
1316 1317
@@ -2488,7 +2489,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb)
2488 if (skb_transport_header_was_set(skb)) 2489 if (skb_transport_header_was_set(skb))
2489 return; 2490 return;
2490 2491
2491 if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) 2492 if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
2493 NULL, 0, 0, 0, 0))
2492 skb_set_transport_header(skb, keys.control.thoff); 2494 skb_set_transport_header(skb, keys.control.thoff);
2493} 2495}
2494 2496
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2b26979efb48..7c5a8d9a8d2a 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
305 return ((char *)target_container) + flow_dissector->offset[key_id]; 305 return ((char *)target_container) + flow_dissector->offset[key_id];
306} 306}
307 307
308struct bpf_flow_dissector {
309 struct bpf_flow_keys *flow_keys;
310 const struct sk_buff *skb;
311 void *data;
312 void *data_end;
313};
314
308#endif 315#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e8f85cd2afce..21f434f3ac9e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -364,13 +364,10 @@ struct tcf_proto {
364}; 364};
365 365
366struct qdisc_skb_cb { 366struct qdisc_skb_cb {
367 union { 367 struct {
368 struct { 368 unsigned int pkt_len;
369 unsigned int pkt_len; 369 u16 slave_dev_queue_mapping;
370 u16 slave_dev_queue_mapping; 370 u16 tc_classid;
371 u16 tc_classid;
372 };
373 struct bpf_flow_keys *flow_keys;
374 }; 371 };
375#define QDISC_CB_PRIV_LEN 20 372#define QDISC_CB_PRIV_LEN 20
376 unsigned char data[QDISC_CB_PRIV_LEN]; 373 unsigned char data[QDISC_CB_PRIV_LEN];
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2221573dacdb..8606e5aef0b6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -379,13 +379,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
379 union bpf_attr __user *uattr) 379 union bpf_attr __user *uattr)
380{ 380{
381 u32 size = kattr->test.data_size_in; 381 u32 size = kattr->test.data_size_in;
382 struct bpf_flow_dissector ctx = {};
382 u32 repeat = kattr->test.repeat; 383 u32 repeat = kattr->test.repeat;
383 struct bpf_flow_keys flow_keys; 384 struct bpf_flow_keys flow_keys;
384 u64 time_start, time_spent = 0; 385 u64 time_start, time_spent = 0;
385 struct bpf_skb_data_end *cb; 386 const struct ethhdr *eth;
386 u32 retval, duration; 387 u32 retval, duration;
387 struct sk_buff *skb;
388 struct sock *sk;
389 void *data; 388 void *data;
390 int ret; 389 int ret;
391 u32 i; 390 u32 i;
@@ -396,46 +395,28 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
396 if (kattr->test.ctx_in || kattr->test.ctx_out) 395 if (kattr->test.ctx_in || kattr->test.ctx_out)
397 return -EINVAL; 396 return -EINVAL;
398 397
399 data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, 398 if (size < ETH_HLEN)
400 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 399 return -EINVAL;
400
401 data = bpf_test_init(kattr, size, 0, 0);
401 if (IS_ERR(data)) 402 if (IS_ERR(data))
402 return PTR_ERR(data); 403 return PTR_ERR(data);
403 404
404 sk = kzalloc(sizeof(*sk), GFP_USER); 405 eth = (struct ethhdr *)data;
405 if (!sk) {
406 kfree(data);
407 return -ENOMEM;
408 }
409 sock_net_set(sk, current->nsproxy->net_ns);
410 sock_init_data(NULL, sk);
411
412 skb = build_skb(data, 0);
413 if (!skb) {
414 kfree(data);
415 kfree(sk);
416 return -ENOMEM;
417 }
418 skb->sk = sk;
419
420 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
421 __skb_put(skb, size);
422 skb->protocol = eth_type_trans(skb,
423 current->nsproxy->net_ns->loopback_dev);
424 skb_reset_network_header(skb);
425
426 cb = (struct bpf_skb_data_end *)skb->cb;
427 cb->qdisc_cb.flow_keys = &flow_keys;
428 406
429 if (!repeat) 407 if (!repeat)
430 repeat = 1; 408 repeat = 1;
431 409
410 ctx.flow_keys = &flow_keys;
411 ctx.data = data;
412 ctx.data_end = (__u8 *)data + size;
413
432 rcu_read_lock(); 414 rcu_read_lock();
433 preempt_disable(); 415 preempt_disable();
434 time_start = ktime_get_ns(); 416 time_start = ktime_get_ns();
435 for (i = 0; i < repeat; i++) { 417 for (i = 0; i < repeat; i++) {
436 retval = __skb_flow_bpf_dissect(prog, skb, 418 retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
437 &flow_keys_dissector, 419 size);
438 &flow_keys);
439 420
440 if (signal_pending(current)) { 421 if (signal_pending(current)) {
441 preempt_enable(); 422 preempt_enable();
@@ -468,7 +449,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
468 retval, duration); 449 retval, duration);
469 450
470out: 451out:
471 kfree_skb(skb); 452 kfree(data);
472 kfree(sk);
473 return ret; 453 return ret;
474} 454}
diff --git a/net/core/filter.c b/net/core/filter.c
index fa8fb0548217..edb3a7c22f6c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1730 .arg4_type = ARG_CONST_SIZE, 1730 .arg4_type = ARG_CONST_SIZE,
1731}; 1731};
1732 1732
1733BPF_CALL_4(bpf_flow_dissector_load_bytes,
1734 const struct bpf_flow_dissector *, ctx, u32, offset,
1735 void *, to, u32, len)
1736{
1737 void *ptr;
1738
1739 if (unlikely(offset > 0xffff))
1740 goto err_clear;
1741
1742 if (unlikely(!ctx->skb))
1743 goto err_clear;
1744
1745 ptr = skb_header_pointer(ctx->skb, offset, len, to);
1746 if (unlikely(!ptr))
1747 goto err_clear;
1748 if (ptr != to)
1749 memcpy(to, ptr, len);
1750
1751 return 0;
1752err_clear:
1753 memset(to, 0, len);
1754 return -EFAULT;
1755}
1756
1757static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1758 .func = bpf_flow_dissector_load_bytes,
1759 .gpl_only = false,
1760 .ret_type = RET_INTEGER,
1761 .arg1_type = ARG_PTR_TO_CTX,
1762 .arg2_type = ARG_ANYTHING,
1763 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1764 .arg4_type = ARG_CONST_SIZE,
1765};
1766
1733BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, 1767BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1734 u32, offset, void *, to, u32, len, u32, start_header) 1768 u32, offset, void *, to, u32, len, u32, start_header)
1735{ 1769{
@@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6121{ 6155{
6122 switch (func_id) { 6156 switch (func_id) {
6123 case BPF_FUNC_skb_load_bytes: 6157 case BPF_FUNC_skb_load_bytes:
6124 return &bpf_skb_load_bytes_proto; 6158 return &bpf_flow_dissector_load_bytes_proto;
6125 default: 6159 default:
6126 return bpf_base_func_proto(func_id); 6160 return bpf_base_func_proto(func_id);
6127 } 6161 }
@@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
6248 return false; 6282 return false;
6249 break; 6283 break;
6250 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6284 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6251 if (size != sizeof(__u64)) 6285 return false;
6252 return false;
6253 break;
6254 case bpf_ctx_range(struct __sk_buff, tstamp): 6286 case bpf_ctx_range(struct __sk_buff, tstamp):
6255 if (size != sizeof(__u64)) 6287 if (size != sizeof(__u64))
6256 return false; 6288 return false;
@@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size,
6285 case bpf_ctx_range(struct __sk_buff, data): 6317 case bpf_ctx_range(struct __sk_buff, data):
6286 case bpf_ctx_range(struct __sk_buff, data_meta): 6318 case bpf_ctx_range(struct __sk_buff, data_meta):
6287 case bpf_ctx_range(struct __sk_buff, data_end): 6319 case bpf_ctx_range(struct __sk_buff, data_end):
6288 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6289 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6320 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6290 case bpf_ctx_range(struct __sk_buff, tstamp): 6321 case bpf_ctx_range(struct __sk_buff, tstamp):
6291 case bpf_ctx_range(struct __sk_buff, wire_len): 6322 case bpf_ctx_range(struct __sk_buff, wire_len):
@@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size,
6312 switch (off) { 6343 switch (off) {
6313 case bpf_ctx_range(struct __sk_buff, tc_classid): 6344 case bpf_ctx_range(struct __sk_buff, tc_classid):
6314 case bpf_ctx_range(struct __sk_buff, data_meta): 6345 case bpf_ctx_range(struct __sk_buff, data_meta):
6315 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6316 case bpf_ctx_range(struct __sk_buff, wire_len): 6346 case bpf_ctx_range(struct __sk_buff, wire_len):
6317 return false; 6347 return false;
6318 case bpf_ctx_range(struct __sk_buff, data): 6348 case bpf_ctx_range(struct __sk_buff, data):
@@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size,
6358 case bpf_ctx_range(struct __sk_buff, tc_classid): 6388 case bpf_ctx_range(struct __sk_buff, tc_classid):
6359 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6389 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6360 case bpf_ctx_range(struct __sk_buff, data_meta): 6390 case bpf_ctx_range(struct __sk_buff, data_meta):
6361 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6362 case bpf_ctx_range(struct __sk_buff, tstamp): 6391 case bpf_ctx_range(struct __sk_buff, tstamp):
6363 case bpf_ctx_range(struct __sk_buff, wire_len): 6392 case bpf_ctx_range(struct __sk_buff, wire_len):
6364 return false; 6393 return false;
@@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size,
6601 case bpf_ctx_range(struct __sk_buff, data_end): 6630 case bpf_ctx_range(struct __sk_buff, data_end):
6602 info->reg_type = PTR_TO_PACKET_END; 6631 info->reg_type = PTR_TO_PACKET_END;
6603 break; 6632 break;
6604 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6605 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6633 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6606 return false; 6634 return false;
6607 } 6635 }
@@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size,
6803 switch (off) { 6831 switch (off) {
6804 case bpf_ctx_range(struct __sk_buff, tc_classid): 6832 case bpf_ctx_range(struct __sk_buff, tc_classid):
6805 case bpf_ctx_range(struct __sk_buff, data_meta): 6833 case bpf_ctx_range(struct __sk_buff, data_meta):
6806 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6807 case bpf_ctx_range(struct __sk_buff, tstamp): 6834 case bpf_ctx_range(struct __sk_buff, tstamp):
6808 case bpf_ctx_range(struct __sk_buff, wire_len): 6835 case bpf_ctx_range(struct __sk_buff, wire_len):
6809 return false; 6836 return false;
@@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size,
6877 const struct bpf_prog *prog, 6904 const struct bpf_prog *prog,
6878 struct bpf_insn_access_aux *info) 6905 struct bpf_insn_access_aux *info)
6879{ 6906{
6907 const int size_default = sizeof(__u32);
6908
6909 if (off < 0 || off >= sizeof(struct __sk_buff))
6910 return false;
6911
6880 if (type == BPF_WRITE) 6912 if (type == BPF_WRITE)
6881 return false; 6913 return false;
6882 6914
6883 switch (off) { 6915 switch (off) {
6884 case bpf_ctx_range(struct __sk_buff, data): 6916 case bpf_ctx_range(struct __sk_buff, data):
6917 if (size != size_default)
6918 return false;
6885 info->reg_type = PTR_TO_PACKET; 6919 info->reg_type = PTR_TO_PACKET;
6886 break; 6920 return true;
6887 case bpf_ctx_range(struct __sk_buff, data_end): 6921 case bpf_ctx_range(struct __sk_buff, data_end):
6922 if (size != size_default)
6923 return false;
6888 info->reg_type = PTR_TO_PACKET_END; 6924 info->reg_type = PTR_TO_PACKET_END;
6889 break; 6925 return true;
6890 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6926 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6927 if (size != sizeof(__u64))
6928 return false;
6891 info->reg_type = PTR_TO_FLOW_KEYS; 6929 info->reg_type = PTR_TO_FLOW_KEYS;
6892 break; 6930 return true;
6893 default: 6931 default:
6894 return false; 6932 return false;
6895 } 6933 }
6934}
6896 6935
6897 return bpf_skb_is_valid_access(off, size, type, prog, info); 6936static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
6937 const struct bpf_insn *si,
6938 struct bpf_insn *insn_buf,
6939 struct bpf_prog *prog,
6940 u32 *target_size)
6941
6942{
6943 struct bpf_insn *insn = insn_buf;
6944
6945 switch (si->off) {
6946 case offsetof(struct __sk_buff, data):
6947 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
6948 si->dst_reg, si->src_reg,
6949 offsetof(struct bpf_flow_dissector, data));
6950 break;
6951
6952 case offsetof(struct __sk_buff, data_end):
6953 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
6954 si->dst_reg, si->src_reg,
6955 offsetof(struct bpf_flow_dissector, data_end));
6956 break;
6957
6958 case offsetof(struct __sk_buff, flow_keys):
6959 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
6960 si->dst_reg, si->src_reg,
6961 offsetof(struct bpf_flow_dissector, flow_keys));
6962 break;
6963 }
6964
6965 return insn - insn_buf;
6898} 6966}
6899 6967
6900static u32 bpf_convert_ctx_access(enum bpf_access_type type, 6968static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
7201 skc_num, 2, target_size)); 7269 skc_num, 2, target_size));
7202 break; 7270 break;
7203 7271
7204 case offsetof(struct __sk_buff, flow_keys):
7205 off = si->off;
7206 off -= offsetof(struct __sk_buff, flow_keys);
7207 off += offsetof(struct sk_buff, cb);
7208 off += offsetof(struct qdisc_skb_cb, flow_keys);
7209 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7210 si->src_reg, off);
7211 break;
7212
7213 case offsetof(struct __sk_buff, tstamp): 7272 case offsetof(struct __sk_buff, tstamp):
7214 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); 7273 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
7215 7274
@@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = {
8214const struct bpf_verifier_ops flow_dissector_verifier_ops = { 8273const struct bpf_verifier_ops flow_dissector_verifier_ops = {
8215 .get_func_proto = flow_dissector_func_proto, 8274 .get_func_proto = flow_dissector_func_proto,
8216 .is_valid_access = flow_dissector_is_valid_access, 8275 .is_valid_access = flow_dissector_is_valid_access,
8217 .convert_ctx_access = bpf_convert_ctx_access, 8276 .convert_ctx_access = flow_dissector_convert_ctx_access,
8218}; 8277};
8219 8278
8220const struct bpf_prog_ops flow_dissector_prog_ops = { 8279const struct bpf_prog_ops flow_dissector_prog_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 795449713ba4..fac712cee9d5 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,50 +683,30 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
683 } 683 }
684} 684}
685 685
686bool __skb_flow_bpf_dissect(struct bpf_prog *prog, 686bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
687 const struct sk_buff *skb, 687 __be16 proto, int nhoff, int hlen)
688 struct flow_dissector *flow_dissector,
689 struct bpf_flow_keys *flow_keys)
690{ 688{
691 struct bpf_skb_data_end cb_saved; 689 struct bpf_flow_keys *flow_keys = ctx->flow_keys;
692 struct bpf_skb_data_end *cb;
693 u32 result; 690 u32 result;
694 691
695 /* Note that even though the const qualifier is discarded
696 * throughout the execution of the BPF program, all changes(the
697 * control block) are reverted after the BPF program returns.
698 * Therefore, __skb_flow_dissect does not alter the skb.
699 */
700
701 cb = (struct bpf_skb_data_end *)skb->cb;
702
703 /* Save Control Block */
704 memcpy(&cb_saved, cb, sizeof(cb_saved));
705 memset(cb, 0, sizeof(*cb));
706
707 /* Pass parameters to the BPF program */ 692 /* Pass parameters to the BPF program */
708 memset(flow_keys, 0, sizeof(*flow_keys)); 693 memset(flow_keys, 0, sizeof(*flow_keys));
709 cb->qdisc_cb.flow_keys = flow_keys; 694 flow_keys->n_proto = proto;
710 flow_keys->n_proto = skb->protocol; 695 flow_keys->nhoff = nhoff;
711 flow_keys->nhoff = skb_network_offset(skb);
712 flow_keys->thoff = flow_keys->nhoff; 696 flow_keys->thoff = flow_keys->nhoff;
713 697
714 bpf_compute_data_pointers((struct sk_buff *)skb); 698 result = BPF_PROG_RUN(prog, ctx);
715 result = BPF_PROG_RUN(prog, skb);
716
717 /* Restore state */
718 memcpy(cb, &cb_saved, sizeof(cb_saved));
719 699
720 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 700 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
721 skb_network_offset(skb), skb->len);
722 flow_keys->thoff = clamp_t(u16, flow_keys->thoff, 701 flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
723 flow_keys->nhoff, skb->len); 702 flow_keys->nhoff, hlen);
724 703
725 return result == BPF_OK; 704 return result == BPF_OK;
726} 705}
727 706
728/** 707/**
729 * __skb_flow_dissect - extract the flow_keys struct and return it 708 * __skb_flow_dissect - extract the flow_keys struct and return it
709 * @net: associated network namespace, derived from @skb if NULL
730 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 710 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
731 * @flow_dissector: list of keys to dissect 711 * @flow_dissector: list of keys to dissect
732 * @target_container: target structure to put dissected values into 712 * @target_container: target structure to put dissected values into
@@ -743,7 +723,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
743 * 723 *
744 * Caller must take care of zeroing target container memory. 724 * Caller must take care of zeroing target container memory.
745 */ 725 */
746bool __skb_flow_dissect(const struct sk_buff *skb, 726bool __skb_flow_dissect(const struct net *net,
727 const struct sk_buff *skb,
747 struct flow_dissector *flow_dissector, 728 struct flow_dissector *flow_dissector,
748 void *target_container, 729 void *target_container,
749 void *data, __be16 proto, int nhoff, int hlen, 730 void *data, __be16 proto, int nhoff, int hlen,
@@ -756,6 +737,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
756 struct flow_dissector_key_icmp *key_icmp; 737 struct flow_dissector_key_icmp *key_icmp;
757 struct flow_dissector_key_tags *key_tags; 738 struct flow_dissector_key_tags *key_tags;
758 struct flow_dissector_key_vlan *key_vlan; 739 struct flow_dissector_key_vlan *key_vlan;
740 struct bpf_prog *attached = NULL;
759 enum flow_dissect_ret fdret; 741 enum flow_dissect_ret fdret;
760 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 742 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
761 int num_hdrs = 0; 743 int num_hdrs = 0;
@@ -798,22 +780,39 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
798 target_container); 780 target_container);
799 781
800 if (skb) { 782 if (skb) {
801 struct bpf_flow_keys flow_keys; 783 if (!net) {
802 struct bpf_prog *attached = NULL; 784 if (skb->dev)
785 net = dev_net(skb->dev);
786 else if (skb->sk)
787 net = sock_net(skb->sk);
788 }
789 }
803 790
791 WARN_ON_ONCE(!net);
792 if (net) {
804 rcu_read_lock(); 793 rcu_read_lock();
805 794 attached = rcu_dereference(net->flow_dissector_prog);
806 if (skb->dev)
807 attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
808 else if (skb->sk)
809 attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
810 else
811 WARN_ON_ONCE(1);
812 795
813 if (attached) { 796 if (attached) {
814 ret = __skb_flow_bpf_dissect(attached, skb, 797 struct bpf_flow_keys flow_keys;
815 flow_dissector, 798 struct bpf_flow_dissector ctx = {
816 &flow_keys); 799 .flow_keys = &flow_keys,
800 .data = data,
801 .data_end = data + hlen,
802 };
803 __be16 n_proto = proto;
804
805 if (skb) {
806 ctx.skb = skb;
807 /* we can't use 'proto' in the skb case
808 * because it might be set to skb->vlan_proto
809 * which has been pulled from the data
810 */
811 n_proto = skb->protocol;
812 }
813
814 ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
815 hlen);
817 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 816 __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
818 target_container); 817 target_container);
819 rcu_read_unlock(); 818 rcu_read_unlock();
@@ -1410,8 +1409,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
1410 __flow_hash_secret_init(); 1409 __flow_hash_secret_init();
1411 1410
1412 memset(&keys, 0, sizeof(keys)); 1411 memset(&keys, 0, sizeof(keys));
1413 __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 1412 __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
1414 NULL, 0, 0, 0, 1413 &keys, NULL, 0, 0, 0,
1415 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1414 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
1416 1415
1417 return __flow_hash_from_keys(&keys, hashrnd); 1416 return __flow_hash_from_keys(&keys, hashrnd);
@@ -1512,7 +1511,8 @@ u32 skb_get_poff(const struct sk_buff *skb)
1512{ 1511{
1513 struct flow_keys_basic keys; 1512 struct flow_keys_basic keys;
1514 1513
1515 if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) 1514 if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
1515 NULL, 0, 0, 0, 0))
1516 return 0; 1516 return 0;
1517 1517
1518 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 1518 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f7a3d7a171c7..0f9863dc4d44 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -119,13 +119,14 @@ EXPORT_SYMBOL(eth_header);
119 119
120/** 120/**
121 * eth_get_headlen - determine the length of header for an ethernet frame 121 * eth_get_headlen - determine the length of header for an ethernet frame
122 * @dev: pointer to network device
122 * @data: pointer to start of frame 123 * @data: pointer to start of frame
123 * @len: total length of frame 124 * @len: total length of frame
124 * 125 *
125 * Make a best effort attempt to pull the length for all of the headers for 126 * Make a best effort attempt to pull the length for all of the headers for
126 * a given frame in a linear buffer. 127 * a given frame in a linear buffer.
127 */ 128 */
128u32 eth_get_headlen(void *data, unsigned int len) 129u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len)
129{ 130{
130 const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG; 131 const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
131 const struct ethhdr *eth = (const struct ethhdr *)data; 132 const struct ethhdr *eth = (const struct ethhdr *)data;
@@ -136,8 +137,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
136 return len; 137 return len;
137 138
138 /* parse any remaining L2/L3 headers, check for L4 */ 139 /* parse any remaining L2/L3 headers, check for L4 */
139 if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto, 140 if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data,
140 sizeof(*eth), len, flags)) 141 eth->h_proto, sizeof(*eth),
142 len, flags))
141 return max_t(u32, keys.control.thoff, sizeof(*eth)); 143 return max_t(u32, keys.control.thoff, sizeof(*eth));
142 144
143 /* parse for any L4 headers */ 145 /* parse for any L4 headers */
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 7136ab9ffa73..3fd83b9dc1bf 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -26,7 +26,7 @@ static void load_and_attach_program(void)
26 struct bpf_object *obj; 26 struct bpf_object *obj;
27 27
28 ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, 28 ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
29 cfg_map_name, &prog_fd); 29 cfg_map_name, NULL, &prog_fd, NULL);
30 if (ret) 30 if (ret)
31 error(1, 0, "bpf_flow_load %s", cfg_path_name); 31 error(1, 0, "bpf_flow_load %s", cfg_path_name);
32 32
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 41dd6959feb0..daeaeb518894 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj,
9 const char *path, 9 const char *path,
10 const char *section_name, 10 const char *section_name,
11 const char *map_name, 11 const char *map_name,
12 int *prog_fd) 12 const char *keys_map_name,
13 int *prog_fd,
14 int *keys_fd)
13{ 15{
14 struct bpf_program *prog, *main_prog; 16 struct bpf_program *prog, *main_prog;
15 struct bpf_map *prog_array; 17 struct bpf_map *prog_array, *keys;
16 int prog_array_fd; 18 int prog_array_fd;
17 int ret, fd, i; 19 int ret, fd, i;
18 20
@@ -23,19 +25,29 @@ static inline int bpf_flow_load(struct bpf_object **obj,
23 25
24 main_prog = bpf_object__find_program_by_title(*obj, section_name); 26 main_prog = bpf_object__find_program_by_title(*obj, section_name);
25 if (!main_prog) 27 if (!main_prog)
26 return ret; 28 return -1;
27 29
28 *prog_fd = bpf_program__fd(main_prog); 30 *prog_fd = bpf_program__fd(main_prog);
29 if (*prog_fd < 0) 31 if (*prog_fd < 0)
30 return ret; 32 return -1;
31 33
32 prog_array = bpf_object__find_map_by_name(*obj, map_name); 34 prog_array = bpf_object__find_map_by_name(*obj, map_name);
33 if (!prog_array) 35 if (!prog_array)
34 return ret; 36 return -1;
35 37
36 prog_array_fd = bpf_map__fd(prog_array); 38 prog_array_fd = bpf_map__fd(prog_array);
37 if (prog_array_fd < 0) 39 if (prog_array_fd < 0)
38 return ret; 40 return -1;
41
42 if (keys_map_name && keys_fd) {
43 keys = bpf_object__find_map_by_name(*obj, keys_map_name);
44 if (!keys)
45 return -1;
46
47 *keys_fd = bpf_map__fd(keys);
48 if (*keys_fd < 0)
49 return -1;
50 }
39 51
40 i = 0; 52 i = 0;
41 bpf_object__for_each_program(prog, *obj) { 53 bpf_object__for_each_program(prog, *obj) {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 126319f9a97c..8b54adfd6264 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,5 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <test_progs.h> 2#include <test_progs.h>
3#include <error.h>
4#include <linux/if.h>
5#include <linux/if_tun.h>
3 6
4#define CHECK_FLOW_KEYS(desc, got, expected) \ 7#define CHECK_FLOW_KEYS(desc, got, expected) \
5 CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ 8 CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
@@ -79,8 +82,8 @@ struct test tests[] = {
79 .tcp.doff = 5, 82 .tcp.doff = 5,
80 }, 83 },
81 .keys = { 84 .keys = {
82 .nhoff = 0, 85 .nhoff = ETH_HLEN,
83 .thoff = sizeof(struct iphdr), 86 .thoff = ETH_HLEN + sizeof(struct iphdr),
84 .addr_proto = ETH_P_IP, 87 .addr_proto = ETH_P_IP,
85 .ip_proto = IPPROTO_TCP, 88 .ip_proto = IPPROTO_TCP,
86 .n_proto = __bpf_constant_htons(ETH_P_IP), 89 .n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -95,8 +98,8 @@ struct test tests[] = {
95 .tcp.doff = 5, 98 .tcp.doff = 5,
96 }, 99 },
97 .keys = { 100 .keys = {
98 .nhoff = 0, 101 .nhoff = ETH_HLEN,
99 .thoff = sizeof(struct ipv6hdr), 102 .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
100 .addr_proto = ETH_P_IPV6, 103 .addr_proto = ETH_P_IPV6,
101 .ip_proto = IPPROTO_TCP, 104 .ip_proto = IPPROTO_TCP,
102 .n_proto = __bpf_constant_htons(ETH_P_IPV6), 105 .n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -113,8 +116,8 @@ struct test tests[] = {
113 .tcp.doff = 5, 116 .tcp.doff = 5,
114 }, 117 },
115 .keys = { 118 .keys = {
116 .nhoff = VLAN_HLEN, 119 .nhoff = ETH_HLEN + VLAN_HLEN,
117 .thoff = VLAN_HLEN + sizeof(struct iphdr), 120 .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr),
118 .addr_proto = ETH_P_IP, 121 .addr_proto = ETH_P_IP,
119 .ip_proto = IPPROTO_TCP, 122 .ip_proto = IPPROTO_TCP,
120 .n_proto = __bpf_constant_htons(ETH_P_IP), 123 .n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -131,8 +134,9 @@ struct test tests[] = {
131 .tcp.doff = 5, 134 .tcp.doff = 5,
132 }, 135 },
133 .keys = { 136 .keys = {
134 .nhoff = VLAN_HLEN * 2, 137 .nhoff = ETH_HLEN + VLAN_HLEN * 2,
135 .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr), 138 .thoff = ETH_HLEN + VLAN_HLEN * 2 +
139 sizeof(struct ipv6hdr),
136 .addr_proto = ETH_P_IPV6, 140 .addr_proto = ETH_P_IPV6,
137 .ip_proto = IPPROTO_TCP, 141 .ip_proto = IPPROTO_TCP,
138 .n_proto = __bpf_constant_htons(ETH_P_IPV6), 142 .n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -140,13 +144,73 @@ struct test tests[] = {
140 }, 144 },
141}; 145};
142 146
147static int create_tap(const char *ifname)
148{
149 struct ifreq ifr = {
150 .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS,
151 };
152 int fd, ret;
153
154 strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
155
156 fd = open("/dev/net/tun", O_RDWR);
157 if (fd < 0)
158 return -1;
159
160 ret = ioctl(fd, TUNSETIFF, &ifr);
161 if (ret)
162 return -1;
163
164 return fd;
165}
166
167static int tx_tap(int fd, void *pkt, size_t len)
168{
169 struct iovec iov[] = {
170 {
171 .iov_len = len,
172 .iov_base = pkt,
173 },
174 };
175 return writev(fd, iov, ARRAY_SIZE(iov));
176}
177
178static int ifup(const char *ifname)
179{
180 struct ifreq ifr = {};
181 int sk, ret;
182
183 strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
184
185 sk = socket(PF_INET, SOCK_DGRAM, 0);
186 if (sk < 0)
187 return -1;
188
189 ret = ioctl(sk, SIOCGIFFLAGS, &ifr);
190 if (ret) {
191 close(sk);
192 return -1;
193 }
194
195 ifr.ifr_flags |= IFF_UP;
196 ret = ioctl(sk, SIOCSIFFLAGS, &ifr);
197 if (ret) {
198 close(sk);
199 return -1;
200 }
201
202 close(sk);
203 return 0;
204}
205
143void test_flow_dissector(void) 206void test_flow_dissector(void)
144{ 207{
208 int i, err, prog_fd, keys_fd = -1, tap_fd;
145 struct bpf_object *obj; 209 struct bpf_object *obj;
146 int i, err, prog_fd; 210 __u32 duration = 0;
147 211
148 err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", 212 err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
149 "jmp_table", &prog_fd); 213 "jmp_table", "last_dissection", &prog_fd, &keys_fd);
150 if (err) { 214 if (err) {
151 error_cnt++; 215 error_cnt++;
152 return; 216 return;
@@ -171,5 +235,34 @@ void test_flow_dissector(void)
171 CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); 235 CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
172 } 236 }
173 237
238 /* Do the same tests but for skb-less flow dissector.
239 * We use a known path in the net/tun driver that calls
240 * eth_get_headlen and we manually export bpf_flow_keys
241 * via BPF map in this case.
242 */
243
244 err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
245 CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno);
246
247 tap_fd = create_tap("tap0");
248 CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno);
249 err = ifup("tap0");
250 CHECK(err, "ifup", "err %d errno %d", err, errno);
251
252 for (i = 0; i < ARRAY_SIZE(tests); i++) {
253 struct bpf_flow_keys flow_keys = {};
254 struct bpf_prog_test_run_attr tattr = {};
255 __u32 key = 0;
256
257 err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
258 CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno);
259
260 err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
261 CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
262
263 CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
264 CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
265 }
266
174 bpf_object__close(obj); 267 bpf_object__close(obj);
175} 268}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
new file mode 100644
index 000000000000..dc5ef155ec28
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -0,0 +1,48 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <test_progs.h>
3
4void test_flow_dissector_load_bytes(void)
5{
6 struct bpf_flow_keys flow_keys;
7 __u32 duration = 0, retval, size;
8 struct bpf_insn prog[] = {
9 // BPF_REG_1 - 1st argument: context
10 // BPF_REG_2 - 2nd argument: offset, start at first byte
11 BPF_MOV64_IMM(BPF_REG_2, 0),
12 // BPF_REG_3 - 3rd argument: destination, reserve byte on stack
13 BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10),
14 BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1),
15 // BPF_REG_4 - 4th argument: copy one byte
16 BPF_MOV64_IMM(BPF_REG_4, 1),
17 // bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1)
18 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
19 BPF_FUNC_skb_load_bytes),
20 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
21 // if (ret == 0) return BPF_DROP (2)
22 BPF_MOV64_IMM(BPF_REG_0, BPF_DROP),
23 BPF_EXIT_INSN(),
24 // if (ret != 0) return BPF_OK (0)
25 BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
26 BPF_EXIT_INSN(),
27 };
28 int fd, err;
29
30 /* make sure bpf_skb_load_bytes is not allowed from skb-less context
31 */
32 fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
33 ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
34 CHECK(fd < 0,
35 "flow_dissector-bpf_skb_load_bytes-load",
36 "fd %d errno %d\n",
37 fd, errno);
38
39 err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
40 &flow_keys, &size, &retval, &duration);
41 CHECK(size != sizeof(flow_keys) || err || retval != 1,
42 "flow_dissector-bpf_skb_load_bytes",
43 "err %d errno %d retval %d duration %d size %u/%zu\n",
44 err, errno, retval, duration, size, sizeof(flow_keys));
45
46 if (fd >= -1)
47 close(fd);
48}
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 75b17cada539..81ad9a0b29d0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = {
64 .max_entries = 8 64 .max_entries = 8
65}; 65};
66 66
67struct bpf_map_def SEC("maps") last_dissection = {
68 .type = BPF_MAP_TYPE_ARRAY,
69 .key_size = sizeof(__u32),
70 .value_size = sizeof(struct bpf_flow_keys),
71 .max_entries = 1,
72};
73
74static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
75 int ret)
76{
77 struct bpf_flow_keys *val;
78 __u32 key = 0;
79
80 val = bpf_map_lookup_elem(&last_dissection, &key);
81 if (val)
82 memcpy(val, keys, sizeof(*val));
83 return ret;
84}
85
67static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 86static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
68 __u16 hdr_size, 87 __u16 hdr_size,
69 void *buffer) 88 void *buffer)
@@ -109,10 +128,10 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
109 break; 128 break;
110 default: 129 default:
111 /* Protocol not supported */ 130 /* Protocol not supported */
112 return BPF_DROP; 131 return export_flow_keys(keys, BPF_DROP);
113 } 132 }
114 133
115 return BPF_DROP; 134 return export_flow_keys(keys, BPF_DROP);
116} 135}
117 136
118SEC("flow_dissector") 137SEC("flow_dissector")
@@ -139,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
139 case IPPROTO_ICMP: 158 case IPPROTO_ICMP:
140 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 159 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
141 if (!icmp) 160 if (!icmp)
142 return BPF_DROP; 161 return export_flow_keys(keys, BPF_DROP);
143 return BPF_OK; 162 return export_flow_keys(keys, BPF_OK);
144 case IPPROTO_IPIP: 163 case IPPROTO_IPIP:
145 keys->is_encap = true; 164 keys->is_encap = true;
146 return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 165 return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
@@ -150,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
150 case IPPROTO_GRE: 169 case IPPROTO_GRE:
151 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 170 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
152 if (!gre) 171 if (!gre)
153 return BPF_DROP; 172 return export_flow_keys(keys, BPF_DROP);
154 173
155 if (bpf_htons(gre->flags & GRE_VERSION)) 174 if (bpf_htons(gre->flags & GRE_VERSION))
156 /* Only inspect standard GRE packets with version 0 */ 175 /* Only inspect standard GRE packets with version 0 */
157 return BPF_OK; 176 return export_flow_keys(keys, BPF_OK);
158 177
159 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 178 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
160 if (GRE_IS_CSUM(gre->flags)) 179 if (GRE_IS_CSUM(gre->flags))
@@ -170,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
170 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 189 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
171 &_eth); 190 &_eth);
172 if (!eth) 191 if (!eth)
173 return BPF_DROP; 192 return export_flow_keys(keys, BPF_DROP);
174 193
175 keys->thoff += sizeof(*eth); 194 keys->thoff += sizeof(*eth);
176 195
@@ -181,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
181 case IPPROTO_TCP: 200 case IPPROTO_TCP:
182 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 201 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
183 if (!tcp) 202 if (!tcp)
184 return BPF_DROP; 203 return export_flow_keys(keys, BPF_DROP);
185 204
186 if (tcp->doff < 5) 205 if (tcp->doff < 5)
187 return BPF_DROP; 206 return export_flow_keys(keys, BPF_DROP);
188 207
189 if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 208 if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
190 return BPF_DROP; 209 return export_flow_keys(keys, BPF_DROP);
191 210
192 keys->sport = tcp->source; 211 keys->sport = tcp->source;
193 keys->dport = tcp->dest; 212 keys->dport = tcp->dest;
194 return BPF_OK; 213 return export_flow_keys(keys, BPF_OK);
195 case IPPROTO_UDP: 214 case IPPROTO_UDP:
196 case IPPROTO_UDPLITE: 215 case IPPROTO_UDPLITE:
197 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 216 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
198 if (!udp) 217 if (!udp)
199 return BPF_DROP; 218 return export_flow_keys(keys, BPF_DROP);
200 219
201 keys->sport = udp->source; 220 keys->sport = udp->source;
202 keys->dport = udp->dest; 221 keys->dport = udp->dest;
203 return BPF_OK; 222 return export_flow_keys(keys, BPF_OK);
204 default: 223 default:
205 return BPF_DROP; 224 return export_flow_keys(keys, BPF_DROP);
206 } 225 }
207 226
208 return BPF_DROP; 227 return export_flow_keys(keys, BPF_DROP);
209} 228}
210 229
211static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 230static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
@@ -225,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
225 return parse_ip_proto(skb, nexthdr); 244 return parse_ip_proto(skb, nexthdr);
226 } 245 }
227 246
228 return BPF_DROP; 247 return export_flow_keys(keys, BPF_DROP);
229} 248}
230 249
231PROG(IP)(struct __sk_buff *skb) 250PROG(IP)(struct __sk_buff *skb)
@@ -238,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb)
238 257
239 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 258 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
240 if (!iph) 259 if (!iph)
241 return BPF_DROP; 260 return export_flow_keys(keys, BPF_DROP);
242 261
243 /* IP header cannot be smaller than 20 bytes */ 262 /* IP header cannot be smaller than 20 bytes */
244 if (iph->ihl < 5) 263 if (iph->ihl < 5)
245 return BPF_DROP; 264 return export_flow_keys(keys, BPF_DROP);
246 265
247 keys->addr_proto = ETH_P_IP; 266 keys->addr_proto = ETH_P_IP;
248 keys->ipv4_src = iph->saddr; 267 keys->ipv4_src = iph->saddr;
@@ -250,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb)
250 269
251 keys->thoff += iph->ihl << 2; 270 keys->thoff += iph->ihl << 2;
252 if (data + keys->thoff > data_end) 271 if (data + keys->thoff > data_end)
253 return BPF_DROP; 272 return export_flow_keys(keys, BPF_DROP);
254 273
255 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 274 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
256 keys->is_frag = true; 275 keys->is_frag = true;
@@ -264,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb)
264 } 283 }
265 284
266 if (done) 285 if (done)
267 return BPF_OK; 286 return export_flow_keys(keys, BPF_OK);
268 287
269 return parse_ip_proto(skb, iph->protocol); 288 return parse_ip_proto(skb, iph->protocol);
270} 289}
@@ -276,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb)
276 295
277 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 296 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
278 if (!ip6h) 297 if (!ip6h)
279 return BPF_DROP; 298 return export_flow_keys(keys, BPF_DROP);
280 299
281 keys->addr_proto = ETH_P_IPV6; 300 keys->addr_proto = ETH_P_IPV6;
282 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 301 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
@@ -288,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb)
288 307
289PROG(IPV6OP)(struct __sk_buff *skb) 308PROG(IPV6OP)(struct __sk_buff *skb)
290{ 309{
310 struct bpf_flow_keys *keys = skb->flow_keys;
291 struct ipv6_opt_hdr *ip6h, _ip6h; 311 struct ipv6_opt_hdr *ip6h, _ip6h;
292 312
293 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 313 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
294 if (!ip6h) 314 if (!ip6h)
295 return BPF_DROP; 315 return export_flow_keys(keys, BPF_DROP);
296 316
297 /* hlen is in 8-octets and does not include the first 8 bytes 317 /* hlen is in 8-octets and does not include the first 8 bytes
298 * of the header 318 * of the header
@@ -309,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
309 329
310 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 330 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
311 if (!fragh) 331 if (!fragh)
312 return BPF_DROP; 332 return export_flow_keys(keys, BPF_DROP);
313 333
314 keys->thoff += sizeof(*fragh); 334 keys->thoff += sizeof(*fragh);
315 keys->is_frag = true; 335 keys->is_frag = true;
@@ -321,13 +341,14 @@ PROG(IPV6FR)(struct __sk_buff *skb)
321 341
322PROG(MPLS)(struct __sk_buff *skb) 342PROG(MPLS)(struct __sk_buff *skb)
323{ 343{
344 struct bpf_flow_keys *keys = skb->flow_keys;
324 struct mpls_label *mpls, _mpls; 345 struct mpls_label *mpls, _mpls;
325 346
326 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 347 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
327 if (!mpls) 348 if (!mpls)
328 return BPF_DROP; 349 return export_flow_keys(keys, BPF_DROP);
329 350
330 return BPF_OK; 351 return export_flow_keys(keys, BPF_OK);
331} 352}
332 353
333PROG(VLAN)(struct __sk_buff *skb) 354PROG(VLAN)(struct __sk_buff *skb)
@@ -339,10 +360,10 @@ PROG(VLAN)(struct __sk_buff *skb)
339 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { 360 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
340 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 361 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
341 if (!vlan) 362 if (!vlan)
342 return BPF_DROP; 363 return export_flow_keys(keys, BPF_DROP);
343 364
344 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 365 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
345 return BPF_DROP; 366 return export_flow_keys(keys, BPF_DROP);
346 367
347 keys->nhoff += sizeof(*vlan); 368 keys->nhoff += sizeof(*vlan);
348 keys->thoff += sizeof(*vlan); 369 keys->thoff += sizeof(*vlan);
@@ -350,14 +371,14 @@ PROG(VLAN)(struct __sk_buff *skb)
350 371
351 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 372 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
352 if (!vlan) 373 if (!vlan)
353 return BPF_DROP; 374 return export_flow_keys(keys, BPF_DROP);
354 375
355 keys->nhoff += sizeof(*vlan); 376 keys->nhoff += sizeof(*vlan);
356 keys->thoff += sizeof(*vlan); 377 keys->thoff += sizeof(*vlan);
357 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 378 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
358 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 379 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
359 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 380 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
360 return BPF_DROP; 381 return export_flow_keys(keys, BPF_DROP);
361 382
362 keys->n_proto = vlan->h_vlan_encapsulated_proto; 383 keys->n_proto = vlan->h_vlan_encapsulated_proto;
363 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 384 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);