diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2016-06-28 06:18:27 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-30 05:54:40 -0400 |
commit | 6578171a7ff0c31dc73258f93da7407510abf085 (patch) | |
tree | 9bd1cbf0fa5de109e750dd482472beaa188951a5 | |
parent | 80b48c445797a634d869c7e5a53e182ba2688931 (diff) |
bpf: add bpf_skb_change_proto helper
This patch adds a minimal helper for doing the groundwork of changing
the skb->protocol in a controlled way. Currently supported is v4 to
v6 and vice versa transitions, which allows f.e. for a minimal, static
nat64 implementation where applications in containers that still
require IPv4 can be transparently operated in an IPv6-only environment.
For example, host facing veth of the container can transparently do
the transitions in a programmatic way with the help of clsact qdisc
and cls_bpf.
Idea is to separate concerns for keeping complexity of the helper
lower, which means that the programs utilize bpf_skb_change_proto(),
bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done,
instead of doing everything in a single helper (and thus partially
duplicating helper functionality). Also, bpf_skb_change_proto()
shouldn't need to deal with raw packet data as this is done by other
helpers.
bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to
operate on a private one, push or pop additionally required header
space and migrate the gso/gro meta data from the shared info. We do
mark the gso type as dodgy so that headers are checked and segs
recalculated by the gso/gro engine. The gso_size target is adapted
as well. The flags argument added is currently reserved and can be
used for future extensions.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/bpf.h | 14 | ||||
-rw-r--r-- | net/core/filter.c | 200 |
2 files changed, 214 insertions, 0 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 58df2da3e9bf..66cd738a937a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -313,6 +313,20 @@ enum bpf_func_id { | |||
313 | */ | 313 | */ |
314 | BPF_FUNC_skb_get_tunnel_opt, | 314 | BPF_FUNC_skb_get_tunnel_opt, |
315 | BPF_FUNC_skb_set_tunnel_opt, | 315 | BPF_FUNC_skb_set_tunnel_opt, |
316 | |||
317 | /** | ||
318 | * bpf_skb_change_proto(skb, proto, flags) | ||
319 | * Change protocol of the skb. Currently supported is | ||
320 | * v4 -> v6, v6 -> v4 transitions. The helper will also | ||
321 | * resize the skb. eBPF program is expected to fill the | ||
322 | * new headers via skb_store_bytes and lX_csum_replace. | ||
323 | * @skb: pointer to skb | ||
324 | * @proto: new skb->protocol type | ||
325 | * @flags: reserved | ||
326 | * Return: 0 on success or negative error | ||
327 | */ | ||
328 | BPF_FUNC_skb_change_proto, | ||
329 | |||
316 | __BPF_FUNC_MAX_ID, | 330 | __BPF_FUNC_MAX_ID, |
317 | }; | 331 | }; |
318 | 332 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 46c88d9cec5c..d983e765787a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -1783,6 +1783,202 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = { | |||
1783 | }; | 1783 | }; |
1784 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); | 1784 | EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); |
1785 | 1785 | ||
1786 | static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) | ||
1787 | { | ||
1788 | /* Caller already did skb_cow() with len as headroom, | ||
1789 | * so no need to do it here. | ||
1790 | */ | ||
1791 | skb_push(skb, len); | ||
1792 | memmove(skb->data, skb->data + len, off); | ||
1793 | memset(skb->data + off, 0, len); | ||
1794 | |||
1795 | /* No skb_postpush_rcsum(skb, skb->data + off, len) | ||
1796 | * needed here as it does not change the skb->csum | ||
1797 | * result for checksum complete when summing over | ||
1798 | * zeroed blocks. | ||
1799 | */ | ||
1800 | return 0; | ||
1801 | } | ||
1802 | |||
1803 | static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) | ||
1804 | { | ||
1805 | /* skb_ensure_writable() is not needed here, as we're | ||
1806 | * already working on an uncloned skb. | ||
1807 | */ | ||
1808 | if (unlikely(!pskb_may_pull(skb, off + len))) | ||
1809 | return -ENOMEM; | ||
1810 | |||
1811 | skb_postpull_rcsum(skb, skb->data + off, len); | ||
1812 | memmove(skb->data + len, skb->data, off); | ||
1813 | __skb_pull(skb, len); | ||
1814 | |||
1815 | return 0; | ||
1816 | } | ||
1817 | |||
1818 | static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) | ||
1819 | { | ||
1820 | bool trans_same = skb->transport_header == skb->network_header; | ||
1821 | int ret; | ||
1822 | |||
1823 | /* There's no need for __skb_push()/__skb_pull() pair to | ||
1824 | * get to the start of the mac header as we're guaranteed | ||
1825 | * to always start from here under eBPF. | ||
1826 | */ | ||
1827 | ret = bpf_skb_generic_push(skb, off, len); | ||
1828 | if (likely(!ret)) { | ||
1829 | skb->mac_header -= len; | ||
1830 | skb->network_header -= len; | ||
1831 | if (trans_same) | ||
1832 | skb->transport_header = skb->network_header; | ||
1833 | } | ||
1834 | |||
1835 | return ret; | ||
1836 | } | ||
1837 | |||
1838 | static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) | ||
1839 | { | ||
1840 | bool trans_same = skb->transport_header == skb->network_header; | ||
1841 | int ret; | ||
1842 | |||
1843 | /* Same here, __skb_push()/__skb_pull() pair not needed. */ | ||
1844 | ret = bpf_skb_generic_pop(skb, off, len); | ||
1845 | if (likely(!ret)) { | ||
1846 | skb->mac_header += len; | ||
1847 | skb->network_header += len; | ||
1848 | if (trans_same) | ||
1849 | skb->transport_header = skb->network_header; | ||
1850 | } | ||
1851 | |||
1852 | return ret; | ||
1853 | } | ||
1854 | |||
1855 | static int bpf_skb_proto_4_to_6(struct sk_buff *skb) | ||
1856 | { | ||
1857 | const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); | ||
1858 | u32 off = skb->network_header - skb->mac_header; | ||
1859 | int ret; | ||
1860 | |||
1861 | ret = skb_cow(skb, len_diff); | ||
1862 | if (unlikely(ret < 0)) | ||
1863 | return ret; | ||
1864 | |||
1865 | ret = bpf_skb_net_hdr_push(skb, off, len_diff); | ||
1866 | if (unlikely(ret < 0)) | ||
1867 | return ret; | ||
1868 | |||
1869 | if (skb_is_gso(skb)) { | ||
1870 | /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to | ||
1871 | * be changed into SKB_GSO_TCPV6. | ||
1872 | */ | ||
1873 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { | ||
1874 | skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; | ||
1875 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; | ||
1876 | } | ||
1877 | |||
1878 | /* Due to IPv6 header, MSS needs to be downgraded. */ | ||
1879 | skb_shinfo(skb)->gso_size -= len_diff; | ||
1880 | /* Header must be checked, and gso_segs recomputed. */ | ||
1881 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
1882 | skb_shinfo(skb)->gso_segs = 0; | ||
1883 | } | ||
1884 | |||
1885 | skb->protocol = htons(ETH_P_IPV6); | ||
1886 | skb_clear_hash(skb); | ||
1887 | |||
1888 | return 0; | ||
1889 | } | ||
1890 | |||
1891 | static int bpf_skb_proto_6_to_4(struct sk_buff *skb) | ||
1892 | { | ||
1893 | const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); | ||
1894 | u32 off = skb->network_header - skb->mac_header; | ||
1895 | int ret; | ||
1896 | |||
1897 | ret = skb_unclone(skb, GFP_ATOMIC); | ||
1898 | if (unlikely(ret < 0)) | ||
1899 | return ret; | ||
1900 | |||
1901 | ret = bpf_skb_net_hdr_pop(skb, off, len_diff); | ||
1902 | if (unlikely(ret < 0)) | ||
1903 | return ret; | ||
1904 | |||
1905 | if (skb_is_gso(skb)) { | ||
1906 | /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to | ||
1907 | * be changed into SKB_GSO_TCPV4. | ||
1908 | */ | ||
1909 | if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { | ||
1910 | skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; | ||
1911 | skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; | ||
1912 | } | ||
1913 | |||
1914 | /* Due to IPv4 header, MSS can be upgraded. */ | ||
1915 | skb_shinfo(skb)->gso_size += len_diff; | ||
1916 | /* Header must be checked, and gso_segs recomputed. */ | ||
1917 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
1918 | skb_shinfo(skb)->gso_segs = 0; | ||
1919 | } | ||
1920 | |||
1921 | skb->protocol = htons(ETH_P_IP); | ||
1922 | skb_clear_hash(skb); | ||
1923 | |||
1924 | return 0; | ||
1925 | } | ||
1926 | |||
1927 | static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) | ||
1928 | { | ||
1929 | __be16 from_proto = skb->protocol; | ||
1930 | |||
1931 | if (from_proto == htons(ETH_P_IP) && | ||
1932 | to_proto == htons(ETH_P_IPV6)) | ||
1933 | return bpf_skb_proto_4_to_6(skb); | ||
1934 | |||
1935 | if (from_proto == htons(ETH_P_IPV6) && | ||
1936 | to_proto == htons(ETH_P_IP)) | ||
1937 | return bpf_skb_proto_6_to_4(skb); | ||
1938 | |||
1939 | return -ENOTSUPP; | ||
1940 | } | ||
1941 | |||
1942 | static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) | ||
1943 | { | ||
1944 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1945 | __be16 proto = (__force __be16) r2; | ||
1946 | int ret; | ||
1947 | |||
1948 | if (unlikely(flags)) | ||
1949 | return -EINVAL; | ||
1950 | |||
1951 | /* General idea is that this helper does the basic groundwork | ||
1952 | * needed for changing the protocol, and eBPF program fills the | ||
1953 | * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() | ||
1954 | * and other helpers, rather than passing a raw buffer here. | ||
1955 | * | ||
1956 | * The rationale is to keep this minimal and without a need to | ||
1957 | * deal with raw packet data. F.e. even if we would pass buffers | ||
1958 | * here, the program still needs to call the bpf_lX_csum_replace() | ||
1959 | * helpers anyway. Plus, this way we keep also separation of | ||
1960 | * concerns, since f.e. bpf_skb_store_bytes() should only take | ||
1961 | * care of stores. | ||
1962 | * | ||
1963 | * Currently, additional options and extension header space are | ||
1964 | * not supported, but flags register is reserved so we can adapt | ||
1965 | * that. For offloads, we mark packet as dodgy, so that headers | ||
1966 | * need to be verified first. | ||
1967 | */ | ||
1968 | ret = bpf_skb_proto_xlat(skb, proto); | ||
1969 | bpf_compute_data_end(skb); | ||
1970 | return ret; | ||
1971 | } | ||
1972 | |||
1973 | static const struct bpf_func_proto bpf_skb_change_proto_proto = { | ||
1974 | .func = bpf_skb_change_proto, | ||
1975 | .gpl_only = false, | ||
1976 | .ret_type = RET_INTEGER, | ||
1977 | .arg1_type = ARG_PTR_TO_CTX, | ||
1978 | .arg2_type = ARG_ANYTHING, | ||
1979 | .arg3_type = ARG_ANYTHING, | ||
1980 | }; | ||
1981 | |||
1786 | bool bpf_helper_changes_skb_data(void *func) | 1982 | bool bpf_helper_changes_skb_data(void *func) |
1787 | { | 1983 | { |
1788 | if (func == bpf_skb_vlan_push) | 1984 | if (func == bpf_skb_vlan_push) |
@@ -1791,6 +1987,8 @@ bool bpf_helper_changes_skb_data(void *func) | |||
1791 | return true; | 1987 | return true; |
1792 | if (func == bpf_skb_store_bytes) | 1988 | if (func == bpf_skb_store_bytes) |
1793 | return true; | 1989 | return true; |
1990 | if (func == bpf_skb_change_proto) | ||
1991 | return true; | ||
1794 | if (func == bpf_l3_csum_replace) | 1992 | if (func == bpf_l3_csum_replace) |
1795 | return true; | 1993 | return true; |
1796 | if (func == bpf_l4_csum_replace) | 1994 | if (func == bpf_l4_csum_replace) |
@@ -2078,6 +2276,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | |||
2078 | return &bpf_skb_vlan_push_proto; | 2276 | return &bpf_skb_vlan_push_proto; |
2079 | case BPF_FUNC_skb_vlan_pop: | 2277 | case BPF_FUNC_skb_vlan_pop: |
2080 | return &bpf_skb_vlan_pop_proto; | 2278 | return &bpf_skb_vlan_pop_proto; |
2279 | case BPF_FUNC_skb_change_proto: | ||
2280 | return &bpf_skb_change_proto_proto; | ||
2081 | case BPF_FUNC_skb_get_tunnel_key: | 2281 | case BPF_FUNC_skb_get_tunnel_key: |
2082 | return &bpf_skb_get_tunnel_key_proto; | 2282 | return &bpf_skb_get_tunnel_key_proto; |
2083 | case BPF_FUNC_skb_set_tunnel_key: | 2283 | case BPF_FUNC_skb_set_tunnel_key: |