aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2016-06-28 06:18:27 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-30 05:54:40 -0400
commit6578171a7ff0c31dc73258f93da7407510abf085 (patch)
tree9bd1cbf0fa5de109e750dd482472beaa188951a5
parent80b48c445797a634d869c7e5a53e182ba2688931 (diff)
bpf: add bpf_skb_change_proto helper
This patch adds a minimal helper for doing the groundwork of changing the skb->protocol in a controlled way. Currently supported is v4 to v6 and vice versa transitions, which allows f.e. for a minimal, static nat64 implementation where applications in containers that still require IPv4 can be transparently operated in an IPv6-only environment. For example, host facing veth of the container can transparently do the transitions in a programmatic way with the help of clsact qdisc and cls_bpf. Idea is to separate concerns for keeping complexity of the helper lower, which means that the programs utilize bpf_skb_change_proto(), bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done, instead of doing everything in a single helper (and thus partially duplicating helper functionality). Also, bpf_skb_change_proto() shouldn't need to deal with raw packet data as this is done by other helpers. bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to operate on a private one, push or pop additionally required header space and migrate the gso/gro meta data from the shared info. We do mark the gso type as dodgy so that headers are checked and segs recalculated by the gso/gro engine. The gso_size target is adapted as well. The flags argument added is currently reserved and can be used for future extensions. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/uapi/linux/bpf.h14
-rw-r--r--net/core/filter.c200
2 files changed, 214 insertions, 0 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 58df2da3e9bf..66cd738a937a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -313,6 +313,20 @@ enum bpf_func_id {
313 */ 313 */
314 BPF_FUNC_skb_get_tunnel_opt, 314 BPF_FUNC_skb_get_tunnel_opt,
315 BPF_FUNC_skb_set_tunnel_opt, 315 BPF_FUNC_skb_set_tunnel_opt,
316
317 /**
318 * bpf_skb_change_proto(skb, proto, flags)
319 * Change protocol of the skb. Currently supported is
320 * v4 -> v6, v6 -> v4 transitions. The helper will also
321 * resize the skb. eBPF program is expected to fill the
322 * new headers via skb_store_bytes and lX_csum_replace.
323 * @skb: pointer to skb
324 * @proto: new skb->protocol type
325 * @flags: reserved
326 * Return: 0 on success or negative error
327 */
328 BPF_FUNC_skb_change_proto,
329
316 __BPF_FUNC_MAX_ID, 330 __BPF_FUNC_MAX_ID,
317}; 331};
318 332
diff --git a/net/core/filter.c b/net/core/filter.c
index 46c88d9cec5c..d983e765787a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1783,6 +1783,202 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
1783}; 1783};
1784EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); 1784EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
1785 1785
1786static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
1787{
1788 /* Caller already did skb_cow() with len as headroom,
1789 * so no need to do it here.
1790 */
1791 skb_push(skb, len);
1792 memmove(skb->data, skb->data + len, off);
1793 memset(skb->data + off, 0, len);
1794
1795 /* No skb_postpush_rcsum(skb, skb->data + off, len)
1796 * needed here as it does not change the skb->csum
1797 * result for checksum complete when summing over
1798 * zeroed blocks.
1799 */
1800 return 0;
1801}
1802
1803static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
1804{
1805 /* skb_ensure_writable() is not needed here, as we're
1806 * already working on an uncloned skb.
1807 */
1808 if (unlikely(!pskb_may_pull(skb, off + len)))
1809 return -ENOMEM;
1810
1811 skb_postpull_rcsum(skb, skb->data + off, len);
1812 memmove(skb->data + len, skb->data, off);
1813 __skb_pull(skb, len);
1814
1815 return 0;
1816}
1817
1818static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
1819{
1820 bool trans_same = skb->transport_header == skb->network_header;
1821 int ret;
1822
1823 /* There's no need for __skb_push()/__skb_pull() pair to
1824 * get to the start of the mac header as we're guaranteed
1825 * to always start from here under eBPF.
1826 */
1827 ret = bpf_skb_generic_push(skb, off, len);
1828 if (likely(!ret)) {
1829 skb->mac_header -= len;
1830 skb->network_header -= len;
1831 if (trans_same)
1832 skb->transport_header = skb->network_header;
1833 }
1834
1835 return ret;
1836}
1837
1838static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
1839{
1840 bool trans_same = skb->transport_header == skb->network_header;
1841 int ret;
1842
1843 /* Same here, __skb_push()/__skb_pull() pair not needed. */
1844 ret = bpf_skb_generic_pop(skb, off, len);
1845 if (likely(!ret)) {
1846 skb->mac_header += len;
1847 skb->network_header += len;
1848 if (trans_same)
1849 skb->transport_header = skb->network_header;
1850 }
1851
1852 return ret;
1853}
1854
1855static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
1856{
1857 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
1858 u32 off = skb->network_header - skb->mac_header;
1859 int ret;
1860
1861 ret = skb_cow(skb, len_diff);
1862 if (unlikely(ret < 0))
1863 return ret;
1864
1865 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
1866 if (unlikely(ret < 0))
1867 return ret;
1868
1869 if (skb_is_gso(skb)) {
1870 /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
1871 * be changed into SKB_GSO_TCPV6.
1872 */
1873 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
1874 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
1875 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
1876 }
1877
1878 /* Due to IPv6 header, MSS needs to be downgraded. */
1879 skb_shinfo(skb)->gso_size -= len_diff;
1880 /* Header must be checked, and gso_segs recomputed. */
1881 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1882 skb_shinfo(skb)->gso_segs = 0;
1883 }
1884
1885 skb->protocol = htons(ETH_P_IPV6);
1886 skb_clear_hash(skb);
1887
1888 return 0;
1889}
1890
1891static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
1892{
1893 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
1894 u32 off = skb->network_header - skb->mac_header;
1895 int ret;
1896
1897 ret = skb_unclone(skb, GFP_ATOMIC);
1898 if (unlikely(ret < 0))
1899 return ret;
1900
1901 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
1902 if (unlikely(ret < 0))
1903 return ret;
1904
1905 if (skb_is_gso(skb)) {
1906 /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
1907 * be changed into SKB_GSO_TCPV4.
1908 */
1909 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
1910 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
1911 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
1912 }
1913
1914 /* Due to IPv4 header, MSS can be upgraded. */
1915 skb_shinfo(skb)->gso_size += len_diff;
1916 /* Header must be checked, and gso_segs recomputed. */
1917 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1918 skb_shinfo(skb)->gso_segs = 0;
1919 }
1920
1921 skb->protocol = htons(ETH_P_IP);
1922 skb_clear_hash(skb);
1923
1924 return 0;
1925}
1926
1927static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
1928{
1929 __be16 from_proto = skb->protocol;
1930
1931 if (from_proto == htons(ETH_P_IP) &&
1932 to_proto == htons(ETH_P_IPV6))
1933 return bpf_skb_proto_4_to_6(skb);
1934
1935 if (from_proto == htons(ETH_P_IPV6) &&
1936 to_proto == htons(ETH_P_IP))
1937 return bpf_skb_proto_6_to_4(skb);
1938
1939 return -ENOTSUPP;
1940}
1941
1942static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
1943{
1944 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1945 __be16 proto = (__force __be16) r2;
1946 int ret;
1947
1948 if (unlikely(flags))
1949 return -EINVAL;
1950
1951 /* General idea is that this helper does the basic groundwork
1952 * needed for changing the protocol, and eBPF program fills the
1953 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
1954 * and other helpers, rather than passing a raw buffer here.
1955 *
1956 * The rationale is to keep this minimal and without a need to
1957 * deal with raw packet data. F.e. even if we would pass buffers
1958 * here, the program still needs to call the bpf_lX_csum_replace()
1959 * helpers anyway. Plus, this way we keep also separation of
1960 * concerns, since f.e. bpf_skb_store_bytes() should only take
1961 * care of stores.
1962 *
1963 * Currently, additional options and extension header space are
1964 * not supported, but flags register is reserved so we can adapt
1965 * that. For offloads, we mark packet as dodgy, so that headers
1966 * need to be verified first.
1967 */
1968 ret = bpf_skb_proto_xlat(skb, proto);
1969 bpf_compute_data_end(skb);
1970 return ret;
1971}
1972
1973static const struct bpf_func_proto bpf_skb_change_proto_proto = {
1974 .func = bpf_skb_change_proto,
1975 .gpl_only = false,
1976 .ret_type = RET_INTEGER,
1977 .arg1_type = ARG_PTR_TO_CTX,
1978 .arg2_type = ARG_ANYTHING,
1979 .arg3_type = ARG_ANYTHING,
1980};
1981
1786bool bpf_helper_changes_skb_data(void *func) 1982bool bpf_helper_changes_skb_data(void *func)
1787{ 1983{
1788 if (func == bpf_skb_vlan_push) 1984 if (func == bpf_skb_vlan_push)
@@ -1791,6 +1987,8 @@ bool bpf_helper_changes_skb_data(void *func)
1791 return true; 1987 return true;
1792 if (func == bpf_skb_store_bytes) 1988 if (func == bpf_skb_store_bytes)
1793 return true; 1989 return true;
1990 if (func == bpf_skb_change_proto)
1991 return true;
1794 if (func == bpf_l3_csum_replace) 1992 if (func == bpf_l3_csum_replace)
1795 return true; 1993 return true;
1796 if (func == bpf_l4_csum_replace) 1994 if (func == bpf_l4_csum_replace)
@@ -2078,6 +2276,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
2078 return &bpf_skb_vlan_push_proto; 2276 return &bpf_skb_vlan_push_proto;
2079 case BPF_FUNC_skb_vlan_pop: 2277 case BPF_FUNC_skb_vlan_pop:
2080 return &bpf_skb_vlan_pop_proto; 2278 return &bpf_skb_vlan_pop_proto;
2279 case BPF_FUNC_skb_change_proto:
2280 return &bpf_skb_change_proto_proto;
2081 case BPF_FUNC_skb_get_tunnel_key: 2281 case BPF_FUNC_skb_get_tunnel_key:
2082 return &bpf_skb_get_tunnel_key_proto; 2282 return &bpf_skb_get_tunnel_key_proto;
2083 case BPF_FUNC_skb_set_tunnel_key: 2283 case BPF_FUNC_skb_set_tunnel_key: