diff options
author | Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> | 2018-05-28 06:37:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-29 11:00:14 -0400 |
commit | 6547e387d7f52f2ba681a229de3c13e5b9e01ee1 (patch) | |
tree | 723d0e9104475ba48ce6cc53098e356e446137ee | |
parent | d2c2725c2cdbcc108a191f50953d31c7b6556761 (diff) |
tun: Fix NULL pointer dereference in XDP redirect
Calling XDP redirection requires bh disabled. Softirq can call another
XDP function and redirection functions, then the percpu static variable
ri->map can be overwritten to NULL.
This is a generic XDP case called from tun.
[ 3535.736058] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 3535.743974] PGD 0 P4D 0
[ 3535.746530] Oops: 0000 [#1] SMP PTI
[ 3535.750049] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm ipmi_ssif irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel crypto_simd cryptd enclosure hpwdt hpilo glue_helper ipmi_si pcspkr wmi mei_me ioatdma mei ipmi_devintf shpchp dca ipmi_msghandler lpc_ich acpi_power_meter sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm smartpqi i40e crc32c_intel scsi_transport_sas tg3 i2c_core ptp pps_core
[ 3535.813456] CPU: 5 PID: 1630 Comm: vhost-1614 Not tainted 4.17.0-rc4 #2
[ 3535.820127] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017
[ 3535.828732] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30
[ 3535.833740] RSP: 0018:ffffb4bc47bf7c58 EFLAGS: 00010246
[ 3535.839009] RAX: ffff9fdfcfea1c40 RBX: 0000000000000000 RCX: ffff9fdf27fe3100
[ 3535.846205] RDX: ffff9fdfca769200 RSI: 0000000000000000 RDI: 0000000000000000
[ 3535.853402] RBP: ffffb4bc491d9000 R08: 00000000000045ad R09: 0000000000000ec0
[ 3535.860597] R10: 0000000000000001 R11: ffff9fdf26c3ce4e R12: ffff9fdf9e72c000
[ 3535.867794] R13: 0000000000000000 R14: fffffffffffffff2 R15: ffff9fdfc82cdd00
[ 3535.874990] FS: 0000000000000000(0000) GS:ffff9fdfcfe80000(0000) knlGS:0000000000000000
[ 3535.883152] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3535.888948] CR2: 0000000000000018 CR3: 0000000bde724004 CR4: 00000000007626e0
[ 3535.896145] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 3535.903342] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 3535.910538] PKRU: 55555554
[ 3535.913267] Call Trace:
[ 3535.915736] xdp_do_generic_redirect+0x7a/0x310
[ 3535.920310] do_xdp_generic.part.117+0x285/0x370
[ 3535.924970] tun_get_user+0x5b9/0x1260 [tun]
[ 3535.929279] tun_sendmsg+0x52/0x70 [tun]
[ 3535.933237] handle_tx+0x2ad/0x5f0 [vhost_net]
[ 3535.937721] vhost_worker+0xa5/0x100 [vhost]
[ 3535.942030] kthread+0xf5/0x130
[ 3535.945198] ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost]
[ 3535.950031] ? kthread_bind+0x10/0x10
[ 3535.953727] ret_from_fork+0x35/0x40
[ 3535.957334] Code: 0e 74 15 83 f8 10 75 05 e9 49 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 29 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 49 a9 b3 ff 31 c0 c3
[ 3535.976387] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffffb4bc47bf7c58
[ 3535.982883] CR2: 0000000000000018
[ 3535.987096] ---[ end trace 383b299dd1430240 ]---
[ 3536.131325] Kernel panic - not syncing: Fatal exception
[ 3536.137484] Kernel Offset: 0x26a00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 3536.281406] ---[ end Kernel panic - not syncing: Fatal exception ]---
And a kernel with generic case fixed still panics in tun driver XDP
redirect, because it disabled only preemption, but not bh.
[ 2055.128746] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 2055.136662] PGD 0 P4D 0
[ 2055.139219] Oops: 0000 [#1] SMP PTI
[ 2055.142736] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel ipmi_ssif crypto_simd enclosure cryptd hpwdt glue_helper ioatdma hpilo wmi dca pcspkr ipmi_si acpi_power_meter ipmi_devintf shpchp mei_me ipmi_msghandler mei lpc_ich sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm i40e smartpqi tg3 scsi_transport_sas crc32c_intel i2c_core ptp pps_core
[ 2055.206142] CPU: 6 PID: 1693 Comm: vhost-1683 Tainted: G W 4.17.0-rc5-fix-tun+ #1
[ 2055.215011] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017
[ 2055.223617] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30
[ 2055.228624] RSP: 0018:ffff998b07607cc0 EFLAGS: 00010246
[ 2055.233892] RAX: ffff8dbd8e235700 RBX: ffff8dbd8ff21c40 RCX: 0000000000000004
[ 2055.241089] RDX: ffff998b097a9000 RSI: 0000000000000000 RDI: 0000000000000000
[ 2055.248286] RBP: 0000000000000000 R08: 00000000000065a8 R09: 0000000000005d80
[ 2055.255483] R10: 0000000000000040 R11: ffff8dbcf0100000 R12: ffff998b097a9000
[ 2055.262681] R13: ffff8dbd8c98c000 R14: 0000000000000000 R15: ffff998b07607d78
[ 2055.269879] FS: 0000000000000000(0000) GS:ffff8dbd8ff00000(0000) knlGS:0000000000000000
[ 2055.278039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2055.283834] CR2: 0000000000000018 CR3: 0000000c0c8cc005 CR4: 00000000007626e0
[ 2055.291030] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2055.298227] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 2055.305424] PKRU: 55555554
[ 2055.308153] Call Trace:
[ 2055.310624] xdp_do_redirect+0x7b/0x380
[ 2055.314499] tun_get_user+0x10fe/0x12a0 [tun]
[ 2055.318895] tun_sendmsg+0x52/0x70 [tun]
[ 2055.322852] handle_tx+0x2ad/0x5f0 [vhost_net]
[ 2055.327337] vhost_worker+0xa5/0x100 [vhost]
[ 2055.331646] kthread+0xf5/0x130
[ 2055.334813] ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost]
[ 2055.339646] ? kthread_bind+0x10/0x10
[ 2055.343343] ret_from_fork+0x35/0x40
[ 2055.346950] Code: 0e 74 15 83 f8 10 75 05 e9 e9 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 c9 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 e9 a9 b3 ff 31 c0 c3
[ 2055.366004] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffff998b07607cc0
[ 2055.372500] CR2: 0000000000000018
[ 2055.375856] ---[ end trace 2a2dcc5e9e174268 ]---
[ 2055.523626] Kernel panic - not syncing: Fatal exception
[ 2055.529796] Kernel Offset: 0x2e000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2055.677539] ---[ end Kernel panic - not syncing: Fatal exception ]---
v2:
- Removed preempt_disable/enable since local_bh_disable will prevent
preemption as well, feedback from Jason Wang.
Fixes: 761876c857cb ("tap: XDP support")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/tun.c | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 45d807796a18..23e9eb66197f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c | |||
@@ -1650,7 +1650,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, | |||
1650 | else | 1650 | else |
1651 | *skb_xdp = 0; | 1651 | *skb_xdp = 0; |
1652 | 1652 | ||
1653 | preempt_disable(); | 1653 | local_bh_disable(); |
1654 | rcu_read_lock(); | 1654 | rcu_read_lock(); |
1655 | xdp_prog = rcu_dereference(tun->xdp_prog); | 1655 | xdp_prog = rcu_dereference(tun->xdp_prog); |
1656 | if (xdp_prog && !*skb_xdp) { | 1656 | if (xdp_prog && !*skb_xdp) { |
@@ -1675,7 +1675,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, | |||
1675 | if (err) | 1675 | if (err) |
1676 | goto err_redirect; | 1676 | goto err_redirect; |
1677 | rcu_read_unlock(); | 1677 | rcu_read_unlock(); |
1678 | preempt_enable(); | 1678 | local_bh_enable(); |
1679 | return NULL; | 1679 | return NULL; |
1680 | case XDP_TX: | 1680 | case XDP_TX: |
1681 | get_page(alloc_frag->page); | 1681 | get_page(alloc_frag->page); |
@@ -1684,7 +1684,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, | |||
1684 | goto err_redirect; | 1684 | goto err_redirect; |
1685 | tun_xdp_flush(tun->dev); | 1685 | tun_xdp_flush(tun->dev); |
1686 | rcu_read_unlock(); | 1686 | rcu_read_unlock(); |
1687 | preempt_enable(); | 1687 | local_bh_enable(); |
1688 | return NULL; | 1688 | return NULL; |
1689 | case XDP_PASS: | 1689 | case XDP_PASS: |
1690 | delta = orig_data - xdp.data; | 1690 | delta = orig_data - xdp.data; |
@@ -1703,7 +1703,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, | |||
1703 | skb = build_skb(buf, buflen); | 1703 | skb = build_skb(buf, buflen); |
1704 | if (!skb) { | 1704 | if (!skb) { |
1705 | rcu_read_unlock(); | 1705 | rcu_read_unlock(); |
1706 | preempt_enable(); | 1706 | local_bh_enable(); |
1707 | return ERR_PTR(-ENOMEM); | 1707 | return ERR_PTR(-ENOMEM); |
1708 | } | 1708 | } |
1709 | 1709 | ||
@@ -1713,7 +1713,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, | |||
1713 | alloc_frag->offset += buflen; | 1713 | alloc_frag->offset += buflen; |
1714 | 1714 | ||
1715 | rcu_read_unlock(); | 1715 | rcu_read_unlock(); |
1716 | preempt_enable(); | 1716 | local_bh_enable(); |
1717 | 1717 | ||
1718 | return skb; | 1718 | return skb; |
1719 | 1719 | ||
@@ -1721,7 +1721,7 @@ err_redirect: | |||
1721 | put_page(alloc_frag->page); | 1721 | put_page(alloc_frag->page); |
1722 | err_xdp: | 1722 | err_xdp: |
1723 | rcu_read_unlock(); | 1723 | rcu_read_unlock(); |
1724 | preempt_enable(); | 1724 | local_bh_enable(); |
1725 | this_cpu_inc(tun->pcpu_stats->rx_dropped); | 1725 | this_cpu_inc(tun->pcpu_stats->rx_dropped); |
1726 | return NULL; | 1726 | return NULL; |
1727 | } | 1727 | } |
@@ -1917,16 +1917,19 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, | |||
1917 | struct bpf_prog *xdp_prog; | 1917 | struct bpf_prog *xdp_prog; |
1918 | int ret; | 1918 | int ret; |
1919 | 1919 | ||
1920 | local_bh_disable(); | ||
1920 | rcu_read_lock(); | 1921 | rcu_read_lock(); |
1921 | xdp_prog = rcu_dereference(tun->xdp_prog); | 1922 | xdp_prog = rcu_dereference(tun->xdp_prog); |
1922 | if (xdp_prog) { | 1923 | if (xdp_prog) { |
1923 | ret = do_xdp_generic(xdp_prog, skb); | 1924 | ret = do_xdp_generic(xdp_prog, skb); |
1924 | if (ret != XDP_PASS) { | 1925 | if (ret != XDP_PASS) { |
1925 | rcu_read_unlock(); | 1926 | rcu_read_unlock(); |
1927 | local_bh_enable(); | ||
1926 | return total_len; | 1928 | return total_len; |
1927 | } | 1929 | } |
1928 | } | 1930 | } |
1929 | rcu_read_unlock(); | 1931 | rcu_read_unlock(); |
1932 | local_bh_enable(); | ||
1930 | } | 1933 | } |
1931 | 1934 | ||
1932 | rcu_read_lock(); | 1935 | rcu_read_lock(); |