diff options
author | Jesper Dangaard Brouer <brouer@redhat.com> | 2019-03-29 05:18:00 -0400 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-03-29 15:15:02 -0400 |
commit | 676e4a6fe703f2dae699ee9d56f14516f9ada4ea (patch) | |
tree | 6208dde17376d516f3da061414afc34159ab7775 /kernel | |
parent | 8543e437807970166c2b66b79935c9f4b0e6d1f9 (diff) |
xdp: fix cpumap redirect SKB creation bug
We want to avoid leaking pointer info from xdp_frame (that is placed in
top of frame) like commit 6dfb970d3dbd ("xdp: avoid leaking info stored in
frame data on page reuse"), and followup commit 97e19cce05e5 ("bpf:
reserve xdp_frame size in xdp headroom") that reserve this headroom.
These changes also affected how cpumap constructed SKBs, as xdpf->headroom
size changed, the skb data starting point were in-effect shifted with 32
bytes (sizeof xdp_frame). This was still okay, as the cpumap frame_size
calculation also included xdpf->headroom which were reduced by same amount.
A bug was introduced in commit 77ea5f4cbe20 ("bpf/cpumap: make sure
frame_size for build_skb is aligned if headroom isn't"), where the
xdpf->headroom became part of the SKB_DATA_ALIGN rounding up. This
round-up to find the frame_size is in principle still correct as it does
not exceed the 2048 bytes frame_size (which is max for ixgbe and i40e),
but the 32 bytes offset of pkt_data_start puts this over the 2048 bytes
limit. This cause skb_shared_info to spill into next frame. It is a little
hard to trigger, as the SKB need to use above 15 skb_shinfo->frags[] as
far as I calculate. This does happen in practise for TCP streams when
skb_try_coalesce() kicks in.
KASAN can be used to detect these wrong memory accesses, I've seen:
BUG: KASAN: use-after-free in skb_try_coalesce+0x3cb/0x760
BUG: KASAN: wild-memory-access in skb_release_data+0xe2/0x250
Driver veth also construct a SKB from xdp_frame in this way, but is not
affected, as it doesn't reserve/deduct the room (used by xdp_frame) from
the SKB headroom. Instead is clears the pointers via xdp_scrub_frame(),
and allows SKB to use this area.
The fix in this patch is to do like veth and instead allow SKB to (re)use
the area occupied by xdp_frame, by clearing via xdp_scrub_frame(). (This
does kill the idea of the SKB being able to access (mem) info from this
area, but I guess it was a bad idea anyhow, and it was already killed by
the veth changes.)
Fixes: 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/cpumap.c | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8974b3755670..3c18260403dd 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c | |||
@@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work) | |||
162 | static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, | 162 | static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, |
163 | struct xdp_frame *xdpf) | 163 | struct xdp_frame *xdpf) |
164 | { | 164 | { |
165 | unsigned int hard_start_headroom; | ||
165 | unsigned int frame_size; | 166 | unsigned int frame_size; |
166 | void *pkt_data_start; | 167 | void *pkt_data_start; |
167 | struct sk_buff *skb; | 168 | struct sk_buff *skb; |
168 | 169 | ||
170 | /* Part of headroom was reserved to xdpf */ | ||
171 | hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; | ||
172 | |||
169 | /* build_skb need to place skb_shared_info after SKB end, and | 173 | /* build_skb need to place skb_shared_info after SKB end, and |
170 | * also want to know the memory "truesize". Thus, need to | 174 | * also want to know the memory "truesize". Thus, need to |
171 | * know the memory frame size backing xdp_buff. | 175 | * know the memory frame size backing xdp_buff. |
@@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, | |||
183 | * is not at a fixed memory location, with mixed length | 187 | * is not at a fixed memory location, with mixed length |
184 | * packets, which is bad for cache-line hotness. | 188 | * packets, which is bad for cache-line hotness. |
185 | */ | 189 | */ |
186 | frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + | 190 | frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) + |
187 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 191 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
188 | 192 | ||
189 | pkt_data_start = xdpf->data - xdpf->headroom; | 193 | pkt_data_start = xdpf->data - hard_start_headroom; |
190 | skb = build_skb(pkt_data_start, frame_size); | 194 | skb = build_skb(pkt_data_start, frame_size); |
191 | if (!skb) | 195 | if (!skb) |
192 | return NULL; | 196 | return NULL; |
193 | 197 | ||
194 | skb_reserve(skb, xdpf->headroom); | 198 | skb_reserve(skb, hard_start_headroom); |
195 | __skb_put(skb, xdpf->len); | 199 | __skb_put(skb, xdpf->len); |
196 | if (xdpf->metasize) | 200 | if (xdpf->metasize) |
197 | skb_metadata_set(skb, xdpf->metasize); | 201 | skb_metadata_set(skb, xdpf->metasize); |
@@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, | |||
205 | * - RX ring dev queue index (skb_record_rx_queue) | 209 | * - RX ring dev queue index (skb_record_rx_queue) |
206 | */ | 210 | */ |
207 | 211 | ||
212 | /* Allow SKB to reuse area used by xdp_frame */ | ||
213 | xdp_scrub_frame(xdpf); | ||
214 | |||
208 | return skb; | 215 | return skb; |
209 | } | 216 | } |
210 | 217 | ||