diff options
author | Zoltan Kiss <zoltan.kiss@citrix.com> | 2014-03-06 16:48:29 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-07 15:56:35 -0500 |
commit | e3377f36ca20a034dce56335dc9b89f41094d845 (patch) | |
tree | 2a2c0d898afae8176b1561a4480f454893fc24ef | |
parent | 1bb332af4cd889e4b64dacbf4a793ceb3a70445d (diff) |
xen-netback: Handle guests with too many frags
Xen network protocol had implicit dependency on MAX_SKB_FRAGS. Netback has to
handle guests sending up to XEN_NETBK_LEGACY_SLOTS_MAX slots. To achieve that:
- create a new skb
- map the leftover slots to its frags (no linear buffer here!)
- chain it to the previous through skb_shinfo(skb)->frag_list
- map them
- copy and coalesce the frags into a brand new one and send it to the stack
- unmap the 2 old skb's pages
It's also introduces new stat counters, which help determine how often the guest
sends a packet with more than MAX_SKB_FRAGS frags.
NOTE: if bisect brought you here, you should apply the series up until
"xen-netback: Timeout packets in RX path", otherwise malicious guests can block
other guests by not releasing their sent packets.
Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/xen-netback/common.h | 1 | ||||
-rw-r--r-- | drivers/net/xen-netback/interface.c | 7 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 164 |
3 files changed, 162 insertions, 10 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 683d30160a7c..f2f8a02afc36 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h | |||
@@ -182,6 +182,7 @@ struct xenvif { | |||
182 | unsigned long tx_zerocopy_sent; | 182 | unsigned long tx_zerocopy_sent; |
183 | unsigned long tx_zerocopy_success; | 183 | unsigned long tx_zerocopy_success; |
184 | unsigned long tx_zerocopy_fail; | 184 | unsigned long tx_zerocopy_fail; |
185 | unsigned long tx_frag_overflow; | ||
185 | 186 | ||
186 | /* Miscellaneous private stuff. */ | 187 | /* Miscellaneous private stuff. */ |
187 | struct net_device *dev; | 188 | struct net_device *dev; |
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 44df8581b4d7..b646039e539b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c | |||
@@ -253,6 +253,13 @@ static const struct xenvif_stat { | |||
253 | "tx_zerocopy_fail", | 253 | "tx_zerocopy_fail", |
254 | offsetof(struct xenvif, tx_zerocopy_fail) | 254 | offsetof(struct xenvif, tx_zerocopy_fail) |
255 | }, | 255 | }, |
256 | /* Number of packets exceeding MAX_SKB_FRAG slots. You should use | ||
257 | * a guest with the same MAX_SKB_FRAG | ||
258 | */ | ||
259 | { | ||
260 | "tx_frag_overflow", | ||
261 | offsetof(struct xenvif, tx_frag_overflow) | ||
262 | }, | ||
256 | }; | 263 | }; |
257 | 264 | ||
258 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) | 265 | static int xenvif_get_sset_count(struct net_device *dev, int string_set) |
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3cb586357df7..58effc49f526 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/if_vlan.h> | 38 | #include <linux/if_vlan.h> |
39 | #include <linux/udp.h> | 39 | #include <linux/udp.h> |
40 | #include <linux/highmem.h> | ||
40 | 41 | ||
41 | #include <net/tcp.h> | 42 | #include <net/tcp.h> |
42 | 43 | ||
@@ -801,6 +802,23 @@ static inline void xenvif_tx_create_gop(struct xenvif *vif, | |||
801 | sizeof(*txp)); | 802 | sizeof(*txp)); |
802 | } | 803 | } |
803 | 804 | ||
805 | static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) | ||
806 | { | ||
807 | struct sk_buff *skb = | ||
808 | alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN, | ||
809 | GFP_ATOMIC | __GFP_NOWARN); | ||
810 | if (unlikely(skb == NULL)) | ||
811 | return NULL; | ||
812 | |||
813 | /* Packets passed to netif_rx() must have some headroom. */ | ||
814 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
815 | |||
816 | /* Initialize it here to avoid later surprises */ | ||
817 | skb_shinfo(skb)->destructor_arg = NULL; | ||
818 | |||
819 | return skb; | ||
820 | } | ||
821 | |||
804 | static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, | 822 | static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, |
805 | struct sk_buff *skb, | 823 | struct sk_buff *skb, |
806 | struct xen_netif_tx_request *txp, | 824 | struct xen_netif_tx_request *txp, |
@@ -811,11 +829,16 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, | |||
811 | u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; | 829 | u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; |
812 | int start; | 830 | int start; |
813 | pending_ring_idx_t index; | 831 | pending_ring_idx_t index; |
814 | unsigned int nr_slots; | 832 | unsigned int nr_slots, frag_overflow = 0; |
815 | 833 | ||
816 | /* At this point shinfo->nr_frags is in fact the number of | 834 | /* At this point shinfo->nr_frags is in fact the number of |
817 | * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. | 835 | * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. |
818 | */ | 836 | */ |
837 | if (shinfo->nr_frags > MAX_SKB_FRAGS) { | ||
838 | frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS; | ||
839 | BUG_ON(frag_overflow > MAX_SKB_FRAGS); | ||
840 | shinfo->nr_frags = MAX_SKB_FRAGS; | ||
841 | } | ||
819 | nr_slots = shinfo->nr_frags; | 842 | nr_slots = shinfo->nr_frags; |
820 | 843 | ||
821 | /* Skip first skb fragment if it is on same page as header fragment. */ | 844 | /* Skip first skb fragment if it is on same page as header fragment. */ |
@@ -829,7 +852,29 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif, | |||
829 | frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); | 852 | frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); |
830 | } | 853 | } |
831 | 854 | ||
832 | BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); | 855 | if (frag_overflow) { |
856 | struct sk_buff *nskb = xenvif_alloc_skb(0); | ||
857 | if (unlikely(nskb == NULL)) { | ||
858 | if (net_ratelimit()) | ||
859 | netdev_err(vif->dev, | ||
860 | "Can't allocate the frag_list skb.\n"); | ||
861 | return NULL; | ||
862 | } | ||
863 | |||
864 | shinfo = skb_shinfo(nskb); | ||
865 | frags = shinfo->frags; | ||
866 | |||
867 | for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow; | ||
868 | shinfo->nr_frags++, txp++, gop++) { | ||
869 | index = pending_index(vif->pending_cons++); | ||
870 | pending_idx = vif->pending_ring[index]; | ||
871 | xenvif_tx_create_gop(vif, pending_idx, txp, gop); | ||
872 | frag_set_pending_idx(&frags[shinfo->nr_frags], | ||
873 | pending_idx); | ||
874 | } | ||
875 | |||
876 | skb_shinfo(skb)->frag_list = nskb; | ||
877 | } | ||
833 | 878 | ||
834 | return gop; | 879 | return gop; |
835 | } | 880 | } |
@@ -871,6 +916,7 @@ static int xenvif_tx_check_gop(struct xenvif *vif, | |||
871 | struct pending_tx_info *tx_info; | 916 | struct pending_tx_info *tx_info; |
872 | int nr_frags = shinfo->nr_frags; | 917 | int nr_frags = shinfo->nr_frags; |
873 | int i, err, start; | 918 | int i, err, start; |
919 | struct sk_buff *first_skb = NULL; | ||
874 | 920 | ||
875 | /* Check status of header. */ | 921 | /* Check status of header. */ |
876 | err = gop->status; | 922 | err = gop->status; |
@@ -882,6 +928,7 @@ static int xenvif_tx_check_gop(struct xenvif *vif, | |||
882 | /* Skip first skb fragment if it is on same page as header fragment. */ | 928 | /* Skip first skb fragment if it is on same page as header fragment. */ |
883 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); | 929 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); |
884 | 930 | ||
931 | check_frags: | ||
885 | for (i = start; i < nr_frags; i++) { | 932 | for (i = start; i < nr_frags; i++) { |
886 | int j, newerr; | 933 | int j, newerr; |
887 | 934 | ||
@@ -905,9 +952,11 @@ static int xenvif_tx_check_gop(struct xenvif *vif, | |||
905 | /* Not the first error? Preceding frags already invalidated. */ | 952 | /* Not the first error? Preceding frags already invalidated. */ |
906 | if (err) | 953 | if (err) |
907 | continue; | 954 | continue; |
908 | |||
909 | /* First error: invalidate header and preceding fragments. */ | 955 | /* First error: invalidate header and preceding fragments. */ |
910 | pending_idx = XENVIF_TX_CB(skb)->pending_idx; | 956 | if (!first_skb) |
957 | pending_idx = XENVIF_TX_CB(skb)->pending_idx; | ||
958 | else | ||
959 | pending_idx = XENVIF_TX_CB(skb)->pending_idx; | ||
911 | xenvif_idx_unmap(vif, pending_idx); | 960 | xenvif_idx_unmap(vif, pending_idx); |
912 | for (j = start; j < i; j++) { | 961 | for (j = start; j < i; j++) { |
913 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); | 962 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); |
@@ -918,6 +967,30 @@ static int xenvif_tx_check_gop(struct xenvif *vif, | |||
918 | err = newerr; | 967 | err = newerr; |
919 | } | 968 | } |
920 | 969 | ||
970 | if (skb_has_frag_list(skb)) { | ||
971 | first_skb = skb; | ||
972 | skb = shinfo->frag_list; | ||
973 | shinfo = skb_shinfo(skb); | ||
974 | nr_frags = shinfo->nr_frags; | ||
975 | start = 0; | ||
976 | |||
977 | goto check_frags; | ||
978 | } | ||
979 | |||
980 | /* There was a mapping error in the frag_list skb. We have to unmap | ||
981 | * the first skb's frags | ||
982 | */ | ||
983 | if (first_skb && err) { | ||
984 | int j; | ||
985 | shinfo = skb_shinfo(first_skb); | ||
986 | pending_idx = XENVIF_TX_CB(skb)->pending_idx; | ||
987 | start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); | ||
988 | for (j = start; j < shinfo->nr_frags; j++) { | ||
989 | pending_idx = frag_get_pending_idx(&shinfo->frags[j]); | ||
990 | xenvif_idx_unmap(vif, pending_idx); | ||
991 | } | ||
992 | } | ||
993 | |||
921 | *gopp = gop + 1; | 994 | *gopp = gop + 1; |
922 | return err; | 995 | return err; |
923 | } | 996 | } |
@@ -1169,8 +1242,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
1169 | ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? | 1242 | ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? |
1170 | PKT_PROT_LEN : txreq.size; | 1243 | PKT_PROT_LEN : txreq.size; |
1171 | 1244 | ||
1172 | skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, | 1245 | skb = xenvif_alloc_skb(data_len); |
1173 | GFP_ATOMIC | __GFP_NOWARN); | ||
1174 | if (unlikely(skb == NULL)) { | 1246 | if (unlikely(skb == NULL)) { |
1175 | netdev_dbg(vif->dev, | 1247 | netdev_dbg(vif->dev, |
1176 | "Can't allocate a skb in start_xmit.\n"); | 1248 | "Can't allocate a skb in start_xmit.\n"); |
@@ -1178,9 +1250,6 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
1178 | break; | 1250 | break; |
1179 | } | 1251 | } |
1180 | 1252 | ||
1181 | /* Packets passed to netif_rx() must have some headroom. */ | ||
1182 | skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); | ||
1183 | |||
1184 | if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { | 1253 | if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { |
1185 | struct xen_netif_extra_info *gso; | 1254 | struct xen_netif_extra_info *gso; |
1186 | gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; | 1255 | gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; |
@@ -1231,6 +1300,71 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) | |||
1231 | return gop - vif->tx_map_ops; | 1300 | return gop - vif->tx_map_ops; |
1232 | } | 1301 | } |
1233 | 1302 | ||
1303 | /* Consolidate skb with a frag_list into a brand new one with local pages on | ||
1304 | * frags. Returns 0 or -ENOMEM if can't allocate new pages. | ||
1305 | */ | ||
1306 | static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb) | ||
1307 | { | ||
1308 | unsigned int offset = skb_headlen(skb); | ||
1309 | skb_frag_t frags[MAX_SKB_FRAGS]; | ||
1310 | int i; | ||
1311 | struct ubuf_info *uarg; | ||
1312 | struct sk_buff *nskb = skb_shinfo(skb)->frag_list; | ||
1313 | |||
1314 | vif->tx_zerocopy_sent += 2; | ||
1315 | vif->tx_frag_overflow++; | ||
1316 | |||
1317 | xenvif_fill_frags(vif, nskb); | ||
1318 | /* Subtract frags size, we will correct it later */ | ||
1319 | skb->truesize -= skb->data_len; | ||
1320 | skb->len += nskb->len; | ||
1321 | skb->data_len += nskb->len; | ||
1322 | |||
1323 | /* create a brand new frags array and coalesce there */ | ||
1324 | for (i = 0; offset < skb->len; i++) { | ||
1325 | struct page *page; | ||
1326 | unsigned int len; | ||
1327 | |||
1328 | BUG_ON(i >= MAX_SKB_FRAGS); | ||
1329 | page = alloc_page(GFP_ATOMIC|__GFP_COLD); | ||
1330 | if (!page) { | ||
1331 | int j; | ||
1332 | skb->truesize += skb->data_len; | ||
1333 | for (j = 0; j < i; j++) | ||
1334 | put_page(frags[j].page.p); | ||
1335 | return -ENOMEM; | ||
1336 | } | ||
1337 | |||
1338 | if (offset + PAGE_SIZE < skb->len) | ||
1339 | len = PAGE_SIZE; | ||
1340 | else | ||
1341 | len = skb->len - offset; | ||
1342 | if (skb_copy_bits(skb, offset, page_address(page), len)) | ||
1343 | BUG(); | ||
1344 | |||
1345 | offset += len; | ||
1346 | frags[i].page.p = page; | ||
1347 | frags[i].page_offset = 0; | ||
1348 | skb_frag_size_set(&frags[i], len); | ||
1349 | } | ||
1350 | /* swap out with old one */ | ||
1351 | memcpy(skb_shinfo(skb)->frags, | ||
1352 | frags, | ||
1353 | i * sizeof(skb_frag_t)); | ||
1354 | skb_shinfo(skb)->nr_frags = i; | ||
1355 | skb->truesize += i * PAGE_SIZE; | ||
1356 | |||
1357 | /* remove traces of mapped pages and frag_list */ | ||
1358 | skb_frag_list_init(skb); | ||
1359 | uarg = skb_shinfo(skb)->destructor_arg; | ||
1360 | uarg->callback(uarg, true); | ||
1361 | skb_shinfo(skb)->destructor_arg = NULL; | ||
1362 | |||
1363 | skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
1364 | kfree_skb(nskb); | ||
1365 | |||
1366 | return 0; | ||
1367 | } | ||
1234 | 1368 | ||
1235 | static int xenvif_tx_submit(struct xenvif *vif) | 1369 | static int xenvif_tx_submit(struct xenvif *vif) |
1236 | { | 1370 | { |
@@ -1267,7 +1401,6 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
1267 | &vif->pending_tx_info[pending_idx].callback_struct; | 1401 | &vif->pending_tx_info[pending_idx].callback_struct; |
1268 | } else { | 1402 | } else { |
1269 | /* Schedule a response immediately. */ | 1403 | /* Schedule a response immediately. */ |
1270 | skb_shinfo(skb)->destructor_arg = NULL; | ||
1271 | xenvif_idx_unmap(vif, pending_idx); | 1404 | xenvif_idx_unmap(vif, pending_idx); |
1272 | } | 1405 | } |
1273 | 1406 | ||
@@ -1278,6 +1411,17 @@ static int xenvif_tx_submit(struct xenvif *vif) | |||
1278 | 1411 | ||
1279 | xenvif_fill_frags(vif, skb); | 1412 | xenvif_fill_frags(vif, skb); |
1280 | 1413 | ||
1414 | if (unlikely(skb_has_frag_list(skb))) { | ||
1415 | if (xenvif_handle_frag_list(vif, skb)) { | ||
1416 | if (net_ratelimit()) | ||
1417 | netdev_err(vif->dev, | ||
1418 | "Not enough memory to consolidate frag_list!\n"); | ||
1419 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
1420 | kfree_skb(skb); | ||
1421 | continue; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1281 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { | 1425 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { |
1282 | int target = min_t(int, skb->len, PKT_PROT_LEN); | 1426 | int target = min_t(int, skb->len, PKT_PROT_LEN); |
1283 | __pskb_pull_tail(skb, target - skb_headlen(skb)); | 1427 | __pskb_pull_tail(skb, target - skb_headlen(skb)); |