aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Liu <wei.liu2@citrix.com>2013-04-21 22:20:42 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-22 15:37:01 -0400
commit2810e5b9a7731ca5fce22bfbe12c96e16ac44b6f (patch)
tree766b484584a4d28ecffa745f5a1cc21864abd099
parent9ecd1a75d977e2e8c48139c7d3efed183f898d94 (diff)
xen-netback: coalesce slots in TX path and fix regressions
This patch tries to coalesce tx requests when constructing grant copy structures. It enables netback to deal with situation when frontend's MAX_SKB_FRAGS is larger than backend's MAX_SKB_FRAGS. With the help of coalescing, this patch tries to address two regressions avoid reopening the security hole in XSA-39. Regression 1. The reduction of the number of supported ring entries (slots) per packet (from 18 to 17). This regression has been around for some time but remains unnoticed until XSA-39 security fix. This is fixed by coalescing slots. Regression 2. The XSA-39 security fix turning "too many frags" errors from just dropping the packet to a fatal error and disabling the VIF. This is fixed by coalescing slots (handling 18 slots when backend's MAX_SKB_FRAGS is 17) which rules out false positive (using 18 slots is legit) and dropping packets using 19 to `max_skb_slots` slots. To avoid reopening security hole in XSA-39, frontend sending packet using more than max_skb_slots is considered malicious. The behavior of netback for packet is thus: 1-18 slots: valid 19-max_skb_slots slots: drop and respond with an error max_skb_slots+ slots: fatal error max_skb_slots is configurable by admin, default value is 20. Also change variable name from "frags" to "slots" in netbk_count_requests. Please note that RX path still has dependency on MAX_SKB_FRAGS. This will be fixed with separate patch. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/xen-netback/netback.c275
-rw-r--r--include/xen/interface/io/netif.h18
2 files changed, 242 insertions, 51 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 9f7184404263..d9292c59789b 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,11 +47,25 @@
47#include <asm/xen/hypercall.h> 47#include <asm/xen/hypercall.h>
48#include <asm/xen/page.h> 48#include <asm/xen/page.h>
49 49
50/*
51 * This is the maximum slots a skb can have. If a guest sends a skb
52 * which exceeds this limit it is considered malicious.
53 */
54#define MAX_SKB_SLOTS_DEFAULT 20
55static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT;
56module_param(max_skb_slots, uint, 0444);
57
58typedef unsigned int pending_ring_idx_t;
59#define INVALID_PENDING_RING_IDX (~0U)
60
50struct pending_tx_info { 61struct pending_tx_info {
51 struct xen_netif_tx_request req; 62 struct xen_netif_tx_request req; /* coalesced tx request */
52 struct xenvif *vif; 63 struct xenvif *vif;
64 pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
65 * if it is head of one or more tx
66 * reqs
67 */
53}; 68};
54typedef unsigned int pending_ring_idx_t;
55 69
56struct netbk_rx_meta { 70struct netbk_rx_meta {
57 int id; 71 int id;
@@ -102,7 +116,11 @@ struct xen_netbk {
102 atomic_t netfront_count; 116 atomic_t netfront_count;
103 117
104 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; 118 struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
105 struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; 119 /* Coalescing tx requests before copying makes number of grant
120 * copy ops greater or equal to number of slots required. In
121 * worst case a tx request consumes 2 gnttab_copy.
122 */
123 struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
106 124
107 u16 pending_ring[MAX_PENDING_REQS]; 125 u16 pending_ring[MAX_PENDING_REQS];
108 126
@@ -118,6 +136,16 @@ struct xen_netbk {
118static struct xen_netbk *xen_netbk; 136static struct xen_netbk *xen_netbk;
119static int xen_netbk_group_nr; 137static int xen_netbk_group_nr;
120 138
139/*
140 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
141 * one or more merged tx requests, otherwise it is the continuation of
142 * previous tx request.
143 */
144static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx)
145{
146 return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
147}
148
121void xen_netbk_add_xenvif(struct xenvif *vif) 149void xen_netbk_add_xenvif(struct xenvif *vif)
122{ 150{
123 int i; 151 int i;
@@ -250,6 +278,7 @@ static int max_required_rx_slots(struct xenvif *vif)
250{ 278{
251 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 279 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
252 280
281 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
253 if (vif->can_sg || vif->gso || vif->gso_prefix) 282 if (vif->can_sg || vif->gso || vif->gso_prefix)
254 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 283 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
255 284
@@ -657,6 +686,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
657 __skb_queue_tail(&rxq, skb); 686 __skb_queue_tail(&rxq, skb);
658 687
659 /* Filled the batch queue? */ 688 /* Filled the batch queue? */
689 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
660 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) 690 if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
661 break; 691 break;
662 } 692 }
@@ -898,47 +928,78 @@ static void netbk_fatal_tx_err(struct xenvif *vif)
898 928
899static int netbk_count_requests(struct xenvif *vif, 929static int netbk_count_requests(struct xenvif *vif,
900 struct xen_netif_tx_request *first, 930 struct xen_netif_tx_request *first,
931 RING_IDX first_idx,
901 struct xen_netif_tx_request *txp, 932 struct xen_netif_tx_request *txp,
902 int work_to_do) 933 int work_to_do)
903{ 934{
904 RING_IDX cons = vif->tx.req_cons; 935 RING_IDX cons = vif->tx.req_cons;
905 int frags = 0; 936 int slots = 0;
937 int drop_err = 0;
906 938
907 if (!(first->flags & XEN_NETTXF_more_data)) 939 if (!(first->flags & XEN_NETTXF_more_data))
908 return 0; 940 return 0;
909 941
910 do { 942 do {
911 if (frags >= work_to_do) { 943 if (slots >= work_to_do) {
912 netdev_err(vif->dev, "Need more frags\n"); 944 netdev_err(vif->dev,
945 "Asked for %d slots but exceeds this limit\n",
946 work_to_do);
913 netbk_fatal_tx_err(vif); 947 netbk_fatal_tx_err(vif);
914 return -ENODATA; 948 return -ENODATA;
915 } 949 }
916 950
917 if (unlikely(frags >= MAX_SKB_FRAGS)) { 951 /* This guest is really using too many slots and
918 netdev_err(vif->dev, "Too many frags\n"); 952 * considered malicious.
953 */
954 if (unlikely(slots >= max_skb_slots)) {
955 netdev_err(vif->dev,
956 "Malicious frontend using %d slots, threshold %u\n",
957 slots, max_skb_slots);
919 netbk_fatal_tx_err(vif); 958 netbk_fatal_tx_err(vif);
920 return -E2BIG; 959 return -E2BIG;
921 } 960 }
922 961
923 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), 962 /* Xen network protocol had implicit dependency on
963 * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
964 * historical MAX_SKB_FRAGS value 18 to honor the same
965 * behavior as before. Any packet using more than 18
966 * slots but less than max_skb_slots slots is dropped
967 */
968 if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) {
969 if (net_ratelimit())
970 netdev_dbg(vif->dev,
971 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
972 slots, XEN_NETIF_NR_SLOTS_MIN);
973 drop_err = -E2BIG;
974 }
975
976 memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
924 sizeof(*txp)); 977 sizeof(*txp));
925 if (txp->size > first->size) { 978 if (txp->size > first->size) {
926 netdev_err(vif->dev, "Frag is bigger than frame.\n"); 979 netdev_err(vif->dev,
980 "Invalid tx request, slot size %u > remaining size %u\n",
981 txp->size, first->size);
927 netbk_fatal_tx_err(vif); 982 netbk_fatal_tx_err(vif);
928 return -EIO; 983 return -EIO;
929 } 984 }
930 985
931 first->size -= txp->size; 986 first->size -= txp->size;
932 frags++; 987 slots++;
933 988
934 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { 989 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
935 netdev_err(vif->dev, "txp->offset: %x, size: %u\n", 990 netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
936 txp->offset, txp->size); 991 txp->offset, txp->size);
937 netbk_fatal_tx_err(vif); 992 netbk_fatal_tx_err(vif);
938 return -EINVAL; 993 return -EINVAL;
939 } 994 }
940 } while ((txp++)->flags & XEN_NETTXF_more_data); 995 } while ((txp++)->flags & XEN_NETTXF_more_data);
941 return frags; 996
997 if (drop_err) {
998 netbk_tx_err(vif, first, first_idx + slots);
999 return drop_err;
1000 }
1001
1002 return slots;
942} 1003}
943 1004
944static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, 1005static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
@@ -962,48 +1023,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
962 struct skb_shared_info *shinfo = skb_shinfo(skb); 1023 struct skb_shared_info *shinfo = skb_shinfo(skb);
963 skb_frag_t *frags = shinfo->frags; 1024 skb_frag_t *frags = shinfo->frags;
964 u16 pending_idx = *((u16 *)skb->data); 1025 u16 pending_idx = *((u16 *)skb->data);
965 int i, start; 1026 u16 head_idx = 0;
1027 int slot, start;
1028 struct page *page;
1029 pending_ring_idx_t index, start_idx = 0;
1030 uint16_t dst_offset;
1031 unsigned int nr_slots;
1032 struct pending_tx_info *first = NULL;
1033
1034 /* At this point shinfo->nr_frags is in fact the number of
1035 * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN.
1036 */
1037 nr_slots = shinfo->nr_frags;
966 1038
967 /* Skip first skb fragment if it is on same page as header fragment. */ 1039 /* Skip first skb fragment if it is on same page as header fragment. */
968 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); 1040 start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
969 1041
970 for (i = start; i < shinfo->nr_frags; i++, txp++) { 1042 /* Coalesce tx requests, at this point the packet passed in
971 struct page *page; 1043 * should be <= 64K. Any packets larger than 64K have been
972 pending_ring_idx_t index; 1044 * handled in netbk_count_requests().
1045 */
1046 for (shinfo->nr_frags = slot = start; slot < nr_slots;
1047 shinfo->nr_frags++) {
973 struct pending_tx_info *pending_tx_info = 1048 struct pending_tx_info *pending_tx_info =
974 netbk->pending_tx_info; 1049 netbk->pending_tx_info;
975 1050
976 index = pending_index(netbk->pending_cons++); 1051 page = alloc_page(GFP_KERNEL|__GFP_COLD);
977 pending_idx = netbk->pending_ring[index];
978 page = xen_netbk_alloc_page(netbk, pending_idx);
979 if (!page) 1052 if (!page)
980 goto err; 1053 goto err;
981 1054
982 gop->source.u.ref = txp->gref; 1055 dst_offset = 0;
983 gop->source.domid = vif->domid; 1056 first = NULL;
984 gop->source.offset = txp->offset; 1057 while (dst_offset < PAGE_SIZE && slot < nr_slots) {
985 1058 gop->flags = GNTCOPY_source_gref;
986 gop->dest.u.gmfn = virt_to_mfn(page_address(page)); 1059
987 gop->dest.domid = DOMID_SELF; 1060 gop->source.u.ref = txp->gref;
988 gop->dest.offset = txp->offset; 1061 gop->source.domid = vif->domid;
989 1062 gop->source.offset = txp->offset;
990 gop->len = txp->size; 1063
991 gop->flags = GNTCOPY_source_gref; 1064 gop->dest.domid = DOMID_SELF;
1065
1066 gop->dest.offset = dst_offset;
1067 gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1068
1069 if (dst_offset + txp->size > PAGE_SIZE) {
1070 /* This page can only merge a portion
1071 * of tx request. Do not increment any
1072 * pointer / counter here. The txp
1073 * will be dealt with in future
1074 * rounds, eventually hitting the
1075 * `else` branch.
1076 */
1077 gop->len = PAGE_SIZE - dst_offset;
1078 txp->offset += gop->len;
1079 txp->size -= gop->len;
1080 dst_offset += gop->len; /* quit loop */
1081 } else {
1082 /* This tx request can be merged in the page */
1083 gop->len = txp->size;
1084 dst_offset += gop->len;
1085
1086 index = pending_index(netbk->pending_cons++);
1087
1088 pending_idx = netbk->pending_ring[index];
1089
1090 memcpy(&pending_tx_info[pending_idx].req, txp,
1091 sizeof(*txp));
1092 xenvif_get(vif);
1093
1094 pending_tx_info[pending_idx].vif = vif;
1095
1096 /* Poison these fields, corresponding
1097 * fields for head tx req will be set
1098 * to correct values after the loop.
1099 */
1100 netbk->mmap_pages[pending_idx] = (void *)(~0UL);
1101 pending_tx_info[pending_idx].head =
1102 INVALID_PENDING_RING_IDX;
1103
1104 if (!first) {
1105 first = &pending_tx_info[pending_idx];
1106 start_idx = index;
1107 head_idx = pending_idx;
1108 }
1109
1110 txp++;
1111 slot++;
1112 }
992 1113
993 gop++; 1114 gop++;
1115 }
994 1116
995 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); 1117 first->req.offset = 0;
996 xenvif_get(vif); 1118 first->req.size = dst_offset;
997 pending_tx_info[pending_idx].vif = vif; 1119 first->head = start_idx;
998 frag_set_pending_idx(&frags[i], pending_idx); 1120 set_page_ext(page, netbk, head_idx);
1121 netbk->mmap_pages[head_idx] = page;
1122 frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
999 } 1123 }
1000 1124
1125 BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
1126
1001 return gop; 1127 return gop;
1002err: 1128err:
1003 /* Unwind, freeing all pages and sending error responses. */ 1129 /* Unwind, freeing all pages and sending error responses. */
1004 while (i-- > start) { 1130 while (shinfo->nr_frags-- > start) {
1005 xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]), 1131 xen_netbk_idx_release(netbk,
1006 XEN_NETIF_RSP_ERROR); 1132 frag_get_pending_idx(&frags[shinfo->nr_frags]),
1133 XEN_NETIF_RSP_ERROR);
1007 } 1134 }
1008 /* The head too, if necessary. */ 1135 /* The head too, if necessary. */
1009 if (start) 1136 if (start)
@@ -1019,8 +1146,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1019 struct gnttab_copy *gop = *gopp; 1146 struct gnttab_copy *gop = *gopp;
1020 u16 pending_idx = *((u16 *)skb->data); 1147 u16 pending_idx = *((u16 *)skb->data);
1021 struct skb_shared_info *shinfo = skb_shinfo(skb); 1148 struct skb_shared_info *shinfo = skb_shinfo(skb);
1149 struct pending_tx_info *tx_info;
1022 int nr_frags = shinfo->nr_frags; 1150 int nr_frags = shinfo->nr_frags;
1023 int i, err, start; 1151 int i, err, start;
1152 u16 peek; /* peek into next tx request */
1024 1153
1025 /* Check status of header. */ 1154 /* Check status of header. */
1026 err = gop->status; 1155 err = gop->status;
@@ -1032,11 +1161,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1032 1161
1033 for (i = start; i < nr_frags; i++) { 1162 for (i = start; i < nr_frags; i++) {
1034 int j, newerr; 1163 int j, newerr;
1164 pending_ring_idx_t head;
1035 1165
1036 pending_idx = frag_get_pending_idx(&shinfo->frags[i]); 1166 pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1167 tx_info = &netbk->pending_tx_info[pending_idx];
1168 head = tx_info->head;
1037 1169
1038 /* Check error status: if okay then remember grant handle. */ 1170 /* Check error status: if okay then remember grant handle. */
1039 newerr = (++gop)->status; 1171 do {
1172 newerr = (++gop)->status;
1173 if (newerr)
1174 break;
1175 peek = netbk->pending_ring[pending_index(++head)];
1176 } while (!pending_tx_is_head(netbk, peek));
1177
1040 if (likely(!newerr)) { 1178 if (likely(!newerr)) {
1041 /* Had a previous error? Invalidate this fragment. */ 1179 /* Had a previous error? Invalidate this fragment. */
1042 if (unlikely(err)) 1180 if (unlikely(err))
@@ -1256,11 +1394,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1256 struct sk_buff *skb; 1394 struct sk_buff *skb;
1257 int ret; 1395 int ret;
1258 1396
1259 while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && 1397 while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN
1398 < MAX_PENDING_REQS) &&
1260 !list_empty(&netbk->net_schedule_list)) { 1399 !list_empty(&netbk->net_schedule_list)) {
1261 struct xenvif *vif; 1400 struct xenvif *vif;
1262 struct xen_netif_tx_request txreq; 1401 struct xen_netif_tx_request txreq;
1263 struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; 1402 struct xen_netif_tx_request txfrags[max_skb_slots];
1264 struct page *page; 1403 struct page *page;
1265 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; 1404 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1266 u16 pending_idx; 1405 u16 pending_idx;
@@ -1321,7 +1460,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1321 continue; 1460 continue;
1322 } 1461 }
1323 1462
1324 ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); 1463 ret = netbk_count_requests(vif, &txreq, idx,
1464 txfrags, work_to_do);
1325 if (unlikely(ret < 0)) 1465 if (unlikely(ret < 0))
1326 continue; 1466 continue;
1327 1467
@@ -1348,7 +1488,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1348 pending_idx = netbk->pending_ring[index]; 1488 pending_idx = netbk->pending_ring[index];
1349 1489
1350 data_len = (txreq.size > PKT_PROT_LEN && 1490 data_len = (txreq.size > PKT_PROT_LEN &&
1351 ret < MAX_SKB_FRAGS) ? 1491 ret < XEN_NETIF_NR_SLOTS_MIN) ?
1352 PKT_PROT_LEN : txreq.size; 1492 PKT_PROT_LEN : txreq.size;
1353 1493
1354 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, 1494 skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
@@ -1398,6 +1538,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1398 memcpy(&netbk->pending_tx_info[pending_idx].req, 1538 memcpy(&netbk->pending_tx_info[pending_idx].req,
1399 &txreq, sizeof(txreq)); 1539 &txreq, sizeof(txreq));
1400 netbk->pending_tx_info[pending_idx].vif = vif; 1540 netbk->pending_tx_info[pending_idx].vif = vif;
1541 netbk->pending_tx_info[pending_idx].head = index;
1401 *((u16 *)skb->data) = pending_idx; 1542 *((u16 *)skb->data) = pending_idx;
1402 1543
1403 __skb_put(skb, data_len); 1544 __skb_put(skb, data_len);
@@ -1528,7 +1669,10 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
1528{ 1669{
1529 struct xenvif *vif; 1670 struct xenvif *vif;
1530 struct pending_tx_info *pending_tx_info; 1671 struct pending_tx_info *pending_tx_info;
1531 pending_ring_idx_t index; 1672 pending_ring_idx_t head;
1673 u16 peek; /* peek into next tx request */
1674
1675 BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
1532 1676
1533 /* Already complete? */ 1677 /* Already complete? */
1534 if (netbk->mmap_pages[pending_idx] == NULL) 1678 if (netbk->mmap_pages[pending_idx] == NULL)
@@ -1537,19 +1681,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
1537 pending_tx_info = &netbk->pending_tx_info[pending_idx]; 1681 pending_tx_info = &netbk->pending_tx_info[pending_idx];
1538 1682
1539 vif = pending_tx_info->vif; 1683 vif = pending_tx_info->vif;
1684 head = pending_tx_info->head;
1540 1685
1541 make_tx_response(vif, &pending_tx_info->req, status); 1686 BUG_ON(!pending_tx_is_head(netbk, head));
1687 BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx);
1542 1688
1543 index = pending_index(netbk->pending_prod++); 1689 do {
1544 netbk->pending_ring[index] = pending_idx; 1690 pending_ring_idx_t index;
1691 pending_ring_idx_t idx = pending_index(head);
1692 u16 info_idx = netbk->pending_ring[idx];
1545 1693
1546 xenvif_put(vif); 1694 pending_tx_info = &netbk->pending_tx_info[info_idx];
1695 make_tx_response(vif, &pending_tx_info->req, status);
1696
1697 /* Setting any number other than
1698 * INVALID_PENDING_RING_IDX indicates this slot is
1699 * starting a new packet / ending a previous packet.
1700 */
1701 pending_tx_info->head = 0;
1702
1703 index = pending_index(netbk->pending_prod++);
1704 netbk->pending_ring[index] = netbk->pending_ring[info_idx];
1547 1705
1548 netbk->mmap_pages[pending_idx]->mapping = NULL; 1706 xenvif_put(vif);
1707
1708 peek = netbk->pending_ring[pending_index(++head)];
1709
1710 } while (!pending_tx_is_head(netbk, peek));
1711
1712 netbk->mmap_pages[pending_idx]->mapping = 0;
1549 put_page(netbk->mmap_pages[pending_idx]); 1713 put_page(netbk->mmap_pages[pending_idx]);
1550 netbk->mmap_pages[pending_idx] = NULL; 1714 netbk->mmap_pages[pending_idx] = NULL;
1551} 1715}
1552 1716
1717
1553static void make_tx_response(struct xenvif *vif, 1718static void make_tx_response(struct xenvif *vif,
1554 struct xen_netif_tx_request *txp, 1719 struct xen_netif_tx_request *txp,
1555 s8 st) 1720 s8 st)
@@ -1602,8 +1767,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
1602static inline int tx_work_todo(struct xen_netbk *netbk) 1767static inline int tx_work_todo(struct xen_netbk *netbk)
1603{ 1768{
1604 1769
1605 if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && 1770 if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN
1606 !list_empty(&netbk->net_schedule_list)) 1771 < MAX_PENDING_REQS) &&
1772 !list_empty(&netbk->net_schedule_list))
1607 return 1; 1773 return 1;
1608 1774
1609 return 0; 1775 return 0;
@@ -1686,6 +1852,13 @@ static int __init netback_init(void)
1686 if (!xen_domain()) 1852 if (!xen_domain())
1687 return -ENODEV; 1853 return -ENODEV;
1688 1854
1855 if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) {
1856 printk(KERN_INFO
1857 "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n",
1858 max_skb_slots, XEN_NETIF_NR_SLOTS_MIN);
1859 max_skb_slots = XEN_NETIF_NR_SLOTS_MIN;
1860 }
1861
1689 xen_netbk_group_nr = num_online_cpus(); 1862 xen_netbk_group_nr = num_online_cpus();
1690 xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); 1863 xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
1691 if (!xen_netbk) 1864 if (!xen_netbk)
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index 58fadcac33a1..3ef3fe05ee99 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -13,6 +13,24 @@
13#include <xen/interface/grant_table.h> 13#include <xen/interface/grant_table.h>
14 14
15/* 15/*
16 * Older implementation of Xen network frontend / backend has an
17 * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
18 * ring slots a skb can use. Netfront / netback may not work as
19 * expected when frontend and backend have different MAX_SKB_FRAGS.
20 *
21 * A better approach is to add mechanism for netfront / netback to
22 * negotiate this value. However we cannot fix all possible
23 * frontends, so we need to define a value which states the minimum
24 * slots backend must support.
25 *
26 * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
27 * (18), which is proved to work with most frontends. Any new backend
28 * which doesn't negotiate with frontend should expect frontend to
29 * send a valid packet using slots up to this value.
30 */
31#define XEN_NETIF_NR_SLOTS_MIN 18
32
33/*
16 * Notifications after enqueuing any type of message should be conditional on 34 * Notifications after enqueuing any type of message should be conditional on
17 * the appropriate req_event or rsp_event field in the shared ring. 35 * the appropriate req_event or rsp_event field in the shared ring.
18 * If the client sends notification for rx requests then it should specify 36 * If the client sends notification for rx requests then it should specify