aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorShreyas Bhatewara <sbhatewara@vmware.com>2011-07-05 10:34:05 -0400
committerDavid S. Miller <davem@davemloft.net>2011-07-05 21:39:40 -0400
commit5318d809d7b4975ce5e5303e8508f89a5458c2b6 (patch)
tree2e676330331419d9bb2e3d17f2f97462df7ce548 /drivers
parent44661462ee1ee3c922754fc1f246867f0d01e7ea (diff)
vmxnet3: fix starving rx ring whenoc_skb kb fails
If the rx ring is completely empty, then the device may never fire an rx interrupt. Unfortunately, the rx interrupt is what triggers populating the rx ring with fresh buffers, so this will cause networking to lock up. This patch replenishes the skb in recv descriptor as soon as it is peeled off while processing rx completions. If the skb/buffer allocation fails, existing one is recycled and the packet in hand is dropped. This way none of the RX desc is ever left empty, thus avoiding starvation Signed-off-by: Scott J. Goldman <scottjg@vmware.com> Signed-off-by: Shreyas N Bhatewara <sbhatewara@vmware.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c135
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h4
2 files changed, 96 insertions, 43 deletions
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index fa6e2ac7475a..45a23b2599f3 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -575,7 +575,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
575 struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx]; 575 struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
576 u32 val; 576 u32 val;
577 577
578 while (num_allocated < num_to_alloc) { 578 while (num_allocated <= num_to_alloc) {
579 struct vmxnet3_rx_buf_info *rbi; 579 struct vmxnet3_rx_buf_info *rbi;
580 union Vmxnet3_GenericDesc *gd; 580 union Vmxnet3_GenericDesc *gd;
581 581
@@ -621,9 +621,15 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
621 621
622 BUG_ON(rbi->dma_addr == 0); 622 BUG_ON(rbi->dma_addr == 0);
623 gd->rxd.addr = cpu_to_le64(rbi->dma_addr); 623 gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
624 gd->dword[2] = cpu_to_le32((ring->gen << VMXNET3_RXD_GEN_SHIFT) 624 gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
625 | val | rbi->len); 625 | val | rbi->len);
626 626
627 /* Fill the last buffer but dont mark it ready, or else the
628 * device will think that the queue is full */
629 if (num_allocated == num_to_alloc)
630 break;
631
632 gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
627 num_allocated++; 633 num_allocated++;
628 vmxnet3_cmd_ring_adv_next2fill(ring); 634 vmxnet3_cmd_ring_adv_next2fill(ring);
629 } 635 }
@@ -1140,6 +1146,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1140 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 1146 VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1141 }; 1147 };
1142 u32 num_rxd = 0; 1148 u32 num_rxd = 0;
1149 bool skip_page_frags = false;
1143 struct Vmxnet3_RxCompDesc *rcd; 1150 struct Vmxnet3_RxCompDesc *rcd;
1144 struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; 1151 struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1145#ifdef __BIG_ENDIAN_BITFIELD 1152#ifdef __BIG_ENDIAN_BITFIELD
@@ -1150,11 +1157,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1150 &rxComp); 1157 &rxComp);
1151 while (rcd->gen == rq->comp_ring.gen) { 1158 while (rcd->gen == rq->comp_ring.gen) {
1152 struct vmxnet3_rx_buf_info *rbi; 1159 struct vmxnet3_rx_buf_info *rbi;
1153 struct sk_buff *skb; 1160 struct sk_buff *skb, *new_skb = NULL;
1161 struct page *new_page = NULL;
1154 int num_to_alloc; 1162 int num_to_alloc;
1155 struct Vmxnet3_RxDesc *rxd; 1163 struct Vmxnet3_RxDesc *rxd;
1156 u32 idx, ring_idx; 1164 u32 idx, ring_idx;
1157 1165 struct vmxnet3_cmd_ring *ring = NULL;
1158 if (num_rxd >= quota) { 1166 if (num_rxd >= quota) {
1159 /* we may stop even before we see the EOP desc of 1167 /* we may stop even before we see the EOP desc of
1160 * the current pkt 1168 * the current pkt
@@ -1165,6 +1173,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1165 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); 1173 BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1166 idx = rcd->rxdIdx; 1174 idx = rcd->rxdIdx;
1167 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; 1175 ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1176 ring = rq->rx_ring + ring_idx;
1168 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd, 1177 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1169 &rxCmdDesc); 1178 &rxCmdDesc);
1170 rbi = rq->buf_info[ring_idx] + idx; 1179 rbi = rq->buf_info[ring_idx] + idx;
@@ -1193,37 +1202,80 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1193 goto rcd_done; 1202 goto rcd_done;
1194 } 1203 }
1195 1204
1205 skip_page_frags = false;
1196 ctx->skb = rbi->skb; 1206 ctx->skb = rbi->skb;
1197 rbi->skb = NULL; 1207 new_skb = dev_alloc_skb(rbi->len + NET_IP_ALIGN);
1208 if (new_skb == NULL) {
1209 /* Skb allocation failed, do not handover this
1210 * skb to stack. Reuse it. Drop the existing pkt
1211 */
1212 rq->stats.rx_buf_alloc_failure++;
1213 ctx->skb = NULL;
1214 rq->stats.drop_total++;
1215 skip_page_frags = true;
1216 goto rcd_done;
1217 }
1198 1218
1199 pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, 1219 pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
1200 PCI_DMA_FROMDEVICE); 1220 PCI_DMA_FROMDEVICE);
1201 1221
1202 skb_put(ctx->skb, rcd->len); 1222 skb_put(ctx->skb, rcd->len);
1223
1224 /* Immediate refill */
1225 new_skb->dev = adapter->netdev;
1226 skb_reserve(new_skb, NET_IP_ALIGN);
1227 rbi->skb = new_skb;
1228 rbi->dma_addr = pci_map_single(adapter->pdev,
1229 rbi->skb->data, rbi->len,
1230 PCI_DMA_FROMDEVICE);
1231 rxd->addr = cpu_to_le64(rbi->dma_addr);
1232 rxd->len = rbi->len;
1233
1203 } else { 1234 } else {
1204 BUG_ON(ctx->skb == NULL); 1235 BUG_ON(ctx->skb == NULL && !skip_page_frags);
1236
1205 /* non SOP buffer must be type 1 in most cases */ 1237 /* non SOP buffer must be type 1 in most cases */
1206 if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) { 1238 BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1207 BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY); 1239 BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1208 1240
1209 if (rcd->len) { 1241 /* If an sop buffer was dropped, skip all
1210 pci_unmap_page(adapter->pdev, 1242 * following non-sop fragments. They will be reused.
1211 rbi->dma_addr, rbi->len, 1243 */
1212 PCI_DMA_FROMDEVICE); 1244 if (skip_page_frags)
1245 goto rcd_done;
1213 1246
1214 vmxnet3_append_frag(ctx->skb, rcd, rbi); 1247 new_page = alloc_page(GFP_ATOMIC);
1215 rbi->page = NULL; 1248 if (unlikely(new_page == NULL)) {
1216 } 1249 /* Replacement page frag could not be allocated.
1217 } else { 1250 * Reuse this page. Drop the pkt and free the
1218 /* 1251 * skb which contained this page as a frag. Skip
1219 * The only time a non-SOP buffer is type 0 is 1252 * processing all the following non-sop frags.
1220 * when it's EOP and error flag is raised, which
1221 * has already been handled.
1222 */ 1253 */
1223 BUG_ON(true); 1254 rq->stats.rx_buf_alloc_failure++;
1255 dev_kfree_skb(ctx->skb);
1256 ctx->skb = NULL;
1257 skip_page_frags = true;
1258 goto rcd_done;
1259 }
1260
1261 if (rcd->len) {
1262 pci_unmap_page(adapter->pdev,
1263 rbi->dma_addr, rbi->len,
1264 PCI_DMA_FROMDEVICE);
1265
1266 vmxnet3_append_frag(ctx->skb, rcd, rbi);
1224 } 1267 }
1268
1269 /* Immediate refill */
1270 rbi->page = new_page;
1271 rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
1272 0, PAGE_SIZE,
1273 PCI_DMA_FROMDEVICE);
1274 rxd->addr = cpu_to_le64(rbi->dma_addr);
1275 rxd->len = rbi->len;
1225 } 1276 }
1226 1277
1278
1227 skb = ctx->skb; 1279 skb = ctx->skb;
1228 if (rcd->eop) { 1280 if (rcd->eop) {
1229 skb->len += skb->data_len; 1281 skb->len += skb->data_len;
@@ -1244,26 +1296,27 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1244 } 1296 }
1245 1297
1246rcd_done: 1298rcd_done:
1247 /* device may skip some rx descs */ 1299 /* device may have skipped some rx descs */
1248 rq->rx_ring[ring_idx].next2comp = idx; 1300 ring->next2comp = idx;
1249 VMXNET3_INC_RING_IDX_ONLY(rq->rx_ring[ring_idx].next2comp, 1301 num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1250 rq->rx_ring[ring_idx].size); 1302 ring = rq->rx_ring + ring_idx;
1251 1303 while (num_to_alloc) {
1252 /* refill rx buffers frequently to avoid starving the h/w */ 1304 vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1253 num_to_alloc = vmxnet3_cmd_ring_desc_avail(rq->rx_ring + 1305 &rxCmdDesc);
1254 ring_idx); 1306 BUG_ON(!rxd->addr);
1255 if (unlikely(num_to_alloc > VMXNET3_RX_ALLOC_THRESHOLD(rq, 1307
1256 ring_idx, adapter))) { 1308 /* Recv desc is ready to be used by the device */
1257 vmxnet3_rq_alloc_rx_buf(rq, ring_idx, num_to_alloc, 1309 rxd->gen = ring->gen;
1258 adapter); 1310 vmxnet3_cmd_ring_adv_next2fill(ring);
1259 1311 num_to_alloc--;
1260 /* if needed, update the register */ 1312 }
1261 if (unlikely(rq->shared->updateRxProd)) { 1313
1262 VMXNET3_WRITE_BAR0_REG(adapter, 1314 /* if needed, update the register */
1263 rxprod_reg[ring_idx] + rq->qid * 8, 1315 if (unlikely(rq->shared->updateRxProd)) {
1264 rq->rx_ring[ring_idx].next2fill); 1316 VMXNET3_WRITE_BAR0_REG(adapter,
1265 rq->uncommitted[ring_idx] = 0; 1317 rxprod_reg[ring_idx] + rq->qid * 8,
1266 } 1318 ring->next2fill);
1319 rq->uncommitted[ring_idx] = 0;
1267 } 1320 }
1268 1321
1269 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring); 1322 vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index f50d36fdf405..8db7ecf5bcab 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -68,10 +68,10 @@
68/* 68/*
69 * Version numbers 69 * Version numbers
70 */ 70 */
71#define VMXNET3_DRIVER_VERSION_STRING "1.1.9.0-k" 71#define VMXNET3_DRIVER_VERSION_STRING "1.1.14.0-k"
72 72
73/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ 73/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
74#define VMXNET3_DRIVER_VERSION_NUM 0x01010900 74#define VMXNET3_DRIVER_VERSION_NUM 0x01010E00
75 75
76#if defined(CONFIG_PCI_MSI) 76#if defined(CONFIG_PCI_MSI)
77 /* RSS only makes sense if MSI-X is supported. */ 77 /* RSS only makes sense if MSI-X is supported. */