aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@intel.com>2011-08-26 03:45:09 -0400
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>2011-10-07 03:09:20 -0400
commitebe42d169bd0b4c3e2e355374d07ba7d51744601 (patch)
treed728a01ea4917f2c4036697ffd588d9cc71e6e31 /drivers/net
parent2bbfebe2db3453f9ad5a3de56b77d383b91a7829 (diff)
igb: consolidate creation of Tx buffer info and data descriptor
This change will combine the writes of tx_buffer_info and the Tx data descriptors into a single function. The advantage of this is that we can avoid needless memory reads from the buffer info struct and speed things up by keeping the accesses to the local registers. Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h8
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c318
2 files changed, 184 insertions, 142 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index b71d1863e551..77793a9debcc 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -135,7 +135,6 @@ struct vf_data_storage {
135#define IGB_TX_FLAGS_TSO 0x00000004 135#define IGB_TX_FLAGS_TSO 0x00000004
136#define IGB_TX_FLAGS_IPV4 0x00000008 136#define IGB_TX_FLAGS_IPV4 0x00000008
137#define IGB_TX_FLAGS_TSTAMP 0x00000010 137#define IGB_TX_FLAGS_TSTAMP 0x00000010
138#define IGB_TX_FLAGS_MAPPED_AS_PAGE 0x00000020
139#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 138#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
140#define IGB_TX_FLAGS_VLAN_SHIFT 16 139#define IGB_TX_FLAGS_VLAN_SHIFT 16
141 140
@@ -144,13 +143,12 @@ struct vf_data_storage {
144struct igb_tx_buffer { 143struct igb_tx_buffer {
145 union e1000_adv_tx_desc *next_to_watch; 144 union e1000_adv_tx_desc *next_to_watch;
146 unsigned long time_stamp; 145 unsigned long time_stamp;
147 dma_addr_t dma;
148 u32 length;
149 u32 tx_flags;
150 struct sk_buff *skb; 146 struct sk_buff *skb;
151 unsigned int bytecount; 147 unsigned int bytecount;
152 u16 gso_segs; 148 u16 gso_segs;
153 u8 mapped_as_page; 149 dma_addr_t dma;
150 u32 length;
151 u32 tx_flags;
154}; 152};
155 153
156struct igb_rx_buffer { 154struct igb_rx_buffer {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index dc93d64cf165..862dd7c0cc70 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3139,29 +3139,26 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3139 igb_free_tx_resources(adapter->tx_ring[i]); 3139 igb_free_tx_resources(adapter->tx_ring[i]);
3140} 3140}
3141 3141
3142void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, 3142void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3143 struct igb_tx_buffer *buffer_info) 3143 struct igb_tx_buffer *tx_buffer)
3144{ 3144{
3145 if (buffer_info->dma) { 3145 if (tx_buffer->skb) {
3146 if (buffer_info->tx_flags & IGB_TX_FLAGS_MAPPED_AS_PAGE) 3146 dev_kfree_skb_any(tx_buffer->skb);
3147 dma_unmap_page(tx_ring->dev, 3147 if (tx_buffer->dma)
3148 buffer_info->dma, 3148 dma_unmap_single(ring->dev,
3149 buffer_info->length, 3149 tx_buffer->dma,
3150 DMA_TO_DEVICE); 3150 tx_buffer->length,
3151 else 3151 DMA_TO_DEVICE);
3152 dma_unmap_single(tx_ring->dev, 3152 } else if (tx_buffer->dma) {
3153 buffer_info->dma, 3153 dma_unmap_page(ring->dev,
3154 buffer_info->length, 3154 tx_buffer->dma,
3155 DMA_TO_DEVICE); 3155 tx_buffer->length,
3156 buffer_info->dma = 0; 3156 DMA_TO_DEVICE);
3157 } 3157 }
3158 if (buffer_info->skb) { 3158 tx_buffer->next_to_watch = NULL;
3159 dev_kfree_skb_any(buffer_info->skb); 3159 tx_buffer->skb = NULL;
3160 buffer_info->skb = NULL; 3160 tx_buffer->dma = 0;
3161 } 3161 /* buffer_info must be completely set up in the transmit path */
3162 buffer_info->time_stamp = 0;
3163 buffer_info->length = 0;
3164 buffer_info->next_to_watch = NULL;
3165} 3162}
3166 3163
3167/** 3164/**
@@ -4138,124 +4135,153 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
4138 return cpu_to_le32(olinfo_status); 4135 return cpu_to_le32(olinfo_status);
4139} 4136}
4140 4137
4141#define IGB_MAX_TXD_PWR 16 4138/*
4142#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) 4139 * The largest size we can write to the descriptor is 65535. In order to
4140 * maintain a power of two alignment we have to limit ourselves to 32K.
4141 */
4142#define IGB_MAX_TXD_PWR 15
4143#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR)
4143 4144
4144static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, 4145static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
4145 struct igb_tx_buffer *first, u32 tx_flags) 4146 struct igb_tx_buffer *first, u32 tx_flags,
4147 const u8 hdr_len)
4146{ 4148{
4147 struct igb_tx_buffer *buffer_info; 4149 struct igb_tx_buffer *tx_buffer_info;
4148 struct device *dev = tx_ring->dev; 4150 union e1000_adv_tx_desc *tx_desc;
4149 unsigned int hlen = skb_headlen(skb); 4151 dma_addr_t dma;
4150 unsigned int count = 0, i; 4152 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4151 unsigned int f; 4153 unsigned int data_len = skb->data_len;
4152 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1; 4154 unsigned int size = skb_headlen(skb);
4153 4155 unsigned int paylen = skb->len - hdr_len;
4154 i = tx_ring->next_to_use; 4156 __le32 cmd_type;
4155 4157 u16 i = tx_ring->next_to_use;
4156 buffer_info = &tx_ring->tx_buffer_info[i]; 4158 u16 gso_segs;
4157 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); 4159
4158 buffer_info->length = hlen; 4160 if (tx_flags & IGB_TX_FLAGS_TSO)
4159 buffer_info->tx_flags = tx_flags; 4161 gso_segs = skb_shinfo(skb)->gso_segs;
4160 buffer_info->dma = dma_map_single(dev, skb->data, hlen, 4162 else
4161 DMA_TO_DEVICE); 4163 gso_segs = 1;
4162 if (dma_mapping_error(dev, buffer_info->dma)) 4164
4165 /* multiply data chunks by size of headers */
4166 first->bytecount = paylen + (gso_segs * hdr_len);
4167 first->gso_segs = gso_segs;
4168 first->skb = skb;
4169
4170 tx_desc = IGB_TX_DESC(tx_ring, i);
4171
4172 tx_desc->read.olinfo_status =
4173 igb_tx_olinfo_status(tx_flags, paylen, tx_ring);
4174
4175 cmd_type = igb_tx_cmd_type(tx_flags);
4176
4177 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4178 if (dma_mapping_error(tx_ring->dev, dma))
4163 goto dma_error; 4179 goto dma_error;
4164 4180
4165 tx_flags |= IGB_TX_FLAGS_MAPPED_AS_PAGE; 4181 /* record length, and DMA address */
4182 first->length = size;
4183 first->dma = dma;
4184 first->tx_flags = tx_flags;
4185 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4166 4186
4167 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { 4187 for (;;) {
4168 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f]; 4188 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4169 unsigned int len = frag->size; 4189 tx_desc->read.cmd_type_len =
4190 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4191
4192 i++;
4193 tx_desc++;
4194 if (i == tx_ring->count) {
4195 tx_desc = IGB_TX_DESC(tx_ring, 0);
4196 i = 0;
4197 }
4198
4199 dma += IGB_MAX_DATA_PER_TXD;
4200 size -= IGB_MAX_DATA_PER_TXD;
4201
4202 tx_desc->read.olinfo_status = 0;
4203 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4204 }
4205
4206 if (likely(!data_len))
4207 break;
4208
4209 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4170 4210
4171 count++;
4172 i++; 4211 i++;
4173 if (i == tx_ring->count) 4212 tx_desc++;
4213 if (i == tx_ring->count) {
4214 tx_desc = IGB_TX_DESC(tx_ring, 0);
4174 i = 0; 4215 i = 0;
4216 }
4175 4217
4176 buffer_info = &tx_ring->tx_buffer_info[i]; 4218 size = frag->size;
4177 BUG_ON(len >= IGB_MAX_DATA_PER_TXD); 4219 data_len -= size;
4178 buffer_info->length = len; 4220
4179 buffer_info->tx_flags = tx_flags; 4221 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4180 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len, 4222 size, DMA_TO_DEVICE);
4181 DMA_TO_DEVICE); 4223 if (dma_mapping_error(tx_ring->dev, dma))
4182 if (dma_mapping_error(dev, buffer_info->dma))
4183 goto dma_error; 4224 goto dma_error;
4184 4225
4226 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4227 tx_buffer_info->length = size;
4228 tx_buffer_info->dma = dma;
4229
4230 tx_desc->read.olinfo_status = 0;
4231 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4232
4233 frag++;
4185 } 4234 }
4186 4235
4187 buffer_info->skb = skb; 4236 /* write last descriptor with RS and EOP bits */
4188 /* multiply data chunks by size of headers */ 4237 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4189 buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; 4238 tx_desc->read.cmd_type_len = cmd_type;
4190 buffer_info->gso_segs = gso_segs;
4191 4239
4192 /* set the timestamp */ 4240 /* set the timestamp */
4193 first->time_stamp = jiffies; 4241 first->time_stamp = jiffies;
4194 4242
4243 /*
4244 * Force memory writes to complete before letting h/w know there
4245 * are new descriptors to fetch. (Only applicable for weak-ordered
4246 * memory model archs, such as IA-64).
4247 *
4248 * We also need this memory barrier to make certain all of the
4249 * status bits have been updated before next_to_watch is written.
4250 */
4251 wmb();
4252
4195 /* set next_to_watch value indicating a packet is present */ 4253 /* set next_to_watch value indicating a packet is present */
4196 first->next_to_watch = IGB_TX_DESC(tx_ring, i); 4254 first->next_to_watch = tx_desc;
4197 4255
4198 return ++count; 4256 i++;
4257 if (i == tx_ring->count)
4258 i = 0;
4199 4259
4200dma_error: 4260 tx_ring->next_to_use = i;
4201 dev_err(dev, "TX DMA map failed\n"); 4261
4262 writel(i, tx_ring->tail);
4202 4263
4203 /* clear timestamp and dma mappings for failed buffer_info mapping */ 4264 /* we need this if more than one processor can write to our tail
4204 buffer_info->dma = 0; 4265 * at a time, it syncronizes IO on IA64/Altix systems */
4205 buffer_info->time_stamp = 0; 4266 mmiowb();
4206 buffer_info->length = 0; 4267
4268 return;
4269
4270dma_error:
4271 dev_err(tx_ring->dev, "TX DMA map failed\n");
4207 4272
4208 /* clear timestamp and dma mappings for remaining portion of packet */ 4273 /* clear dma mappings for failed tx_buffer_info map */
4209 while (count--) { 4274 for (;;) {
4275 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4276 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4277 if (tx_buffer_info == first)
4278 break;
4210 if (i == 0) 4279 if (i == 0)
4211 i = tx_ring->count; 4280 i = tx_ring->count;
4212 i--; 4281 i--;
4213 buffer_info = &tx_ring->tx_buffer_info[i];
4214 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4215 } 4282 }
4216 4283
4217 return 0;
4218}
4219
4220static inline void igb_tx_queue(struct igb_ring *tx_ring,
4221 u32 tx_flags, int count, u32 paylen,
4222 u8 hdr_len)
4223{
4224 union e1000_adv_tx_desc *tx_desc;
4225 struct igb_tx_buffer *buffer_info;
4226 __le32 olinfo_status, cmd_type;
4227 unsigned int i = tx_ring->next_to_use;
4228
4229 cmd_type = igb_tx_cmd_type(tx_flags);
4230 olinfo_status = igb_tx_olinfo_status(tx_flags,
4231 paylen - hdr_len,
4232 tx_ring);
4233
4234 do {
4235 buffer_info = &tx_ring->tx_buffer_info[i];
4236 tx_desc = IGB_TX_DESC(tx_ring, i);
4237 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4238 tx_desc->read.cmd_type_len = cmd_type |
4239 cpu_to_le32(buffer_info->length);
4240 tx_desc->read.olinfo_status = olinfo_status;
4241 count--;
4242 i++;
4243 if (i == tx_ring->count)
4244 i = 0;
4245 } while (count > 0);
4246
4247 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD);
4248 /* Force memory writes to complete before letting h/w
4249 * know there are new descriptors to fetch. (Only
4250 * applicable for weak-ordered memory model archs,
4251 * such as IA-64). */
4252 wmb();
4253
4254 tx_ring->next_to_use = i; 4284 tx_ring->next_to_use = i;
4255 writel(i, tx_ring->tail);
4256 /* we need this if more than one processor can write to our tail
4257 * at a time, it syncronizes IO on IA64/Altix systems */
4258 mmiowb();
4259} 4285}
4260 4286
4261static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size) 4287static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
@@ -4295,7 +4321,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4295 struct igb_ring *tx_ring) 4321 struct igb_ring *tx_ring)
4296{ 4322{
4297 struct igb_tx_buffer *first; 4323 struct igb_tx_buffer *first;
4298 int tso, count; 4324 int tso;
4299 u32 tx_flags = 0; 4325 u32 tx_flags = 0;
4300 __be16 protocol = vlan_get_protocol(skb); 4326 __be16 protocol = vlan_get_protocol(skb);
4301 u8 hdr_len = 0; 4327 u8 hdr_len = 0;
@@ -4335,19 +4361,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4335 tx_flags |= IGB_TX_FLAGS_CSUM; 4361 tx_flags |= IGB_TX_FLAGS_CSUM;
4336 } 4362 }
4337 4363
4338 /* 4364 igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len);
4339 * count reflects descriptors mapped, if 0 or less then mapping error
4340 * has occurred and we need to rewind the descriptor queue
4341 */
4342 count = igb_tx_map(tx_ring, skb, first, tx_flags);
4343 if (!count) {
4344 dev_kfree_skb_any(skb);
4345 first->time_stamp = 0;
4346 tx_ring->next_to_use = first - tx_ring->tx_buffer_info;
4347 return NETDEV_TX_OK;
4348 }
4349
4350 igb_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len);
4351 4365
4352 /* Make sure there is space in the ring for the next send. */ 4366 /* Make sure there is space in the ring for the next send. */
4353 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); 4367 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
@@ -5609,17 +5623,26 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5609 /* clear next_to_watch to prevent false hangs */ 5623 /* clear next_to_watch to prevent false hangs */
5610 tx_buffer->next_to_watch = NULL; 5624 tx_buffer->next_to_watch = NULL;
5611 5625
5612 do { 5626 /* update the statistics for this packet */
5613 tx_desc->wb.status = 0; 5627 total_bytes += tx_buffer->bytecount;
5614 if (likely(tx_desc == eop_desc)) { 5628 total_packets += tx_buffer->gso_segs;
5615 eop_desc = NULL;
5616 5629
5617 total_bytes += tx_buffer->bytecount; 5630 /* retrieve hardware timestamp */
5618 total_packets += tx_buffer->gso_segs; 5631 igb_tx_hwtstamp(q_vector, tx_buffer);
5619 igb_tx_hwtstamp(q_vector, tx_buffer); 5632
5620 } 5633 /* free the skb */
5634 dev_kfree_skb_any(tx_buffer->skb);
5635 tx_buffer->skb = NULL;
5636
5637 /* unmap skb header data */
5638 dma_unmap_single(tx_ring->dev,
5639 tx_buffer->dma,
5640 tx_buffer->length,
5641 DMA_TO_DEVICE);
5621 5642
5622 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); 5643 /* clear last DMA location and unmap remaining buffers */
5644 while (tx_desc != eop_desc) {
5645 tx_buffer->dma = 0;
5623 5646
5624 tx_buffer++; 5647 tx_buffer++;
5625 tx_desc++; 5648 tx_desc++;
@@ -5629,7 +5652,28 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5629 tx_buffer = tx_ring->tx_buffer_info; 5652 tx_buffer = tx_ring->tx_buffer_info;
5630 tx_desc = IGB_TX_DESC(tx_ring, 0); 5653 tx_desc = IGB_TX_DESC(tx_ring, 0);
5631 } 5654 }
5632 } while (eop_desc); 5655
5656 /* unmap any remaining paged data */
5657 if (tx_buffer->dma) {
5658 dma_unmap_page(tx_ring->dev,
5659 tx_buffer->dma,
5660 tx_buffer->length,
5661 DMA_TO_DEVICE);
5662 }
5663 }
5664
5665 /* clear last DMA location */
5666 tx_buffer->dma = 0;
5667
5668 /* move us one more past the eop_desc for start of next pkt */
5669 tx_buffer++;
5670 tx_desc++;
5671 i++;
5672 if (unlikely(!i)) {
5673 i -= tx_ring->count;
5674 tx_buffer = tx_ring->tx_buffer_info;
5675 tx_desc = IGB_TX_DESC(tx_ring, 0);
5676 }
5633 } 5677 }
5634 5678
5635 i += tx_ring->count; 5679 i += tx_ring->count;