aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2009-06-05 00:04:16 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-08 03:21:48 -0400
commit042a53a9e437feaf2230dd2cadcecfae9c7bfe05 (patch)
treeae9078f61e390a3014aecb3fe80d3438ab25ee51
parenteae3f29cc73f83cc3f1891d3ad40021b5172c630 (diff)
net: skb_shared_info optimization
skb_dma_unmap() is quite expensive for small packets, because we use two different cache lines from skb_shared_info. One to access nr_frags, one to access dma_maps[0] Instead of dma_maps being an array of MAX_SKB_FRAGS + 1 elements, let dma_head alone in a new dma_head field, close to nr_frags, to reduce cache lines misses. Tested on my dev machine (bnx2 & tg3 adapters), nice speedup ! Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bnx2.c6
-rw-r--r--drivers/net/e1000/e1000_main.c4
-rw-r--r--drivers/net/e1000e/netdev.c4
-rw-r--r--drivers/net/igb/igb_main.c5
-rw-r--r--drivers/net/igbvf/netdev.c5
-rw-r--r--drivers/net/ixgb/ixgb_main.c4
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c4
-rw-r--r--drivers/net/tg3.c10
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--net/core/skb_dma_map.c12
10 files changed, 30 insertions, 29 deletions
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index f53017250e09..391d2d47089c 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -5487,7 +5487,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
5487 dev_kfree_skb(skb); 5487 dev_kfree_skb(skb);
5488 return -EIO; 5488 return -EIO;
5489 } 5489 }
5490 map = skb_shinfo(skb)->dma_maps[0]; 5490 map = skb_shinfo(skb)->dma_head;
5491 5491
5492 REG_WR(bp, BNX2_HC_COMMAND, 5492 REG_WR(bp, BNX2_HC_COMMAND,
5493 bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT); 5493 bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
@@ -6167,7 +6167,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
6167 } 6167 }
6168 6168
6169 sp = skb_shinfo(skb); 6169 sp = skb_shinfo(skb);
6170 mapping = sp->dma_maps[0]; 6170 mapping = sp->dma_head;
6171 6171
6172 tx_buf = &txr->tx_buf_ring[ring_prod]; 6172 tx_buf = &txr->tx_buf_ring[ring_prod];
6173 tx_buf->skb = skb; 6173 tx_buf->skb = skb;
@@ -6191,7 +6191,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
6191 txbd = &txr->tx_desc_ring[ring_prod]; 6191 txbd = &txr->tx_desc_ring[ring_prod];
6192 6192
6193 len = frag->size; 6193 len = frag->size;
6194 mapping = sp->dma_maps[i + 1]; 6194 mapping = sp->dma_maps[i];
6195 6195
6196 txbd->tx_bd_haddr_hi = (u64) mapping >> 32; 6196 txbd->tx_bd_haddr_hi = (u64) mapping >> 32;
6197 txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff; 6197 txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 05e87a59f1c6..8d36743c8140 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2998,7 +2998,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
2998 size -= 4; 2998 size -= 4;
2999 2999
3000 buffer_info->length = size; 3000 buffer_info->length = size;
3001 buffer_info->dma = map[0] + offset; 3001 buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
3002 buffer_info->time_stamp = jiffies; 3002 buffer_info->time_stamp = jiffies;
3003 buffer_info->next_to_watch = i; 3003 buffer_info->next_to_watch = i;
3004 3004
@@ -3039,7 +3039,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
3039 size -= 4; 3039 size -= 4;
3040 3040
3041 buffer_info->length = size; 3041 buffer_info->length = size;
3042 buffer_info->dma = map[f + 1] + offset; 3042 buffer_info->dma = map[f] + offset;
3043 buffer_info->time_stamp = jiffies; 3043 buffer_info->time_stamp = jiffies;
3044 buffer_info->next_to_watch = i; 3044 buffer_info->next_to_watch = i;
3045 3045
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 38694c79edcc..9043f1b845fe 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -3916,7 +3916,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
3916 buffer_info->length = size; 3916 buffer_info->length = size;
3917 buffer_info->time_stamp = jiffies; 3917 buffer_info->time_stamp = jiffies;
3918 buffer_info->next_to_watch = i; 3918 buffer_info->next_to_watch = i;
3919 buffer_info->dma = map[0] + offset; 3919 buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
3920 count++; 3920 count++;
3921 3921
3922 len -= size; 3922 len -= size;
@@ -3947,7 +3947,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
3947 buffer_info->length = size; 3947 buffer_info->length = size;
3948 buffer_info->time_stamp = jiffies; 3948 buffer_info->time_stamp = jiffies;
3949 buffer_info->next_to_watch = i; 3949 buffer_info->next_to_watch = i;
3950 buffer_info->dma = map[f + 1] + offset; 3950 buffer_info->dma = map[f] + offset;
3951 3951
3952 len -= size; 3952 len -= size;
3953 offset += size; 3953 offset += size;
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 958b2879da48..ea17319624aa 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3139,8 +3139,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
3139 /* set time_stamp *before* dma to help avoid a possible race */ 3139 /* set time_stamp *before* dma to help avoid a possible race */
3140 buffer_info->time_stamp = jiffies; 3140 buffer_info->time_stamp = jiffies;
3141 buffer_info->next_to_watch = i; 3141 buffer_info->next_to_watch = i;
3142 buffer_info->dma = map[count]; 3142 buffer_info->dma = skb_shinfo(skb)->dma_head;
3143 count++;
3144 3143
3145 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { 3144 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3146 struct skb_frag_struct *frag; 3145 struct skb_frag_struct *frag;
@@ -3164,7 +3163,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
3164 tx_ring->buffer_info[i].skb = skb; 3163 tx_ring->buffer_info[i].skb = skb;
3165 tx_ring->buffer_info[first].next_to_watch = i; 3164 tx_ring->buffer_info[first].next_to_watch = i;
3166 3165
3167 return count; 3166 return count + 1;
3168} 3167}
3169 3168
3170static inline void igb_tx_queue_adv(struct igb_adapter *adapter, 3169static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 5f7ba1a4990b..22aadb7884fa 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -2119,8 +2119,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
2119 /* set time_stamp *before* dma to help avoid a possible race */ 2119 /* set time_stamp *before* dma to help avoid a possible race */
2120 buffer_info->time_stamp = jiffies; 2120 buffer_info->time_stamp = jiffies;
2121 buffer_info->next_to_watch = i; 2121 buffer_info->next_to_watch = i;
2122 buffer_info->dma = map[count]; 2122 buffer_info->dma = skb_shinfo(skb)->dma_head;
2123 count++;
2124 2123
2125 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { 2124 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
2126 struct skb_frag_struct *frag; 2125 struct skb_frag_struct *frag;
@@ -2144,7 +2143,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
2144 tx_ring->buffer_info[i].skb = skb; 2143 tx_ring->buffer_info[i].skb = skb;
2145 tx_ring->buffer_info[first].next_to_watch = i; 2144 tx_ring->buffer_info[first].next_to_watch = i;
2146 2145
2147 return count; 2146 return count + 1;
2148} 2147}
2149 2148
2150static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter, 2149static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 6eb7f37a113b..9c897cf86b9f 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1300,7 +1300,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
1300 buffer_info->length = size; 1300 buffer_info->length = size;
1301 WARN_ON(buffer_info->dma != 0); 1301 WARN_ON(buffer_info->dma != 0);
1302 buffer_info->time_stamp = jiffies; 1302 buffer_info->time_stamp = jiffies;
1303 buffer_info->dma = map[0] + offset; 1303 buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
1304 pci_map_single(adapter->pdev, 1304 pci_map_single(adapter->pdev,
1305 skb->data + offset, 1305 skb->data + offset,
1306 size, 1306 size,
@@ -1340,7 +1340,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
1340 1340
1341 buffer_info->length = size; 1341 buffer_info->length = size;
1342 buffer_info->time_stamp = jiffies; 1342 buffer_info->time_stamp = jiffies;
1343 buffer_info->dma = map[f + 1] + offset; 1343 buffer_info->dma = map[f] + offset;
1344 buffer_info->next_to_watch = 0; 1344 buffer_info->next_to_watch = 0;
1345 1345
1346 len -= size; 1346 len -= size;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index d36003cbb6d4..09994e920d5d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4837,7 +4837,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
4837 size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); 4837 size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
4838 4838
4839 tx_buffer_info->length = size; 4839 tx_buffer_info->length = size;
4840 tx_buffer_info->dma = map[0] + offset; 4840 tx_buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
4841 tx_buffer_info->time_stamp = jiffies; 4841 tx_buffer_info->time_stamp = jiffies;
4842 tx_buffer_info->next_to_watch = i; 4842 tx_buffer_info->next_to_watch = i;
4843 4843
@@ -4869,7 +4869,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
4869 size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD); 4869 size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
4870 4870
4871 tx_buffer_info->length = size; 4871 tx_buffer_info->length = size;
4872 tx_buffer_info->dma = map[f + 1] + offset; 4872 tx_buffer_info->dma = map[f] + offset;
4873 tx_buffer_info->time_stamp = jiffies; 4873 tx_buffer_info->time_stamp = jiffies;
4874 tx_buffer_info->next_to_watch = i; 4874 tx_buffer_info->next_to_watch = i;
4875 4875
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a39b534fb43e..46a3f86125be 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -5021,7 +5021,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
5021 /* New SKB is guaranteed to be linear. */ 5021 /* New SKB is guaranteed to be linear. */
5022 entry = *start; 5022 entry = *start;
5023 ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE); 5023 ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE);
5024 new_addr = skb_shinfo(new_skb)->dma_maps[0]; 5024 new_addr = skb_shinfo(new_skb)->dma_head;
5025 5025
5026 /* Make sure new skb does not cross any 4G boundaries. 5026 /* Make sure new skb does not cross any 4G boundaries.
5027 * Drop the packet if it does. 5027 * Drop the packet if it does.
@@ -5155,7 +5155,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
5155 5155
5156 sp = skb_shinfo(skb); 5156 sp = skb_shinfo(skb);
5157 5157
5158 mapping = sp->dma_maps[0]; 5158 mapping = sp->dma_head;
5159 5159
5160 tp->tx_buffers[entry].skb = skb; 5160 tp->tx_buffers[entry].skb = skb;
5161 5161
@@ -5173,7 +5173,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
5173 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 5173 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
5174 5174
5175 len = frag->size; 5175 len = frag->size;
5176 mapping = sp->dma_maps[i + 1]; 5176 mapping = sp->dma_maps[i];
5177 tp->tx_buffers[entry].skb = NULL; 5177 tp->tx_buffers[entry].skb = NULL;
5178 5178
5179 tg3_set_txd(tp, entry, mapping, len, 5179 tg3_set_txd(tp, entry, mapping, len,
@@ -5331,7 +5331,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
5331 5331
5332 sp = skb_shinfo(skb); 5332 sp = skb_shinfo(skb);
5333 5333
5334 mapping = sp->dma_maps[0]; 5334 mapping = sp->dma_head;
5335 5335
5336 tp->tx_buffers[entry].skb = skb; 5336 tp->tx_buffers[entry].skb = skb;
5337 5337
@@ -5356,7 +5356,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
5356 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 5356 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
5357 5357
5358 len = frag->size; 5358 len = frag->size;
5359 mapping = sp->dma_maps[i + 1]; 5359 mapping = sp->dma_maps[i];
5360 5360
5361 tp->tx_buffers[entry].skb = NULL; 5361 tp->tx_buffers[entry].skb = NULL;
5362 5362
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7485058125e3..aad484cd5863 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -189,6 +189,9 @@ struct skb_shared_info {
189 atomic_t dataref; 189 atomic_t dataref;
190 unsigned short nr_frags; 190 unsigned short nr_frags;
191 unsigned short gso_size; 191 unsigned short gso_size;
192#ifdef CONFIG_HAS_DMA
193 dma_addr_t dma_head;
194#endif
192 /* Warning: this field is not always filled in (UFO)! */ 195 /* Warning: this field is not always filled in (UFO)! */
193 unsigned short gso_segs; 196 unsigned short gso_segs;
194 unsigned short gso_type; 197 unsigned short gso_type;
@@ -198,7 +201,7 @@ struct skb_shared_info {
198 struct skb_shared_hwtstamps hwtstamps; 201 struct skb_shared_hwtstamps hwtstamps;
199 skb_frag_t frags[MAX_SKB_FRAGS]; 202 skb_frag_t frags[MAX_SKB_FRAGS];
200#ifdef CONFIG_HAS_DMA 203#ifdef CONFIG_HAS_DMA
201 dma_addr_t dma_maps[MAX_SKB_FRAGS + 1]; 204 dma_addr_t dma_maps[MAX_SKB_FRAGS];
202#endif 205#endif
203 /* Intermediate layers must ensure that destructor_arg 206 /* Intermediate layers must ensure that destructor_arg
204 * remains valid until skb destructor */ 207 * remains valid until skb destructor */
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 7adb623ef664..79687dfd6957 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
20 if (dma_mapping_error(dev, map)) 20 if (dma_mapping_error(dev, map))
21 goto out_err; 21 goto out_err;
22 22
23 sp->dma_maps[0] = map; 23 sp->dma_head = map;
24 for (i = 0; i < sp->nr_frags; i++) { 24 for (i = 0; i < sp->nr_frags; i++) {
25 skb_frag_t *fp = &sp->frags[i]; 25 skb_frag_t *fp = &sp->frags[i];
26 26
@@ -28,7 +28,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
28 fp->size, dir); 28 fp->size, dir);
29 if (dma_mapping_error(dev, map)) 29 if (dma_mapping_error(dev, map))
30 goto unwind; 30 goto unwind;
31 sp->dma_maps[i + 1] = map; 31 sp->dma_maps[i] = map;
32 } 32 }
33 33
34 return 0; 34 return 0;
@@ -37,10 +37,10 @@ unwind:
37 while (--i >= 0) { 37 while (--i >= 0) {
38 skb_frag_t *fp = &sp->frags[i]; 38 skb_frag_t *fp = &sp->frags[i];
39 39
40 dma_unmap_page(dev, sp->dma_maps[i + 1], 40 dma_unmap_page(dev, sp->dma_maps[i],
41 fp->size, dir); 41 fp->size, dir);
42 } 42 }
43 dma_unmap_single(dev, sp->dma_maps[0], 43 dma_unmap_single(dev, sp->dma_head,
44 skb_headlen(skb), dir); 44 skb_headlen(skb), dir);
45out_err: 45out_err:
46 return -ENOMEM; 46 return -ENOMEM;
@@ -53,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
53 struct skb_shared_info *sp = skb_shinfo(skb); 53 struct skb_shared_info *sp = skb_shinfo(skb);
54 int i; 54 int i;
55 55
56 dma_unmap_single(dev, sp->dma_maps[0], 56 dma_unmap_single(dev, sp->dma_head,
57 skb_headlen(skb), dir); 57 skb_headlen(skb), dir);
58 for (i = 0; i < sp->nr_frags; i++) { 58 for (i = 0; i < sp->nr_frags; i++) {
59 skb_frag_t *fp = &sp->frags[i]; 59 skb_frag_t *fp = &sp->frags[i];
60 60
61 dma_unmap_page(dev, sp->dma_maps[i + 1], 61 dma_unmap_page(dev, sp->dma_maps[i],
62 fp->size, dir); 62 fp->size, dir);
63 } 63 }
64} 64}