aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorZhang Yanmin <yanmin.zhang@intel.com>2008-03-13 01:52:37 -0400
committerDavid S. Miller <davem@davemloft.net>2008-03-13 01:52:37 -0400
commitf1dd9c379cac7d5a76259e7dffcd5f8edc697d17 (patch)
treeea1870f5720d842ae8968922226a424f3cb28726 /include/net
parent22626216c46f2ec86287e75ea86dd9ac3df54265 (diff)
[NET]: Fix tbench regression in 2.6.25-rc1
Comparing with kernel 2.6.24, tbench result has regression with 2.6.25-rc1. 1) On 2 quad-core processor stoakley: 4%. 2) On 4 quad-core processor tigerton: more than 30%. bisect located below patch. b4ce92775c2e7ff9cf79cca4e0a19c8c5fd6287b is first bad commit commit b4ce92775c2e7ff9cf79cca4e0a19c8c5fd6287b Author: Herbert Xu <herbert@gondor.apana.org.au> Date: Tue Nov 13 21:33:32 2007 -0800 [IPV6]: Move nfheader_len into rt6_info The dst member nfheader_len is only used by IPv6. It's also currently creating a rather ugly alignment hole in struct dst. Therefore this patch moves it from there into struct rt6_info. Above patch changes the cache line alignment, especially member __refcnt. I did a testing by adding 2 unsigned long pading before lastuse, so the 3 members, lastuse/__refcnt/__use, are moved to next cache line. The performance is recovered. I created a patch to rearrange the members in struct dst_entry. With Eric and Valdis Kletnieks's suggestion, I made finer arrangement. 1) Move tclassid under ops in case CONFIG_NET_CLS_ROUTE=y. So sizeof(dst_entry)=200 no matter if CONFIG_NET_CLS_ROUTE=y/n. I tested many patches on my 16-core tigerton by moving tclassid to different place. It looks like tclassid could also have impact on performance. If moving tclassid before metrics, or just don't move tclassid, the performance isn't good. So I move it behind metrics. 2) Add comments before __refcnt. On 16-core tigerton: If CONFIG_NET_CLS_ROUTE=y, the result with below patch is about 18% better than the one without the patch; If CONFIG_NET_CLS_ROUTE=n, the result with below patch is about 30% better than the one without the patch. With 32bit 2.6.25-rc1 on 8-core stoakley, the new patch doesn't introduce regression. Thank Eric, Valdis, and David! Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com> Acked-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/dst.h23
1 files changed, 14 insertions, 9 deletions
diff --git a/include/net/dst.h b/include/net/dst.h
index e3ac7d0fc4e1..ae13370e8484 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -52,15 +52,10 @@ struct dst_entry
52 unsigned short header_len; /* more space at head required */ 52 unsigned short header_len; /* more space at head required */
53 unsigned short trailer_len; /* space to reserve at tail */ 53 unsigned short trailer_len; /* space to reserve at tail */
54 54
55 u32 metrics[RTAX_MAX];
56 struct dst_entry *path;
57
58 unsigned long rate_last; /* rate limiting for ICMP */
59 unsigned int rate_tokens; 55 unsigned int rate_tokens;
56 unsigned long rate_last; /* rate limiting for ICMP */
60 57
61#ifdef CONFIG_NET_CLS_ROUTE 58 struct dst_entry *path;
62 __u32 tclassid;
63#endif
64 59
65 struct neighbour *neighbour; 60 struct neighbour *neighbour;
66 struct hh_cache *hh; 61 struct hh_cache *hh;
@@ -70,10 +65,20 @@ struct dst_entry
70 int (*output)(struct sk_buff*); 65 int (*output)(struct sk_buff*);
71 66
72 struct dst_ops *ops; 67 struct dst_ops *ops;
73 68
74 unsigned long lastuse; 69 u32 metrics[RTAX_MAX];
70
71#ifdef CONFIG_NET_CLS_ROUTE
72 __u32 tclassid;
73#endif
74
75 /*
76 * __refcnt wants to be on a different cache line from
77 * input/output/ops or performance tanks badly
78 */
75 atomic_t __refcnt; /* client references */ 79 atomic_t __refcnt; /* client references */
76 int __use; 80 int __use;
81 unsigned long lastuse;
77 union { 82 union {
78 struct dst_entry *next; 83 struct dst_entry *next;
79 struct rtable *rt_next; 84 struct rtable *rt_next;