aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/openvswitch/flow.c119
-rw-r--r--net/openvswitch/flow.h10
-rw-r--r--net/openvswitch/flow_table.c46
-rw-r--r--net/openvswitch/flow_table.h2
4 files changed, 122 insertions, 55 deletions
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index aad7a8da70b1..432f04d5c896 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -65,8 +65,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
65{ 65{
66 struct flow_stats *stats; 66 struct flow_stats *stats;
67 __be16 tcp_flags = 0; 67 __be16 tcp_flags = 0;
68 int node = numa_node_id();
68 69
69 stats = this_cpu_ptr(flow->stats); 70 stats = rcu_dereference(flow->stats[node]);
70 71
71 if ((flow->key.eth.type == htons(ETH_P_IP) || 72 if ((flow->key.eth.type == htons(ETH_P_IP) ||
72 flow->key.eth.type == htons(ETH_P_IPV6)) && 73 flow->key.eth.type == htons(ETH_P_IPV6)) &&
@@ -76,68 +77,102 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
76 tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); 77 tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
77 } 78 }
78 79
79 spin_lock(&stats->lock); 80 /* Check if already have node-specific stats. */
81 if (likely(stats)) {
82 spin_lock(&stats->lock);
83 /* Mark if we write on the pre-allocated stats. */
84 if (node == 0 && unlikely(flow->stats_last_writer != node))
85 flow->stats_last_writer = node;
86 } else {
87 stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
88 spin_lock(&stats->lock);
89
90 /* If the current NUMA-node is the only writer on the
91 * pre-allocated stats keep using them.
92 */
93 if (unlikely(flow->stats_last_writer != node)) {
94 /* A previous locker may have already allocated the
95 * stats, so we need to check again. If node-specific
96 * stats were already allocated, we update the pre-
97 * allocated stats as we have already locked them.
98 */
99 if (likely(flow->stats_last_writer != NUMA_NO_NODE)
100 && likely(!rcu_dereference(flow->stats[node]))) {
101 /* Try to allocate node-specific stats. */
102 struct flow_stats *new_stats;
103
104 new_stats =
105 kmem_cache_alloc_node(flow_stats_cache,
106 GFP_THISNODE |
107 __GFP_NOMEMALLOC,
108 node);
109 if (likely(new_stats)) {
110 new_stats->used = jiffies;
111 new_stats->packet_count = 1;
112 new_stats->byte_count = skb->len;
113 new_stats->tcp_flags = tcp_flags;
114 spin_lock_init(&new_stats->lock);
115
116 rcu_assign_pointer(flow->stats[node],
117 new_stats);
118 goto unlock;
119 }
120 }
121 flow->stats_last_writer = node;
122 }
123 }
124
80 stats->used = jiffies; 125 stats->used = jiffies;
81 stats->packet_count++; 126 stats->packet_count++;
82 stats->byte_count += skb->len; 127 stats->byte_count += skb->len;
83 stats->tcp_flags |= tcp_flags; 128 stats->tcp_flags |= tcp_flags;
84 spin_unlock(&stats->lock); 129unlock:
85}
86
87static void stats_read(struct flow_stats *stats,
88 struct ovs_flow_stats *ovs_stats,
89 unsigned long *used, __be16 *tcp_flags)
90{
91 spin_lock(&stats->lock);
92 if (!*used || time_after(stats->used, *used))
93 *used = stats->used;
94 *tcp_flags |= stats->tcp_flags;
95 ovs_stats->n_packets += stats->packet_count;
96 ovs_stats->n_bytes += stats->byte_count;
97 spin_unlock(&stats->lock); 130 spin_unlock(&stats->lock);
98} 131}
99 132
100void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, 133void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
101 unsigned long *used, __be16 *tcp_flags) 134 unsigned long *used, __be16 *tcp_flags)
102{ 135{
103 int cpu; 136 int node;
104 137
105 *used = 0; 138 *used = 0;
106 *tcp_flags = 0; 139 *tcp_flags = 0;
107 memset(ovs_stats, 0, sizeof(*ovs_stats)); 140 memset(ovs_stats, 0, sizeof(*ovs_stats));
108 141
109 local_bh_disable(); 142 for_each_node(node) {
110 143 struct flow_stats *stats = rcu_dereference(flow->stats[node]);
111 for_each_possible_cpu(cpu) {
112 struct flow_stats *stats;
113 144
114 stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); 145 if (stats) {
115 stats_read(stats, ovs_stats, used, tcp_flags); 146 /* Local CPU may write on non-local stats, so we must
147 * block bottom-halves here.
148 */
149 spin_lock_bh(&stats->lock);
150 if (!*used || time_after(stats->used, *used))
151 *used = stats->used;
152 *tcp_flags |= stats->tcp_flags;
153 ovs_stats->n_packets += stats->packet_count;
154 ovs_stats->n_bytes += stats->byte_count;
155 spin_unlock_bh(&stats->lock);
156 }
116 } 157 }
117
118 local_bh_enable();
119}
120
121static void stats_reset(struct flow_stats *stats)
122{
123 spin_lock(&stats->lock);
124 stats->used = 0;
125 stats->packet_count = 0;
126 stats->byte_count = 0;
127 stats->tcp_flags = 0;
128 spin_unlock(&stats->lock);
129} 158}
130 159
131void ovs_flow_stats_clear(struct sw_flow *flow) 160void ovs_flow_stats_clear(struct sw_flow *flow)
132{ 161{
133 int cpu; 162 int node;
134 163
135 local_bh_disable(); 164 for_each_node(node) {
136 165 struct flow_stats *stats = rcu_dereference(flow->stats[node]);
137 for_each_possible_cpu(cpu) 166
138 stats_reset(per_cpu_ptr(flow->stats, cpu)); 167 if (stats) {
139 168 spin_lock_bh(&stats->lock);
140 local_bh_enable(); 169 stats->used = 0;
170 stats->packet_count = 0;
171 stats->byte_count = 0;
172 stats->tcp_flags = 0;
173 spin_unlock_bh(&stats->lock);
174 }
175 }
141} 176}
142 177
143static int check_header(struct sk_buff *skb, int len) 178static int check_header(struct sk_buff *skb, int len)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 9c0dd8aa3117..ddcebc53224f 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -159,12 +159,18 @@ struct sw_flow {
159 struct rcu_head rcu; 159 struct rcu_head rcu;
160 struct hlist_node hash_node[2]; 160 struct hlist_node hash_node[2];
161 u32 hash; 161 u32 hash;
162 162 int stats_last_writer; /* NUMA-node id of the last writer on
163 * 'stats[0]'.
164 */
163 struct sw_flow_key key; 165 struct sw_flow_key key;
164 struct sw_flow_key unmasked_key; 166 struct sw_flow_key unmasked_key;
165 struct sw_flow_mask *mask; 167 struct sw_flow_mask *mask;
166 struct sw_flow_actions __rcu *sf_acts; 168 struct sw_flow_actions __rcu *sf_acts;
167 struct flow_stats __percpu *stats; 169 struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
170 * is allocated at flow creation time,
171 * the rest are allocated on demand
172 * while holding the 'stats[0].lock'.
173 */
168}; 174};
169 175
170struct arp_eth_header { 176struct arp_eth_header {
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index aa92da23053d..d8ef37b937bd 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -48,6 +48,7 @@
48#define REHASH_INTERVAL (10 * 60 * HZ) 48#define REHASH_INTERVAL (10 * 60 * HZ)
49 49
50static struct kmem_cache *flow_cache; 50static struct kmem_cache *flow_cache;
51struct kmem_cache *flow_stats_cache __read_mostly;
51 52
52static u16 range_n_bytes(const struct sw_flow_key_range *range) 53static u16 range_n_bytes(const struct sw_flow_key_range *range)
53{ 54{
@@ -75,7 +76,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
75struct sw_flow *ovs_flow_alloc(void) 76struct sw_flow *ovs_flow_alloc(void)
76{ 77{
77 struct sw_flow *flow; 78 struct sw_flow *flow;
78 int cpu; 79 struct flow_stats *stats;
80 int node;
79 81
80 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 82 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
81 if (!flow) 83 if (!flow)
@@ -83,17 +85,22 @@ struct sw_flow *ovs_flow_alloc(void)
83 85
84 flow->sf_acts = NULL; 86 flow->sf_acts = NULL;
85 flow->mask = NULL; 87 flow->mask = NULL;
88 flow->stats_last_writer = NUMA_NO_NODE;
86 89
87 flow->stats = alloc_percpu(struct flow_stats); 90 /* Initialize the default stat node. */
88 if (!flow->stats) 91 stats = kmem_cache_alloc_node(flow_stats_cache,
92 GFP_KERNEL | __GFP_ZERO, 0);
93 if (!stats)
89 goto err; 94 goto err;
90 95
91 for_each_possible_cpu(cpu) { 96 spin_lock_init(&stats->lock);
92 struct flow_stats *cpu_stats; 97
98 RCU_INIT_POINTER(flow->stats[0], stats);
99
100 for_each_node(node)
101 if (node != 0)
102 RCU_INIT_POINTER(flow->stats[node], NULL);
93 103
94 cpu_stats = per_cpu_ptr(flow->stats, cpu);
95 spin_lock_init(&cpu_stats->lock);
96 }
97 return flow; 104 return flow;
98err: 105err:
99 kmem_cache_free(flow_cache, flow); 106 kmem_cache_free(flow_cache, flow);
@@ -130,8 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
130 137
131static void flow_free(struct sw_flow *flow) 138static void flow_free(struct sw_flow *flow)
132{ 139{
140 int node;
141
133 kfree((struct sf_flow_acts __force *)flow->sf_acts); 142 kfree((struct sf_flow_acts __force *)flow->sf_acts);
134 free_percpu(flow->stats); 143 for_each_node(node)
144 if (flow->stats[node])
145 kmem_cache_free(flow_stats_cache,
146 (struct flow_stats __force *)flow->stats[node]);
135 kmem_cache_free(flow_cache, flow); 147 kmem_cache_free(flow_cache, flow);
136} 148}
137 149
@@ -586,16 +598,28 @@ int ovs_flow_init(void)
586 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 598 BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
587 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 599 BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
588 600
589 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 601 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
590 0, NULL); 602 + (num_possible_nodes()
603 * sizeof(struct flow_stats *)),
604 0, 0, NULL);
591 if (flow_cache == NULL) 605 if (flow_cache == NULL)
592 return -ENOMEM; 606 return -ENOMEM;
593 607
608 flow_stats_cache
609 = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
610 0, SLAB_HWCACHE_ALIGN, NULL);
611 if (flow_stats_cache == NULL) {
612 kmem_cache_destroy(flow_cache);
613 flow_cache = NULL;
614 return -ENOMEM;
615 }
616
594 return 0; 617 return 0;
595} 618}
596 619
597/* Uninitializes the flow module. */ 620/* Uninitializes the flow module. */
598void ovs_flow_exit(void) 621void ovs_flow_exit(void)
599{ 622{
623 kmem_cache_destroy(flow_stats_cache);
600 kmem_cache_destroy(flow_cache); 624 kmem_cache_destroy(flow_cache);
601} 625}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index c26c59a7ab57..ca8a5820f615 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -52,6 +52,8 @@ struct flow_table {
52 unsigned int count; 52 unsigned int count;
53}; 53};
54 54
55extern struct kmem_cache *flow_stats_cache;
56
55int ovs_flow_init(void); 57int ovs_flow_init(void);
56void ovs_flow_exit(void); 58void ovs_flow_exit(void);
57 59