diff options
-rw-r--r-- | net/openvswitch/flow.c | 119 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 10 | ||||
-rw-r--r-- | net/openvswitch/flow_table.c | 46 | ||||
-rw-r--r-- | net/openvswitch/flow_table.h | 2 |
4 files changed, 122 insertions, 55 deletions
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index aad7a8da70b1..432f04d5c896 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c | |||
@@ -65,8 +65,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) | |||
65 | { | 65 | { |
66 | struct flow_stats *stats; | 66 | struct flow_stats *stats; |
67 | __be16 tcp_flags = 0; | 67 | __be16 tcp_flags = 0; |
68 | int node = numa_node_id(); | ||
68 | 69 | ||
69 | stats = this_cpu_ptr(flow->stats); | 70 | stats = rcu_dereference(flow->stats[node]); |
70 | 71 | ||
71 | if ((flow->key.eth.type == htons(ETH_P_IP) || | 72 | if ((flow->key.eth.type == htons(ETH_P_IP) || |
72 | flow->key.eth.type == htons(ETH_P_IPV6)) && | 73 | flow->key.eth.type == htons(ETH_P_IPV6)) && |
@@ -76,68 +77,102 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) | |||
76 | tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); | 77 | tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); |
77 | } | 78 | } |
78 | 79 | ||
79 | spin_lock(&stats->lock); | 80 | /* Check if already have node-specific stats. */ |
81 | if (likely(stats)) { | ||
82 | spin_lock(&stats->lock); | ||
83 | /* Mark if we write on the pre-allocated stats. */ | ||
84 | if (node == 0 && unlikely(flow->stats_last_writer != node)) | ||
85 | flow->stats_last_writer = node; | ||
86 | } else { | ||
87 | stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ | ||
88 | spin_lock(&stats->lock); | ||
89 | |||
90 | /* If the current NUMA-node is the only writer on the | ||
91 | * pre-allocated stats keep using them. | ||
92 | */ | ||
93 | if (unlikely(flow->stats_last_writer != node)) { | ||
94 | /* A previous locker may have already allocated the | ||
95 | * stats, so we need to check again. If node-specific | ||
96 | * stats were already allocated, we update the pre- | ||
97 | * allocated stats as we have already locked them. | ||
98 | */ | ||
99 | if (likely(flow->stats_last_writer != NUMA_NO_NODE) | ||
100 | && likely(!rcu_dereference(flow->stats[node]))) { | ||
101 | /* Try to allocate node-specific stats. */ | ||
102 | struct flow_stats *new_stats; | ||
103 | |||
104 | new_stats = | ||
105 | kmem_cache_alloc_node(flow_stats_cache, | ||
106 | GFP_THISNODE | | ||
107 | __GFP_NOMEMALLOC, | ||
108 | node); | ||
109 | if (likely(new_stats)) { | ||
110 | new_stats->used = jiffies; | ||
111 | new_stats->packet_count = 1; | ||
112 | new_stats->byte_count = skb->len; | ||
113 | new_stats->tcp_flags = tcp_flags; | ||
114 | spin_lock_init(&new_stats->lock); | ||
115 | |||
116 | rcu_assign_pointer(flow->stats[node], | ||
117 | new_stats); | ||
118 | goto unlock; | ||
119 | } | ||
120 | } | ||
121 | flow->stats_last_writer = node; | ||
122 | } | ||
123 | } | ||
124 | |||
80 | stats->used = jiffies; | 125 | stats->used = jiffies; |
81 | stats->packet_count++; | 126 | stats->packet_count++; |
82 | stats->byte_count += skb->len; | 127 | stats->byte_count += skb->len; |
83 | stats->tcp_flags |= tcp_flags; | 128 | stats->tcp_flags |= tcp_flags; |
84 | spin_unlock(&stats->lock); | 129 | unlock: |
85 | } | ||
86 | |||
87 | static void stats_read(struct flow_stats *stats, | ||
88 | struct ovs_flow_stats *ovs_stats, | ||
89 | unsigned long *used, __be16 *tcp_flags) | ||
90 | { | ||
91 | spin_lock(&stats->lock); | ||
92 | if (!*used || time_after(stats->used, *used)) | ||
93 | *used = stats->used; | ||
94 | *tcp_flags |= stats->tcp_flags; | ||
95 | ovs_stats->n_packets += stats->packet_count; | ||
96 | ovs_stats->n_bytes += stats->byte_count; | ||
97 | spin_unlock(&stats->lock); | 130 | spin_unlock(&stats->lock); |
98 | } | 131 | } |
99 | 132 | ||
100 | void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, | 133 | void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, |
101 | unsigned long *used, __be16 *tcp_flags) | 134 | unsigned long *used, __be16 *tcp_flags) |
102 | { | 135 | { |
103 | int cpu; | 136 | int node; |
104 | 137 | ||
105 | *used = 0; | 138 | *used = 0; |
106 | *tcp_flags = 0; | 139 | *tcp_flags = 0; |
107 | memset(ovs_stats, 0, sizeof(*ovs_stats)); | 140 | memset(ovs_stats, 0, sizeof(*ovs_stats)); |
108 | 141 | ||
109 | local_bh_disable(); | 142 | for_each_node(node) { |
110 | 143 | struct flow_stats *stats = rcu_dereference(flow->stats[node]); | |
111 | for_each_possible_cpu(cpu) { | ||
112 | struct flow_stats *stats; | ||
113 | 144 | ||
114 | stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); | 145 | if (stats) { |
115 | stats_read(stats, ovs_stats, used, tcp_flags); | 146 | /* Local CPU may write on non-local stats, so we must |
147 | * block bottom-halves here. | ||
148 | */ | ||
149 | spin_lock_bh(&stats->lock); | ||
150 | if (!*used || time_after(stats->used, *used)) | ||
151 | *used = stats->used; | ||
152 | *tcp_flags |= stats->tcp_flags; | ||
153 | ovs_stats->n_packets += stats->packet_count; | ||
154 | ovs_stats->n_bytes += stats->byte_count; | ||
155 | spin_unlock_bh(&stats->lock); | ||
156 | } | ||
116 | } | 157 | } |
117 | |||
118 | local_bh_enable(); | ||
119 | } | ||
120 | |||
121 | static void stats_reset(struct flow_stats *stats) | ||
122 | { | ||
123 | spin_lock(&stats->lock); | ||
124 | stats->used = 0; | ||
125 | stats->packet_count = 0; | ||
126 | stats->byte_count = 0; | ||
127 | stats->tcp_flags = 0; | ||
128 | spin_unlock(&stats->lock); | ||
129 | } | 158 | } |
130 | 159 | ||
131 | void ovs_flow_stats_clear(struct sw_flow *flow) | 160 | void ovs_flow_stats_clear(struct sw_flow *flow) |
132 | { | 161 | { |
133 | int cpu; | 162 | int node; |
134 | 163 | ||
135 | local_bh_disable(); | 164 | for_each_node(node) { |
136 | 165 | struct flow_stats *stats = rcu_dereference(flow->stats[node]); | |
137 | for_each_possible_cpu(cpu) | 166 | |
138 | stats_reset(per_cpu_ptr(flow->stats, cpu)); | 167 | if (stats) { |
139 | 168 | spin_lock_bh(&stats->lock); | |
140 | local_bh_enable(); | 169 | stats->used = 0; |
170 | stats->packet_count = 0; | ||
171 | stats->byte_count = 0; | ||
172 | stats->tcp_flags = 0; | ||
173 | spin_unlock_bh(&stats->lock); | ||
174 | } | ||
175 | } | ||
141 | } | 176 | } |
142 | 177 | ||
143 | static int check_header(struct sk_buff *skb, int len) | 178 | static int check_header(struct sk_buff *skb, int len) |
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9c0dd8aa3117..ddcebc53224f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h | |||
@@ -159,12 +159,18 @@ struct sw_flow { | |||
159 | struct rcu_head rcu; | 159 | struct rcu_head rcu; |
160 | struct hlist_node hash_node[2]; | 160 | struct hlist_node hash_node[2]; |
161 | u32 hash; | 161 | u32 hash; |
162 | 162 | int stats_last_writer; /* NUMA-node id of the last writer on | |
163 | * 'stats[0]'. | ||
164 | */ | ||
163 | struct sw_flow_key key; | 165 | struct sw_flow_key key; |
164 | struct sw_flow_key unmasked_key; | 166 | struct sw_flow_key unmasked_key; |
165 | struct sw_flow_mask *mask; | 167 | struct sw_flow_mask *mask; |
166 | struct sw_flow_actions __rcu *sf_acts; | 168 | struct sw_flow_actions __rcu *sf_acts; |
167 | struct flow_stats __percpu *stats; | 169 | struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one |
170 | * is allocated at flow creation time, | ||
171 | * the rest are allocated on demand | ||
172 | * while holding the 'stats[0].lock'. | ||
173 | */ | ||
168 | }; | 174 | }; |
169 | 175 | ||
170 | struct arp_eth_header { | 176 | struct arp_eth_header { |
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index aa92da23053d..d8ef37b937bd 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #define REHASH_INTERVAL (10 * 60 * HZ) | 48 | #define REHASH_INTERVAL (10 * 60 * HZ) |
49 | 49 | ||
50 | static struct kmem_cache *flow_cache; | 50 | static struct kmem_cache *flow_cache; |
51 | struct kmem_cache *flow_stats_cache __read_mostly; | ||
51 | 52 | ||
52 | static u16 range_n_bytes(const struct sw_flow_key_range *range) | 53 | static u16 range_n_bytes(const struct sw_flow_key_range *range) |
53 | { | 54 | { |
@@ -75,7 +76,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, | |||
75 | struct sw_flow *ovs_flow_alloc(void) | 76 | struct sw_flow *ovs_flow_alloc(void) |
76 | { | 77 | { |
77 | struct sw_flow *flow; | 78 | struct sw_flow *flow; |
78 | int cpu; | 79 | struct flow_stats *stats; |
80 | int node; | ||
79 | 81 | ||
80 | flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); | 82 | flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); |
81 | if (!flow) | 83 | if (!flow) |
@@ -83,17 +85,22 @@ struct sw_flow *ovs_flow_alloc(void) | |||
83 | 85 | ||
84 | flow->sf_acts = NULL; | 86 | flow->sf_acts = NULL; |
85 | flow->mask = NULL; | 87 | flow->mask = NULL; |
88 | flow->stats_last_writer = NUMA_NO_NODE; | ||
86 | 89 | ||
87 | flow->stats = alloc_percpu(struct flow_stats); | 90 | /* Initialize the default stat node. */ |
88 | if (!flow->stats) | 91 | stats = kmem_cache_alloc_node(flow_stats_cache, |
92 | GFP_KERNEL | __GFP_ZERO, 0); | ||
93 | if (!stats) | ||
89 | goto err; | 94 | goto err; |
90 | 95 | ||
91 | for_each_possible_cpu(cpu) { | 96 | spin_lock_init(&stats->lock); |
92 | struct flow_stats *cpu_stats; | 97 | |
98 | RCU_INIT_POINTER(flow->stats[0], stats); | ||
99 | |||
100 | for_each_node(node) | ||
101 | if (node != 0) | ||
102 | RCU_INIT_POINTER(flow->stats[node], NULL); | ||
93 | 103 | ||
94 | cpu_stats = per_cpu_ptr(flow->stats, cpu); | ||
95 | spin_lock_init(&cpu_stats->lock); | ||
96 | } | ||
97 | return flow; | 104 | return flow; |
98 | err: | 105 | err: |
99 | kmem_cache_free(flow_cache, flow); | 106 | kmem_cache_free(flow_cache, flow); |
@@ -130,8 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) | |||
130 | 137 | ||
131 | static void flow_free(struct sw_flow *flow) | 138 | static void flow_free(struct sw_flow *flow) |
132 | { | 139 | { |
140 | int node; | ||
141 | |||
133 | kfree((struct sf_flow_acts __force *)flow->sf_acts); | 142 | kfree((struct sf_flow_acts __force *)flow->sf_acts); |
134 | free_percpu(flow->stats); | 143 | for_each_node(node) |
144 | if (flow->stats[node]) | ||
145 | kmem_cache_free(flow_stats_cache, | ||
146 | (struct flow_stats __force *)flow->stats[node]); | ||
135 | kmem_cache_free(flow_cache, flow); | 147 | kmem_cache_free(flow_cache, flow); |
136 | } | 148 | } |
137 | 149 | ||
@@ -586,16 +598,28 @@ int ovs_flow_init(void) | |||
586 | BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); | 598 | BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); |
587 | BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); | 599 | BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); |
588 | 600 | ||
589 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | 601 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) |
590 | 0, NULL); | 602 | + (num_possible_nodes() |
603 | * sizeof(struct flow_stats *)), | ||
604 | 0, 0, NULL); | ||
591 | if (flow_cache == NULL) | 605 | if (flow_cache == NULL) |
592 | return -ENOMEM; | 606 | return -ENOMEM; |
593 | 607 | ||
608 | flow_stats_cache | ||
609 | = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), | ||
610 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
611 | if (flow_stats_cache == NULL) { | ||
612 | kmem_cache_destroy(flow_cache); | ||
613 | flow_cache = NULL; | ||
614 | return -ENOMEM; | ||
615 | } | ||
616 | |||
594 | return 0; | 617 | return 0; |
595 | } | 618 | } |
596 | 619 | ||
597 | /* Uninitializes the flow module. */ | 620 | /* Uninitializes the flow module. */ |
598 | void ovs_flow_exit(void) | 621 | void ovs_flow_exit(void) |
599 | { | 622 | { |
623 | kmem_cache_destroy(flow_stats_cache); | ||
600 | kmem_cache_destroy(flow_cache); | 624 | kmem_cache_destroy(flow_cache); |
601 | } | 625 | } |
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index c26c59a7ab57..ca8a5820f615 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h | |||
@@ -52,6 +52,8 @@ struct flow_table { | |||
52 | unsigned int count; | 52 | unsigned int count; |
53 | }; | 53 | }; |
54 | 54 | ||
55 | extern struct kmem_cache *flow_stats_cache; | ||
56 | |||
55 | int ovs_flow_init(void); | 57 | int ovs_flow_init(void); |
56 | void ovs_flow_exit(void); | 58 | void ovs_flow_exit(void); |
57 | 59 | ||