diff options
author | Amir Vadai <amirva@mellanox.com> | 2016-05-13 08:55:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-16 13:43:51 -0400 |
commit | 43a335e055bb7ebdc8a68ce7362ef26ef5bda92b (patch) | |
tree | 0441f14934b5e341f12df45bc86bbca9c3dc3d03 | |
parent | bd5251dbf156b6bc0661a9409d46e47160df61dd (diff) |
net/mlx5_core: Flow counters infrastructure
If a counter has the aging flag set when created, it is added to a list
of counters that will be queried periodically from a workqueue. query
result and last use timestamp are cached.
add/del counter must be very efficient since thousands of such
operations might be issued in a second.
There is only a single reference to counters without aging, therefore
no need for locks.
But, counters with aging enabled are stored in a list. In order to make
code as lockless as possible, all the list manipulation and access to
hardware is done from a single context - the periodic counters query
thread.
The hardware supports multiple counters per FTE, however currently we
are using one counter for each FTE.
Signed-off-by: Amir Vadai <amirva@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 226 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 14 | ||||
-rw-r--r-- | include/linux/mlx5/fs.h | 5 |
6 files changed, 255 insertions, 2 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index b531d4f3c00b..9ea7b583096a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile | |||
@@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o | |||
2 | 2 | ||
3 | mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ | 3 | mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ |
4 | health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ | 4 | health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ |
5 | mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o | 5 | mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o |
6 | 6 | ||
7 | mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ | 7 | mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ |
8 | en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ | 8 | en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9420def3a2fe..8b5f0b2c0d5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | |||
@@ -1771,6 +1771,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) | |||
1771 | cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); | 1771 | cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); |
1772 | cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); | 1772 | cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); |
1773 | cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns); | 1773 | cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns); |
1774 | mlx5_cleanup_fc_stats(dev); | ||
1774 | } | 1775 | } |
1775 | 1776 | ||
1776 | static int init_fdb_root_ns(struct mlx5_core_dev *dev) | 1777 | static int init_fdb_root_ns(struct mlx5_core_dev *dev) |
@@ -1827,10 +1828,14 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) | |||
1827 | { | 1828 | { |
1828 | int err = 0; | 1829 | int err = 0; |
1829 | 1830 | ||
1831 | err = mlx5_init_fc_stats(dev); | ||
1832 | if (err) | ||
1833 | return err; | ||
1834 | |||
1830 | if (MLX5_CAP_GEN(dev, nic_flow_table)) { | 1835 | if (MLX5_CAP_GEN(dev, nic_flow_table)) { |
1831 | err = init_root_ns(dev); | 1836 | err = init_root_ns(dev); |
1832 | if (err) | 1837 | if (err) |
1833 | return err; | 1838 | goto err; |
1834 | } | 1839 | } |
1835 | if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { | 1840 | if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { |
1836 | err = init_fdb_root_ns(dev); | 1841 | err = init_fdb_root_ns(dev); |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 1989048ebdfd..aa41a7314691 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | |||
@@ -169,6 +169,9 @@ struct mlx5_flow_root_namespace { | |||
169 | struct mutex chain_lock; | 169 | struct mutex chain_lock; |
170 | }; | 170 | }; |
171 | 171 | ||
172 | int mlx5_init_fc_stats(struct mlx5_core_dev *dev); | ||
173 | void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); | ||
174 | |||
172 | int mlx5_init_fs(struct mlx5_core_dev *dev); | 175 | int mlx5_init_fs(struct mlx5_core_dev *dev); |
173 | void mlx5_cleanup_fs(struct mlx5_core_dev *dev); | 176 | void mlx5_cleanup_fs(struct mlx5_core_dev *dev); |
174 | 177 | ||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 000000000000..164dc37fda72 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, Mellanox Technologies. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/mlx5/driver.h> | ||
34 | #include <linux/mlx5/fs.h> | ||
35 | #include "mlx5_core.h" | ||
36 | #include "fs_core.h" | ||
37 | #include "fs_cmd.h" | ||
38 | |||
39 | #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) | ||
40 | |||
41 | /* locking scheme: | ||
42 | * | ||
43 | * It is the responsibility of the user to prevent concurrent calls or bad | ||
44 | * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference | ||
45 | * to struct mlx5_fc. | ||
46 | * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a | ||
47 | * dump (access to struct mlx5_fc) after a counter is destroyed. | ||
48 | * | ||
49 | * access to counter list: | ||
50 | * - create (user context) | ||
51 | * - mlx5_fc_create() only adds to an addlist to be used by | ||
52 | * mlx5_fc_stats_query_work(). addlist is protected by a spinlock. | ||
53 | * - spawn thread to do the actual destroy | ||
54 | * | ||
55 | * - destroy (user context) | ||
56 | * - mark a counter as deleted | ||
57 | * - spawn thread to do the actual del | ||
58 | * | ||
59 | * - dump (user context) | ||
60 | * user should not call dump after destroy | ||
61 | * | ||
62 | * - query (single thread workqueue context) | ||
63 | * destroy/dump - no conflict (see destroy) | ||
64 | * query/dump - packets and bytes might be inconsistent (since update is not | ||
65 | * atomic) | ||
66 | * query/create - no conflict (see create) | ||
67 | * since every create/destroy spawn the work, only after necessary time has | ||
68 | * elapsed, the thread will actually query the hardware. | ||
69 | */ | ||
70 | |||
71 | static void mlx5_fc_stats_work(struct work_struct *work) | ||
72 | { | ||
73 | struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, | ||
74 | priv.fc_stats.work.work); | ||
75 | struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; | ||
76 | unsigned long now = jiffies; | ||
77 | struct mlx5_fc *counter; | ||
78 | struct mlx5_fc *tmp; | ||
79 | int err = 0; | ||
80 | |||
81 | spin_lock(&fc_stats->addlist_lock); | ||
82 | |||
83 | list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); | ||
84 | |||
85 | if (!list_empty(&fc_stats->list)) | ||
86 | queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); | ||
87 | |||
88 | spin_unlock(&fc_stats->addlist_lock); | ||
89 | |||
90 | list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { | ||
91 | struct mlx5_fc_cache *c = &counter->cache; | ||
92 | u64 packets; | ||
93 | u64 bytes; | ||
94 | |||
95 | if (counter->deleted) { | ||
96 | list_del(&counter->list); | ||
97 | |||
98 | mlx5_cmd_fc_free(dev, counter->id); | ||
99 | |||
100 | kfree(counter); | ||
101 | continue; | ||
102 | } | ||
103 | |||
104 | if (time_before(now, fc_stats->next_query)) | ||
105 | continue; | ||
106 | |||
107 | err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes); | ||
108 | if (err) { | ||
109 | pr_err("Error querying stats for counter id %d\n", | ||
110 | counter->id); | ||
111 | continue; | ||
112 | } | ||
113 | |||
114 | if (packets == c->packets) | ||
115 | continue; | ||
116 | |||
117 | c->lastuse = jiffies; | ||
118 | c->packets = packets; | ||
119 | c->bytes = bytes; | ||
120 | } | ||
121 | |||
122 | if (time_after_eq(now, fc_stats->next_query)) | ||
123 | fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; | ||
124 | } | ||
125 | |||
126 | struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) | ||
127 | { | ||
128 | struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; | ||
129 | struct mlx5_fc *counter; | ||
130 | int err; | ||
131 | |||
132 | counter = kzalloc(sizeof(*counter), GFP_KERNEL); | ||
133 | if (!counter) | ||
134 | return ERR_PTR(-ENOMEM); | ||
135 | |||
136 | err = mlx5_cmd_fc_alloc(dev, &counter->id); | ||
137 | if (err) | ||
138 | goto err_out; | ||
139 | |||
140 | if (aging) { | ||
141 | counter->aging = true; | ||
142 | |||
143 | spin_lock(&fc_stats->addlist_lock); | ||
144 | list_add(&counter->list, &fc_stats->addlist); | ||
145 | spin_unlock(&fc_stats->addlist_lock); | ||
146 | |||
147 | mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); | ||
148 | } | ||
149 | |||
150 | return counter; | ||
151 | |||
152 | err_out: | ||
153 | kfree(counter); | ||
154 | |||
155 | return ERR_PTR(err); | ||
156 | } | ||
157 | |||
158 | void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) | ||
159 | { | ||
160 | struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; | ||
161 | |||
162 | if (!counter) | ||
163 | return; | ||
164 | |||
165 | if (counter->aging) { | ||
166 | counter->deleted = true; | ||
167 | mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); | ||
168 | return; | ||
169 | } | ||
170 | |||
171 | mlx5_cmd_fc_free(dev, counter->id); | ||
172 | kfree(counter); | ||
173 | } | ||
174 | |||
175 | int mlx5_init_fc_stats(struct mlx5_core_dev *dev) | ||
176 | { | ||
177 | struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; | ||
178 | |||
179 | INIT_LIST_HEAD(&fc_stats->list); | ||
180 | INIT_LIST_HEAD(&fc_stats->addlist); | ||
181 | spin_lock_init(&fc_stats->addlist_lock); | ||
182 | |||
183 | fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); | ||
184 | if (!fc_stats->wq) | ||
185 | return -ENOMEM; | ||
186 | |||
187 | INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); | ||
188 | |||
189 | return 0; | ||
190 | } | ||
191 | |||
192 | void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) | ||
193 | { | ||
194 | struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; | ||
195 | struct mlx5_fc *counter; | ||
196 | struct mlx5_fc *tmp; | ||
197 | |||
198 | cancel_delayed_work_sync(&dev->priv.fc_stats.work); | ||
199 | destroy_workqueue(dev->priv.fc_stats.wq); | ||
200 | dev->priv.fc_stats.wq = NULL; | ||
201 | |||
202 | list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); | ||
203 | |||
204 | list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { | ||
205 | list_del(&counter->list); | ||
206 | |||
207 | mlx5_cmd_fc_free(dev, counter->id); | ||
208 | |||
209 | kfree(counter); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | void mlx5_fc_query_cached(struct mlx5_fc *counter, | ||
214 | u64 *bytes, u64 *packets, u64 *lastuse) | ||
215 | { | ||
216 | struct mlx5_fc_cache c; | ||
217 | |||
218 | c = counter->cache; | ||
219 | |||
220 | *bytes = c.bytes - counter->lastbytes; | ||
221 | *packets = c.packets - counter->lastpackets; | ||
222 | *lastuse = c.lastuse; | ||
223 | |||
224 | counter->lastbytes = c.bytes; | ||
225 | counter->lastpackets = c.packets; | ||
226 | } | ||
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9613143f0561..07b504f7eb84 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/vmalloc.h> | 42 | #include <linux/vmalloc.h> |
43 | #include <linux/radix-tree.h> | 43 | #include <linux/radix-tree.h> |
44 | #include <linux/workqueue.h> | ||
44 | 45 | ||
45 | #include <linux/mlx5/device.h> | 46 | #include <linux/mlx5/device.h> |
46 | #include <linux/mlx5/doorbell.h> | 47 | #include <linux/mlx5/doorbell.h> |
@@ -457,6 +458,17 @@ struct mlx5_irq_info { | |||
457 | char name[MLX5_MAX_IRQ_NAME]; | 458 | char name[MLX5_MAX_IRQ_NAME]; |
458 | }; | 459 | }; |
459 | 460 | ||
461 | struct mlx5_fc_stats { | ||
462 | struct list_head list; | ||
463 | struct list_head addlist; | ||
464 | /* protect addlist add/splice operations */ | ||
465 | spinlock_t addlist_lock; | ||
466 | |||
467 | struct workqueue_struct *wq; | ||
468 | struct delayed_work work; | ||
469 | unsigned long next_query; | ||
470 | }; | ||
471 | |||
460 | struct mlx5_eswitch; | 472 | struct mlx5_eswitch; |
461 | 473 | ||
462 | struct mlx5_priv { | 474 | struct mlx5_priv { |
@@ -520,6 +532,8 @@ struct mlx5_priv { | |||
520 | struct mlx5_flow_root_namespace *fdb_root_ns; | 532 | struct mlx5_flow_root_namespace *fdb_root_ns; |
521 | struct mlx5_flow_root_namespace *esw_egress_root_ns; | 533 | struct mlx5_flow_root_namespace *esw_egress_root_ns; |
522 | struct mlx5_flow_root_namespace *esw_ingress_root_ns; | 534 | struct mlx5_flow_root_namespace *esw_ingress_root_ns; |
535 | |||
536 | struct mlx5_fc_stats fc_stats; | ||
523 | }; | 537 | }; |
524 | 538 | ||
525 | enum mlx5_device_state { | 539 | enum mlx5_device_state { |
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c8b9ede1c20a..4b7a107d9c19 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h | |||
@@ -127,4 +127,9 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, | |||
127 | struct mlx5_flow_destination *dest); | 127 | struct mlx5_flow_destination *dest); |
128 | 128 | ||
129 | struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); | 129 | struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); |
130 | struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); | ||
131 | void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); | ||
132 | void mlx5_fc_query_cached(struct mlx5_fc *counter, | ||
133 | u64 *bytes, u64 *packets, u64 *lastuse); | ||
134 | |||
130 | #endif | 135 | #endif |