aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2017-07-17 12:29:40 -0400
committerDavid S. Miller <davem@davemloft.net>2017-07-17 12:48:06 -0400
commit11393cc9b9be2a1f61559e6fb9c27bc8fa20b1ff (patch)
tree9983db89954e9d34d04d16fa023e5df752efd90d /kernel/bpf/devmap.c
parent97f91a7cf04ff605845c20948b8a80e54cbd3376 (diff)
xdp: Add batching support to redirect map
For performance reasons we want to avoid updating the tail pointer in the driver tx ring as much as possible. To accomplish this we add batching support to the redirect path in XDP. This adds another ndo op "xdp_flush" that is used to inform the driver that it should bump the tail pointer on the TX ring. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c84
1 files changed, 83 insertions, 1 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 36dc13deb2e1..b2ef04a1c86a 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -53,6 +53,7 @@ struct bpf_dtab_netdev {
53struct bpf_dtab { 53struct bpf_dtab {
54 struct bpf_map map; 54 struct bpf_map map;
55 struct bpf_dtab_netdev **netdev_map; 55 struct bpf_dtab_netdev **netdev_map;
56 unsigned long int __percpu *flush_needed;
56}; 57};
57 58
58static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 59static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -87,6 +88,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
87 88
88 /* make sure page count doesn't overflow */ 89 /* make sure page count doesn't overflow */
89 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 90 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
91 cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
90 if (cost >= U32_MAX - PAGE_SIZE) 92 if (cost >= U32_MAX - PAGE_SIZE)
91 goto free_dtab; 93 goto free_dtab;
92 94
@@ -97,6 +99,14 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
97 if (err) 99 if (err)
98 goto free_dtab; 100 goto free_dtab;
99 101
102 /* A per cpu bitfield with a bit per possible net device */
103 dtab->flush_needed = __alloc_percpu(
104 BITS_TO_LONGS(attr->max_entries) *
105 sizeof(unsigned long),
106 __alignof__(unsigned long));
107 if (!dtab->flush_needed)
108 goto free_dtab;
109
100 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 110 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
101 sizeof(struct bpf_dtab_netdev *)); 111 sizeof(struct bpf_dtab_netdev *));
102 if (!dtab->netdev_map) 112 if (!dtab->netdev_map)
@@ -105,6 +115,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
105 return &dtab->map; 115 return &dtab->map;
106 116
107free_dtab: 117free_dtab:
118 free_percpu(dtab->flush_needed);
108 kfree(dtab); 119 kfree(dtab);
109 return ERR_PTR(err); 120 return ERR_PTR(err);
110} 121}
@@ -112,7 +123,7 @@ free_dtab:
112static void dev_map_free(struct bpf_map *map) 123static void dev_map_free(struct bpf_map *map)
113{ 124{
114 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 125 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
115 int i; 126 int i, cpu;
116 127
117 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 128 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
118 * so the programs (can be more than one that used this map) were 129 * so the programs (can be more than one that used this map) were
@@ -123,6 +134,18 @@ static void dev_map_free(struct bpf_map *map)
123 */ 134 */
124 synchronize_rcu(); 135 synchronize_rcu();
125 136
137 /* To ensure all pending flush operations have completed wait for flush
138 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
139 * Because the above synchronize_rcu() ensures the map is disconnected
140 * from the program we can assume no new bits will be set.
141 */
142 for_each_online_cpu(cpu) {
143 unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
144
145 while (!bitmap_empty(bitmap, dtab->map.max_entries))
146 cpu_relax();
147 }
148
126 for (i = 0; i < dtab->map.max_entries; i++) { 149 for (i = 0; i < dtab->map.max_entries; i++) {
127 struct bpf_dtab_netdev *dev; 150 struct bpf_dtab_netdev *dev;
128 151
@@ -137,6 +160,7 @@ static void dev_map_free(struct bpf_map *map)
137 /* At this point bpf program is detached and all pending operations 160 /* At this point bpf program is detached and all pending operations
138 * _must_ be complete 161 * _must_ be complete
139 */ 162 */
163 free_percpu(dtab->flush_needed);
140 bpf_map_area_free(dtab->netdev_map); 164 bpf_map_area_free(dtab->netdev_map);
141 kfree(dtab); 165 kfree(dtab);
142} 166}
@@ -159,6 +183,14 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
159 return 0; 183 return 0;
160} 184}
161 185
186void __dev_map_insert_ctx(struct bpf_map *map, u32 key)
187{
188 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
189 unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
190
191 __set_bit(key, bitmap);
192}
193
162struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) 194struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
163{ 195{
164 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 196 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -171,6 +203,39 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
171 return dev ? dev->dev : NULL; 203 return dev ? dev->dev : NULL;
172} 204}
173 205
206/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
207 * from the driver before returning from its napi->poll() routine. The poll()
208 * routine is called either from busy_poll context or net_rx_action signaled
209 * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
210 * net device can be torn down. On devmap tear down we ensure the ctx bitmap
211 * is zeroed before completing to ensure all flush operations have completed.
212 */
213void __dev_map_flush(struct bpf_map *map)
214{
215 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
216 unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
217 u32 bit;
218
219 for_each_set_bit(bit, bitmap, map->max_entries) {
220 struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
221 struct net_device *netdev;
222
223 /* This is possible if the dev entry is removed by user space
224 * between xdp redirect and flush op.
225 */
226 if (unlikely(!dev))
227 continue;
228
229 netdev = dev->dev;
230
231 __clear_bit(bit, bitmap);
232 if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush))
233 continue;
234
235 netdev->netdev_ops->ndo_xdp_flush(netdev);
236 }
237}
238
174/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or 239/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
175 * update happens in parallel here a dev_put wont happen until after reading the 240 * update happens in parallel here a dev_put wont happen until after reading the
176 * ifindex. 241 * ifindex.
@@ -188,11 +253,28 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
188 return dev ? &dev->dev->ifindex : NULL; 253 return dev ? &dev->dev->ifindex : NULL;
189} 254}
190 255
256static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev)
257{
258 if (old_dev->dev->netdev_ops->ndo_xdp_flush) {
259 struct net_device *fl = old_dev->dev;
260 unsigned long *bitmap;
261 int cpu;
262
263 for_each_online_cpu(cpu) {
264 bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu);
265 __clear_bit(old_dev->key, bitmap);
266
267 fl->netdev_ops->ndo_xdp_flush(old_dev->dev);
268 }
269 }
270}
271
191static void __dev_map_entry_free(struct rcu_head *rcu) 272static void __dev_map_entry_free(struct rcu_head *rcu)
192{ 273{
193 struct bpf_dtab_netdev *old_dev; 274 struct bpf_dtab_netdev *old_dev;
194 275
195 old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 276 old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
277 dev_map_flush_old(old_dev);
196 dev_put(old_dev->dev); 278 dev_put(old_dev->dev);
197 kfree(old_dev); 279 kfree(old_dev);
198} 280}