aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c84
1 files changed, 83 insertions, 1 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 36dc13deb2e1..b2ef04a1c86a 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -53,6 +53,7 @@ struct bpf_dtab_netdev {
53struct bpf_dtab { 53struct bpf_dtab {
54 struct bpf_map map; 54 struct bpf_map map;
55 struct bpf_dtab_netdev **netdev_map; 55 struct bpf_dtab_netdev **netdev_map;
56 unsigned long int __percpu *flush_needed;
56}; 57};
57 58
58static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 59static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -87,6 +88,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
87 88
88 /* make sure page count doesn't overflow */ 89 /* make sure page count doesn't overflow */
89 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); 90 cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
91 cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
90 if (cost >= U32_MAX - PAGE_SIZE) 92 if (cost >= U32_MAX - PAGE_SIZE)
91 goto free_dtab; 93 goto free_dtab;
92 94
@@ -97,6 +99,14 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
97 if (err) 99 if (err)
98 goto free_dtab; 100 goto free_dtab;
99 101
102 /* A per cpu bitfield with a bit per possible net device */
103 dtab->flush_needed = __alloc_percpu(
104 BITS_TO_LONGS(attr->max_entries) *
105 sizeof(unsigned long),
106 __alignof__(unsigned long));
107 if (!dtab->flush_needed)
108 goto free_dtab;
109
100 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 110 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
101 sizeof(struct bpf_dtab_netdev *)); 111 sizeof(struct bpf_dtab_netdev *));
102 if (!dtab->netdev_map) 112 if (!dtab->netdev_map)
@@ -105,6 +115,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
105 return &dtab->map; 115 return &dtab->map;
106 116
107free_dtab: 117free_dtab:
118 free_percpu(dtab->flush_needed);
108 kfree(dtab); 119 kfree(dtab);
109 return ERR_PTR(err); 120 return ERR_PTR(err);
110} 121}
@@ -112,7 +123,7 @@ free_dtab:
112static void dev_map_free(struct bpf_map *map) 123static void dev_map_free(struct bpf_map *map)
113{ 124{
114 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 125 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
115 int i; 126 int i, cpu;
116 127
117 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 128 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
118 * so the programs (can be more than one that used this map) were 129 * so the programs (can be more than one that used this map) were
@@ -123,6 +134,18 @@ static void dev_map_free(struct bpf_map *map)
123 */ 134 */
124 synchronize_rcu(); 135 synchronize_rcu();
125 136
137 /* To ensure all pending flush operations have completed wait for flush
138 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
139 * Because the above synchronize_rcu() ensures the map is disconnected
140 * from the program we can assume no new bits will be set.
141 */
142 for_each_online_cpu(cpu) {
143 unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
144
145 while (!bitmap_empty(bitmap, dtab->map.max_entries))
146 cpu_relax();
147 }
148
126 for (i = 0; i < dtab->map.max_entries; i++) { 149 for (i = 0; i < dtab->map.max_entries; i++) {
127 struct bpf_dtab_netdev *dev; 150 struct bpf_dtab_netdev *dev;
128 151
@@ -137,6 +160,7 @@ static void dev_map_free(struct bpf_map *map)
137 /* At this point bpf program is detached and all pending operations 160 /* At this point bpf program is detached and all pending operations
138 * _must_ be complete 161 * _must_ be complete
139 */ 162 */
163 free_percpu(dtab->flush_needed);
140 bpf_map_area_free(dtab->netdev_map); 164 bpf_map_area_free(dtab->netdev_map);
141 kfree(dtab); 165 kfree(dtab);
142} 166}
@@ -159,6 +183,14 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
159 return 0; 183 return 0;
160} 184}
161 185
186void __dev_map_insert_ctx(struct bpf_map *map, u32 key)
187{
188 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
189 unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
190
191 __set_bit(key, bitmap);
192}
193
162struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) 194struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
163{ 195{
164 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 196 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -171,6 +203,39 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
171 return dev ? dev->dev : NULL; 203 return dev ? dev->dev : NULL;
172} 204}
173 205
206/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
207 * from the driver before returning from its napi->poll() routine. The poll()
208 * routine is called either from busy_poll context or net_rx_action signaled
209 * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
210 * net device can be torn down. On devmap tear down we ensure the ctx bitmap
211 * is zeroed before completing to ensure all flush operations have completed.
212 */
213void __dev_map_flush(struct bpf_map *map)
214{
215 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
216 unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
217 u32 bit;
218
219 for_each_set_bit(bit, bitmap, map->max_entries) {
220 struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
221 struct net_device *netdev;
222
223 /* This is possible if the dev entry is removed by user space
224 * between xdp redirect and flush op.
225 */
226 if (unlikely(!dev))
227 continue;
228
229 netdev = dev->dev;
230
231 __clear_bit(bit, bitmap);
232 if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush))
233 continue;
234
235 netdev->netdev_ops->ndo_xdp_flush(netdev);
236 }
237}
238
174/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or 239/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
175 * update happens in parallel here a dev_put wont happen until after reading the 240 * update happens in parallel here a dev_put wont happen until after reading the
176 * ifindex. 241 * ifindex.
@@ -188,11 +253,28 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
188 return dev ? &dev->dev->ifindex : NULL; 253 return dev ? &dev->dev->ifindex : NULL;
189} 254}
190 255
256static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev)
257{
258 if (old_dev->dev->netdev_ops->ndo_xdp_flush) {
259 struct net_device *fl = old_dev->dev;
260 unsigned long *bitmap;
261 int cpu;
262
263 for_each_online_cpu(cpu) {
264 bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu);
265 __clear_bit(old_dev->key, bitmap);
266
267 fl->netdev_ops->ndo_xdp_flush(old_dev->dev);
268 }
269 }
270}
271
191static void __dev_map_entry_free(struct rcu_head *rcu) 272static void __dev_map_entry_free(struct rcu_head *rcu)
192{ 273{
193 struct bpf_dtab_netdev *old_dev; 274 struct bpf_dtab_netdev *old_dev;
194 275
195 old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 276 old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
277 dev_map_flush_old(old_dev);
196 dev_put(old_dev->dev); 278 dev_put(old_dev->dev);
197 kfree(old_dev); 279 kfree(old_dev);
198} 280}