diff options
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r-- | kernel/bpf/devmap.c | 84 |
1 files changed, 83 insertions, 1 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 36dc13deb2e1..b2ef04a1c86a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -53,6 +53,7 @@ struct bpf_dtab_netdev { | |||
53 | struct bpf_dtab { | 53 | struct bpf_dtab { |
54 | struct bpf_map map; | 54 | struct bpf_map map; |
55 | struct bpf_dtab_netdev **netdev_map; | 55 | struct bpf_dtab_netdev **netdev_map; |
56 | unsigned long int __percpu *flush_needed; | ||
56 | }; | 57 | }; |
57 | 58 | ||
58 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 59 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
@@ -87,6 +88,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
87 | 88 | ||
88 | /* make sure page count doesn't overflow */ | 89 | /* make sure page count doesn't overflow */ |
89 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); | 90 | cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); |
91 | cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); | ||
90 | if (cost >= U32_MAX - PAGE_SIZE) | 92 | if (cost >= U32_MAX - PAGE_SIZE) |
91 | goto free_dtab; | 93 | goto free_dtab; |
92 | 94 | ||
@@ -97,6 +99,14 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
97 | if (err) | 99 | if (err) |
98 | goto free_dtab; | 100 | goto free_dtab; |
99 | 101 | ||
102 | /* A per cpu bitfield with a bit per possible net device */ | ||
103 | dtab->flush_needed = __alloc_percpu( | ||
104 | BITS_TO_LONGS(attr->max_entries) * | ||
105 | sizeof(unsigned long), | ||
106 | __alignof__(unsigned long)); | ||
107 | if (!dtab->flush_needed) | ||
108 | goto free_dtab; | ||
109 | |||
100 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * | 110 | dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * |
101 | sizeof(struct bpf_dtab_netdev *)); | 111 | sizeof(struct bpf_dtab_netdev *)); |
102 | if (!dtab->netdev_map) | 112 | if (!dtab->netdev_map) |
@@ -105,6 +115,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
105 | return &dtab->map; | 115 | return &dtab->map; |
106 | 116 | ||
107 | free_dtab: | 117 | free_dtab: |
118 | free_percpu(dtab->flush_needed); | ||
108 | kfree(dtab); | 119 | kfree(dtab); |
109 | return ERR_PTR(err); | 120 | return ERR_PTR(err); |
110 | } | 121 | } |
@@ -112,7 +123,7 @@ free_dtab: | |||
112 | static void dev_map_free(struct bpf_map *map) | 123 | static void dev_map_free(struct bpf_map *map) |
113 | { | 124 | { |
114 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 125 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
115 | int i; | 126 | int i, cpu; |
116 | 127 | ||
117 | /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | 128 | /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, |
118 | * so the programs (can be more than one that used this map) were | 129 | * so the programs (can be more than one that used this map) were |
@@ -123,6 +134,18 @@ static void dev_map_free(struct bpf_map *map) | |||
123 | */ | 134 | */ |
124 | synchronize_rcu(); | 135 | synchronize_rcu(); |
125 | 136 | ||
137 | /* To ensure all pending flush operations have completed wait for flush | ||
138 | * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. | ||
139 | * Because the above synchronize_rcu() ensures the map is disconnected | ||
140 | * from the program we can assume no new bits will be set. | ||
141 | */ | ||
142 | for_each_online_cpu(cpu) { | ||
143 | unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu); | ||
144 | |||
145 | while (!bitmap_empty(bitmap, dtab->map.max_entries)) | ||
146 | cpu_relax(); | ||
147 | } | ||
148 | |||
126 | for (i = 0; i < dtab->map.max_entries; i++) { | 149 | for (i = 0; i < dtab->map.max_entries; i++) { |
127 | struct bpf_dtab_netdev *dev; | 150 | struct bpf_dtab_netdev *dev; |
128 | 151 | ||
@@ -137,6 +160,7 @@ static void dev_map_free(struct bpf_map *map) | |||
137 | /* At this point bpf program is detached and all pending operations | 160 | /* At this point bpf program is detached and all pending operations |
138 | * _must_ be complete | 161 | * _must_ be complete |
139 | */ | 162 | */ |
163 | free_percpu(dtab->flush_needed); | ||
140 | bpf_map_area_free(dtab->netdev_map); | 164 | bpf_map_area_free(dtab->netdev_map); |
141 | kfree(dtab); | 165 | kfree(dtab); |
142 | } | 166 | } |
@@ -159,6 +183,14 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |||
159 | return 0; | 183 | return 0; |
160 | } | 184 | } |
161 | 185 | ||
186 | void __dev_map_insert_ctx(struct bpf_map *map, u32 key) | ||
187 | { | ||
188 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | ||
189 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); | ||
190 | |||
191 | __set_bit(key, bitmap); | ||
192 | } | ||
193 | |||
162 | struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) | 194 | struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) |
163 | { | 195 | { |
164 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | 196 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); |
@@ -171,6 +203,39 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) | |||
171 | return dev ? dev->dev : NULL; | 203 | return dev ? dev->dev : NULL; |
172 | } | 204 | } |
173 | 205 | ||
206 | /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled | ||
207 | * from the driver before returning from its napi->poll() routine. The poll() | ||
208 | * routine is called either from busy_poll context or net_rx_action signaled | ||
209 | * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the | ||
210 | * net device can be torn down. On devmap tear down we ensure the ctx bitmap | ||
211 | * is zeroed before completing to ensure all flush operations have completed. | ||
212 | */ | ||
213 | void __dev_map_flush(struct bpf_map *map) | ||
214 | { | ||
215 | struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); | ||
216 | unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); | ||
217 | u32 bit; | ||
218 | |||
219 | for_each_set_bit(bit, bitmap, map->max_entries) { | ||
220 | struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); | ||
221 | struct net_device *netdev; | ||
222 | |||
223 | /* This is possible if the dev entry is removed by user space | ||
224 | * between xdp redirect and flush op. | ||
225 | */ | ||
226 | if (unlikely(!dev)) | ||
227 | continue; | ||
228 | |||
229 | netdev = dev->dev; | ||
230 | |||
231 | __clear_bit(bit, bitmap); | ||
232 | if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush)) | ||
233 | continue; | ||
234 | |||
235 | netdev->netdev_ops->ndo_xdp_flush(netdev); | ||
236 | } | ||
237 | } | ||
238 | |||
174 | /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or | 239 | /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or |
175 | * update happens in parallel here a dev_put wont happen until after reading the | 240 | * update happens in parallel here a dev_put wont happen until after reading the |
176 | * ifindex. | 241 | * ifindex. |
@@ -188,11 +253,28 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) | |||
188 | return dev ? &dev->dev->ifindex : NULL; | 253 | return dev ? &dev->dev->ifindex : NULL; |
189 | } | 254 | } |
190 | 255 | ||
256 | static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev) | ||
257 | { | ||
258 | if (old_dev->dev->netdev_ops->ndo_xdp_flush) { | ||
259 | struct net_device *fl = old_dev->dev; | ||
260 | unsigned long *bitmap; | ||
261 | int cpu; | ||
262 | |||
263 | for_each_online_cpu(cpu) { | ||
264 | bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu); | ||
265 | __clear_bit(old_dev->key, bitmap); | ||
266 | |||
267 | fl->netdev_ops->ndo_xdp_flush(old_dev->dev); | ||
268 | } | ||
269 | } | ||
270 | } | ||
271 | |||
191 | static void __dev_map_entry_free(struct rcu_head *rcu) | 272 | static void __dev_map_entry_free(struct rcu_head *rcu) |
192 | { | 273 | { |
193 | struct bpf_dtab_netdev *old_dev; | 274 | struct bpf_dtab_netdev *old_dev; |
194 | 275 | ||
195 | old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); | 276 | old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); |
277 | dev_map_flush_old(old_dev); | ||
196 | dev_put(old_dev->dev); | 278 | dev_put(old_dev->dev); |
197 | kfree(old_dev); | 279 | kfree(old_dev); |
198 | } | 280 | } |