summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2019-05-29 21:03:58 -0400
committerAlexei Starovoitov <ast@kernel.org>2019-05-31 19:52:56 -0400
commitb936ca643ade11f265fa10e5fb71c20d9c5243f1 (patch)
treefab6cc685363f4f9901fecf6335a8d8e6974f8b3 /kernel/bpf/devmap.c
parent3539b96e041c06e4317082816d90ec09160aeb11 (diff)
bpf: rework memlock-based memory accounting for maps
In order to unify the existing memlock charging code with the memcg-based memory accounting, which will be added later, let's rework the current scheme. Currently the following design is used: 1) .alloc() callback optionally checks if the allocation will likely succeed using bpf_map_precharge_memlock() 2) .alloc() performs actual allocations 3) .alloc() callback calculates map cost and sets map.memory.pages 4) map_create() calls bpf_map_init_memlock() which sets map.memory.user and performs actual charging; in case of failure the map is destroyed <map is in use> 1) bpf_map_free_deferred() calls bpf_map_release_memlock(), which performs uncharge and releases the user 2) .map_free() callback releases the memory The scheme can be simplified and made more robust: 1) .alloc() calculates map cost and calls bpf_map_charge_init() 2) bpf_map_charge_init() sets map.memory.user and performs actual charge 3) .alloc() performs actual allocations <map is in use> 1) .map_free() callback releases the memory 2) bpf_map_charge_finish() performs uncharge and releases the user The new scheme also allows to reuse bpf_map_charge_init()/finish() functions for memcg-based accounting. Because charges are performed before actual allocations and uncharges after freeing the memory, no bogus memory pressure can be created. In cases when the map structure is not available (e.g. it's not created yet, or is already destroyed), on-stack bpf_map_memory structure is used. The charge can be transferred with the bpf_map_charge_move() function. Signed-off-by: Roman Gushchin <guro@fb.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c13
1 files changed, 7 insertions, 6 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f6c57efb1d0d..371bd880ed58 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -111,10 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111 if (cost >= U32_MAX - PAGE_SIZE) 111 if (cost >= U32_MAX - PAGE_SIZE)
112 goto free_dtab; 112 goto free_dtab;
113 113
114 dtab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 114 /* if map size is larger than memlock limit, reject it */
115 115 err = bpf_map_charge_init(&dtab->map.memory,
116 /* if map size is larger than memlock limit, reject it early */ 116 round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
117 err = bpf_map_precharge_memlock(dtab->map.memory.pages);
118 if (err) 117 if (err)
119 goto free_dtab; 118 goto free_dtab;
120 119
@@ -125,19 +124,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
125 __alignof__(unsigned long), 124 __alignof__(unsigned long),
126 GFP_KERNEL | __GFP_NOWARN); 125 GFP_KERNEL | __GFP_NOWARN);
127 if (!dtab->flush_needed) 126 if (!dtab->flush_needed)
128 goto free_dtab; 127 goto free_charge;
129 128
130 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * 129 dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
131 sizeof(struct bpf_dtab_netdev *), 130 sizeof(struct bpf_dtab_netdev *),
132 dtab->map.numa_node); 131 dtab->map.numa_node);
133 if (!dtab->netdev_map) 132 if (!dtab->netdev_map)
134 goto free_dtab; 133 goto free_charge;
135 134
136 spin_lock(&dev_map_lock); 135 spin_lock(&dev_map_lock);
137 list_add_tail_rcu(&dtab->list, &dev_map_list); 136 list_add_tail_rcu(&dtab->list, &dev_map_list);
138 spin_unlock(&dev_map_lock); 137 spin_unlock(&dev_map_lock);
139 138
140 return &dtab->map; 139 return &dtab->map;
140free_charge:
141 bpf_map_charge_finish(&dtab->map.memory);
141free_dtab: 142free_dtab:
142 free_percpu(dtab->flush_needed); 143 free_percpu(dtab->flush_needed);
143 kfree(dtab); 144 kfree(dtab);