aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lib/bpf/btf.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-02-06 19:56:20 -0500
committerDavid S. Miller <davem@davemloft.net>2019-02-06 19:56:20 -0500
commite90b1fd83c94d536375d8b9f4916afd15f4db0ed (patch)
treeba50688cc9a6712575aa861ff37b1db53dc472b8 /tools/lib/bpf/btf.c
parent907bea9cb8e9b7c4cb6a8042c164f3c24f141006 (diff)
parentdd9cef43c222df7c0d76d34451808e789952379d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2019-02-07 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add a riscv64 JIT for BPF, from Björn. 2) Implement BTF deduplication algorithm for libbpf which takes BTF type information containing duplicate per-compilation unit information and reduces it to an equivalent set of BTF types with no duplication and without loss of information, from Andrii. 3) Offloaded and native BPF XDP programs can coexist today, enable also offloaded and generic ones as well, from Jakub. 4) Expose various BTF related helper functions in libbpf as API which are in particular helpful for JITed programs, from Yonghong. 5) Fix the recently added JMP32 code emission in s390x JIT, from Heiko. 6) Fix BPF kselftests' tcp_{server,client}.py to be able to run inside a network namespace, also add a fix for libbpf to get libbpf_print() working, from Stanislav. 7) Fixes for bpftool documentation, from Prashant. 8) Type cleanup in BPF kselftests' test_maps.c to silence a gcc8 warning, from Breno. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools/lib/bpf/btf.c')
-rw-r--r--tools/lib/bpf/btf.c2032
1 files changed, 1924 insertions, 108 deletions
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d682d3b8f7b9..ab6528c935a1 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/* Copyright (c) 2018 Facebook */ 2/* Copyright (c) 2018 Facebook */
3 3
4#include <stdio.h>
4#include <stdlib.h> 5#include <stdlib.h>
5#include <string.h> 6#include <string.h>
6#include <unistd.h> 7#include <unistd.h>
@@ -9,8 +10,9 @@
9#include <linux/btf.h> 10#include <linux/btf.h>
10#include "btf.h" 11#include "btf.h"
11#include "bpf.h" 12#include "bpf.h"
13#include "libbpf.h"
14#include "libbpf_util.h"
12 15
13#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
14#define max(a, b) ((a) > (b) ? (a) : (b)) 16#define max(a, b) ((a) > (b) ? (a) : (b))
15#define min(a, b) ((a) < (b) ? (a) : (b)) 17#define min(a, b) ((a) < (b) ? (a) : (b))
16 18
@@ -107,54 +109,54 @@ static int btf_add_type(struct btf *btf, struct btf_type *t)
107 return 0; 109 return 0;
108} 110}
109 111
110static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) 112static int btf_parse_hdr(struct btf *btf)
111{ 113{
112 const struct btf_header *hdr = btf->hdr; 114 const struct btf_header *hdr = btf->hdr;
113 __u32 meta_left; 115 __u32 meta_left;
114 116
115 if (btf->data_size < sizeof(struct btf_header)) { 117 if (btf->data_size < sizeof(struct btf_header)) {
116 elog("BTF header not found\n"); 118 pr_debug("BTF header not found\n");
117 return -EINVAL; 119 return -EINVAL;
118 } 120 }
119 121
120 if (hdr->magic != BTF_MAGIC) { 122 if (hdr->magic != BTF_MAGIC) {
121 elog("Invalid BTF magic:%x\n", hdr->magic); 123 pr_debug("Invalid BTF magic:%x\n", hdr->magic);
122 return -EINVAL; 124 return -EINVAL;
123 } 125 }
124 126
125 if (hdr->version != BTF_VERSION) { 127 if (hdr->version != BTF_VERSION) {
126 elog("Unsupported BTF version:%u\n", hdr->version); 128 pr_debug("Unsupported BTF version:%u\n", hdr->version);
127 return -ENOTSUP; 129 return -ENOTSUP;
128 } 130 }
129 131
130 if (hdr->flags) { 132 if (hdr->flags) {
131 elog("Unsupported BTF flags:%x\n", hdr->flags); 133 pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
132 return -ENOTSUP; 134 return -ENOTSUP;
133 } 135 }
134 136
135 meta_left = btf->data_size - sizeof(*hdr); 137 meta_left = btf->data_size - sizeof(*hdr);
136 if (!meta_left) { 138 if (!meta_left) {
137 elog("BTF has no data\n"); 139 pr_debug("BTF has no data\n");
138 return -EINVAL; 140 return -EINVAL;
139 } 141 }
140 142
141 if (meta_left < hdr->type_off) { 143 if (meta_left < hdr->type_off) {
142 elog("Invalid BTF type section offset:%u\n", hdr->type_off); 144 pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
143 return -EINVAL; 145 return -EINVAL;
144 } 146 }
145 147
146 if (meta_left < hdr->str_off) { 148 if (meta_left < hdr->str_off) {
147 elog("Invalid BTF string section offset:%u\n", hdr->str_off); 149 pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
148 return -EINVAL; 150 return -EINVAL;
149 } 151 }
150 152
151 if (hdr->type_off >= hdr->str_off) { 153 if (hdr->type_off >= hdr->str_off) {
152 elog("BTF type section offset >= string section offset. No type?\n"); 154 pr_debug("BTF type section offset >= string section offset. No type?\n");
153 return -EINVAL; 155 return -EINVAL;
154 } 156 }
155 157
156 if (hdr->type_off & 0x02) { 158 if (hdr->type_off & 0x02) {
157 elog("BTF type section is not aligned to 4 bytes\n"); 159 pr_debug("BTF type section is not aligned to 4 bytes\n");
158 return -EINVAL; 160 return -EINVAL;
159 } 161 }
160 162
@@ -163,7 +165,7 @@ static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
163 return 0; 165 return 0;
164} 166}
165 167
166static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) 168static int btf_parse_str_sec(struct btf *btf)
167{ 169{
168 const struct btf_header *hdr = btf->hdr; 170 const struct btf_header *hdr = btf->hdr;
169 const char *start = btf->nohdr_data + hdr->str_off; 171 const char *start = btf->nohdr_data + hdr->str_off;
@@ -171,7 +173,7 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
171 173
172 if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || 174 if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
173 start[0] || end[-1]) { 175 start[0] || end[-1]) {
174 elog("Invalid BTF string section\n"); 176 pr_debug("Invalid BTF string section\n");
175 return -EINVAL; 177 return -EINVAL;
176 } 178 }
177 179
@@ -180,7 +182,38 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
180 return 0; 182 return 0;
181} 183}
182 184
183static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) 185static int btf_type_size(struct btf_type *t)
186{
187 int base_size = sizeof(struct btf_type);
188 __u16 vlen = BTF_INFO_VLEN(t->info);
189
190 switch (BTF_INFO_KIND(t->info)) {
191 case BTF_KIND_FWD:
192 case BTF_KIND_CONST:
193 case BTF_KIND_VOLATILE:
194 case BTF_KIND_RESTRICT:
195 case BTF_KIND_PTR:
196 case BTF_KIND_TYPEDEF:
197 case BTF_KIND_FUNC:
198 return base_size;
199 case BTF_KIND_INT:
200 return base_size + sizeof(__u32);
201 case BTF_KIND_ENUM:
202 return base_size + vlen * sizeof(struct btf_enum);
203 case BTF_KIND_ARRAY:
204 return base_size + sizeof(struct btf_array);
205 case BTF_KIND_STRUCT:
206 case BTF_KIND_UNION:
207 return base_size + vlen * sizeof(struct btf_member);
208 case BTF_KIND_FUNC_PROTO:
209 return base_size + vlen * sizeof(struct btf_param);
210 default:
211 pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info));
212 return -EINVAL;
213 }
214}
215
216static int btf_parse_type_sec(struct btf *btf)
184{ 217{
185 struct btf_header *hdr = btf->hdr; 218 struct btf_header *hdr = btf->hdr;
186 void *nohdr_data = btf->nohdr_data; 219 void *nohdr_data = btf->nohdr_data;
@@ -189,41 +222,13 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
189 222
190 while (next_type < end_type) { 223 while (next_type < end_type) {
191 struct btf_type *t = next_type; 224 struct btf_type *t = next_type;
192 __u16 vlen = BTF_INFO_VLEN(t->info); 225 int type_size;
193 int err; 226 int err;
194 227
195 next_type += sizeof(*t); 228 type_size = btf_type_size(t);
196 switch (BTF_INFO_KIND(t->info)) { 229 if (type_size < 0)
197 case BTF_KIND_INT: 230 return type_size;
198 next_type += sizeof(int); 231 next_type += type_size;
199 break;
200 case BTF_KIND_ARRAY:
201 next_type += sizeof(struct btf_array);
202 break;
203 case BTF_KIND_STRUCT:
204 case BTF_KIND_UNION:
205 next_type += vlen * sizeof(struct btf_member);
206 break;
207 case BTF_KIND_ENUM:
208 next_type += vlen * sizeof(struct btf_enum);
209 break;
210 case BTF_KIND_FUNC_PROTO:
211 next_type += vlen * sizeof(struct btf_param);
212 break;
213 case BTF_KIND_FUNC:
214 case BTF_KIND_TYPEDEF:
215 case BTF_KIND_PTR:
216 case BTF_KIND_FWD:
217 case BTF_KIND_VOLATILE:
218 case BTF_KIND_CONST:
219 case BTF_KIND_RESTRICT:
220 break;
221 default:
222 elog("Unsupported BTF_KIND:%u\n",
223 BTF_INFO_KIND(t->info));
224 return -EINVAL;
225 }
226
227 err = btf_add_type(btf, t); 232 err = btf_add_type(btf, t);
228 if (err) 233 if (err)
229 return err; 234 return err;
@@ -232,6 +237,11 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
232 return 0; 237 return 0;
233} 238}
234 239
240__u32 btf__get_nr_types(const struct btf *btf)
241{
242 return btf->nr_types;
243}
244
235const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) 245const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
236{ 246{
237 if (type_id > btf->nr_types) 247 if (type_id > btf->nr_types)
@@ -250,21 +260,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t)
250 return !t || btf_type_is_void(t); 260 return !t || btf_type_is_void(t);
251} 261}
252 262
253static __s64 btf_type_size(const struct btf_type *t)
254{
255 switch (BTF_INFO_KIND(t->info)) {
256 case BTF_KIND_INT:
257 case BTF_KIND_STRUCT:
258 case BTF_KIND_UNION:
259 case BTF_KIND_ENUM:
260 return t->size;
261 case BTF_KIND_PTR:
262 return sizeof(void *);
263 default:
264 return -EINVAL;
265 }
266}
267
268#define MAX_RESOLVE_DEPTH 32 263#define MAX_RESOLVE_DEPTH 32
269 264
270__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) 265__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
@@ -278,11 +273,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
278 t = btf__type_by_id(btf, type_id); 273 t = btf__type_by_id(btf, type_id);
279 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); 274 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
280 i++) { 275 i++) {
281 size = btf_type_size(t);
282 if (size >= 0)
283 break;
284
285 switch (BTF_INFO_KIND(t->info)) { 276 switch (BTF_INFO_KIND(t->info)) {
277 case BTF_KIND_INT:
278 case BTF_KIND_STRUCT:
279 case BTF_KIND_UNION:
280 case BTF_KIND_ENUM:
281 size = t->size;
282 goto done;
283 case BTF_KIND_PTR:
284 size = sizeof(void *);
285 goto done;
286 case BTF_KIND_TYPEDEF: 286 case BTF_KIND_TYPEDEF:
287 case BTF_KIND_VOLATILE: 287 case BTF_KIND_VOLATILE:
288 case BTF_KIND_CONST: 288 case BTF_KIND_CONST:
@@ -306,6 +306,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
306 if (size < 0) 306 if (size < 0)
307 return -EINVAL; 307 return -EINVAL;
308 308
309done:
309 if (nelems && size > UINT32_MAX / nelems) 310 if (nelems && size > UINT32_MAX / nelems)
310 return -E2BIG; 311 return -E2BIG;
311 312
@@ -363,7 +364,7 @@ void btf__free(struct btf *btf)
363 free(btf); 364 free(btf);
364} 365}
365 366
366struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) 367struct btf *btf__new(__u8 *data, __u32 size)
367{ 368{
368 __u32 log_buf_size = 0; 369 __u32 log_buf_size = 0;
369 char *log_buf = NULL; 370 char *log_buf = NULL;
@@ -376,16 +377,15 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
376 377
377 btf->fd = -1; 378 btf->fd = -1;
378 379
379 if (err_log) { 380 log_buf = malloc(BPF_LOG_BUF_SIZE);
380 log_buf = malloc(BPF_LOG_BUF_SIZE); 381 if (!log_buf) {
381 if (!log_buf) { 382 err = -ENOMEM;
382 err = -ENOMEM; 383 goto done;
383 goto done;
384 }
385 *log_buf = 0;
386 log_buf_size = BPF_LOG_BUF_SIZE;
387 } 384 }
388 385
386 *log_buf = 0;
387 log_buf_size = BPF_LOG_BUF_SIZE;
388
389 btf->data = malloc(size); 389 btf->data = malloc(size);
390 if (!btf->data) { 390 if (!btf->data) {
391 err = -ENOMEM; 391 err = -ENOMEM;
@@ -400,21 +400,21 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
400 400
401 if (btf->fd == -1) { 401 if (btf->fd == -1) {
402 err = -errno; 402 err = -errno;
403 elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); 403 pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
404 if (log_buf && *log_buf) 404 if (log_buf && *log_buf)
405 elog("%s\n", log_buf); 405 pr_warning("%s\n", log_buf);
406 goto done; 406 goto done;
407 } 407 }
408 408
409 err = btf_parse_hdr(btf, err_log); 409 err = btf_parse_hdr(btf);
410 if (err) 410 if (err)
411 goto done; 411 goto done;
412 412
413 err = btf_parse_str_sec(btf, err_log); 413 err = btf_parse_str_sec(btf);
414 if (err) 414 if (err)
415 goto done; 415 goto done;
416 416
417 err = btf_parse_type_sec(btf, err_log); 417 err = btf_parse_type_sec(btf);
418 418
419done: 419done:
420 free(log_buf); 420 free(log_buf);
@@ -432,6 +432,13 @@ int btf__fd(const struct btf *btf)
432 return btf->fd; 432 return btf->fd;
433} 433}
434 434
435void btf__get_strings(const struct btf *btf, const char **strings,
436 __u32 *str_len)
437{
438 *strings = btf->strings;
439 *str_len = btf->hdr->str_len;
440}
441
435const char *btf__name_by_offset(const struct btf *btf, __u32 offset) 442const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
436{ 443{
437 if (offset < btf->hdr->str_len) 444 if (offset < btf->hdr->str_len)
@@ -491,7 +498,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
491 goto exit_free; 498 goto exit_free;
492 } 499 }
493 500
494 *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); 501 *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
495 if (IS_ERR(*btf)) { 502 if (IS_ERR(*btf)) {
496 err = PTR_ERR(*btf); 503 err = PTR_ERR(*btf);
497 *btf = NULL; 504 *btf = NULL;
@@ -504,6 +511,78 @@ exit_free:
504 return err; 511 return err;
505} 512}
506 513
514int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
515 __u32 expected_key_size, __u32 expected_value_size,
516 __u32 *key_type_id, __u32 *value_type_id)
517{
518 const struct btf_type *container_type;
519 const struct btf_member *key, *value;
520 const size_t max_name = 256;
521 char container_name[max_name];
522 __s64 key_size, value_size;
523 __s32 container_id;
524
525 if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
526 max_name) {
527 pr_warning("map:%s length of '____btf_map_%s' is too long\n",
528 map_name, map_name);
529 return -EINVAL;
530 }
531
532 container_id = btf__find_by_name(btf, container_name);
533 if (container_id < 0) {
534 pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
535 map_name, container_name);
536 return container_id;
537 }
538
539 container_type = btf__type_by_id(btf, container_id);
540 if (!container_type) {
541 pr_warning("map:%s cannot find BTF type for container_id:%u\n",
542 map_name, container_id);
543 return -EINVAL;
544 }
545
546 if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
547 BTF_INFO_VLEN(container_type->info) < 2) {
548 pr_warning("map:%s container_name:%s is an invalid container struct\n",
549 map_name, container_name);
550 return -EINVAL;
551 }
552
553 key = (struct btf_member *)(container_type + 1);
554 value = key + 1;
555
556 key_size = btf__resolve_size(btf, key->type);
557 if (key_size < 0) {
558 pr_warning("map:%s invalid BTF key_type_size\n", map_name);
559 return key_size;
560 }
561
562 if (expected_key_size != key_size) {
563 pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
564 map_name, (__u32)key_size, expected_key_size);
565 return -EINVAL;
566 }
567
568 value_size = btf__resolve_size(btf, value->type);
569 if (value_size < 0) {
570 pr_warning("map:%s invalid BTF value_type_size\n", map_name);
571 return value_size;
572 }
573
574 if (expected_value_size != value_size) {
575 pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
576 map_name, (__u32)value_size, expected_value_size);
577 return -EINVAL;
578 }
579
580 *key_type_id = key->type;
581 *value_type_id = value->type;
582
583 return 0;
584}
585
507struct btf_ext_sec_copy_param { 586struct btf_ext_sec_copy_param {
508 __u32 off; 587 __u32 off;
509 __u32 len; 588 __u32 len;
@@ -514,8 +593,7 @@ struct btf_ext_sec_copy_param {
514 593
515static int btf_ext_copy_info(struct btf_ext *btf_ext, 594static int btf_ext_copy_info(struct btf_ext *btf_ext,
516 __u8 *data, __u32 data_size, 595 __u8 *data, __u32 data_size,
517 struct btf_ext_sec_copy_param *ext_sec, 596 struct btf_ext_sec_copy_param *ext_sec)
518 btf_print_fn_t err_log)
519{ 597{
520 const struct btf_ext_header *hdr = (struct btf_ext_header *)data; 598 const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
521 const struct btf_ext_info_sec *sinfo; 599 const struct btf_ext_info_sec *sinfo;
@@ -529,14 +607,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
529 data_size -= hdr->hdr_len; 607 data_size -= hdr->hdr_len;
530 608
531 if (ext_sec->off & 0x03) { 609 if (ext_sec->off & 0x03) {
532 elog(".BTF.ext %s section is not aligned to 4 bytes\n", 610 pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
533 ext_sec->desc); 611 ext_sec->desc);
534 return -EINVAL; 612 return -EINVAL;
535 } 613 }
536 614
537 if (data_size < ext_sec->off || 615 if (data_size < ext_sec->off ||
538 ext_sec->len > data_size - ext_sec->off) { 616 ext_sec->len > data_size - ext_sec->off) {
539 elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", 617 pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
540 ext_sec->desc, ext_sec->off, ext_sec->len); 618 ext_sec->desc, ext_sec->off, ext_sec->len);
541 return -EINVAL; 619 return -EINVAL;
542 } 620 }
@@ -546,7 +624,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
546 624
547 /* At least a record size */ 625 /* At least a record size */
548 if (info_left < sizeof(__u32)) { 626 if (info_left < sizeof(__u32)) {
549 elog(".BTF.ext %s record size not found\n", ext_sec->desc); 627 pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
550 return -EINVAL; 628 return -EINVAL;
551 } 629 }
552 630
@@ -554,7 +632,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
554 record_size = *(__u32 *)info; 632 record_size = *(__u32 *)info;
555 if (record_size < ext_sec->min_rec_size || 633 if (record_size < ext_sec->min_rec_size ||
556 record_size & 0x03) { 634 record_size & 0x03) {
557 elog("%s section in .BTF.ext has invalid record size %u\n", 635 pr_debug("%s section in .BTF.ext has invalid record size %u\n",
558 ext_sec->desc, record_size); 636 ext_sec->desc, record_size);
559 return -EINVAL; 637 return -EINVAL;
560 } 638 }
@@ -564,7 +642,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
564 642
565 /* If no records, return failure now so .BTF.ext won't be used. */ 643 /* If no records, return failure now so .BTF.ext won't be used. */
566 if (!info_left) { 644 if (!info_left) {
567 elog("%s section in .BTF.ext has no records", ext_sec->desc); 645 pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
568 return -EINVAL; 646 return -EINVAL;
569 } 647 }
570 648
@@ -574,14 +652,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
574 __u32 num_records; 652 __u32 num_records;
575 653
576 if (info_left < sec_hdrlen) { 654 if (info_left < sec_hdrlen) {
577 elog("%s section header is not found in .BTF.ext\n", 655 pr_debug("%s section header is not found in .BTF.ext\n",
578 ext_sec->desc); 656 ext_sec->desc);
579 return -EINVAL; 657 return -EINVAL;
580 } 658 }
581 659
582 num_records = sinfo->num_info; 660 num_records = sinfo->num_info;
583 if (num_records == 0) { 661 if (num_records == 0) {
584 elog("%s section has incorrect num_records in .BTF.ext\n", 662 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
585 ext_sec->desc); 663 ext_sec->desc);
586 return -EINVAL; 664 return -EINVAL;
587 } 665 }
@@ -589,7 +667,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
589 total_record_size = sec_hdrlen + 667 total_record_size = sec_hdrlen +
590 (__u64)num_records * record_size; 668 (__u64)num_records * record_size;
591 if (info_left < total_record_size) { 669 if (info_left < total_record_size) {
592 elog("%s section has incorrect num_records in .BTF.ext\n", 670 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
593 ext_sec->desc); 671 ext_sec->desc);
594 return -EINVAL; 672 return -EINVAL;
595 } 673 }
@@ -610,8 +688,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
610} 688}
611 689
612static int btf_ext_copy_func_info(struct btf_ext *btf_ext, 690static int btf_ext_copy_func_info(struct btf_ext *btf_ext,
613 __u8 *data, __u32 data_size, 691 __u8 *data, __u32 data_size)
614 btf_print_fn_t err_log)
615{ 692{
616 const struct btf_ext_header *hdr = (struct btf_ext_header *)data; 693 const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
617 struct btf_ext_sec_copy_param param = { 694 struct btf_ext_sec_copy_param param = {
@@ -622,12 +699,11 @@ static int btf_ext_copy_func_info(struct btf_ext *btf_ext,
622 .desc = "func_info" 699 .desc = "func_info"
623 }; 700 };
624 701
625 return btf_ext_copy_info(btf_ext, data, data_size, &param, err_log); 702 return btf_ext_copy_info(btf_ext, data, data_size, &param);
626} 703}
627 704
628static int btf_ext_copy_line_info(struct btf_ext *btf_ext, 705static int btf_ext_copy_line_info(struct btf_ext *btf_ext,
629 __u8 *data, __u32 data_size, 706 __u8 *data, __u32 data_size)
630 btf_print_fn_t err_log)
631{ 707{
632 const struct btf_ext_header *hdr = (struct btf_ext_header *)data; 708 const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
633 struct btf_ext_sec_copy_param param = { 709 struct btf_ext_sec_copy_param param = {
@@ -638,37 +714,36 @@ static int btf_ext_copy_line_info(struct btf_ext *btf_ext,
638 .desc = "line_info", 714 .desc = "line_info",
639 }; 715 };
640 716
641 return btf_ext_copy_info(btf_ext, data, data_size, &param, err_log); 717 return btf_ext_copy_info(btf_ext, data, data_size, &param);
642} 718}
643 719
644static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, 720static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
645 btf_print_fn_t err_log)
646{ 721{
647 const struct btf_ext_header *hdr = (struct btf_ext_header *)data; 722 const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
648 723
649 if (data_size < offsetof(struct btf_ext_header, func_info_off) || 724 if (data_size < offsetof(struct btf_ext_header, func_info_off) ||
650 data_size < hdr->hdr_len) { 725 data_size < hdr->hdr_len) {
651 elog("BTF.ext header not found"); 726 pr_debug("BTF.ext header not found");
652 return -EINVAL; 727 return -EINVAL;
653 } 728 }
654 729
655 if (hdr->magic != BTF_MAGIC) { 730 if (hdr->magic != BTF_MAGIC) {
656 elog("Invalid BTF.ext magic:%x\n", hdr->magic); 731 pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
657 return -EINVAL; 732 return -EINVAL;
658 } 733 }
659 734
660 if (hdr->version != BTF_VERSION) { 735 if (hdr->version != BTF_VERSION) {
661 elog("Unsupported BTF.ext version:%u\n", hdr->version); 736 pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
662 return -ENOTSUP; 737 return -ENOTSUP;
663 } 738 }
664 739
665 if (hdr->flags) { 740 if (hdr->flags) {
666 elog("Unsupported BTF.ext flags:%x\n", hdr->flags); 741 pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
667 return -ENOTSUP; 742 return -ENOTSUP;
668 } 743 }
669 744
670 if (data_size == hdr->hdr_len) { 745 if (data_size == hdr->hdr_len) {
671 elog("BTF.ext has no data\n"); 746 pr_debug("BTF.ext has no data\n");
672 return -EINVAL; 747 return -EINVAL;
673 } 748 }
674 749
@@ -685,12 +760,12 @@ void btf_ext__free(struct btf_ext *btf_ext)
685 free(btf_ext); 760 free(btf_ext);
686} 761}
687 762
688struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) 763struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
689{ 764{
690 struct btf_ext *btf_ext; 765 struct btf_ext *btf_ext;
691 int err; 766 int err;
692 767
693 err = btf_ext_parse_hdr(data, size, err_log); 768 err = btf_ext_parse_hdr(data, size);
694 if (err) 769 if (err)
695 return ERR_PTR(err); 770 return ERR_PTR(err);
696 771
@@ -698,13 +773,13 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
698 if (!btf_ext) 773 if (!btf_ext)
699 return ERR_PTR(-ENOMEM); 774 return ERR_PTR(-ENOMEM);
700 775
701 err = btf_ext_copy_func_info(btf_ext, data, size, err_log); 776 err = btf_ext_copy_func_info(btf_ext, data, size);
702 if (err) { 777 if (err) {
703 btf_ext__free(btf_ext); 778 btf_ext__free(btf_ext);
704 return ERR_PTR(err); 779 return ERR_PTR(err);
705 } 780 }
706 781
707 err = btf_ext_copy_line_info(btf_ext, data, size, err_log); 782 err = btf_ext_copy_line_info(btf_ext, data, size);
708 if (err) { 783 if (err) {
709 btf_ext__free(btf_ext); 784 btf_ext__free(btf_ext);
710 return ERR_PTR(err); 785 return ERR_PTR(err);
@@ -786,3 +861,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
786{ 861{
787 return btf_ext->line_info.rec_size; 862 return btf_ext->line_info.rec_size;
788} 863}
864
865struct btf_dedup;
866
867static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
868 const struct btf_dedup_opts *opts);
869static void btf_dedup_free(struct btf_dedup *d);
870static int btf_dedup_strings(struct btf_dedup *d);
871static int btf_dedup_prim_types(struct btf_dedup *d);
872static int btf_dedup_struct_types(struct btf_dedup *d);
873static int btf_dedup_ref_types(struct btf_dedup *d);
874static int btf_dedup_compact_types(struct btf_dedup *d);
875static int btf_dedup_remap_types(struct btf_dedup *d);
876
877/*
878 * Deduplicate BTF types and strings.
879 *
880 * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
881 * section with all BTF type descriptors and string data. It overwrites that
882 * memory in-place with deduplicated types and strings without any loss of
883 * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
884 * is provided, all the strings referenced from .BTF.ext section are honored
885 * and updated to point to the right offsets after deduplication.
886 *
887 * If function returns with error, type/string data might be garbled and should
888 * be discarded.
889 *
890 * More verbose and detailed description of both problem btf_dedup is solving,
891 * as well as solution could be found at:
892 * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
893 *
894 * Problem description and justification
895 * =====================================
896 *
897 * BTF type information is typically emitted either as a result of conversion
898 * from DWARF to BTF or directly by compiler. In both cases, each compilation
899 * unit contains information about a subset of all the types that are used
900 * in an application. These subsets are frequently overlapping and contain a lot
901 * of duplicated information when later concatenated together into a single
902 * binary. This algorithm ensures that each unique type is represented by single
903 * BTF type descriptor, greatly reducing resulting size of BTF data.
904 *
905 * Compilation unit isolation and subsequent duplication of data is not the only
906 * problem. The same type hierarchy (e.g., struct and all the type that struct
907 * references) in different compilation units can be represented in BTF to
908 * various degrees of completeness (or, rather, incompleteness) due to
909 * struct/union forward declarations.
910 *
911 * Let's take a look at an example, that we'll use to better understand the
912 * problem (and solution). Suppose we have two compilation units, each using
913 * same `struct S`, but each of them having incomplete type information about
914 * struct's fields:
915 *
916 * // CU #1:
917 * struct S;
918 * struct A {
919 * int a;
920 * struct A* self;
921 * struct S* parent;
922 * };
923 * struct B;
924 * struct S {
925 * struct A* a_ptr;
926 * struct B* b_ptr;
927 * };
928 *
929 * // CU #2:
930 * struct S;
931 * struct A;
932 * struct B {
933 * int b;
934 * struct B* self;
935 * struct S* parent;
936 * };
937 * struct S {
938 * struct A* a_ptr;
939 * struct B* b_ptr;
940 * };
941 *
942 * In case of CU #1, BTF data will know only that `struct B` exist (but no
943 * more), but will know the complete type information about `struct A`. While
944 * for CU #2, it will know full type information about `struct B`, but will
945 * only know about forward declaration of `struct A` (in BTF terms, it will
946 * have `BTF_KIND_FWD` type descriptor with name `B`).
947 *
948 * This compilation unit isolation means that it's possible that there is no
949 * single CU with complete type information describing structs `S`, `A`, and
950 * `B`. Also, we might get tons of duplicated and redundant type information.
951 *
952 * Additional complication we need to keep in mind comes from the fact that
953 * types, in general, can form graphs containing cycles, not just DAGs.
954 *
955 * While algorithm does deduplication, it also merges and resolves type
956 * information (unless disabled throught `struct btf_opts`), whenever possible.
957 * E.g., in the example above with two compilation units having partial type
958 * information for structs `A` and `B`, the output of algorithm will emit
959 * a single copy of each BTF type that describes structs `A`, `B`, and `S`
960 * (as well as type information for `int` and pointers), as if they were defined
961 * in a single compilation unit as:
962 *
963 * struct A {
964 * int a;
965 * struct A* self;
966 * struct S* parent;
967 * };
968 * struct B {
969 * int b;
970 * struct B* self;
971 * struct S* parent;
972 * };
973 * struct S {
974 * struct A* a_ptr;
975 * struct B* b_ptr;
976 * };
977 *
978 * Algorithm summary
979 * =================
980 *
981 * Algorithm completes its work in 6 separate passes:
982 *
983 * 1. Strings deduplication.
984 * 2. Primitive types deduplication (int, enum, fwd).
985 * 3. Struct/union types deduplication.
986 * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
987 * protos, and const/volatile/restrict modifiers).
988 * 5. Types compaction.
989 * 6. Types remapping.
990 *
991 * Algorithm determines canonical type descriptor, which is a single
992 * representative type for each truly unique type. This canonical type is the
993 * one that will go into final deduplicated BTF type information. For
994 * struct/unions, it is also the type that algorithm will merge additional type
995 * information into (while resolving FWDs), as it discovers it from data in
996 * other CUs. Each input BTF type eventually gets either mapped to itself, if
997 * that type is canonical, or to some other type, if that type is equivalent
998 * and was chosen as canonical representative. This mapping is stored in
999 * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
1000 * FWD type got resolved to.
1001 *
1002 * To facilitate fast discovery of canonical types, we also maintain canonical
1003 * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
1004 * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
1005 * that match that signature. With sufficiently good choice of type signature
1006 * hashing function, we can limit number of canonical types for each unique type
1007 * signature to a very small number, allowing to find canonical type for any
1008 * duplicated type very quickly.
1009 *
1010 * Struct/union deduplication is the most critical part and algorithm for
1011 * deduplicating structs/unions is described in greater details in comments for
1012 * `btf_dedup_is_equiv` function.
1013 */
1014int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
1015 const struct btf_dedup_opts *opts)
1016{
1017 struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
1018 int err;
1019
1020 if (IS_ERR(d)) {
1021 pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
1022 return -EINVAL;
1023 }
1024
1025 err = btf_dedup_strings(d);
1026 if (err < 0) {
1027 pr_debug("btf_dedup_strings failed:%d\n", err);
1028 goto done;
1029 }
1030 err = btf_dedup_prim_types(d);
1031 if (err < 0) {
1032 pr_debug("btf_dedup_prim_types failed:%d\n", err);
1033 goto done;
1034 }
1035 err = btf_dedup_struct_types(d);
1036 if (err < 0) {
1037 pr_debug("btf_dedup_struct_types failed:%d\n", err);
1038 goto done;
1039 }
1040 err = btf_dedup_ref_types(d);
1041 if (err < 0) {
1042 pr_debug("btf_dedup_ref_types failed:%d\n", err);
1043 goto done;
1044 }
1045 err = btf_dedup_compact_types(d);
1046 if (err < 0) {
1047 pr_debug("btf_dedup_compact_types failed:%d\n", err);
1048 goto done;
1049 }
1050 err = btf_dedup_remap_types(d);
1051 if (err < 0) {
1052 pr_debug("btf_dedup_remap_types failed:%d\n", err);
1053 goto done;
1054 }
1055
1056done:
1057 btf_dedup_free(d);
1058 return err;
1059}
1060
1061#define BTF_DEDUP_TABLE_SIZE_LOG 14
1062#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
1063#define BTF_UNPROCESSED_ID ((__u32)-1)
1064#define BTF_IN_PROGRESS_ID ((__u32)-2)
1065
1066struct btf_dedup_node {
1067 struct btf_dedup_node *next;
1068 __u32 type_id;
1069};
1070
1071struct btf_dedup {
1072 /* .BTF section to be deduped in-place */
1073 struct btf *btf;
1074 /*
1075 * Optional .BTF.ext section. When provided, any strings referenced
1076 * from it will be taken into account when deduping strings
1077 */
1078 struct btf_ext *btf_ext;
1079 /*
1080 * This is a map from any type's signature hash to a list of possible
1081 * canonical representative type candidates. Hash collisions are
1082 * ignored, so even types of various kinds can share same list of
1083 * candidates, which is fine because we rely on subsequent
1084 * btf_xxx_equal() checks to authoritatively verify type equality.
1085 */
1086 struct btf_dedup_node **dedup_table;
1087 /* Canonical types map */
1088 __u32 *map;
1089 /* Hypothetical mapping, used during type graph equivalence checks */
1090 __u32 *hypot_map;
1091 __u32 *hypot_list;
1092 size_t hypot_cnt;
1093 size_t hypot_cap;
1094 /* Various option modifying behavior of algorithm */
1095 struct btf_dedup_opts opts;
1096};
1097
1098struct btf_str_ptr {
1099 const char *str;
1100 __u32 new_off;
1101 bool used;
1102};
1103
1104struct btf_str_ptrs {
1105 struct btf_str_ptr *ptrs;
1106 const char *data;
1107 __u32 cnt;
1108 __u32 cap;
1109};
1110
1111static inline __u32 hash_combine(__u32 h, __u32 value)
1112{
1113/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
1114#define GOLDEN_RATIO_PRIME 0x9e370001UL
1115 return h * 37 + value * GOLDEN_RATIO_PRIME;
1116#undef GOLDEN_RATIO_PRIME
1117}
1118
1119#define for_each_hash_node(table, hash, node) \
1120 for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
1121
1122static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
1123{
1124 struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
1125
1126 if (!node)
1127 return -ENOMEM;
1128 node->type_id = type_id;
1129 node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
1130 d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
1131 return 0;
1132}
1133
1134static int btf_dedup_hypot_map_add(struct btf_dedup *d,
1135 __u32 from_id, __u32 to_id)
1136{
1137 if (d->hypot_cnt == d->hypot_cap) {
1138 __u32 *new_list;
1139
1140 d->hypot_cap += max(16, d->hypot_cap / 2);
1141 new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
1142 if (!new_list)
1143 return -ENOMEM;
1144 d->hypot_list = new_list;
1145 }
1146 d->hypot_list[d->hypot_cnt++] = from_id;
1147 d->hypot_map[from_id] = to_id;
1148 return 0;
1149}
1150
1151static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
1152{
1153 int i;
1154
1155 for (i = 0; i < d->hypot_cnt; i++)
1156 d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
1157 d->hypot_cnt = 0;
1158}
1159
1160static void btf_dedup_table_free(struct btf_dedup *d)
1161{
1162 struct btf_dedup_node *head, *tmp;
1163 int i;
1164
1165 if (!d->dedup_table)
1166 return;
1167
1168 for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
1169 while (d->dedup_table[i]) {
1170 tmp = d->dedup_table[i];
1171 d->dedup_table[i] = tmp->next;
1172 free(tmp);
1173 }
1174
1175 head = d->dedup_table[i];
1176 while (head) {
1177 tmp = head;
1178 head = head->next;
1179 free(tmp);
1180 }
1181 }
1182
1183 free(d->dedup_table);
1184 d->dedup_table = NULL;
1185}
1186
1187static void btf_dedup_free(struct btf_dedup *d)
1188{
1189 btf_dedup_table_free(d);
1190
1191 free(d->map);
1192 d->map = NULL;
1193
1194 free(d->hypot_map);
1195 d->hypot_map = NULL;
1196
1197 free(d->hypot_list);
1198 d->hypot_list = NULL;
1199
1200 free(d);
1201}
1202
1203static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1204 const struct btf_dedup_opts *opts)
1205{
1206 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
1207 int i, err = 0;
1208
1209 if (!d)
1210 return ERR_PTR(-ENOMEM);
1211
1212 d->btf = btf;
1213 d->btf_ext = btf_ext;
1214
1215 d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
1216 sizeof(struct btf_dedup_node *));
1217 if (!d->dedup_table) {
1218 err = -ENOMEM;
1219 goto done;
1220 }
1221
1222 d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1223 if (!d->map) {
1224 err = -ENOMEM;
1225 goto done;
1226 }
1227 /* special BTF "void" type is made canonical immediately */
1228 d->map[0] = 0;
1229 for (i = 1; i <= btf->nr_types; i++)
1230 d->map[i] = BTF_UNPROCESSED_ID;
1231
1232 d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1233 if (!d->hypot_map) {
1234 err = -ENOMEM;
1235 goto done;
1236 }
1237 for (i = 0; i <= btf->nr_types; i++)
1238 d->hypot_map[i] = BTF_UNPROCESSED_ID;
1239
1240 d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
1241
1242done:
1243 if (err) {
1244 btf_dedup_free(d);
1245 return ERR_PTR(err);
1246 }
1247
1248 return d;
1249}
1250
1251typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
1252
1253/*
1254 * Iterate over all possible places in .BTF and .BTF.ext that can reference
1255 * string and pass pointer to it to a provided callback `fn`.
1256 */
1257static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
1258{
1259 void *line_data_cur, *line_data_end;
1260 int i, j, r, rec_size;
1261 struct btf_type *t;
1262
1263 for (i = 1; i <= d->btf->nr_types; i++) {
1264 t = d->btf->types[i];
1265 r = fn(&t->name_off, ctx);
1266 if (r)
1267 return r;
1268
1269 switch (BTF_INFO_KIND(t->info)) {
1270 case BTF_KIND_STRUCT:
1271 case BTF_KIND_UNION: {
1272 struct btf_member *m = (struct btf_member *)(t + 1);
1273 __u16 vlen = BTF_INFO_VLEN(t->info);
1274
1275 for (j = 0; j < vlen; j++) {
1276 r = fn(&m->name_off, ctx);
1277 if (r)
1278 return r;
1279 m++;
1280 }
1281 break;
1282 }
1283 case BTF_KIND_ENUM: {
1284 struct btf_enum *m = (struct btf_enum *)(t + 1);
1285 __u16 vlen = BTF_INFO_VLEN(t->info);
1286
1287 for (j = 0; j < vlen; j++) {
1288 r = fn(&m->name_off, ctx);
1289 if (r)
1290 return r;
1291 m++;
1292 }
1293 break;
1294 }
1295 case BTF_KIND_FUNC_PROTO: {
1296 struct btf_param *m = (struct btf_param *)(t + 1);
1297 __u16 vlen = BTF_INFO_VLEN(t->info);
1298
1299 for (j = 0; j < vlen; j++) {
1300 r = fn(&m->name_off, ctx);
1301 if (r)
1302 return r;
1303 m++;
1304 }
1305 break;
1306 }
1307 default:
1308 break;
1309 }
1310 }
1311
1312 if (!d->btf_ext)
1313 return 0;
1314
1315 line_data_cur = d->btf_ext->line_info.info;
1316 line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
1317 rec_size = d->btf_ext->line_info.rec_size;
1318
1319 while (line_data_cur < line_data_end) {
1320 struct btf_ext_info_sec *sec = line_data_cur;
1321 struct bpf_line_info_min *line_info;
1322 __u32 num_info = sec->num_info;
1323
1324 r = fn(&sec->sec_name_off, ctx);
1325 if (r)
1326 return r;
1327
1328 line_data_cur += sizeof(struct btf_ext_info_sec);
1329 for (i = 0; i < num_info; i++) {
1330 line_info = line_data_cur;
1331 r = fn(&line_info->file_name_off, ctx);
1332 if (r)
1333 return r;
1334 r = fn(&line_info->line_off, ctx);
1335 if (r)
1336 return r;
1337 line_data_cur += rec_size;
1338 }
1339 }
1340
1341 return 0;
1342}
1343
1344static int str_sort_by_content(const void *a1, const void *a2)
1345{
1346 const struct btf_str_ptr *p1 = a1;
1347 const struct btf_str_ptr *p2 = a2;
1348
1349 return strcmp(p1->str, p2->str);
1350}
1351
1352static int str_sort_by_offset(const void *a1, const void *a2)
1353{
1354 const struct btf_str_ptr *p1 = a1;
1355 const struct btf_str_ptr *p2 = a2;
1356
1357 if (p1->str != p2->str)
1358 return p1->str < p2->str ? -1 : 1;
1359 return 0;
1360}
1361
1362static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
1363{
1364 const struct btf_str_ptr *p = pelem;
1365
1366 if (str_ptr != p->str)
1367 return (const char *)str_ptr < p->str ? -1 : 1;
1368 return 0;
1369}
1370
1371static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
1372{
1373 struct btf_str_ptrs *strs;
1374 struct btf_str_ptr *s;
1375
1376 if (*str_off_ptr == 0)
1377 return 0;
1378
1379 strs = ctx;
1380 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1381 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1382 if (!s)
1383 return -EINVAL;
1384 s->used = true;
1385 return 0;
1386}
1387
1388static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
1389{
1390 struct btf_str_ptrs *strs;
1391 struct btf_str_ptr *s;
1392
1393 if (*str_off_ptr == 0)
1394 return 0;
1395
1396 strs = ctx;
1397 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1398 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1399 if (!s)
1400 return -EINVAL;
1401 *str_off_ptr = s->new_off;
1402 return 0;
1403}
1404
1405/*
1406 * Dedup string and filter out those that are not referenced from either .BTF
1407 * or .BTF.ext (if provided) sections.
1408 *
1409 * This is done by building index of all strings in BTF's string section,
1410 * then iterating over all entities that can reference strings (e.g., type
1411 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
1412 * strings as used. After that all used strings are deduped and compacted into
1413 * sequential blob of memory and new offsets are calculated. Then all the string
1414 * references are iterated again and rewritten using new offsets.
1415 */
1416static int btf_dedup_strings(struct btf_dedup *d)
1417{
1418 const struct btf_header *hdr = d->btf->hdr;
1419 char *start = (char *)d->btf->nohdr_data + hdr->str_off;
1420 char *end = start + d->btf->hdr->str_len;
1421 char *p = start, *tmp_strs = NULL;
1422 struct btf_str_ptrs strs = {
1423 .cnt = 0,
1424 .cap = 0,
1425 .ptrs = NULL,
1426 .data = start,
1427 };
1428 int i, j, err = 0, grp_idx;
1429 bool grp_used;
1430
1431 /* build index of all strings */
1432 while (p < end) {
1433 if (strs.cnt + 1 > strs.cap) {
1434 struct btf_str_ptr *new_ptrs;
1435
1436 strs.cap += max(strs.cnt / 2, 16);
1437 new_ptrs = realloc(strs.ptrs,
1438 sizeof(strs.ptrs[0]) * strs.cap);
1439 if (!new_ptrs) {
1440 err = -ENOMEM;
1441 goto done;
1442 }
1443 strs.ptrs = new_ptrs;
1444 }
1445
1446 strs.ptrs[strs.cnt].str = p;
1447 strs.ptrs[strs.cnt].used = false;
1448
1449 p += strlen(p) + 1;
1450 strs.cnt++;
1451 }
1452
1453 /* temporary storage for deduplicated strings */
1454 tmp_strs = malloc(d->btf->hdr->str_len);
1455 if (!tmp_strs) {
1456 err = -ENOMEM;
1457 goto done;
1458 }
1459
1460 /* mark all used strings */
1461 strs.ptrs[0].used = true;
1462 err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
1463 if (err)
1464 goto done;
1465
1466 /* sort strings by context, so that we can identify duplicates */
1467 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
1468
1469 /*
1470 * iterate groups of equal strings and if any instance in a group was
1471 * referenced, emit single instance and remember new offset
1472 */
1473 p = tmp_strs;
1474 grp_idx = 0;
1475 grp_used = strs.ptrs[0].used;
1476 /* iterate past end to avoid code duplication after loop */
1477 for (i = 1; i <= strs.cnt; i++) {
1478 /*
1479 * when i == strs.cnt, we want to skip string comparison and go
1480 * straight to handling last group of strings (otherwise we'd
1481 * need to handle last group after the loop w/ duplicated code)
1482 */
1483 if (i < strs.cnt &&
1484 !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
1485 grp_used = grp_used || strs.ptrs[i].used;
1486 continue;
1487 }
1488
1489 /*
1490 * this check would have been required after the loop to handle
1491 * last group of strings, but due to <= condition in a loop
1492 * we avoid that duplication
1493 */
1494 if (grp_used) {
1495 int new_off = p - tmp_strs;
1496 __u32 len = strlen(strs.ptrs[grp_idx].str);
1497
1498 memmove(p, strs.ptrs[grp_idx].str, len + 1);
1499 for (j = grp_idx; j < i; j++)
1500 strs.ptrs[j].new_off = new_off;
1501 p += len + 1;
1502 }
1503
1504 if (i < strs.cnt) {
1505 grp_idx = i;
1506 grp_used = strs.ptrs[i].used;
1507 }
1508 }
1509
1510 /* replace original strings with deduped ones */
1511 d->btf->hdr->str_len = p - tmp_strs;
1512 memmove(start, tmp_strs, d->btf->hdr->str_len);
1513 end = start + d->btf->hdr->str_len;
1514
1515 /* restore original order for further binary search lookups */
1516 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
1517
1518 /* remap string offsets */
1519 err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
1520 if (err)
1521 goto done;
1522
1523 d->btf->hdr->str_len = end - start;
1524
1525done:
1526 free(tmp_strs);
1527 free(strs.ptrs);
1528 return err;
1529}
1530
1531static __u32 btf_hash_common(struct btf_type *t)
1532{
1533 __u32 h;
1534
1535 h = hash_combine(0, t->name_off);
1536 h = hash_combine(h, t->info);
1537 h = hash_combine(h, t->size);
1538 return h;
1539}
1540
1541static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
1542{
1543 return t1->name_off == t2->name_off &&
1544 t1->info == t2->info &&
1545 t1->size == t2->size;
1546}
1547
1548/* Calculate type signature hash of INT. */
1549static __u32 btf_hash_int(struct btf_type *t)
1550{
1551 __u32 info = *(__u32 *)(t + 1);
1552 __u32 h;
1553
1554 h = btf_hash_common(t);
1555 h = hash_combine(h, info);
1556 return h;
1557}
1558
1559/* Check structural equality of two INTs. */
1560static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
1561{
1562 __u32 info1, info2;
1563
1564 if (!btf_equal_common(t1, t2))
1565 return false;
1566 info1 = *(__u32 *)(t1 + 1);
1567 info2 = *(__u32 *)(t2 + 1);
1568 return info1 == info2;
1569}
1570
1571/* Calculate type signature hash of ENUM. */
1572static __u32 btf_hash_enum(struct btf_type *t)
1573{
1574 struct btf_enum *member = (struct btf_enum *)(t + 1);
1575 __u32 vlen = BTF_INFO_VLEN(t->info);
1576 __u32 h = btf_hash_common(t);
1577 int i;
1578
1579 for (i = 0; i < vlen; i++) {
1580 h = hash_combine(h, member->name_off);
1581 h = hash_combine(h, member->val);
1582 member++;
1583 }
1584 return h;
1585}
1586
1587/* Check structural equality of two ENUMs. */
1588static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
1589{
1590 struct btf_enum *m1, *m2;
1591 __u16 vlen;
1592 int i;
1593
1594 if (!btf_equal_common(t1, t2))
1595 return false;
1596
1597 vlen = BTF_INFO_VLEN(t1->info);
1598 m1 = (struct btf_enum *)(t1 + 1);
1599 m2 = (struct btf_enum *)(t2 + 1);
1600 for (i = 0; i < vlen; i++) {
1601 if (m1->name_off != m2->name_off || m1->val != m2->val)
1602 return false;
1603 m1++;
1604 m2++;
1605 }
1606 return true;
1607}
1608
1609/*
1610 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
1611 * as referenced type IDs equivalence is established separately during type
1612 * graph equivalence check algorithm.
1613 */
1614static __u32 btf_hash_struct(struct btf_type *t)
1615{
1616 struct btf_member *member = (struct btf_member *)(t + 1);
1617 __u32 vlen = BTF_INFO_VLEN(t->info);
1618 __u32 h = btf_hash_common(t);
1619 int i;
1620
1621 for (i = 0; i < vlen; i++) {
1622 h = hash_combine(h, member->name_off);
1623 h = hash_combine(h, member->offset);
1624 /* no hashing of referenced type ID, it can be unresolved yet */
1625 member++;
1626 }
1627 return h;
1628}
1629
1630/*
1631 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1632 * IDs. This check is performed during type graph equivalence check and
1633 * referenced types equivalence is checked separately.
1634 */
1635static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2)
1636{
1637 struct btf_member *m1, *m2;
1638 __u16 vlen;
1639 int i;
1640
1641 if (!btf_equal_common(t1, t2))
1642 return false;
1643
1644 vlen = BTF_INFO_VLEN(t1->info);
1645 m1 = (struct btf_member *)(t1 + 1);
1646 m2 = (struct btf_member *)(t2 + 1);
1647 for (i = 0; i < vlen; i++) {
1648 if (m1->name_off != m2->name_off || m1->offset != m2->offset)
1649 return false;
1650 m1++;
1651 m2++;
1652 }
1653 return true;
1654}
1655
1656/*
1657 * Calculate type signature hash of ARRAY, including referenced type IDs,
1658 * under assumption that they were already resolved to canonical type IDs and
1659 * are not going to change.
1660 */
1661static __u32 btf_hash_array(struct btf_type *t)
1662{
1663 struct btf_array *info = (struct btf_array *)(t + 1);
1664 __u32 h = btf_hash_common(t);
1665
1666 h = hash_combine(h, info->type);
1667 h = hash_combine(h, info->index_type);
1668 h = hash_combine(h, info->nelems);
1669 return h;
1670}
1671
1672/*
1673 * Check exact equality of two ARRAYs, taking into account referenced
1674 * type IDs, under assumption that they were already resolved to canonical
1675 * type IDs and are not going to change.
1676 * This function is called during reference types deduplication to compare
1677 * ARRAY to potential canonical representative.
1678 */
1679static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
1680{
1681 struct btf_array *info1, *info2;
1682
1683 if (!btf_equal_common(t1, t2))
1684 return false;
1685
1686 info1 = (struct btf_array *)(t1 + 1);
1687 info2 = (struct btf_array *)(t2 + 1);
1688 return info1->type == info2->type &&
1689 info1->index_type == info2->index_type &&
1690 info1->nelems == info2->nelems;
1691}
1692
1693/*
1694 * Check structural compatibility of two ARRAYs, ignoring referenced type
1695 * IDs. This check is performed during type graph equivalence check and
1696 * referenced types equivalence is checked separately.
1697 */
1698static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
1699{
1700 struct btf_array *info1, *info2;
1701
1702 if (!btf_equal_common(t1, t2))
1703 return false;
1704
1705 info1 = (struct btf_array *)(t1 + 1);
1706 info2 = (struct btf_array *)(t2 + 1);
1707 return info1->nelems == info2->nelems;
1708}
1709
1710/*
1711 * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
1712 * under assumption that they were already resolved to canonical type IDs and
1713 * are not going to change.
1714 */
1715static inline __u32 btf_hash_fnproto(struct btf_type *t)
1716{
1717 struct btf_param *member = (struct btf_param *)(t + 1);
1718 __u16 vlen = BTF_INFO_VLEN(t->info);
1719 __u32 h = btf_hash_common(t);
1720 int i;
1721
1722 for (i = 0; i < vlen; i++) {
1723 h = hash_combine(h, member->name_off);
1724 h = hash_combine(h, member->type);
1725 member++;
1726 }
1727 return h;
1728}
1729
1730/*
1731 * Check exact equality of two FUNC_PROTOs, taking into account referenced
1732 * type IDs, under assumption that they were already resolved to canonical
1733 * type IDs and are not going to change.
1734 * This function is called during reference types deduplication to compare
1735 * FUNC_PROTO to potential canonical representative.
1736 */
1737static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
1738{
1739 struct btf_param *m1, *m2;
1740 __u16 vlen;
1741 int i;
1742
1743 if (!btf_equal_common(t1, t2))
1744 return false;
1745
1746 vlen = BTF_INFO_VLEN(t1->info);
1747 m1 = (struct btf_param *)(t1 + 1);
1748 m2 = (struct btf_param *)(t2 + 1);
1749 for (i = 0; i < vlen; i++) {
1750 if (m1->name_off != m2->name_off || m1->type != m2->type)
1751 return false;
1752 m1++;
1753 m2++;
1754 }
1755 return true;
1756}
1757
1758/*
1759 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1760 * IDs. This check is performed during type graph equivalence check and
1761 * referenced types equivalence is checked separately.
1762 */
1763static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
1764{
1765 struct btf_param *m1, *m2;
1766 __u16 vlen;
1767 int i;
1768
1769 /* skip return type ID */
1770 if (t1->name_off != t2->name_off || t1->info != t2->info)
1771 return false;
1772
1773 vlen = BTF_INFO_VLEN(t1->info);
1774 m1 = (struct btf_param *)(t1 + 1);
1775 m2 = (struct btf_param *)(t2 + 1);
1776 for (i = 0; i < vlen; i++) {
1777 if (m1->name_off != m2->name_off)
1778 return false;
1779 m1++;
1780 m2++;
1781 }
1782 return true;
1783}
1784
1785/*
1786 * Deduplicate primitive types, that can't reference other types, by calculating
1787 * their type signature hash and comparing them with any possible canonical
1788 * candidate. If no canonical candidate matches, type itself is marked as
1789 * canonical and is added into `btf_dedup->dedup_table` as another candidate.
1790 */
1791static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1792{
1793 struct btf_type *t = d->btf->types[type_id];
1794 struct btf_type *cand;
1795 struct btf_dedup_node *cand_node;
1796 /* if we don't find equivalent type, then we are canonical */
1797 __u32 new_id = type_id;
1798 __u32 h;
1799
1800 switch (BTF_INFO_KIND(t->info)) {
1801 case BTF_KIND_CONST:
1802 case BTF_KIND_VOLATILE:
1803 case BTF_KIND_RESTRICT:
1804 case BTF_KIND_PTR:
1805 case BTF_KIND_TYPEDEF:
1806 case BTF_KIND_ARRAY:
1807 case BTF_KIND_STRUCT:
1808 case BTF_KIND_UNION:
1809 case BTF_KIND_FUNC:
1810 case BTF_KIND_FUNC_PROTO:
1811 return 0;
1812
1813 case BTF_KIND_INT:
1814 h = btf_hash_int(t);
1815 for_each_hash_node(d->dedup_table, h, cand_node) {
1816 cand = d->btf->types[cand_node->type_id];
1817 if (btf_equal_int(t, cand)) {
1818 new_id = cand_node->type_id;
1819 break;
1820 }
1821 }
1822 break;
1823
1824 case BTF_KIND_ENUM:
1825 h = btf_hash_enum(t);
1826 for_each_hash_node(d->dedup_table, h, cand_node) {
1827 cand = d->btf->types[cand_node->type_id];
1828 if (btf_equal_enum(t, cand)) {
1829 new_id = cand_node->type_id;
1830 break;
1831 }
1832 }
1833 break;
1834
1835 case BTF_KIND_FWD:
1836 h = btf_hash_common(t);
1837 for_each_hash_node(d->dedup_table, h, cand_node) {
1838 cand = d->btf->types[cand_node->type_id];
1839 if (btf_equal_common(t, cand)) {
1840 new_id = cand_node->type_id;
1841 break;
1842 }
1843 }
1844 break;
1845
1846 default:
1847 return -EINVAL;
1848 }
1849
1850 d->map[type_id] = new_id;
1851 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
1852 return -ENOMEM;
1853
1854 return 0;
1855}
1856
1857static int btf_dedup_prim_types(struct btf_dedup *d)
1858{
1859 int i, err;
1860
1861 for (i = 1; i <= d->btf->nr_types; i++) {
1862 err = btf_dedup_prim_type(d, i);
1863 if (err)
1864 return err;
1865 }
1866 return 0;
1867}
1868
1869/*
1870 * Check whether type is already mapped into canonical one (could be to itself).
1871 */
1872static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
1873{
1874 return d->map[type_id] <= BTF_MAX_TYPE;
1875}
1876
1877/*
1878 * Resolve type ID into its canonical type ID, if any; otherwise return original
1879 * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
1880 * STRUCT/UNION link and resolve it into canonical type ID as well.
1881 */
1882static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
1883{
1884 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
1885 type_id = d->map[type_id];
1886 return type_id;
1887}
1888
1889/*
1890 * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
1891 * type ID.
1892 */
1893static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
1894{
1895 __u32 orig_type_id = type_id;
1896
1897 if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
1898 return type_id;
1899
1900 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
1901 type_id = d->map[type_id];
1902
1903 if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
1904 return type_id;
1905
1906 return orig_type_id;
1907}
1908
1909
1910static inline __u16 btf_fwd_kind(struct btf_type *t)
1911{
1912 return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
1913}
1914
1915/*
1916 * Check equivalence of BTF type graph formed by candidate struct/union (we'll
1917 * call it "candidate graph" in this description for brevity) to a type graph
1918 * formed by (potential) canonical struct/union ("canonical graph" for brevity
1919 * here, though keep in mind that not all types in canonical graph are
1920 * necessarily canonical representatives themselves, some of them might be
1921 * duplicates or its uniqueness might not have been established yet).
1922 * Returns:
1923 * - >0, if type graphs are equivalent;
1924 * - 0, if not equivalent;
1925 * - <0, on error.
1926 *
1927 * Algorithm performs side-by-side DFS traversal of both type graphs and checks
1928 * equivalence of BTF types at each step. If at any point BTF types in candidate
1929 * and canonical graphs are not compatible structurally, whole graphs are
1930 * incompatible. If types are structurally equivalent (i.e., all information
1931 * except referenced type IDs is exactly the same), a mapping from `canon_id` to
1932 * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
1933 * If a type references other types, then those referenced types are checked
1934 * for equivalence recursively.
1935 *
1936 * During DFS traversal, if we find that for current `canon_id` type we
1937 * already have some mapping in hypothetical map, we check for two possible
1938 * situations:
1939 * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
1940 * happen when type graphs have cycles. In this case we assume those two
1941 * types are equivalent.
1942 * - `canon_id` is mapped to different type. This is contradiction in our
1943 * hypothetical mapping, because same graph in canonical graph corresponds
1944 * to two different types in candidate graph, which for equivalent type
1945 * graphs shouldn't happen. This condition terminates equivalence check
1946 * with negative result.
1947 *
1948 * If type graphs traversal exhausts types to check and find no contradiction,
1949 * then type graphs are equivalent.
1950 *
1951 * When checking types for equivalence, there is one special case: FWD types.
1952 * If FWD type resolution is allowed and one of the types (either from canonical
1953 * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
1954 * flag) and their names match, hypothetical mapping is updated to point from
1955 * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
1956 * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
1957 *
1958 * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
1959 * if there are two exactly named (or anonymous) structs/unions that are
1960 * compatible structurally, one of which has FWD field, while other is concrete
1961 * STRUCT/UNION, but according to C sources they are different structs/unions
1962 * that are referencing different types with the same name. This is extremely
1963 * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
1964 * this logic is causing problems.
1965 *
1966 * Doing FWD resolution means that both candidate and/or canonical graphs can
1967 * consists of portions of the graph that come from multiple compilation units.
1968 * This is due to the fact that types within single compilation unit are always
1969 * deduplicated and FWDs are already resolved, if referenced struct/union
1970 * definiton is available. So, if we had unresolved FWD and found corresponding
1971 * STRUCT/UNION, they will be from different compilation units. This
1972 * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
1973 * type graph will likely have at least two different BTF types that describe
1974 * same type (e.g., most probably there will be two different BTF types for the
1975 * same 'int' primitive type) and could even have "overlapping" parts of type
1976 * graph that describe same subset of types.
1977 *
1978 * This in turn means that our assumption that each type in canonical graph
1979 * must correspond to exactly one type in candidate graph might not hold
1980 * anymore and will make it harder to detect contradictions using hypothetical
1981 * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
1982 * resolution only in canonical graph. FWDs in candidate graphs are never
1983 * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
1984 * that can occur:
1985 * - Both types in canonical and candidate graphs are FWDs. If they are
1986 * structurally equivalent, then they can either be both resolved to the
1987 * same STRUCT/UNION or not resolved at all. In both cases they are
1988 * equivalent and there is no need to resolve FWD on candidate side.
1989 * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
1990 * so nothing to resolve as well, algorithm will check equivalence anyway.
1991 * - Type in canonical graph is FWD, while type in candidate is concrete
1992 * STRUCT/UNION. In this case candidate graph comes from single compilation
1993 * unit, so there is exactly one BTF type for each unique C type. After
1994 * resolving FWD into STRUCT/UNION, there might be more than one BTF type
1995 * in canonical graph mapping to single BTF type in candidate graph, but
1996 * because hypothetical mapping maps from canonical to candidate types, it's
1997 * alright, and we still maintain the property of having single `canon_id`
1998 * mapping to single `cand_id` (there could be two different `canon_id`
1999 * mapped to the same `cand_id`, but it's not contradictory).
2000 * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
2001 * graph is FWD. In this case we are just going to check compatibility of
2002 * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
2003 * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
2004 * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
2005 * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
2006 * canonical graph.
2007 */
2008static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
2009 __u32 canon_id)
2010{
2011 struct btf_type *cand_type;
2012 struct btf_type *canon_type;
2013 __u32 hypot_type_id;
2014 __u16 cand_kind;
2015 __u16 canon_kind;
2016 int i, eq;
2017
2018 /* if both resolve to the same canonical, they must be equivalent */
2019 if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
2020 return 1;
2021
2022 canon_id = resolve_fwd_id(d, canon_id);
2023
2024 hypot_type_id = d->hypot_map[canon_id];
2025 if (hypot_type_id <= BTF_MAX_TYPE)
2026 return hypot_type_id == cand_id;
2027
2028 if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
2029 return -ENOMEM;
2030
2031 cand_type = d->btf->types[cand_id];
2032 canon_type = d->btf->types[canon_id];
2033 cand_kind = BTF_INFO_KIND(cand_type->info);
2034 canon_kind = BTF_INFO_KIND(canon_type->info);
2035
2036 if (cand_type->name_off != canon_type->name_off)
2037 return 0;
2038
2039 /* FWD <--> STRUCT/UNION equivalence check, if enabled */
2040 if (!d->opts.dont_resolve_fwds
2041 && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
2042 && cand_kind != canon_kind) {
2043 __u16 real_kind;
2044 __u16 fwd_kind;
2045
2046 if (cand_kind == BTF_KIND_FWD) {
2047 real_kind = canon_kind;
2048 fwd_kind = btf_fwd_kind(cand_type);
2049 } else {
2050 real_kind = cand_kind;
2051 fwd_kind = btf_fwd_kind(canon_type);
2052 }
2053 return fwd_kind == real_kind;
2054 }
2055
2056 if (cand_type->info != canon_type->info)
2057 return 0;
2058
2059 switch (cand_kind) {
2060 case BTF_KIND_INT:
2061 return btf_equal_int(cand_type, canon_type);
2062
2063 case BTF_KIND_ENUM:
2064 return btf_equal_enum(cand_type, canon_type);
2065
2066 case BTF_KIND_FWD:
2067 return btf_equal_common(cand_type, canon_type);
2068
2069 case BTF_KIND_CONST:
2070 case BTF_KIND_VOLATILE:
2071 case BTF_KIND_RESTRICT:
2072 case BTF_KIND_PTR:
2073 case BTF_KIND_TYPEDEF:
2074 case BTF_KIND_FUNC:
2075 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2076
2077 case BTF_KIND_ARRAY: {
2078 struct btf_array *cand_arr, *canon_arr;
2079
2080 if (!btf_compat_array(cand_type, canon_type))
2081 return 0;
2082 cand_arr = (struct btf_array *)(cand_type + 1);
2083 canon_arr = (struct btf_array *)(canon_type + 1);
2084 eq = btf_dedup_is_equiv(d,
2085 cand_arr->index_type, canon_arr->index_type);
2086 if (eq <= 0)
2087 return eq;
2088 return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
2089 }
2090
2091 case BTF_KIND_STRUCT:
2092 case BTF_KIND_UNION: {
2093 struct btf_member *cand_m, *canon_m;
2094 __u16 vlen;
2095
2096 if (!btf_equal_struct(cand_type, canon_type))
2097 return 0;
2098 vlen = BTF_INFO_VLEN(cand_type->info);
2099 cand_m = (struct btf_member *)(cand_type + 1);
2100 canon_m = (struct btf_member *)(canon_type + 1);
2101 for (i = 0; i < vlen; i++) {
2102 eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
2103 if (eq <= 0)
2104 return eq;
2105 cand_m++;
2106 canon_m++;
2107 }
2108
2109 return 1;
2110 }
2111
2112 case BTF_KIND_FUNC_PROTO: {
2113 struct btf_param *cand_p, *canon_p;
2114 __u16 vlen;
2115
2116 if (!btf_compat_fnproto(cand_type, canon_type))
2117 return 0;
2118 eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2119 if (eq <= 0)
2120 return eq;
2121 vlen = BTF_INFO_VLEN(cand_type->info);
2122 cand_p = (struct btf_param *)(cand_type + 1);
2123 canon_p = (struct btf_param *)(canon_type + 1);
2124 for (i = 0; i < vlen; i++) {
2125 eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
2126 if (eq <= 0)
2127 return eq;
2128 cand_p++;
2129 canon_p++;
2130 }
2131 return 1;
2132 }
2133
2134 default:
2135 return -EINVAL;
2136 }
2137 return 0;
2138}
2139
2140/*
2141 * Use hypothetical mapping, produced by successful type graph equivalence
2142 * check, to augment existing struct/union canonical mapping, where possible.
2143 *
2144 * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
2145 * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
2146 * it doesn't matter if FWD type was part of canonical graph or candidate one,
2147 * we are recording the mapping anyway. As opposed to carefulness required
2148 * for struct/union correspondence mapping (described below), for FWD resolution
2149 * it's not important, as by the time that FWD type (reference type) will be
2150 * deduplicated all structs/unions will be deduped already anyway.
2151 *
2152 * Recording STRUCT/UNION mapping is purely a performance optimization and is
2153 * not required for correctness. It needs to be done carefully to ensure that
2154 * struct/union from candidate's type graph is not mapped into corresponding
2155 * struct/union from canonical type graph that itself hasn't been resolved into
2156 * canonical representative. The only guarantee we have is that canonical
2157 * struct/union was determined as canonical and that won't change. But any
2158 * types referenced through that struct/union fields could have been not yet
2159 * resolved, so in case like that it's too early to establish any kind of
2160 * correspondence between structs/unions.
2161 *
2162 * No canonical correspondence is derived for primitive types (they are already
2163 * deduplicated completely already anyway) or reference types (they rely on
2164 * stability of struct/union canonical relationship for equivalence checks).
2165 */
2166static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
2167{
2168 __u32 cand_type_id, targ_type_id;
2169 __u16 t_kind, c_kind;
2170 __u32 t_id, c_id;
2171 int i;
2172
2173 for (i = 0; i < d->hypot_cnt; i++) {
2174 cand_type_id = d->hypot_list[i];
2175 targ_type_id = d->hypot_map[cand_type_id];
2176 t_id = resolve_type_id(d, targ_type_id);
2177 c_id = resolve_type_id(d, cand_type_id);
2178 t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info);
2179 c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info);
2180 /*
2181 * Resolve FWD into STRUCT/UNION.
2182 * It's ok to resolve FWD into STRUCT/UNION that's not yet
2183 * mapped to canonical representative (as opposed to
2184 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
2185 * eventually that struct is going to be mapped and all resolved
2186 * FWDs will automatically resolve to correct canonical
2187 * representative. This will happen before ref type deduping,
2188 * which critically depends on stability of these mapping. This
2189 * stability is not a requirement for STRUCT/UNION equivalence
2190 * checks, though.
2191 */
2192 if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
2193 d->map[c_id] = t_id;
2194 else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
2195 d->map[t_id] = c_id;
2196
2197 if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
2198 c_kind != BTF_KIND_FWD &&
2199 is_type_mapped(d, c_id) &&
2200 !is_type_mapped(d, t_id)) {
2201 /*
2202 * as a perf optimization, we can map struct/union
2203 * that's part of type graph we just verified for
2204 * equivalence. We can do that for struct/union that has
2205 * canonical representative only, though.
2206 */
2207 d->map[t_id] = c_id;
2208 }
2209 }
2210}
2211
2212/*
2213 * Deduplicate struct/union types.
2214 *
2215 * For each struct/union type its type signature hash is calculated, taking
2216 * into account type's name, size, number, order and names of fields, but
2217 * ignoring type ID's referenced from fields, because they might not be deduped
2218 * completely until after reference types deduplication phase. This type hash
2219 * is used to iterate over all potential canonical types, sharing same hash.
2220 * For each canonical candidate we check whether type graphs that they form
2221 * (through referenced types in fields and so on) are equivalent using algorithm
2222 * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
2223 * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
2224 * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
2225 * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
2226 * potentially map other structs/unions to their canonical representatives,
2227 * if such relationship hasn't yet been established. This speeds up algorithm
2228 * by eliminating some of the duplicate work.
2229 *
2230 * If no matching canonical representative was found, struct/union is marked
2231 * as canonical for itself and is added into btf_dedup->dedup_table hash map
2232 * for further look ups.
2233 */
2234static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2235{
2236 struct btf_dedup_node *cand_node;
2237 struct btf_type *t;
2238 /* if we don't find equivalent type, then we are canonical */
2239 __u32 new_id = type_id;
2240 __u16 kind;
2241 __u32 h;
2242
2243 /* already deduped or is in process of deduping (loop detected) */
2244 if (d->map[type_id] <= BTF_MAX_TYPE)
2245 return 0;
2246
2247 t = d->btf->types[type_id];
2248 kind = BTF_INFO_KIND(t->info);
2249
2250 if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
2251 return 0;
2252
2253 h = btf_hash_struct(t);
2254 for_each_hash_node(d->dedup_table, h, cand_node) {
2255 int eq;
2256
2257 btf_dedup_clear_hypot_map(d);
2258 eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
2259 if (eq < 0)
2260 return eq;
2261 if (!eq)
2262 continue;
2263 new_id = cand_node->type_id;
2264 btf_dedup_merge_hypot_map(d);
2265 break;
2266 }
2267
2268 d->map[type_id] = new_id;
2269 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2270 return -ENOMEM;
2271
2272 return 0;
2273}
2274
2275static int btf_dedup_struct_types(struct btf_dedup *d)
2276{
2277 int i, err;
2278
2279 for (i = 1; i <= d->btf->nr_types; i++) {
2280 err = btf_dedup_struct_type(d, i);
2281 if (err)
2282 return err;
2283 }
2284 return 0;
2285}
2286
2287/*
2288 * Deduplicate reference type.
2289 *
2290 * Once all primitive and struct/union types got deduplicated, we can easily
2291 * deduplicate all other (reference) BTF types. This is done in two steps:
2292 *
2293 * 1. Resolve all referenced type IDs into their canonical type IDs. This
2294 * resolution can be done either immediately for primitive or struct/union types
2295 * (because they were deduped in previous two phases) or recursively for
2296 * reference types. Recursion will always terminate at either primitive or
2297 * struct/union type, at which point we can "unwind" chain of reference types
2298 * one by one. There is no danger of encountering cycles because in C type
2299 * system the only way to form type cycle is through struct/union, so any chain
2300 * of reference types, even those taking part in a type cycle, will inevitably
2301 * reach struct/union at some point.
2302 *
2303 * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
2304 * becomes "stable", in the sense that no further deduplication will cause
2305 * any changes to it. With that, it's now possible to calculate type's signature
2306 * hash (this time taking into account referenced type IDs) and loop over all
2307 * potential canonical representatives. If no match was found, current type
2308 * will become canonical representative of itself and will be added into
2309 * btf_dedup->dedup_table as another possible canonical representative.
2310 */
2311static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2312{
2313 struct btf_dedup_node *cand_node;
2314 struct btf_type *t, *cand;
2315 /* if we don't find equivalent type, then we are representative type */
2316 __u32 new_id = type_id;
2317 __u32 h, ref_type_id;
2318
2319 if (d->map[type_id] == BTF_IN_PROGRESS_ID)
2320 return -ELOOP;
2321 if (d->map[type_id] <= BTF_MAX_TYPE)
2322 return resolve_type_id(d, type_id);
2323
2324 t = d->btf->types[type_id];
2325 d->map[type_id] = BTF_IN_PROGRESS_ID;
2326
2327 switch (BTF_INFO_KIND(t->info)) {
2328 case BTF_KIND_CONST:
2329 case BTF_KIND_VOLATILE:
2330 case BTF_KIND_RESTRICT:
2331 case BTF_KIND_PTR:
2332 case BTF_KIND_TYPEDEF:
2333 case BTF_KIND_FUNC:
2334 ref_type_id = btf_dedup_ref_type(d, t->type);
2335 if (ref_type_id < 0)
2336 return ref_type_id;
2337 t->type = ref_type_id;
2338
2339 h = btf_hash_common(t);
2340 for_each_hash_node(d->dedup_table, h, cand_node) {
2341 cand = d->btf->types[cand_node->type_id];
2342 if (btf_equal_common(t, cand)) {
2343 new_id = cand_node->type_id;
2344 break;
2345 }
2346 }
2347 break;
2348
2349 case BTF_KIND_ARRAY: {
2350 struct btf_array *info = (struct btf_array *)(t + 1);
2351
2352 ref_type_id = btf_dedup_ref_type(d, info->type);
2353 if (ref_type_id < 0)
2354 return ref_type_id;
2355 info->type = ref_type_id;
2356
2357 ref_type_id = btf_dedup_ref_type(d, info->index_type);
2358 if (ref_type_id < 0)
2359 return ref_type_id;
2360 info->index_type = ref_type_id;
2361
2362 h = btf_hash_array(t);
2363 for_each_hash_node(d->dedup_table, h, cand_node) {
2364 cand = d->btf->types[cand_node->type_id];
2365 if (btf_equal_array(t, cand)) {
2366 new_id = cand_node->type_id;
2367 break;
2368 }
2369 }
2370 break;
2371 }
2372
2373 case BTF_KIND_FUNC_PROTO: {
2374 struct btf_param *param;
2375 __u16 vlen;
2376 int i;
2377
2378 ref_type_id = btf_dedup_ref_type(d, t->type);
2379 if (ref_type_id < 0)
2380 return ref_type_id;
2381 t->type = ref_type_id;
2382
2383 vlen = BTF_INFO_VLEN(t->info);
2384 param = (struct btf_param *)(t + 1);
2385 for (i = 0; i < vlen; i++) {
2386 ref_type_id = btf_dedup_ref_type(d, param->type);
2387 if (ref_type_id < 0)
2388 return ref_type_id;
2389 param->type = ref_type_id;
2390 param++;
2391 }
2392
2393 h = btf_hash_fnproto(t);
2394 for_each_hash_node(d->dedup_table, h, cand_node) {
2395 cand = d->btf->types[cand_node->type_id];
2396 if (btf_equal_fnproto(t, cand)) {
2397 new_id = cand_node->type_id;
2398 break;
2399 }
2400 }
2401 break;
2402 }
2403
2404 default:
2405 return -EINVAL;
2406 }
2407
2408 d->map[type_id] = new_id;
2409 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2410 return -ENOMEM;
2411
2412 return new_id;
2413}
2414
2415static int btf_dedup_ref_types(struct btf_dedup *d)
2416{
2417 int i, err;
2418
2419 for (i = 1; i <= d->btf->nr_types; i++) {
2420 err = btf_dedup_ref_type(d, i);
2421 if (err < 0)
2422 return err;
2423 }
2424 btf_dedup_table_free(d);
2425 return 0;
2426}
2427
2428/*
2429 * Compact types.
2430 *
2431 * After we established for each type its corresponding canonical representative
2432 * type, we now can eliminate types that are not canonical and leave only
2433 * canonical ones layed out sequentially in memory by copying them over
2434 * duplicates. During compaction btf_dedup->hypot_map array is reused to store
2435 * a map from original type ID to a new compacted type ID, which will be used
2436 * during next phase to "fix up" type IDs, referenced from struct/union and
2437 * reference types.
2438 */
2439static int btf_dedup_compact_types(struct btf_dedup *d)
2440{
2441 struct btf_type **new_types;
2442 __u32 next_type_id = 1;
2443 char *types_start, *p;
2444 int i, len;
2445
2446 /* we are going to reuse hypot_map to store compaction remapping */
2447 d->hypot_map[0] = 0;
2448 for (i = 1; i <= d->btf->nr_types; i++)
2449 d->hypot_map[i] = BTF_UNPROCESSED_ID;
2450
2451 types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
2452 p = types_start;
2453
2454 for (i = 1; i <= d->btf->nr_types; i++) {
2455 if (d->map[i] != i)
2456 continue;
2457
2458 len = btf_type_size(d->btf->types[i]);
2459 if (len < 0)
2460 return len;
2461
2462 memmove(p, d->btf->types[i], len);
2463 d->hypot_map[i] = next_type_id;
2464 d->btf->types[next_type_id] = (struct btf_type *)p;
2465 p += len;
2466 next_type_id++;
2467 }
2468
2469 /* shrink struct btf's internal types index and update btf_header */
2470 d->btf->nr_types = next_type_id - 1;
2471 d->btf->types_size = d->btf->nr_types;
2472 d->btf->hdr->type_len = p - types_start;
2473 new_types = realloc(d->btf->types,
2474 (1 + d->btf->nr_types) * sizeof(struct btf_type *));
2475 if (!new_types)
2476 return -ENOMEM;
2477 d->btf->types = new_types;
2478
2479 /* make sure string section follows type information without gaps */
2480 d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
2481 memmove(p, d->btf->strings, d->btf->hdr->str_len);
2482 d->btf->strings = p;
2483 p += d->btf->hdr->str_len;
2484
2485 d->btf->data_size = p - (char *)d->btf->data;
2486 return 0;
2487}
2488
2489/*
2490 * Figure out final (deduplicated and compacted) type ID for provided original
2491 * `type_id` by first resolving it into corresponding canonical type ID and
2492 * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
2493 * which is populated during compaction phase.
2494 */
2495static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
2496{
2497 __u32 resolved_type_id, new_type_id;
2498
2499 resolved_type_id = resolve_type_id(d, type_id);
2500 new_type_id = d->hypot_map[resolved_type_id];
2501 if (new_type_id > BTF_MAX_TYPE)
2502 return -EINVAL;
2503 return new_type_id;
2504}
2505
2506/*
2507 * Remap referenced type IDs into deduped type IDs.
2508 *
2509 * After BTF types are deduplicated and compacted, their final type IDs may
2510 * differ from original ones. The map from original to a corresponding
2511 * deduped type ID is stored in btf_dedup->hypot_map and is populated during
2512 * compaction phase. During remapping phase we are rewriting all type IDs
2513 * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
2514 * their final deduped type IDs.
2515 */
2516static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
2517{
2518 struct btf_type *t = d->btf->types[type_id];
2519 int i, r;
2520
2521 switch (BTF_INFO_KIND(t->info)) {
2522 case BTF_KIND_INT:
2523 case BTF_KIND_ENUM:
2524 break;
2525
2526 case BTF_KIND_FWD:
2527 case BTF_KIND_CONST:
2528 case BTF_KIND_VOLATILE:
2529 case BTF_KIND_RESTRICT:
2530 case BTF_KIND_PTR:
2531 case BTF_KIND_TYPEDEF:
2532 case BTF_KIND_FUNC:
2533 r = btf_dedup_remap_type_id(d, t->type);
2534 if (r < 0)
2535 return r;
2536 t->type = r;
2537 break;
2538
2539 case BTF_KIND_ARRAY: {
2540 struct btf_array *arr_info = (struct btf_array *)(t + 1);
2541
2542 r = btf_dedup_remap_type_id(d, arr_info->type);
2543 if (r < 0)
2544 return r;
2545 arr_info->type = r;
2546 r = btf_dedup_remap_type_id(d, arr_info->index_type);
2547 if (r < 0)
2548 return r;
2549 arr_info->index_type = r;
2550 break;
2551 }
2552
2553 case BTF_KIND_STRUCT:
2554 case BTF_KIND_UNION: {
2555 struct btf_member *member = (struct btf_member *)(t + 1);
2556 __u16 vlen = BTF_INFO_VLEN(t->info);
2557
2558 for (i = 0; i < vlen; i++) {
2559 r = btf_dedup_remap_type_id(d, member->type);
2560 if (r < 0)
2561 return r;
2562 member->type = r;
2563 member++;
2564 }
2565 break;
2566 }
2567
2568 case BTF_KIND_FUNC_PROTO: {
2569 struct btf_param *param = (struct btf_param *)(t + 1);
2570 __u16 vlen = BTF_INFO_VLEN(t->info);
2571
2572 r = btf_dedup_remap_type_id(d, t->type);
2573 if (r < 0)
2574 return r;
2575 t->type = r;
2576
2577 for (i = 0; i < vlen; i++) {
2578 r = btf_dedup_remap_type_id(d, param->type);
2579 if (r < 0)
2580 return r;
2581 param->type = r;
2582 param++;
2583 }
2584 break;
2585 }
2586
2587 default:
2588 return -EINVAL;
2589 }
2590
2591 return 0;
2592}
2593
2594static int btf_dedup_remap_types(struct btf_dedup *d)
2595{
2596 int i, r;
2597
2598 for (i = 1; i <= d->btf->nr_types; i++) {
2599 r = btf_dedup_remap_type(d, i);
2600 if (r < 0)
2601 return r;
2602 }
2603 return 0;
2604}