aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lib/bpf/btf.c
diff options
context:
space:
mode:
authorAndrii Nakryiko <andriin@fb.com>2019-02-04 20:29:45 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-05 10:52:57 -0500
commitd5caef5b56555bfa2ac0cf730f075864a023437e (patch)
treeaa8f77aa92737d225574fe536014e9d49ff28b29 /tools/lib/bpf/btf.c
parent69eaab04c675ef2d0127a80b3395aa90dfd1061f (diff)
btf: add BTF types deduplication algorithm
This patch implements BTF types deduplication algorithm. It allows to greatly compress typical output of pahole's DWARF-to-BTF conversion or LLVM's compilation output by detecting and collapsing identical types emitted in isolation per compilation unit. Algorithm also resolves struct/union forward declarations into concrete BTF types representing referenced struct/union. If undesired, this resolution can be disabled through specifying corresponding options. Algorithm itself and its application to Linux kernel's BTF types is described in details at: https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools/lib/bpf/btf.c')
-rw-r--r--tools/lib/bpf/btf.c1741
1 files changed, 1741 insertions, 0 deletions
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 06bd1a625ff4..e5097be16018 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -849,3 +849,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
849{ 849{
850 return btf_ext->line_info.rec_size; 850 return btf_ext->line_info.rec_size;
851} 851}
852
853struct btf_dedup;
854
855static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
856 const struct btf_dedup_opts *opts);
857static void btf_dedup_free(struct btf_dedup *d);
858static int btf_dedup_strings(struct btf_dedup *d);
859static int btf_dedup_prim_types(struct btf_dedup *d);
860static int btf_dedup_struct_types(struct btf_dedup *d);
861static int btf_dedup_ref_types(struct btf_dedup *d);
862static int btf_dedup_compact_types(struct btf_dedup *d);
863static int btf_dedup_remap_types(struct btf_dedup *d);
864
865/*
866 * Deduplicate BTF types and strings.
867 *
868 * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
869 * section with all BTF type descriptors and string data. It overwrites that
870 * memory in-place with deduplicated types and strings without any loss of
871 * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
872 * is provided, all the strings referenced from .BTF.ext section are honored
873 * and updated to point to the right offsets after deduplication.
874 *
875 * If function returns with error, type/string data might be garbled and should
876 * be discarded.
877 *
878 * More verbose and detailed description of both problem btf_dedup is solving,
879 * as well as solution could be found at:
880 * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
881 *
882 * Problem description and justification
883 * =====================================
884 *
885 * BTF type information is typically emitted either as a result of conversion
886 * from DWARF to BTF or directly by compiler. In both cases, each compilation
887 * unit contains information about a subset of all the types that are used
888 * in an application. These subsets are frequently overlapping and contain a lot
889 * of duplicated information when later concatenated together into a single
890 * binary. This algorithm ensures that each unique type is represented by single
891 * BTF type descriptor, greatly reducing resulting size of BTF data.
892 *
893 * Compilation unit isolation and subsequent duplication of data is not the only
894 * problem. The same type hierarchy (e.g., struct and all the type that struct
895 * references) in different compilation units can be represented in BTF to
896 * various degrees of completeness (or, rather, incompleteness) due to
897 * struct/union forward declarations.
898 *
899 * Let's take a look at an example, that we'll use to better understand the
900 * problem (and solution). Suppose we have two compilation units, each using
901 * same `struct S`, but each of them having incomplete type information about
902 * struct's fields:
903 *
904 * // CU #1:
905 * struct S;
906 * struct A {
907 * int a;
908 * struct A* self;
909 * struct S* parent;
910 * };
911 * struct B;
912 * struct S {
913 * struct A* a_ptr;
914 * struct B* b_ptr;
915 * };
916 *
917 * // CU #2:
918 * struct S;
919 * struct A;
920 * struct B {
921 * int b;
922 * struct B* self;
923 * struct S* parent;
924 * };
925 * struct S {
926 * struct A* a_ptr;
927 * struct B* b_ptr;
928 * };
929 *
930 * In case of CU #1, BTF data will know only that `struct B` exist (but no
931 * more), but will know the complete type information about `struct A`. While
932 * for CU #2, it will know full type information about `struct B`, but will
933 * only know about forward declaration of `struct A` (in BTF terms, it will
934 * have `BTF_KIND_FWD` type descriptor with name `B`).
935 *
936 * This compilation unit isolation means that it's possible that there is no
937 * single CU with complete type information describing structs `S`, `A`, and
938 * `B`. Also, we might get tons of duplicated and redundant type information.
939 *
940 * Additional complication we need to keep in mind comes from the fact that
941 * types, in general, can form graphs containing cycles, not just DAGs.
942 *
943 * While algorithm does deduplication, it also merges and resolves type
944 * information (unless disabled throught `struct btf_opts`), whenever possible.
945 * E.g., in the example above with two compilation units having partial type
946 * information for structs `A` and `B`, the output of algorithm will emit
947 * a single copy of each BTF type that describes structs `A`, `B`, and `S`
948 * (as well as type information for `int` and pointers), as if they were defined
949 * in a single compilation unit as:
950 *
951 * struct A {
952 * int a;
953 * struct A* self;
954 * struct S* parent;
955 * };
956 * struct B {
957 * int b;
958 * struct B* self;
959 * struct S* parent;
960 * };
961 * struct S {
962 * struct A* a_ptr;
963 * struct B* b_ptr;
964 * };
965 *
966 * Algorithm summary
967 * =================
968 *
969 * Algorithm completes its work in 6 separate passes:
970 *
971 * 1. Strings deduplication.
972 * 2. Primitive types deduplication (int, enum, fwd).
973 * 3. Struct/union types deduplication.
974 * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
975 * protos, and const/volatile/restrict modifiers).
976 * 5. Types compaction.
977 * 6. Types remapping.
978 *
979 * Algorithm determines canonical type descriptor, which is a single
980 * representative type for each truly unique type. This canonical type is the
981 * one that will go into final deduplicated BTF type information. For
982 * struct/unions, it is also the type that algorithm will merge additional type
983 * information into (while resolving FWDs), as it discovers it from data in
984 * other CUs. Each input BTF type eventually gets either mapped to itself, if
985 * that type is canonical, or to some other type, if that type is equivalent
986 * and was chosen as canonical representative. This mapping is stored in
987 * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
988 * FWD type got resolved to.
989 *
990 * To facilitate fast discovery of canonical types, we also maintain canonical
991 * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
992 * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
993 * that match that signature. With sufficiently good choice of type signature
994 * hashing function, we can limit number of canonical types for each unique type
995 * signature to a very small number, allowing to find canonical type for any
996 * duplicated type very quickly.
997 *
998 * Struct/union deduplication is the most critical part and algorithm for
999 * deduplicating structs/unions is described in greater details in comments for
1000 * `btf_dedup_is_equiv` function.
1001 */
1002int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
1003 const struct btf_dedup_opts *opts)
1004{
1005 struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
1006 int err;
1007
1008 if (IS_ERR(d)) {
1009 pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
1010 return -EINVAL;
1011 }
1012
1013 err = btf_dedup_strings(d);
1014 if (err < 0) {
1015 pr_debug("btf_dedup_strings failed:%d\n", err);
1016 goto done;
1017 }
1018 err = btf_dedup_prim_types(d);
1019 if (err < 0) {
1020 pr_debug("btf_dedup_prim_types failed:%d\n", err);
1021 goto done;
1022 }
1023 err = btf_dedup_struct_types(d);
1024 if (err < 0) {
1025 pr_debug("btf_dedup_struct_types failed:%d\n", err);
1026 goto done;
1027 }
1028 err = btf_dedup_ref_types(d);
1029 if (err < 0) {
1030 pr_debug("btf_dedup_ref_types failed:%d\n", err);
1031 goto done;
1032 }
1033 err = btf_dedup_compact_types(d);
1034 if (err < 0) {
1035 pr_debug("btf_dedup_compact_types failed:%d\n", err);
1036 goto done;
1037 }
1038 err = btf_dedup_remap_types(d);
1039 if (err < 0) {
1040 pr_debug("btf_dedup_remap_types failed:%d\n", err);
1041 goto done;
1042 }
1043
1044done:
1045 btf_dedup_free(d);
1046 return err;
1047}
1048
1049#define BTF_DEDUP_TABLE_SIZE_LOG 14
1050#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
1051#define BTF_UNPROCESSED_ID ((__u32)-1)
1052#define BTF_IN_PROGRESS_ID ((__u32)-2)
1053
1054struct btf_dedup_node {
1055 struct btf_dedup_node *next;
1056 __u32 type_id;
1057};
1058
1059struct btf_dedup {
1060 /* .BTF section to be deduped in-place */
1061 struct btf *btf;
1062 /*
1063 * Optional .BTF.ext section. When provided, any strings referenced
1064 * from it will be taken into account when deduping strings
1065 */
1066 struct btf_ext *btf_ext;
1067 /*
1068 * This is a map from any type's signature hash to a list of possible
1069 * canonical representative type candidates. Hash collisions are
1070 * ignored, so even types of various kinds can share same list of
1071 * candidates, which is fine because we rely on subsequent
1072 * btf_xxx_equal() checks to authoritatively verify type equality.
1073 */
1074 struct btf_dedup_node **dedup_table;
1075 /* Canonical types map */
1076 __u32 *map;
1077 /* Hypothetical mapping, used during type graph equivalence checks */
1078 __u32 *hypot_map;
1079 __u32 *hypot_list;
1080 size_t hypot_cnt;
1081 size_t hypot_cap;
1082 /* Various option modifying behavior of algorithm */
1083 struct btf_dedup_opts opts;
1084};
1085
1086struct btf_str_ptr {
1087 const char *str;
1088 __u32 new_off;
1089 bool used;
1090};
1091
1092struct btf_str_ptrs {
1093 struct btf_str_ptr *ptrs;
1094 const char *data;
1095 __u32 cnt;
1096 __u32 cap;
1097};
1098
1099static inline __u32 hash_combine(__u32 h, __u32 value)
1100{
1101/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
1102#define GOLDEN_RATIO_PRIME 0x9e370001UL
1103 return h * 37 + value * GOLDEN_RATIO_PRIME;
1104#undef GOLDEN_RATIO_PRIME
1105}
1106
1107#define for_each_hash_node(table, hash, node) \
1108 for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
1109
1110static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
1111{
1112 struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
1113
1114 if (!node)
1115 return -ENOMEM;
1116 node->type_id = type_id;
1117 node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
1118 d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
1119 return 0;
1120}
1121
1122static int btf_dedup_hypot_map_add(struct btf_dedup *d,
1123 __u32 from_id, __u32 to_id)
1124{
1125 if (d->hypot_cnt == d->hypot_cap) {
1126 __u32 *new_list;
1127
1128 d->hypot_cap += max(16, d->hypot_cap / 2);
1129 new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
1130 if (!new_list)
1131 return -ENOMEM;
1132 d->hypot_list = new_list;
1133 }
1134 d->hypot_list[d->hypot_cnt++] = from_id;
1135 d->hypot_map[from_id] = to_id;
1136 return 0;
1137}
1138
1139static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
1140{
1141 int i;
1142
1143 for (i = 0; i < d->hypot_cnt; i++)
1144 d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
1145 d->hypot_cnt = 0;
1146}
1147
1148static void btf_dedup_table_free(struct btf_dedup *d)
1149{
1150 struct btf_dedup_node *head, *tmp;
1151 int i;
1152
1153 if (!d->dedup_table)
1154 return;
1155
1156 for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
1157 while (d->dedup_table[i]) {
1158 tmp = d->dedup_table[i];
1159 d->dedup_table[i] = tmp->next;
1160 free(tmp);
1161 }
1162
1163 head = d->dedup_table[i];
1164 while (head) {
1165 tmp = head;
1166 head = head->next;
1167 free(tmp);
1168 }
1169 }
1170
1171 free(d->dedup_table);
1172 d->dedup_table = NULL;
1173}
1174
1175static void btf_dedup_free(struct btf_dedup *d)
1176{
1177 btf_dedup_table_free(d);
1178
1179 free(d->map);
1180 d->map = NULL;
1181
1182 free(d->hypot_map);
1183 d->hypot_map = NULL;
1184
1185 free(d->hypot_list);
1186 d->hypot_list = NULL;
1187
1188 free(d);
1189}
1190
1191static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1192 const struct btf_dedup_opts *opts)
1193{
1194 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
1195 int i, err = 0;
1196
1197 if (!d)
1198 return ERR_PTR(-ENOMEM);
1199
1200 d->btf = btf;
1201 d->btf_ext = btf_ext;
1202
1203 d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
1204 sizeof(struct btf_dedup_node *));
1205 if (!d->dedup_table) {
1206 err = -ENOMEM;
1207 goto done;
1208 }
1209
1210 d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1211 if (!d->map) {
1212 err = -ENOMEM;
1213 goto done;
1214 }
1215 /* special BTF "void" type is made canonical immediately */
1216 d->map[0] = 0;
1217 for (i = 1; i <= btf->nr_types; i++)
1218 d->map[i] = BTF_UNPROCESSED_ID;
1219
1220 d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1221 if (!d->hypot_map) {
1222 err = -ENOMEM;
1223 goto done;
1224 }
1225 for (i = 0; i <= btf->nr_types; i++)
1226 d->hypot_map[i] = BTF_UNPROCESSED_ID;
1227
1228 d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
1229
1230done:
1231 if (err) {
1232 btf_dedup_free(d);
1233 return ERR_PTR(err);
1234 }
1235
1236 return d;
1237}
1238
1239typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
1240
1241/*
1242 * Iterate over all possible places in .BTF and .BTF.ext that can reference
1243 * string and pass pointer to it to a provided callback `fn`.
1244 */
1245static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
1246{
1247 void *line_data_cur, *line_data_end;
1248 int i, j, r, rec_size;
1249 struct btf_type *t;
1250
1251 for (i = 1; i <= d->btf->nr_types; i++) {
1252 t = d->btf->types[i];
1253 r = fn(&t->name_off, ctx);
1254 if (r)
1255 return r;
1256
1257 switch (BTF_INFO_KIND(t->info)) {
1258 case BTF_KIND_STRUCT:
1259 case BTF_KIND_UNION: {
1260 struct btf_member *m = (struct btf_member *)(t + 1);
1261 __u16 vlen = BTF_INFO_VLEN(t->info);
1262
1263 for (j = 0; j < vlen; j++) {
1264 r = fn(&m->name_off, ctx);
1265 if (r)
1266 return r;
1267 m++;
1268 }
1269 break;
1270 }
1271 case BTF_KIND_ENUM: {
1272 struct btf_enum *m = (struct btf_enum *)(t + 1);
1273 __u16 vlen = BTF_INFO_VLEN(t->info);
1274
1275 for (j = 0; j < vlen; j++) {
1276 r = fn(&m->name_off, ctx);
1277 if (r)
1278 return r;
1279 m++;
1280 }
1281 break;
1282 }
1283 case BTF_KIND_FUNC_PROTO: {
1284 struct btf_param *m = (struct btf_param *)(t + 1);
1285 __u16 vlen = BTF_INFO_VLEN(t->info);
1286
1287 for (j = 0; j < vlen; j++) {
1288 r = fn(&m->name_off, ctx);
1289 if (r)
1290 return r;
1291 m++;
1292 }
1293 break;
1294 }
1295 default:
1296 break;
1297 }
1298 }
1299
1300 if (!d->btf_ext)
1301 return 0;
1302
1303 line_data_cur = d->btf_ext->line_info.info;
1304 line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
1305 rec_size = d->btf_ext->line_info.rec_size;
1306
1307 while (line_data_cur < line_data_end) {
1308 struct btf_ext_info_sec *sec = line_data_cur;
1309 struct bpf_line_info_min *line_info;
1310 __u32 num_info = sec->num_info;
1311
1312 r = fn(&sec->sec_name_off, ctx);
1313 if (r)
1314 return r;
1315
1316 line_data_cur += sizeof(struct btf_ext_info_sec);
1317 for (i = 0; i < num_info; i++) {
1318 line_info = line_data_cur;
1319 r = fn(&line_info->file_name_off, ctx);
1320 if (r)
1321 return r;
1322 r = fn(&line_info->line_off, ctx);
1323 if (r)
1324 return r;
1325 line_data_cur += rec_size;
1326 }
1327 }
1328
1329 return 0;
1330}
1331
1332static int str_sort_by_content(const void *a1, const void *a2)
1333{
1334 const struct btf_str_ptr *p1 = a1;
1335 const struct btf_str_ptr *p2 = a2;
1336
1337 return strcmp(p1->str, p2->str);
1338}
1339
1340static int str_sort_by_offset(const void *a1, const void *a2)
1341{
1342 const struct btf_str_ptr *p1 = a1;
1343 const struct btf_str_ptr *p2 = a2;
1344
1345 if (p1->str != p2->str)
1346 return p1->str < p2->str ? -1 : 1;
1347 return 0;
1348}
1349
1350static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
1351{
1352 const struct btf_str_ptr *p = pelem;
1353
1354 if (str_ptr != p->str)
1355 return (const char *)str_ptr < p->str ? -1 : 1;
1356 return 0;
1357}
1358
1359static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
1360{
1361 struct btf_str_ptrs *strs;
1362 struct btf_str_ptr *s;
1363
1364 if (*str_off_ptr == 0)
1365 return 0;
1366
1367 strs = ctx;
1368 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1369 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1370 if (!s)
1371 return -EINVAL;
1372 s->used = true;
1373 return 0;
1374}
1375
1376static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
1377{
1378 struct btf_str_ptrs *strs;
1379 struct btf_str_ptr *s;
1380
1381 if (*str_off_ptr == 0)
1382 return 0;
1383
1384 strs = ctx;
1385 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1386 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1387 if (!s)
1388 return -EINVAL;
1389 *str_off_ptr = s->new_off;
1390 return 0;
1391}
1392
1393/*
1394 * Dedup string and filter out those that are not referenced from either .BTF
1395 * or .BTF.ext (if provided) sections.
1396 *
1397 * This is done by building index of all strings in BTF's string section,
1398 * then iterating over all entities that can reference strings (e.g., type
1399 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
1400 * strings as used. After that all used strings are deduped and compacted into
1401 * sequential blob of memory and new offsets are calculated. Then all the string
1402 * references are iterated again and rewritten using new offsets.
1403 */
1404static int btf_dedup_strings(struct btf_dedup *d)
1405{
1406 const struct btf_header *hdr = d->btf->hdr;
1407 char *start = (char *)d->btf->nohdr_data + hdr->str_off;
1408 char *end = start + d->btf->hdr->str_len;
1409 char *p = start, *tmp_strs = NULL;
1410 struct btf_str_ptrs strs = {
1411 .cnt = 0,
1412 .cap = 0,
1413 .ptrs = NULL,
1414 .data = start,
1415 };
1416 int i, j, err = 0, grp_idx;
1417 bool grp_used;
1418
1419 /* build index of all strings */
1420 while (p < end) {
1421 if (strs.cnt + 1 > strs.cap) {
1422 struct btf_str_ptr *new_ptrs;
1423
1424 strs.cap += max(strs.cnt / 2, 16);
1425 new_ptrs = realloc(strs.ptrs,
1426 sizeof(strs.ptrs[0]) * strs.cap);
1427 if (!new_ptrs) {
1428 err = -ENOMEM;
1429 goto done;
1430 }
1431 strs.ptrs = new_ptrs;
1432 }
1433
1434 strs.ptrs[strs.cnt].str = p;
1435 strs.ptrs[strs.cnt].used = false;
1436
1437 p += strlen(p) + 1;
1438 strs.cnt++;
1439 }
1440
1441 /* temporary storage for deduplicated strings */
1442 tmp_strs = malloc(d->btf->hdr->str_len);
1443 if (!tmp_strs) {
1444 err = -ENOMEM;
1445 goto done;
1446 }
1447
1448 /* mark all used strings */
1449 strs.ptrs[0].used = true;
1450 err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
1451 if (err)
1452 goto done;
1453
1454 /* sort strings by context, so that we can identify duplicates */
1455 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
1456
1457 /*
1458 * iterate groups of equal strings and if any instance in a group was
1459 * referenced, emit single instance and remember new offset
1460 */
1461 p = tmp_strs;
1462 grp_idx = 0;
1463 grp_used = strs.ptrs[0].used;
1464 /* iterate past end to avoid code duplication after loop */
1465 for (i = 1; i <= strs.cnt; i++) {
1466 /*
1467 * when i == strs.cnt, we want to skip string comparison and go
1468 * straight to handling last group of strings (otherwise we'd
1469 * need to handle last group after the loop w/ duplicated code)
1470 */
1471 if (i < strs.cnt &&
1472 !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
1473 grp_used = grp_used || strs.ptrs[i].used;
1474 continue;
1475 }
1476
1477 /*
1478 * this check would have been required after the loop to handle
1479 * last group of strings, but due to <= condition in a loop
1480 * we avoid that duplication
1481 */
1482 if (grp_used) {
1483 int new_off = p - tmp_strs;
1484 __u32 len = strlen(strs.ptrs[grp_idx].str);
1485
1486 memmove(p, strs.ptrs[grp_idx].str, len + 1);
1487 for (j = grp_idx; j < i; j++)
1488 strs.ptrs[j].new_off = new_off;
1489 p += len + 1;
1490 }
1491
1492 if (i < strs.cnt) {
1493 grp_idx = i;
1494 grp_used = strs.ptrs[i].used;
1495 }
1496 }
1497
1498 /* replace original strings with deduped ones */
1499 d->btf->hdr->str_len = p - tmp_strs;
1500 memmove(start, tmp_strs, d->btf->hdr->str_len);
1501 end = start + d->btf->hdr->str_len;
1502
1503 /* restore original order for further binary search lookups */
1504 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
1505
1506 /* remap string offsets */
1507 err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
1508 if (err)
1509 goto done;
1510
1511 d->btf->hdr->str_len = end - start;
1512
1513done:
1514 free(tmp_strs);
1515 free(strs.ptrs);
1516 return err;
1517}
1518
1519static __u32 btf_hash_common(struct btf_type *t)
1520{
1521 __u32 h;
1522
1523 h = hash_combine(0, t->name_off);
1524 h = hash_combine(h, t->info);
1525 h = hash_combine(h, t->size);
1526 return h;
1527}
1528
1529static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
1530{
1531 return t1->name_off == t2->name_off &&
1532 t1->info == t2->info &&
1533 t1->size == t2->size;
1534}
1535
1536/* Calculate type signature hash of INT. */
1537static __u32 btf_hash_int(struct btf_type *t)
1538{
1539 __u32 info = *(__u32 *)(t + 1);
1540 __u32 h;
1541
1542 h = btf_hash_common(t);
1543 h = hash_combine(h, info);
1544 return h;
1545}
1546
1547/* Check structural equality of two INTs. */
1548static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
1549{
1550 __u32 info1, info2;
1551
1552 if (!btf_equal_common(t1, t2))
1553 return false;
1554 info1 = *(__u32 *)(t1 + 1);
1555 info2 = *(__u32 *)(t2 + 1);
1556 return info1 == info2;
1557}
1558
1559/* Calculate type signature hash of ENUM. */
1560static __u32 btf_hash_enum(struct btf_type *t)
1561{
1562 struct btf_enum *member = (struct btf_enum *)(t + 1);
1563 __u32 vlen = BTF_INFO_VLEN(t->info);
1564 __u32 h = btf_hash_common(t);
1565 int i;
1566
1567 for (i = 0; i < vlen; i++) {
1568 h = hash_combine(h, member->name_off);
1569 h = hash_combine(h, member->val);
1570 member++;
1571 }
1572 return h;
1573}
1574
1575/* Check structural equality of two ENUMs. */
1576static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
1577{
1578 struct btf_enum *m1, *m2;
1579 __u16 vlen;
1580 int i;
1581
1582 if (!btf_equal_common(t1, t2))
1583 return false;
1584
1585 vlen = BTF_INFO_VLEN(t1->info);
1586 m1 = (struct btf_enum *)(t1 + 1);
1587 m2 = (struct btf_enum *)(t2 + 1);
1588 for (i = 0; i < vlen; i++) {
1589 if (m1->name_off != m2->name_off || m1->val != m2->val)
1590 return false;
1591 m1++;
1592 m2++;
1593 }
1594 return true;
1595}
1596
1597/*
1598 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
1599 * as referenced type IDs equivalence is established separately during type
1600 * graph equivalence check algorithm.
1601 */
1602static __u32 btf_hash_struct(struct btf_type *t)
1603{
1604 struct btf_member *member = (struct btf_member *)(t + 1);
1605 __u32 vlen = BTF_INFO_VLEN(t->info);
1606 __u32 h = btf_hash_common(t);
1607 int i;
1608
1609 for (i = 0; i < vlen; i++) {
1610 h = hash_combine(h, member->name_off);
1611 h = hash_combine(h, member->offset);
1612 /* no hashing of referenced type ID, it can be unresolved yet */
1613 member++;
1614 }
1615 return h;
1616}
1617
1618/*
1619 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1620 * IDs. This check is performed during type graph equivalence check and
1621 * referenced types equivalence is checked separately.
1622 */
1623static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2)
1624{
1625 struct btf_member *m1, *m2;
1626 __u16 vlen;
1627 int i;
1628
1629 if (!btf_equal_common(t1, t2))
1630 return false;
1631
1632 vlen = BTF_INFO_VLEN(t1->info);
1633 m1 = (struct btf_member *)(t1 + 1);
1634 m2 = (struct btf_member *)(t2 + 1);
1635 for (i = 0; i < vlen; i++) {
1636 if (m1->name_off != m2->name_off || m1->offset != m2->offset)
1637 return false;
1638 m1++;
1639 m2++;
1640 }
1641 return true;
1642}
1643
1644/*
1645 * Calculate type signature hash of ARRAY, including referenced type IDs,
1646 * under assumption that they were already resolved to canonical type IDs and
1647 * are not going to change.
1648 */
1649static __u32 btf_hash_array(struct btf_type *t)
1650{
1651 struct btf_array *info = (struct btf_array *)(t + 1);
1652 __u32 h = btf_hash_common(t);
1653
1654 h = hash_combine(h, info->type);
1655 h = hash_combine(h, info->index_type);
1656 h = hash_combine(h, info->nelems);
1657 return h;
1658}
1659
1660/*
1661 * Check exact equality of two ARRAYs, taking into account referenced
1662 * type IDs, under assumption that they were already resolved to canonical
1663 * type IDs and are not going to change.
1664 * This function is called during reference types deduplication to compare
1665 * ARRAY to potential canonical representative.
1666 */
1667static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
1668{
1669 struct btf_array *info1, *info2;
1670
1671 if (!btf_equal_common(t1, t2))
1672 return false;
1673
1674 info1 = (struct btf_array *)(t1 + 1);
1675 info2 = (struct btf_array *)(t2 + 1);
1676 return info1->type == info2->type &&
1677 info1->index_type == info2->index_type &&
1678 info1->nelems == info2->nelems;
1679}
1680
1681/*
1682 * Check structural compatibility of two ARRAYs, ignoring referenced type
1683 * IDs. This check is performed during type graph equivalence check and
1684 * referenced types equivalence is checked separately.
1685 */
1686static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
1687{
1688 struct btf_array *info1, *info2;
1689
1690 if (!btf_equal_common(t1, t2))
1691 return false;
1692
1693 info1 = (struct btf_array *)(t1 + 1);
1694 info2 = (struct btf_array *)(t2 + 1);
1695 return info1->nelems == info2->nelems;
1696}
1697
1698/*
1699 * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
1700 * under assumption that they were already resolved to canonical type IDs and
1701 * are not going to change.
1702 */
1703static inline __u32 btf_hash_fnproto(struct btf_type *t)
1704{
1705 struct btf_param *member = (struct btf_param *)(t + 1);
1706 __u16 vlen = BTF_INFO_VLEN(t->info);
1707 __u32 h = btf_hash_common(t);
1708 int i;
1709
1710 for (i = 0; i < vlen; i++) {
1711 h = hash_combine(h, member->name_off);
1712 h = hash_combine(h, member->type);
1713 member++;
1714 }
1715 return h;
1716}
1717
1718/*
1719 * Check exact equality of two FUNC_PROTOs, taking into account referenced
1720 * type IDs, under assumption that they were already resolved to canonical
1721 * type IDs and are not going to change.
1722 * This function is called during reference types deduplication to compare
1723 * FUNC_PROTO to potential canonical representative.
1724 */
1725static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
1726{
1727 struct btf_param *m1, *m2;
1728 __u16 vlen;
1729 int i;
1730
1731 if (!btf_equal_common(t1, t2))
1732 return false;
1733
1734 vlen = BTF_INFO_VLEN(t1->info);
1735 m1 = (struct btf_param *)(t1 + 1);
1736 m2 = (struct btf_param *)(t2 + 1);
1737 for (i = 0; i < vlen; i++) {
1738 if (m1->name_off != m2->name_off || m1->type != m2->type)
1739 return false;
1740 m1++;
1741 m2++;
1742 }
1743 return true;
1744}
1745
1746/*
1747 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1748 * IDs. This check is performed during type graph equivalence check and
1749 * referenced types equivalence is checked separately.
1750 */
1751static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
1752{
1753 struct btf_param *m1, *m2;
1754 __u16 vlen;
1755 int i;
1756
1757 /* skip return type ID */
1758 if (t1->name_off != t2->name_off || t1->info != t2->info)
1759 return false;
1760
1761 vlen = BTF_INFO_VLEN(t1->info);
1762 m1 = (struct btf_param *)(t1 + 1);
1763 m2 = (struct btf_param *)(t2 + 1);
1764 for (i = 0; i < vlen; i++) {
1765 if (m1->name_off != m2->name_off)
1766 return false;
1767 m1++;
1768 m2++;
1769 }
1770 return true;
1771}
1772
1773/*
1774 * Deduplicate primitive types, that can't reference other types, by calculating
1775 * their type signature hash and comparing them with any possible canonical
1776 * candidate. If no canonical candidate matches, type itself is marked as
1777 * canonical and is added into `btf_dedup->dedup_table` as another candidate.
1778 */
1779static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1780{
1781 struct btf_type *t = d->btf->types[type_id];
1782 struct btf_type *cand;
1783 struct btf_dedup_node *cand_node;
1784 /* if we don't find equivalent type, then we are canonical */
1785 __u32 new_id = type_id;
1786 __u32 h;
1787
1788 switch (BTF_INFO_KIND(t->info)) {
1789 case BTF_KIND_CONST:
1790 case BTF_KIND_VOLATILE:
1791 case BTF_KIND_RESTRICT:
1792 case BTF_KIND_PTR:
1793 case BTF_KIND_TYPEDEF:
1794 case BTF_KIND_ARRAY:
1795 case BTF_KIND_STRUCT:
1796 case BTF_KIND_UNION:
1797 case BTF_KIND_FUNC:
1798 case BTF_KIND_FUNC_PROTO:
1799 return 0;
1800
1801 case BTF_KIND_INT:
1802 h = btf_hash_int(t);
1803 for_each_hash_node(d->dedup_table, h, cand_node) {
1804 cand = d->btf->types[cand_node->type_id];
1805 if (btf_equal_int(t, cand)) {
1806 new_id = cand_node->type_id;
1807 break;
1808 }
1809 }
1810 break;
1811
1812 case BTF_KIND_ENUM:
1813 h = btf_hash_enum(t);
1814 for_each_hash_node(d->dedup_table, h, cand_node) {
1815 cand = d->btf->types[cand_node->type_id];
1816 if (btf_equal_enum(t, cand)) {
1817 new_id = cand_node->type_id;
1818 break;
1819 }
1820 }
1821 break;
1822
1823 case BTF_KIND_FWD:
1824 h = btf_hash_common(t);
1825 for_each_hash_node(d->dedup_table, h, cand_node) {
1826 cand = d->btf->types[cand_node->type_id];
1827 if (btf_equal_common(t, cand)) {
1828 new_id = cand_node->type_id;
1829 break;
1830 }
1831 }
1832 break;
1833
1834 default:
1835 return -EINVAL;
1836 }
1837
1838 d->map[type_id] = new_id;
1839 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
1840 return -ENOMEM;
1841
1842 return 0;
1843}
1844
1845static int btf_dedup_prim_types(struct btf_dedup *d)
1846{
1847 int i, err;
1848
1849 for (i = 1; i <= d->btf->nr_types; i++) {
1850 err = btf_dedup_prim_type(d, i);
1851 if (err)
1852 return err;
1853 }
1854 return 0;
1855}
1856
1857/*
1858 * Check whether type is already mapped into canonical one (could be to itself).
1859 */
1860static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
1861{
1862 return d->map[type_id] <= BTF_MAX_TYPE;
1863}
1864
1865/*
1866 * Resolve type ID into its canonical type ID, if any; otherwise return original
1867 * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
1868 * STRUCT/UNION link and resolve it into canonical type ID as well.
1869 */
1870static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
1871{
1872 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
1873 type_id = d->map[type_id];
1874 return type_id;
1875}
1876
1877/*
1878 * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
1879 * type ID.
1880 */
1881static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
1882{
1883 __u32 orig_type_id = type_id;
1884
1885 if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
1886 return type_id;
1887
1888 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
1889 type_id = d->map[type_id];
1890
1891 if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD)
1892 return type_id;
1893
1894 return orig_type_id;
1895}
1896
1897
1898static inline __u16 btf_fwd_kind(struct btf_type *t)
1899{
1900 return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
1901}
1902
1903/*
1904 * Check equivalence of BTF type graph formed by candidate struct/union (we'll
1905 * call it "candidate graph" in this description for brevity) to a type graph
1906 * formed by (potential) canonical struct/union ("canonical graph" for brevity
1907 * here, though keep in mind that not all types in canonical graph are
1908 * necessarily canonical representatives themselves, some of them might be
1909 * duplicates or its uniqueness might not have been established yet).
1910 * Returns:
1911 * - >0, if type graphs are equivalent;
1912 * - 0, if not equivalent;
1913 * - <0, on error.
1914 *
1915 * Algorithm performs side-by-side DFS traversal of both type graphs and checks
1916 * equivalence of BTF types at each step. If at any point BTF types in candidate
1917 * and canonical graphs are not compatible structurally, whole graphs are
1918 * incompatible. If types are structurally equivalent (i.e., all information
1919 * except referenced type IDs is exactly the same), a mapping from `canon_id` to
1920 * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
1921 * If a type references other types, then those referenced types are checked
1922 * for equivalence recursively.
1923 *
1924 * During DFS traversal, if we find that for current `canon_id` type we
1925 * already have some mapping in hypothetical map, we check for two possible
1926 * situations:
1927 * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
1928 * happen when type graphs have cycles. In this case we assume those two
1929 * types are equivalent.
1930 * - `canon_id` is mapped to different type. This is contradiction in our
1931 * hypothetical mapping, because same graph in canonical graph corresponds
1932 * to two different types in candidate graph, which for equivalent type
1933 * graphs shouldn't happen. This condition terminates equivalence check
1934 * with negative result.
1935 *
1936 * If type graphs traversal exhausts types to check and find no contradiction,
1937 * then type graphs are equivalent.
1938 *
1939 * When checking types for equivalence, there is one special case: FWD types.
1940 * If FWD type resolution is allowed and one of the types (either from canonical
1941 * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
1942 * flag) and their names match, hypothetical mapping is updated to point from
1943 * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
1944 * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
1945 *
1946 * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
1947 * if there are two exactly named (or anonymous) structs/unions that are
1948 * compatible structurally, one of which has FWD field, while other is concrete
1949 * STRUCT/UNION, but according to C sources they are different structs/unions
1950 * that are referencing different types with the same name. This is extremely
1951 * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
1952 * this logic is causing problems.
1953 *
1954 * Doing FWD resolution means that both candidate and/or canonical graphs can
1955 * consists of portions of the graph that come from multiple compilation units.
1956 * This is due to the fact that types within single compilation unit are always
1957 * deduplicated and FWDs are already resolved, if referenced struct/union
1958 * definiton is available. So, if we had unresolved FWD and found corresponding
1959 * STRUCT/UNION, they will be from different compilation units. This
1960 * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
1961 * type graph will likely have at least two different BTF types that describe
1962 * same type (e.g., most probably there will be two different BTF types for the
1963 * same 'int' primitive type) and could even have "overlapping" parts of type
1964 * graph that describe same subset of types.
1965 *
1966 * This in turn means that our assumption that each type in canonical graph
1967 * must correspond to exactly one type in candidate graph might not hold
1968 * anymore and will make it harder to detect contradictions using hypothetical
1969 * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
1970 * resolution only in canonical graph. FWDs in candidate graphs are never
1971 * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
1972 * that can occur:
1973 * - Both types in canonical and candidate graphs are FWDs. If they are
1974 * structurally equivalent, then they can either be both resolved to the
1975 * same STRUCT/UNION or not resolved at all. In both cases they are
1976 * equivalent and there is no need to resolve FWD on candidate side.
1977 * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
1978 * so nothing to resolve as well, algorithm will check equivalence anyway.
1979 * - Type in canonical graph is FWD, while type in candidate is concrete
1980 * STRUCT/UNION. In this case candidate graph comes from single compilation
1981 * unit, so there is exactly one BTF type for each unique C type. After
1982 * resolving FWD into STRUCT/UNION, there might be more than one BTF type
1983 * in canonical graph mapping to single BTF type in candidate graph, but
1984 * because hypothetical mapping maps from canonical to candidate types, it's
1985 * alright, and we still maintain the property of having single `canon_id`
1986 * mapping to single `cand_id` (there could be two different `canon_id`
1987 * mapped to the same `cand_id`, but it's not contradictory).
1988 * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
1989 * graph is FWD. In this case we are just going to check compatibility of
1990 * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
1991 * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
1992 * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
1993 * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
1994 * canonical graph.
1995 */
1996static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
1997 __u32 canon_id)
1998{
1999 struct btf_type *cand_type;
2000 struct btf_type *canon_type;
2001 __u32 hypot_type_id;
2002 __u16 cand_kind;
2003 __u16 canon_kind;
2004 int i, eq;
2005
2006 /* if both resolve to the same canonical, they must be equivalent */
2007 if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
2008 return 1;
2009
2010 canon_id = resolve_fwd_id(d, canon_id);
2011
2012 hypot_type_id = d->hypot_map[canon_id];
2013 if (hypot_type_id <= BTF_MAX_TYPE)
2014 return hypot_type_id == cand_id;
2015
2016 if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
2017 return -ENOMEM;
2018
2019 cand_type = d->btf->types[cand_id];
2020 canon_type = d->btf->types[canon_id];
2021 cand_kind = BTF_INFO_KIND(cand_type->info);
2022 canon_kind = BTF_INFO_KIND(canon_type->info);
2023
2024 if (cand_type->name_off != canon_type->name_off)
2025 return 0;
2026
2027 /* FWD <--> STRUCT/UNION equivalence check, if enabled */
2028 if (!d->opts.dont_resolve_fwds
2029 && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
2030 && cand_kind != canon_kind) {
2031 __u16 real_kind;
2032 __u16 fwd_kind;
2033
2034 if (cand_kind == BTF_KIND_FWD) {
2035 real_kind = canon_kind;
2036 fwd_kind = btf_fwd_kind(cand_type);
2037 } else {
2038 real_kind = cand_kind;
2039 fwd_kind = btf_fwd_kind(canon_type);
2040 }
2041 return fwd_kind == real_kind;
2042 }
2043
2044 if (cand_type->info != canon_type->info)
2045 return 0;
2046
2047 switch (cand_kind) {
2048 case BTF_KIND_INT:
2049 return btf_equal_int(cand_type, canon_type);
2050
2051 case BTF_KIND_ENUM:
2052 return btf_equal_enum(cand_type, canon_type);
2053
2054 case BTF_KIND_FWD:
2055 return btf_equal_common(cand_type, canon_type);
2056
2057 case BTF_KIND_CONST:
2058 case BTF_KIND_VOLATILE:
2059 case BTF_KIND_RESTRICT:
2060 case BTF_KIND_PTR:
2061 case BTF_KIND_TYPEDEF:
2062 case BTF_KIND_FUNC:
2063 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2064
2065 case BTF_KIND_ARRAY: {
2066 struct btf_array *cand_arr, *canon_arr;
2067
2068 if (!btf_compat_array(cand_type, canon_type))
2069 return 0;
2070 cand_arr = (struct btf_array *)(cand_type + 1);
2071 canon_arr = (struct btf_array *)(canon_type + 1);
2072 eq = btf_dedup_is_equiv(d,
2073 cand_arr->index_type, canon_arr->index_type);
2074 if (eq <= 0)
2075 return eq;
2076 return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
2077 }
2078
2079 case BTF_KIND_STRUCT:
2080 case BTF_KIND_UNION: {
2081 struct btf_member *cand_m, *canon_m;
2082 __u16 vlen;
2083
2084 if (!btf_equal_struct(cand_type, canon_type))
2085 return 0;
2086 vlen = BTF_INFO_VLEN(cand_type->info);
2087 cand_m = (struct btf_member *)(cand_type + 1);
2088 canon_m = (struct btf_member *)(canon_type + 1);
2089 for (i = 0; i < vlen; i++) {
2090 eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
2091 if (eq <= 0)
2092 return eq;
2093 cand_m++;
2094 canon_m++;
2095 }
2096
2097 return 1;
2098 }
2099
2100 case BTF_KIND_FUNC_PROTO: {
2101 struct btf_param *cand_p, *canon_p;
2102 __u16 vlen;
2103
2104 if (!btf_compat_fnproto(cand_type, canon_type))
2105 return 0;
2106 eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2107 if (eq <= 0)
2108 return eq;
2109 vlen = BTF_INFO_VLEN(cand_type->info);
2110 cand_p = (struct btf_param *)(cand_type + 1);
2111 canon_p = (struct btf_param *)(canon_type + 1);
2112 for (i = 0; i < vlen; i++) {
2113 eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
2114 if (eq <= 0)
2115 return eq;
2116 cand_p++;
2117 canon_p++;
2118 }
2119 return 1;
2120 }
2121
2122 default:
2123 return -EINVAL;
2124 }
2125 return 0;
2126}
2127
2128/*
2129 * Use hypothetical mapping, produced by successful type graph equivalence
2130 * check, to augment existing struct/union canonical mapping, where possible.
2131 *
2132 * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
2133 * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
2134 * it doesn't matter if FWD type was part of canonical graph or candidate one,
2135 * we are recording the mapping anyway. As opposed to carefulness required
2136 * for struct/union correspondence mapping (described below), for FWD resolution
2137 * it's not important, as by the time that FWD type (reference type) will be
2138 * deduplicated all structs/unions will be deduped already anyway.
2139 *
2140 * Recording STRUCT/UNION mapping is purely a performance optimization and is
2141 * not required for correctness. It needs to be done carefully to ensure that
2142 * struct/union from candidate's type graph is not mapped into corresponding
2143 * struct/union from canonical type graph that itself hasn't been resolved into
2144 * canonical representative. The only guarantee we have is that canonical
2145 * struct/union was determined as canonical and that won't change. But any
2146 * types referenced through that struct/union fields could have been not yet
2147 * resolved, so in case like that it's too early to establish any kind of
2148 * correspondence between structs/unions.
2149 *
2150 * No canonical correspondence is derived for primitive types (they are already
2151 * deduplicated completely already anyway) or reference types (they rely on
2152 * stability of struct/union canonical relationship for equivalence checks).
2153 */
2154static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
2155{
2156 __u32 cand_type_id, targ_type_id;
2157 __u16 t_kind, c_kind;
2158 __u32 t_id, c_id;
2159 int i;
2160
2161 for (i = 0; i < d->hypot_cnt; i++) {
2162 cand_type_id = d->hypot_list[i];
2163 targ_type_id = d->hypot_map[cand_type_id];
2164 t_id = resolve_type_id(d, targ_type_id);
2165 c_id = resolve_type_id(d, cand_type_id);
2166 t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info);
2167 c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info);
2168 /*
2169 * Resolve FWD into STRUCT/UNION.
2170 * It's ok to resolve FWD into STRUCT/UNION that's not yet
2171 * mapped to canonical representative (as opposed to
2172 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
2173 * eventually that struct is going to be mapped and all resolved
2174 * FWDs will automatically resolve to correct canonical
2175 * representative. This will happen before ref type deduping,
2176 * which critically depends on stability of these mapping. This
2177 * stability is not a requirement for STRUCT/UNION equivalence
2178 * checks, though.
2179 */
2180 if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
2181 d->map[c_id] = t_id;
2182 else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
2183 d->map[t_id] = c_id;
2184
2185 if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
2186 c_kind != BTF_KIND_FWD &&
2187 is_type_mapped(d, c_id) &&
2188 !is_type_mapped(d, t_id)) {
2189 /*
2190 * as a perf optimization, we can map struct/union
2191 * that's part of type graph we just verified for
2192 * equivalence. We can do that for struct/union that has
2193 * canonical representative only, though.
2194 */
2195 d->map[t_id] = c_id;
2196 }
2197 }
2198}
2199
2200/*
2201 * Deduplicate struct/union types.
2202 *
2203 * For each struct/union type its type signature hash is calculated, taking
2204 * into account type's name, size, number, order and names of fields, but
2205 * ignoring type ID's referenced from fields, because they might not be deduped
2206 * completely until after reference types deduplication phase. This type hash
2207 * is used to iterate over all potential canonical types, sharing same hash.
2208 * For each canonical candidate we check whether type graphs that they form
2209 * (through referenced types in fields and so on) are equivalent using algorithm
2210 * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
2211 * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
2212 * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
2213 * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
2214 * potentially map other structs/unions to their canonical representatives,
2215 * if such relationship hasn't yet been established. This speeds up algorithm
2216 * by eliminating some of the duplicate work.
2217 *
2218 * If no matching canonical representative was found, struct/union is marked
2219 * as canonical for itself and is added into btf_dedup->dedup_table hash map
2220 * for further look ups.
2221 */
2222static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2223{
2224 struct btf_dedup_node *cand_node;
2225 struct btf_type *t;
2226 /* if we don't find equivalent type, then we are canonical */
2227 __u32 new_id = type_id;
2228 __u16 kind;
2229 __u32 h;
2230
2231 /* already deduped or is in process of deduping (loop detected) */
2232 if (d->map[type_id] <= BTF_MAX_TYPE)
2233 return 0;
2234
2235 t = d->btf->types[type_id];
2236 kind = BTF_INFO_KIND(t->info);
2237
2238 if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
2239 return 0;
2240
2241 h = btf_hash_struct(t);
2242 for_each_hash_node(d->dedup_table, h, cand_node) {
2243 int eq;
2244
2245 btf_dedup_clear_hypot_map(d);
2246 eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
2247 if (eq < 0)
2248 return eq;
2249 if (!eq)
2250 continue;
2251 new_id = cand_node->type_id;
2252 btf_dedup_merge_hypot_map(d);
2253 break;
2254 }
2255
2256 d->map[type_id] = new_id;
2257 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2258 return -ENOMEM;
2259
2260 return 0;
2261}
2262
2263static int btf_dedup_struct_types(struct btf_dedup *d)
2264{
2265 int i, err;
2266
2267 for (i = 1; i <= d->btf->nr_types; i++) {
2268 err = btf_dedup_struct_type(d, i);
2269 if (err)
2270 return err;
2271 }
2272 return 0;
2273}
2274
2275/*
2276 * Deduplicate reference type.
2277 *
2278 * Once all primitive and struct/union types got deduplicated, we can easily
2279 * deduplicate all other (reference) BTF types. This is done in two steps:
2280 *
2281 * 1. Resolve all referenced type IDs into their canonical type IDs. This
2282 * resolution can be done either immediately for primitive or struct/union types
2283 * (because they were deduped in previous two phases) or recursively for
2284 * reference types. Recursion will always terminate at either primitive or
2285 * struct/union type, at which point we can "unwind" chain of reference types
2286 * one by one. There is no danger of encountering cycles because in C type
2287 * system the only way to form type cycle is through struct/union, so any chain
2288 * of reference types, even those taking part in a type cycle, will inevitably
2289 * reach struct/union at some point.
2290 *
2291 * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
2292 * becomes "stable", in the sense that no further deduplication will cause
2293 * any changes to it. With that, it's now possible to calculate type's signature
2294 * hash (this time taking into account referenced type IDs) and loop over all
2295 * potential canonical representatives. If no match was found, current type
2296 * will become canonical representative of itself and will be added into
2297 * btf_dedup->dedup_table as another possible canonical representative.
2298 */
2299static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2300{
2301 struct btf_dedup_node *cand_node;
2302 struct btf_type *t, *cand;
2303 /* if we don't find equivalent type, then we are representative type */
2304 __u32 new_id = type_id;
2305 __u32 h, ref_type_id;
2306
2307 if (d->map[type_id] == BTF_IN_PROGRESS_ID)
2308 return -ELOOP;
2309 if (d->map[type_id] <= BTF_MAX_TYPE)
2310 return resolve_type_id(d, type_id);
2311
2312 t = d->btf->types[type_id];
2313 d->map[type_id] = BTF_IN_PROGRESS_ID;
2314
2315 switch (BTF_INFO_KIND(t->info)) {
2316 case BTF_KIND_CONST:
2317 case BTF_KIND_VOLATILE:
2318 case BTF_KIND_RESTRICT:
2319 case BTF_KIND_PTR:
2320 case BTF_KIND_TYPEDEF:
2321 case BTF_KIND_FUNC:
2322 ref_type_id = btf_dedup_ref_type(d, t->type);
2323 if (ref_type_id < 0)
2324 return ref_type_id;
2325 t->type = ref_type_id;
2326
2327 h = btf_hash_common(t);
2328 for_each_hash_node(d->dedup_table, h, cand_node) {
2329 cand = d->btf->types[cand_node->type_id];
2330 if (btf_equal_common(t, cand)) {
2331 new_id = cand_node->type_id;
2332 break;
2333 }
2334 }
2335 break;
2336
2337 case BTF_KIND_ARRAY: {
2338 struct btf_array *info = (struct btf_array *)(t + 1);
2339
2340 ref_type_id = btf_dedup_ref_type(d, info->type);
2341 if (ref_type_id < 0)
2342 return ref_type_id;
2343 info->type = ref_type_id;
2344
2345 ref_type_id = btf_dedup_ref_type(d, info->index_type);
2346 if (ref_type_id < 0)
2347 return ref_type_id;
2348 info->index_type = ref_type_id;
2349
2350 h = btf_hash_array(t);
2351 for_each_hash_node(d->dedup_table, h, cand_node) {
2352 cand = d->btf->types[cand_node->type_id];
2353 if (btf_equal_array(t, cand)) {
2354 new_id = cand_node->type_id;
2355 break;
2356 }
2357 }
2358 break;
2359 }
2360
2361 case BTF_KIND_FUNC_PROTO: {
2362 struct btf_param *param;
2363 __u16 vlen;
2364 int i;
2365
2366 ref_type_id = btf_dedup_ref_type(d, t->type);
2367 if (ref_type_id < 0)
2368 return ref_type_id;
2369 t->type = ref_type_id;
2370
2371 vlen = BTF_INFO_VLEN(t->info);
2372 param = (struct btf_param *)(t + 1);
2373 for (i = 0; i < vlen; i++) {
2374 ref_type_id = btf_dedup_ref_type(d, param->type);
2375 if (ref_type_id < 0)
2376 return ref_type_id;
2377 param->type = ref_type_id;
2378 param++;
2379 }
2380
2381 h = btf_hash_fnproto(t);
2382 for_each_hash_node(d->dedup_table, h, cand_node) {
2383 cand = d->btf->types[cand_node->type_id];
2384 if (btf_equal_fnproto(t, cand)) {
2385 new_id = cand_node->type_id;
2386 break;
2387 }
2388 }
2389 break;
2390 }
2391
2392 default:
2393 return -EINVAL;
2394 }
2395
2396 d->map[type_id] = new_id;
2397 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2398 return -ENOMEM;
2399
2400 return new_id;
2401}
2402
2403static int btf_dedup_ref_types(struct btf_dedup *d)
2404{
2405 int i, err;
2406
2407 for (i = 1; i <= d->btf->nr_types; i++) {
2408 err = btf_dedup_ref_type(d, i);
2409 if (err < 0)
2410 return err;
2411 }
2412 btf_dedup_table_free(d);
2413 return 0;
2414}
2415
2416/*
2417 * Compact types.
2418 *
2419 * After we established for each type its corresponding canonical representative
2420 * type, we now can eliminate types that are not canonical and leave only
2421 * canonical ones layed out sequentially in memory by copying them over
2422 * duplicates. During compaction btf_dedup->hypot_map array is reused to store
2423 * a map from original type ID to a new compacted type ID, which will be used
2424 * during next phase to "fix up" type IDs, referenced from struct/union and
2425 * reference types.
2426 */
2427static int btf_dedup_compact_types(struct btf_dedup *d)
2428{
2429 struct btf_type **new_types;
2430 __u32 next_type_id = 1;
2431 char *types_start, *p;
2432 int i, len;
2433
2434 /* we are going to reuse hypot_map to store compaction remapping */
2435 d->hypot_map[0] = 0;
2436 for (i = 1; i <= d->btf->nr_types; i++)
2437 d->hypot_map[i] = BTF_UNPROCESSED_ID;
2438
2439 types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
2440 p = types_start;
2441
2442 for (i = 1; i <= d->btf->nr_types; i++) {
2443 if (d->map[i] != i)
2444 continue;
2445
2446 len = btf_type_size(d->btf->types[i]);
2447 if (len < 0)
2448 return len;
2449
2450 memmove(p, d->btf->types[i], len);
2451 d->hypot_map[i] = next_type_id;
2452 d->btf->types[next_type_id] = (struct btf_type *)p;
2453 p += len;
2454 next_type_id++;
2455 }
2456
2457 /* shrink struct btf's internal types index and update btf_header */
2458 d->btf->nr_types = next_type_id - 1;
2459 d->btf->types_size = d->btf->nr_types;
2460 d->btf->hdr->type_len = p - types_start;
2461 new_types = realloc(d->btf->types,
2462 (1 + d->btf->nr_types) * sizeof(struct btf_type *));
2463 if (!new_types)
2464 return -ENOMEM;
2465 d->btf->types = new_types;
2466
2467 /* make sure string section follows type information without gaps */
2468 d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
2469 memmove(p, d->btf->strings, d->btf->hdr->str_len);
2470 d->btf->strings = p;
2471 p += d->btf->hdr->str_len;
2472
2473 d->btf->data_size = p - (char *)d->btf->data;
2474 return 0;
2475}
2476
2477/*
2478 * Figure out final (deduplicated and compacted) type ID for provided original
2479 * `type_id` by first resolving it into corresponding canonical type ID and
2480 * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
2481 * which is populated during compaction phase.
2482 */
2483static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
2484{
2485 __u32 resolved_type_id, new_type_id;
2486
2487 resolved_type_id = resolve_type_id(d, type_id);
2488 new_type_id = d->hypot_map[resolved_type_id];
2489 if (new_type_id > BTF_MAX_TYPE)
2490 return -EINVAL;
2491 return new_type_id;
2492}
2493
2494/*
2495 * Remap referenced type IDs into deduped type IDs.
2496 *
2497 * After BTF types are deduplicated and compacted, their final type IDs may
2498 * differ from original ones. The map from original to a corresponding
2499 * deduped type ID is stored in btf_dedup->hypot_map and is populated during
2500 * compaction phase. During remapping phase we are rewriting all type IDs
2501 * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
2502 * their final deduped type IDs.
2503 */
2504static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
2505{
2506 struct btf_type *t = d->btf->types[type_id];
2507 int i, r;
2508
2509 switch (BTF_INFO_KIND(t->info)) {
2510 case BTF_KIND_INT:
2511 case BTF_KIND_ENUM:
2512 break;
2513
2514 case BTF_KIND_FWD:
2515 case BTF_KIND_CONST:
2516 case BTF_KIND_VOLATILE:
2517 case BTF_KIND_RESTRICT:
2518 case BTF_KIND_PTR:
2519 case BTF_KIND_TYPEDEF:
2520 case BTF_KIND_FUNC:
2521 r = btf_dedup_remap_type_id(d, t->type);
2522 if (r < 0)
2523 return r;
2524 t->type = r;
2525 break;
2526
2527 case BTF_KIND_ARRAY: {
2528 struct btf_array *arr_info = (struct btf_array *)(t + 1);
2529
2530 r = btf_dedup_remap_type_id(d, arr_info->type);
2531 if (r < 0)
2532 return r;
2533 arr_info->type = r;
2534 r = btf_dedup_remap_type_id(d, arr_info->index_type);
2535 if (r < 0)
2536 return r;
2537 arr_info->index_type = r;
2538 break;
2539 }
2540
2541 case BTF_KIND_STRUCT:
2542 case BTF_KIND_UNION: {
2543 struct btf_member *member = (struct btf_member *)(t + 1);
2544 __u16 vlen = BTF_INFO_VLEN(t->info);
2545
2546 for (i = 0; i < vlen; i++) {
2547 r = btf_dedup_remap_type_id(d, member->type);
2548 if (r < 0)
2549 return r;
2550 member->type = r;
2551 member++;
2552 }
2553 break;
2554 }
2555
2556 case BTF_KIND_FUNC_PROTO: {
2557 struct btf_param *param = (struct btf_param *)(t + 1);
2558 __u16 vlen = BTF_INFO_VLEN(t->info);
2559
2560 r = btf_dedup_remap_type_id(d, t->type);
2561 if (r < 0)
2562 return r;
2563 t->type = r;
2564
2565 for (i = 0; i < vlen; i++) {
2566 r = btf_dedup_remap_type_id(d, param->type);
2567 if (r < 0)
2568 return r;
2569 param->type = r;
2570 param++;
2571 }
2572 break;
2573 }
2574
2575 default:
2576 return -EINVAL;
2577 }
2578
2579 return 0;
2580}
2581
2582static int btf_dedup_remap_types(struct btf_dedup *d)
2583{
2584 int i, r;
2585
2586 for (i = 1; i <= d->btf->nr_types; i++) {
2587 r = btf_dedup_remap_type(d, i);
2588 if (r < 0)
2589 return r;
2590 }
2591 return 0;
2592}