diff options
author | Andrii Nakryiko <andriin@fb.com> | 2019-02-04 20:29:45 -0500 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-02-05 10:52:57 -0500 |
commit | d5caef5b56555bfa2ac0cf730f075864a023437e (patch) | |
tree | aa8f77aa92737d225574fe536014e9d49ff28b29 /tools/lib/bpf/btf.c | |
parent | 69eaab04c675ef2d0127a80b3395aa90dfd1061f (diff) |
btf: add BTF types deduplication algorithm
This patch implements BTF types deduplication algorithm. It allows to
greatly compress typical output of pahole's DWARF-to-BTF conversion or
LLVM's compilation output by detecting and collapsing identical types emitted in
isolation per compilation unit. Algorithm also resolves struct/union forward
declarations into concrete BTF types representing referenced struct/union. If
undesired, this resolution can be disabled through specifying corresponding options.
Algorithm itself and its application to Linux kernel's BTF types is
described in details at:
https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools/lib/bpf/btf.c')
-rw-r--r-- | tools/lib/bpf/btf.c | 1741 |
1 files changed, 1741 insertions, 0 deletions
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 06bd1a625ff4..e5097be16018 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c | |||
@@ -849,3 +849,1744 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) | |||
849 | { | 849 | { |
850 | return btf_ext->line_info.rec_size; | 850 | return btf_ext->line_info.rec_size; |
851 | } | 851 | } |
852 | |||
853 | struct btf_dedup; | ||
854 | |||
855 | static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, | ||
856 | const struct btf_dedup_opts *opts); | ||
857 | static void btf_dedup_free(struct btf_dedup *d); | ||
858 | static int btf_dedup_strings(struct btf_dedup *d); | ||
859 | static int btf_dedup_prim_types(struct btf_dedup *d); | ||
860 | static int btf_dedup_struct_types(struct btf_dedup *d); | ||
861 | static int btf_dedup_ref_types(struct btf_dedup *d); | ||
862 | static int btf_dedup_compact_types(struct btf_dedup *d); | ||
863 | static int btf_dedup_remap_types(struct btf_dedup *d); | ||
864 | |||
865 | /* | ||
866 | * Deduplicate BTF types and strings. | ||
867 | * | ||
868 | * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF | ||
869 | * section with all BTF type descriptors and string data. It overwrites that | ||
870 | * memory in-place with deduplicated types and strings without any loss of | ||
871 | * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section | ||
872 | * is provided, all the strings referenced from .BTF.ext section are honored | ||
873 | * and updated to point to the right offsets after deduplication. | ||
874 | * | ||
875 | * If function returns with error, type/string data might be garbled and should | ||
876 | * be discarded. | ||
877 | * | ||
878 | * More verbose and detailed description of both problem btf_dedup is solving, | ||
879 | * as well as solution could be found at: | ||
880 | * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html | ||
881 | * | ||
882 | * Problem description and justification | ||
883 | * ===================================== | ||
884 | * | ||
885 | * BTF type information is typically emitted either as a result of conversion | ||
886 | * from DWARF to BTF or directly by compiler. In both cases, each compilation | ||
887 | * unit contains information about a subset of all the types that are used | ||
888 | * in an application. These subsets are frequently overlapping and contain a lot | ||
889 | * of duplicated information when later concatenated together into a single | ||
890 | * binary. This algorithm ensures that each unique type is represented by single | ||
891 | * BTF type descriptor, greatly reducing resulting size of BTF data. | ||
892 | * | ||
893 | * Compilation unit isolation and subsequent duplication of data is not the only | ||
894 | * problem. The same type hierarchy (e.g., struct and all the type that struct | ||
895 | * references) in different compilation units can be represented in BTF to | ||
896 | * various degrees of completeness (or, rather, incompleteness) due to | ||
897 | * struct/union forward declarations. | ||
898 | * | ||
899 | * Let's take a look at an example, that we'll use to better understand the | ||
900 | * problem (and solution). Suppose we have two compilation units, each using | ||
901 | * same `struct S`, but each of them having incomplete type information about | ||
902 | * struct's fields: | ||
903 | * | ||
904 | * // CU #1: | ||
905 | * struct S; | ||
906 | * struct A { | ||
907 | * int a; | ||
908 | * struct A* self; | ||
909 | * struct S* parent; | ||
910 | * }; | ||
911 | * struct B; | ||
912 | * struct S { | ||
913 | * struct A* a_ptr; | ||
914 | * struct B* b_ptr; | ||
915 | * }; | ||
916 | * | ||
917 | * // CU #2: | ||
918 | * struct S; | ||
919 | * struct A; | ||
920 | * struct B { | ||
921 | * int b; | ||
922 | * struct B* self; | ||
923 | * struct S* parent; | ||
924 | * }; | ||
925 | * struct S { | ||
926 | * struct A* a_ptr; | ||
927 | * struct B* b_ptr; | ||
928 | * }; | ||
929 | * | ||
930 | * In case of CU #1, BTF data will know only that `struct B` exist (but no | ||
931 | * more), but will know the complete type information about `struct A`. While | ||
932 | * for CU #2, it will know full type information about `struct B`, but will | ||
933 | * only know about forward declaration of `struct A` (in BTF terms, it will | ||
934 | * have `BTF_KIND_FWD` type descriptor with name `B`). | ||
935 | * | ||
936 | * This compilation unit isolation means that it's possible that there is no | ||
937 | * single CU with complete type information describing structs `S`, `A`, and | ||
938 | * `B`. Also, we might get tons of duplicated and redundant type information. | ||
939 | * | ||
940 | * Additional complication we need to keep in mind comes from the fact that | ||
941 | * types, in general, can form graphs containing cycles, not just DAGs. | ||
942 | * | ||
943 | * While algorithm does deduplication, it also merges and resolves type | ||
944 | * information (unless disabled throught `struct btf_opts`), whenever possible. | ||
945 | * E.g., in the example above with two compilation units having partial type | ||
946 | * information for structs `A` and `B`, the output of algorithm will emit | ||
947 | * a single copy of each BTF type that describes structs `A`, `B`, and `S` | ||
948 | * (as well as type information for `int` and pointers), as if they were defined | ||
949 | * in a single compilation unit as: | ||
950 | * | ||
951 | * struct A { | ||
952 | * int a; | ||
953 | * struct A* self; | ||
954 | * struct S* parent; | ||
955 | * }; | ||
956 | * struct B { | ||
957 | * int b; | ||
958 | * struct B* self; | ||
959 | * struct S* parent; | ||
960 | * }; | ||
961 | * struct S { | ||
962 | * struct A* a_ptr; | ||
963 | * struct B* b_ptr; | ||
964 | * }; | ||
965 | * | ||
966 | * Algorithm summary | ||
967 | * ================= | ||
968 | * | ||
969 | * Algorithm completes its work in 6 separate passes: | ||
970 | * | ||
971 | * 1. Strings deduplication. | ||
972 | * 2. Primitive types deduplication (int, enum, fwd). | ||
973 | * 3. Struct/union types deduplication. | ||
974 | * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func | ||
975 | * protos, and const/volatile/restrict modifiers). | ||
976 | * 5. Types compaction. | ||
977 | * 6. Types remapping. | ||
978 | * | ||
979 | * Algorithm determines canonical type descriptor, which is a single | ||
980 | * representative type for each truly unique type. This canonical type is the | ||
981 | * one that will go into final deduplicated BTF type information. For | ||
982 | * struct/unions, it is also the type that algorithm will merge additional type | ||
983 | * information into (while resolving FWDs), as it discovers it from data in | ||
984 | * other CUs. Each input BTF type eventually gets either mapped to itself, if | ||
985 | * that type is canonical, or to some other type, if that type is equivalent | ||
986 | * and was chosen as canonical representative. This mapping is stored in | ||
987 | * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that | ||
988 | * FWD type got resolved to. | ||
989 | * | ||
990 | * To facilitate fast discovery of canonical types, we also maintain canonical | ||
991 | * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash | ||
992 | * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types | ||
993 | * that match that signature. With sufficiently good choice of type signature | ||
994 | * hashing function, we can limit number of canonical types for each unique type | ||
995 | * signature to a very small number, allowing to find canonical type for any | ||
996 | * duplicated type very quickly. | ||
997 | * | ||
998 | * Struct/union deduplication is the most critical part and algorithm for | ||
999 | * deduplicating structs/unions is described in greater details in comments for | ||
1000 | * `btf_dedup_is_equiv` function. | ||
1001 | */ | ||
1002 | int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, | ||
1003 | const struct btf_dedup_opts *opts) | ||
1004 | { | ||
1005 | struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); | ||
1006 | int err; | ||
1007 | |||
1008 | if (IS_ERR(d)) { | ||
1009 | pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); | ||
1010 | return -EINVAL; | ||
1011 | } | ||
1012 | |||
1013 | err = btf_dedup_strings(d); | ||
1014 | if (err < 0) { | ||
1015 | pr_debug("btf_dedup_strings failed:%d\n", err); | ||
1016 | goto done; | ||
1017 | } | ||
1018 | err = btf_dedup_prim_types(d); | ||
1019 | if (err < 0) { | ||
1020 | pr_debug("btf_dedup_prim_types failed:%d\n", err); | ||
1021 | goto done; | ||
1022 | } | ||
1023 | err = btf_dedup_struct_types(d); | ||
1024 | if (err < 0) { | ||
1025 | pr_debug("btf_dedup_struct_types failed:%d\n", err); | ||
1026 | goto done; | ||
1027 | } | ||
1028 | err = btf_dedup_ref_types(d); | ||
1029 | if (err < 0) { | ||
1030 | pr_debug("btf_dedup_ref_types failed:%d\n", err); | ||
1031 | goto done; | ||
1032 | } | ||
1033 | err = btf_dedup_compact_types(d); | ||
1034 | if (err < 0) { | ||
1035 | pr_debug("btf_dedup_compact_types failed:%d\n", err); | ||
1036 | goto done; | ||
1037 | } | ||
1038 | err = btf_dedup_remap_types(d); | ||
1039 | if (err < 0) { | ||
1040 | pr_debug("btf_dedup_remap_types failed:%d\n", err); | ||
1041 | goto done; | ||
1042 | } | ||
1043 | |||
1044 | done: | ||
1045 | btf_dedup_free(d); | ||
1046 | return err; | ||
1047 | } | ||
1048 | |||
1049 | #define BTF_DEDUP_TABLE_SIZE_LOG 14 | ||
1050 | #define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1) | ||
1051 | #define BTF_UNPROCESSED_ID ((__u32)-1) | ||
1052 | #define BTF_IN_PROGRESS_ID ((__u32)-2) | ||
1053 | |||
1054 | struct btf_dedup_node { | ||
1055 | struct btf_dedup_node *next; | ||
1056 | __u32 type_id; | ||
1057 | }; | ||
1058 | |||
1059 | struct btf_dedup { | ||
1060 | /* .BTF section to be deduped in-place */ | ||
1061 | struct btf *btf; | ||
1062 | /* | ||
1063 | * Optional .BTF.ext section. When provided, any strings referenced | ||
1064 | * from it will be taken into account when deduping strings | ||
1065 | */ | ||
1066 | struct btf_ext *btf_ext; | ||
1067 | /* | ||
1068 | * This is a map from any type's signature hash to a list of possible | ||
1069 | * canonical representative type candidates. Hash collisions are | ||
1070 | * ignored, so even types of various kinds can share same list of | ||
1071 | * candidates, which is fine because we rely on subsequent | ||
1072 | * btf_xxx_equal() checks to authoritatively verify type equality. | ||
1073 | */ | ||
1074 | struct btf_dedup_node **dedup_table; | ||
1075 | /* Canonical types map */ | ||
1076 | __u32 *map; | ||
1077 | /* Hypothetical mapping, used during type graph equivalence checks */ | ||
1078 | __u32 *hypot_map; | ||
1079 | __u32 *hypot_list; | ||
1080 | size_t hypot_cnt; | ||
1081 | size_t hypot_cap; | ||
1082 | /* Various option modifying behavior of algorithm */ | ||
1083 | struct btf_dedup_opts opts; | ||
1084 | }; | ||
1085 | |||
1086 | struct btf_str_ptr { | ||
1087 | const char *str; | ||
1088 | __u32 new_off; | ||
1089 | bool used; | ||
1090 | }; | ||
1091 | |||
1092 | struct btf_str_ptrs { | ||
1093 | struct btf_str_ptr *ptrs; | ||
1094 | const char *data; | ||
1095 | __u32 cnt; | ||
1096 | __u32 cap; | ||
1097 | }; | ||
1098 | |||
1099 | static inline __u32 hash_combine(__u32 h, __u32 value) | ||
1100 | { | ||
1101 | /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ | ||
1102 | #define GOLDEN_RATIO_PRIME 0x9e370001UL | ||
1103 | return h * 37 + value * GOLDEN_RATIO_PRIME; | ||
1104 | #undef GOLDEN_RATIO_PRIME | ||
1105 | } | ||
1106 | |||
1107 | #define for_each_hash_node(table, hash, node) \ | ||
1108 | for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next) | ||
1109 | |||
1110 | static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) | ||
1111 | { | ||
1112 | struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); | ||
1113 | |||
1114 | if (!node) | ||
1115 | return -ENOMEM; | ||
1116 | node->type_id = type_id; | ||
1117 | node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD]; | ||
1118 | d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node; | ||
1119 | return 0; | ||
1120 | } | ||
1121 | |||
1122 | static int btf_dedup_hypot_map_add(struct btf_dedup *d, | ||
1123 | __u32 from_id, __u32 to_id) | ||
1124 | { | ||
1125 | if (d->hypot_cnt == d->hypot_cap) { | ||
1126 | __u32 *new_list; | ||
1127 | |||
1128 | d->hypot_cap += max(16, d->hypot_cap / 2); | ||
1129 | new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); | ||
1130 | if (!new_list) | ||
1131 | return -ENOMEM; | ||
1132 | d->hypot_list = new_list; | ||
1133 | } | ||
1134 | d->hypot_list[d->hypot_cnt++] = from_id; | ||
1135 | d->hypot_map[from_id] = to_id; | ||
1136 | return 0; | ||
1137 | } | ||
1138 | |||
1139 | static void btf_dedup_clear_hypot_map(struct btf_dedup *d) | ||
1140 | { | ||
1141 | int i; | ||
1142 | |||
1143 | for (i = 0; i < d->hypot_cnt; i++) | ||
1144 | d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; | ||
1145 | d->hypot_cnt = 0; | ||
1146 | } | ||
1147 | |||
1148 | static void btf_dedup_table_free(struct btf_dedup *d) | ||
1149 | { | ||
1150 | struct btf_dedup_node *head, *tmp; | ||
1151 | int i; | ||
1152 | |||
1153 | if (!d->dedup_table) | ||
1154 | return; | ||
1155 | |||
1156 | for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) { | ||
1157 | while (d->dedup_table[i]) { | ||
1158 | tmp = d->dedup_table[i]; | ||
1159 | d->dedup_table[i] = tmp->next; | ||
1160 | free(tmp); | ||
1161 | } | ||
1162 | |||
1163 | head = d->dedup_table[i]; | ||
1164 | while (head) { | ||
1165 | tmp = head; | ||
1166 | head = head->next; | ||
1167 | free(tmp); | ||
1168 | } | ||
1169 | } | ||
1170 | |||
1171 | free(d->dedup_table); | ||
1172 | d->dedup_table = NULL; | ||
1173 | } | ||
1174 | |||
1175 | static void btf_dedup_free(struct btf_dedup *d) | ||
1176 | { | ||
1177 | btf_dedup_table_free(d); | ||
1178 | |||
1179 | free(d->map); | ||
1180 | d->map = NULL; | ||
1181 | |||
1182 | free(d->hypot_map); | ||
1183 | d->hypot_map = NULL; | ||
1184 | |||
1185 | free(d->hypot_list); | ||
1186 | d->hypot_list = NULL; | ||
1187 | |||
1188 | free(d); | ||
1189 | } | ||
1190 | |||
1191 | static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, | ||
1192 | const struct btf_dedup_opts *opts) | ||
1193 | { | ||
1194 | struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); | ||
1195 | int i, err = 0; | ||
1196 | |||
1197 | if (!d) | ||
1198 | return ERR_PTR(-ENOMEM); | ||
1199 | |||
1200 | d->btf = btf; | ||
1201 | d->btf_ext = btf_ext; | ||
1202 | |||
1203 | d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG, | ||
1204 | sizeof(struct btf_dedup_node *)); | ||
1205 | if (!d->dedup_table) { | ||
1206 | err = -ENOMEM; | ||
1207 | goto done; | ||
1208 | } | ||
1209 | |||
1210 | d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); | ||
1211 | if (!d->map) { | ||
1212 | err = -ENOMEM; | ||
1213 | goto done; | ||
1214 | } | ||
1215 | /* special BTF "void" type is made canonical immediately */ | ||
1216 | d->map[0] = 0; | ||
1217 | for (i = 1; i <= btf->nr_types; i++) | ||
1218 | d->map[i] = BTF_UNPROCESSED_ID; | ||
1219 | |||
1220 | d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); | ||
1221 | if (!d->hypot_map) { | ||
1222 | err = -ENOMEM; | ||
1223 | goto done; | ||
1224 | } | ||
1225 | for (i = 0; i <= btf->nr_types; i++) | ||
1226 | d->hypot_map[i] = BTF_UNPROCESSED_ID; | ||
1227 | |||
1228 | d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; | ||
1229 | |||
1230 | done: | ||
1231 | if (err) { | ||
1232 | btf_dedup_free(d); | ||
1233 | return ERR_PTR(err); | ||
1234 | } | ||
1235 | |||
1236 | return d; | ||
1237 | } | ||
1238 | |||
1239 | typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); | ||
1240 | |||
1241 | /* | ||
1242 | * Iterate over all possible places in .BTF and .BTF.ext that can reference | ||
1243 | * string and pass pointer to it to a provided callback `fn`. | ||
1244 | */ | ||
1245 | static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) | ||
1246 | { | ||
1247 | void *line_data_cur, *line_data_end; | ||
1248 | int i, j, r, rec_size; | ||
1249 | struct btf_type *t; | ||
1250 | |||
1251 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
1252 | t = d->btf->types[i]; | ||
1253 | r = fn(&t->name_off, ctx); | ||
1254 | if (r) | ||
1255 | return r; | ||
1256 | |||
1257 | switch (BTF_INFO_KIND(t->info)) { | ||
1258 | case BTF_KIND_STRUCT: | ||
1259 | case BTF_KIND_UNION: { | ||
1260 | struct btf_member *m = (struct btf_member *)(t + 1); | ||
1261 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
1262 | |||
1263 | for (j = 0; j < vlen; j++) { | ||
1264 | r = fn(&m->name_off, ctx); | ||
1265 | if (r) | ||
1266 | return r; | ||
1267 | m++; | ||
1268 | } | ||
1269 | break; | ||
1270 | } | ||
1271 | case BTF_KIND_ENUM: { | ||
1272 | struct btf_enum *m = (struct btf_enum *)(t + 1); | ||
1273 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
1274 | |||
1275 | for (j = 0; j < vlen; j++) { | ||
1276 | r = fn(&m->name_off, ctx); | ||
1277 | if (r) | ||
1278 | return r; | ||
1279 | m++; | ||
1280 | } | ||
1281 | break; | ||
1282 | } | ||
1283 | case BTF_KIND_FUNC_PROTO: { | ||
1284 | struct btf_param *m = (struct btf_param *)(t + 1); | ||
1285 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
1286 | |||
1287 | for (j = 0; j < vlen; j++) { | ||
1288 | r = fn(&m->name_off, ctx); | ||
1289 | if (r) | ||
1290 | return r; | ||
1291 | m++; | ||
1292 | } | ||
1293 | break; | ||
1294 | } | ||
1295 | default: | ||
1296 | break; | ||
1297 | } | ||
1298 | } | ||
1299 | |||
1300 | if (!d->btf_ext) | ||
1301 | return 0; | ||
1302 | |||
1303 | line_data_cur = d->btf_ext->line_info.info; | ||
1304 | line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; | ||
1305 | rec_size = d->btf_ext->line_info.rec_size; | ||
1306 | |||
1307 | while (line_data_cur < line_data_end) { | ||
1308 | struct btf_ext_info_sec *sec = line_data_cur; | ||
1309 | struct bpf_line_info_min *line_info; | ||
1310 | __u32 num_info = sec->num_info; | ||
1311 | |||
1312 | r = fn(&sec->sec_name_off, ctx); | ||
1313 | if (r) | ||
1314 | return r; | ||
1315 | |||
1316 | line_data_cur += sizeof(struct btf_ext_info_sec); | ||
1317 | for (i = 0; i < num_info; i++) { | ||
1318 | line_info = line_data_cur; | ||
1319 | r = fn(&line_info->file_name_off, ctx); | ||
1320 | if (r) | ||
1321 | return r; | ||
1322 | r = fn(&line_info->line_off, ctx); | ||
1323 | if (r) | ||
1324 | return r; | ||
1325 | line_data_cur += rec_size; | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | return 0; | ||
1330 | } | ||
1331 | |||
1332 | static int str_sort_by_content(const void *a1, const void *a2) | ||
1333 | { | ||
1334 | const struct btf_str_ptr *p1 = a1; | ||
1335 | const struct btf_str_ptr *p2 = a2; | ||
1336 | |||
1337 | return strcmp(p1->str, p2->str); | ||
1338 | } | ||
1339 | |||
1340 | static int str_sort_by_offset(const void *a1, const void *a2) | ||
1341 | { | ||
1342 | const struct btf_str_ptr *p1 = a1; | ||
1343 | const struct btf_str_ptr *p2 = a2; | ||
1344 | |||
1345 | if (p1->str != p2->str) | ||
1346 | return p1->str < p2->str ? -1 : 1; | ||
1347 | return 0; | ||
1348 | } | ||
1349 | |||
1350 | static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) | ||
1351 | { | ||
1352 | const struct btf_str_ptr *p = pelem; | ||
1353 | |||
1354 | if (str_ptr != p->str) | ||
1355 | return (const char *)str_ptr < p->str ? -1 : 1; | ||
1356 | return 0; | ||
1357 | } | ||
1358 | |||
1359 | static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) | ||
1360 | { | ||
1361 | struct btf_str_ptrs *strs; | ||
1362 | struct btf_str_ptr *s; | ||
1363 | |||
1364 | if (*str_off_ptr == 0) | ||
1365 | return 0; | ||
1366 | |||
1367 | strs = ctx; | ||
1368 | s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, | ||
1369 | sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); | ||
1370 | if (!s) | ||
1371 | return -EINVAL; | ||
1372 | s->used = true; | ||
1373 | return 0; | ||
1374 | } | ||
1375 | |||
1376 | static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) | ||
1377 | { | ||
1378 | struct btf_str_ptrs *strs; | ||
1379 | struct btf_str_ptr *s; | ||
1380 | |||
1381 | if (*str_off_ptr == 0) | ||
1382 | return 0; | ||
1383 | |||
1384 | strs = ctx; | ||
1385 | s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, | ||
1386 | sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); | ||
1387 | if (!s) | ||
1388 | return -EINVAL; | ||
1389 | *str_off_ptr = s->new_off; | ||
1390 | return 0; | ||
1391 | } | ||
1392 | |||
1393 | /* | ||
1394 | * Dedup string and filter out those that are not referenced from either .BTF | ||
1395 | * or .BTF.ext (if provided) sections. | ||
1396 | * | ||
1397 | * This is done by building index of all strings in BTF's string section, | ||
1398 | * then iterating over all entities that can reference strings (e.g., type | ||
1399 | * names, struct field names, .BTF.ext line info, etc) and marking corresponding | ||
1400 | * strings as used. After that all used strings are deduped and compacted into | ||
1401 | * sequential blob of memory and new offsets are calculated. Then all the string | ||
1402 | * references are iterated again and rewritten using new offsets. | ||
1403 | */ | ||
1404 | static int btf_dedup_strings(struct btf_dedup *d) | ||
1405 | { | ||
1406 | const struct btf_header *hdr = d->btf->hdr; | ||
1407 | char *start = (char *)d->btf->nohdr_data + hdr->str_off; | ||
1408 | char *end = start + d->btf->hdr->str_len; | ||
1409 | char *p = start, *tmp_strs = NULL; | ||
1410 | struct btf_str_ptrs strs = { | ||
1411 | .cnt = 0, | ||
1412 | .cap = 0, | ||
1413 | .ptrs = NULL, | ||
1414 | .data = start, | ||
1415 | }; | ||
1416 | int i, j, err = 0, grp_idx; | ||
1417 | bool grp_used; | ||
1418 | |||
1419 | /* build index of all strings */ | ||
1420 | while (p < end) { | ||
1421 | if (strs.cnt + 1 > strs.cap) { | ||
1422 | struct btf_str_ptr *new_ptrs; | ||
1423 | |||
1424 | strs.cap += max(strs.cnt / 2, 16); | ||
1425 | new_ptrs = realloc(strs.ptrs, | ||
1426 | sizeof(strs.ptrs[0]) * strs.cap); | ||
1427 | if (!new_ptrs) { | ||
1428 | err = -ENOMEM; | ||
1429 | goto done; | ||
1430 | } | ||
1431 | strs.ptrs = new_ptrs; | ||
1432 | } | ||
1433 | |||
1434 | strs.ptrs[strs.cnt].str = p; | ||
1435 | strs.ptrs[strs.cnt].used = false; | ||
1436 | |||
1437 | p += strlen(p) + 1; | ||
1438 | strs.cnt++; | ||
1439 | } | ||
1440 | |||
1441 | /* temporary storage for deduplicated strings */ | ||
1442 | tmp_strs = malloc(d->btf->hdr->str_len); | ||
1443 | if (!tmp_strs) { | ||
1444 | err = -ENOMEM; | ||
1445 | goto done; | ||
1446 | } | ||
1447 | |||
1448 | /* mark all used strings */ | ||
1449 | strs.ptrs[0].used = true; | ||
1450 | err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); | ||
1451 | if (err) | ||
1452 | goto done; | ||
1453 | |||
1454 | /* sort strings by context, so that we can identify duplicates */ | ||
1455 | qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); | ||
1456 | |||
1457 | /* | ||
1458 | * iterate groups of equal strings and if any instance in a group was | ||
1459 | * referenced, emit single instance and remember new offset | ||
1460 | */ | ||
1461 | p = tmp_strs; | ||
1462 | grp_idx = 0; | ||
1463 | grp_used = strs.ptrs[0].used; | ||
1464 | /* iterate past end to avoid code duplication after loop */ | ||
1465 | for (i = 1; i <= strs.cnt; i++) { | ||
1466 | /* | ||
1467 | * when i == strs.cnt, we want to skip string comparison and go | ||
1468 | * straight to handling last group of strings (otherwise we'd | ||
1469 | * need to handle last group after the loop w/ duplicated code) | ||
1470 | */ | ||
1471 | if (i < strs.cnt && | ||
1472 | !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { | ||
1473 | grp_used = grp_used || strs.ptrs[i].used; | ||
1474 | continue; | ||
1475 | } | ||
1476 | |||
1477 | /* | ||
1478 | * this check would have been required after the loop to handle | ||
1479 | * last group of strings, but due to <= condition in a loop | ||
1480 | * we avoid that duplication | ||
1481 | */ | ||
1482 | if (grp_used) { | ||
1483 | int new_off = p - tmp_strs; | ||
1484 | __u32 len = strlen(strs.ptrs[grp_idx].str); | ||
1485 | |||
1486 | memmove(p, strs.ptrs[grp_idx].str, len + 1); | ||
1487 | for (j = grp_idx; j < i; j++) | ||
1488 | strs.ptrs[j].new_off = new_off; | ||
1489 | p += len + 1; | ||
1490 | } | ||
1491 | |||
1492 | if (i < strs.cnt) { | ||
1493 | grp_idx = i; | ||
1494 | grp_used = strs.ptrs[i].used; | ||
1495 | } | ||
1496 | } | ||
1497 | |||
1498 | /* replace original strings with deduped ones */ | ||
1499 | d->btf->hdr->str_len = p - tmp_strs; | ||
1500 | memmove(start, tmp_strs, d->btf->hdr->str_len); | ||
1501 | end = start + d->btf->hdr->str_len; | ||
1502 | |||
1503 | /* restore original order for further binary search lookups */ | ||
1504 | qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); | ||
1505 | |||
1506 | /* remap string offsets */ | ||
1507 | err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); | ||
1508 | if (err) | ||
1509 | goto done; | ||
1510 | |||
1511 | d->btf->hdr->str_len = end - start; | ||
1512 | |||
1513 | done: | ||
1514 | free(tmp_strs); | ||
1515 | free(strs.ptrs); | ||
1516 | return err; | ||
1517 | } | ||
1518 | |||
1519 | static __u32 btf_hash_common(struct btf_type *t) | ||
1520 | { | ||
1521 | __u32 h; | ||
1522 | |||
1523 | h = hash_combine(0, t->name_off); | ||
1524 | h = hash_combine(h, t->info); | ||
1525 | h = hash_combine(h, t->size); | ||
1526 | return h; | ||
1527 | } | ||
1528 | |||
1529 | static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) | ||
1530 | { | ||
1531 | return t1->name_off == t2->name_off && | ||
1532 | t1->info == t2->info && | ||
1533 | t1->size == t2->size; | ||
1534 | } | ||
1535 | |||
1536 | /* Calculate type signature hash of INT. */ | ||
1537 | static __u32 btf_hash_int(struct btf_type *t) | ||
1538 | { | ||
1539 | __u32 info = *(__u32 *)(t + 1); | ||
1540 | __u32 h; | ||
1541 | |||
1542 | h = btf_hash_common(t); | ||
1543 | h = hash_combine(h, info); | ||
1544 | return h; | ||
1545 | } | ||
1546 | |||
1547 | /* Check structural equality of two INTs. */ | ||
1548 | static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) | ||
1549 | { | ||
1550 | __u32 info1, info2; | ||
1551 | |||
1552 | if (!btf_equal_common(t1, t2)) | ||
1553 | return false; | ||
1554 | info1 = *(__u32 *)(t1 + 1); | ||
1555 | info2 = *(__u32 *)(t2 + 1); | ||
1556 | return info1 == info2; | ||
1557 | } | ||
1558 | |||
1559 | /* Calculate type signature hash of ENUM. */ | ||
1560 | static __u32 btf_hash_enum(struct btf_type *t) | ||
1561 | { | ||
1562 | struct btf_enum *member = (struct btf_enum *)(t + 1); | ||
1563 | __u32 vlen = BTF_INFO_VLEN(t->info); | ||
1564 | __u32 h = btf_hash_common(t); | ||
1565 | int i; | ||
1566 | |||
1567 | for (i = 0; i < vlen; i++) { | ||
1568 | h = hash_combine(h, member->name_off); | ||
1569 | h = hash_combine(h, member->val); | ||
1570 | member++; | ||
1571 | } | ||
1572 | return h; | ||
1573 | } | ||
1574 | |||
1575 | /* Check structural equality of two ENUMs. */ | ||
1576 | static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) | ||
1577 | { | ||
1578 | struct btf_enum *m1, *m2; | ||
1579 | __u16 vlen; | ||
1580 | int i; | ||
1581 | |||
1582 | if (!btf_equal_common(t1, t2)) | ||
1583 | return false; | ||
1584 | |||
1585 | vlen = BTF_INFO_VLEN(t1->info); | ||
1586 | m1 = (struct btf_enum *)(t1 + 1); | ||
1587 | m2 = (struct btf_enum *)(t2 + 1); | ||
1588 | for (i = 0; i < vlen; i++) { | ||
1589 | if (m1->name_off != m2->name_off || m1->val != m2->val) | ||
1590 | return false; | ||
1591 | m1++; | ||
1592 | m2++; | ||
1593 | } | ||
1594 | return true; | ||
1595 | } | ||
1596 | |||
1597 | /* | ||
1598 | * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, | ||
1599 | * as referenced type IDs equivalence is established separately during type | ||
1600 | * graph equivalence check algorithm. | ||
1601 | */ | ||
1602 | static __u32 btf_hash_struct(struct btf_type *t) | ||
1603 | { | ||
1604 | struct btf_member *member = (struct btf_member *)(t + 1); | ||
1605 | __u32 vlen = BTF_INFO_VLEN(t->info); | ||
1606 | __u32 h = btf_hash_common(t); | ||
1607 | int i; | ||
1608 | |||
1609 | for (i = 0; i < vlen; i++) { | ||
1610 | h = hash_combine(h, member->name_off); | ||
1611 | h = hash_combine(h, member->offset); | ||
1612 | /* no hashing of referenced type ID, it can be unresolved yet */ | ||
1613 | member++; | ||
1614 | } | ||
1615 | return h; | ||
1616 | } | ||
1617 | |||
1618 | /* | ||
1619 | * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type | ||
1620 | * IDs. This check is performed during type graph equivalence check and | ||
1621 | * referenced types equivalence is checked separately. | ||
1622 | */ | ||
1623 | static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2) | ||
1624 | { | ||
1625 | struct btf_member *m1, *m2; | ||
1626 | __u16 vlen; | ||
1627 | int i; | ||
1628 | |||
1629 | if (!btf_equal_common(t1, t2)) | ||
1630 | return false; | ||
1631 | |||
1632 | vlen = BTF_INFO_VLEN(t1->info); | ||
1633 | m1 = (struct btf_member *)(t1 + 1); | ||
1634 | m2 = (struct btf_member *)(t2 + 1); | ||
1635 | for (i = 0; i < vlen; i++) { | ||
1636 | if (m1->name_off != m2->name_off || m1->offset != m2->offset) | ||
1637 | return false; | ||
1638 | m1++; | ||
1639 | m2++; | ||
1640 | } | ||
1641 | return true; | ||
1642 | } | ||
1643 | |||
1644 | /* | ||
1645 | * Calculate type signature hash of ARRAY, including referenced type IDs, | ||
1646 | * under assumption that they were already resolved to canonical type IDs and | ||
1647 | * are not going to change. | ||
1648 | */ | ||
1649 | static __u32 btf_hash_array(struct btf_type *t) | ||
1650 | { | ||
1651 | struct btf_array *info = (struct btf_array *)(t + 1); | ||
1652 | __u32 h = btf_hash_common(t); | ||
1653 | |||
1654 | h = hash_combine(h, info->type); | ||
1655 | h = hash_combine(h, info->index_type); | ||
1656 | h = hash_combine(h, info->nelems); | ||
1657 | return h; | ||
1658 | } | ||
1659 | |||
1660 | /* | ||
1661 | * Check exact equality of two ARRAYs, taking into account referenced | ||
1662 | * type IDs, under assumption that they were already resolved to canonical | ||
1663 | * type IDs and are not going to change. | ||
1664 | * This function is called during reference types deduplication to compare | ||
1665 | * ARRAY to potential canonical representative. | ||
1666 | */ | ||
1667 | static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) | ||
1668 | { | ||
1669 | struct btf_array *info1, *info2; | ||
1670 | |||
1671 | if (!btf_equal_common(t1, t2)) | ||
1672 | return false; | ||
1673 | |||
1674 | info1 = (struct btf_array *)(t1 + 1); | ||
1675 | info2 = (struct btf_array *)(t2 + 1); | ||
1676 | return info1->type == info2->type && | ||
1677 | info1->index_type == info2->index_type && | ||
1678 | info1->nelems == info2->nelems; | ||
1679 | } | ||
1680 | |||
1681 | /* | ||
1682 | * Check structural compatibility of two ARRAYs, ignoring referenced type | ||
1683 | * IDs. This check is performed during type graph equivalence check and | ||
1684 | * referenced types equivalence is checked separately. | ||
1685 | */ | ||
1686 | static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) | ||
1687 | { | ||
1688 | struct btf_array *info1, *info2; | ||
1689 | |||
1690 | if (!btf_equal_common(t1, t2)) | ||
1691 | return false; | ||
1692 | |||
1693 | info1 = (struct btf_array *)(t1 + 1); | ||
1694 | info2 = (struct btf_array *)(t2 + 1); | ||
1695 | return info1->nelems == info2->nelems; | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, | ||
1700 | * under assumption that they were already resolved to canonical type IDs and | ||
1701 | * are not going to change. | ||
1702 | */ | ||
1703 | static inline __u32 btf_hash_fnproto(struct btf_type *t) | ||
1704 | { | ||
1705 | struct btf_param *member = (struct btf_param *)(t + 1); | ||
1706 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
1707 | __u32 h = btf_hash_common(t); | ||
1708 | int i; | ||
1709 | |||
1710 | for (i = 0; i < vlen; i++) { | ||
1711 | h = hash_combine(h, member->name_off); | ||
1712 | h = hash_combine(h, member->type); | ||
1713 | member++; | ||
1714 | } | ||
1715 | return h; | ||
1716 | } | ||
1717 | |||
1718 | /* | ||
1719 | * Check exact equality of two FUNC_PROTOs, taking into account referenced | ||
1720 | * type IDs, under assumption that they were already resolved to canonical | ||
1721 | * type IDs and are not going to change. | ||
1722 | * This function is called during reference types deduplication to compare | ||
1723 | * FUNC_PROTO to potential canonical representative. | ||
1724 | */ | ||
1725 | static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) | ||
1726 | { | ||
1727 | struct btf_param *m1, *m2; | ||
1728 | __u16 vlen; | ||
1729 | int i; | ||
1730 | |||
1731 | if (!btf_equal_common(t1, t2)) | ||
1732 | return false; | ||
1733 | |||
1734 | vlen = BTF_INFO_VLEN(t1->info); | ||
1735 | m1 = (struct btf_param *)(t1 + 1); | ||
1736 | m2 = (struct btf_param *)(t2 + 1); | ||
1737 | for (i = 0; i < vlen; i++) { | ||
1738 | if (m1->name_off != m2->name_off || m1->type != m2->type) | ||
1739 | return false; | ||
1740 | m1++; | ||
1741 | m2++; | ||
1742 | } | ||
1743 | return true; | ||
1744 | } | ||
1745 | |||
1746 | /* | ||
1747 | * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type | ||
1748 | * IDs. This check is performed during type graph equivalence check and | ||
1749 | * referenced types equivalence is checked separately. | ||
1750 | */ | ||
1751 | static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) | ||
1752 | { | ||
1753 | struct btf_param *m1, *m2; | ||
1754 | __u16 vlen; | ||
1755 | int i; | ||
1756 | |||
1757 | /* skip return type ID */ | ||
1758 | if (t1->name_off != t2->name_off || t1->info != t2->info) | ||
1759 | return false; | ||
1760 | |||
1761 | vlen = BTF_INFO_VLEN(t1->info); | ||
1762 | m1 = (struct btf_param *)(t1 + 1); | ||
1763 | m2 = (struct btf_param *)(t2 + 1); | ||
1764 | for (i = 0; i < vlen; i++) { | ||
1765 | if (m1->name_off != m2->name_off) | ||
1766 | return false; | ||
1767 | m1++; | ||
1768 | m2++; | ||
1769 | } | ||
1770 | return true; | ||
1771 | } | ||
1772 | |||
1773 | /* | ||
1774 | * Deduplicate primitive types, that can't reference other types, by calculating | ||
1775 | * their type signature hash and comparing them with any possible canonical | ||
1776 | * candidate. If no canonical candidate matches, type itself is marked as | ||
1777 | * canonical and is added into `btf_dedup->dedup_table` as another candidate. | ||
1778 | */ | ||
1779 | static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) | ||
1780 | { | ||
1781 | struct btf_type *t = d->btf->types[type_id]; | ||
1782 | struct btf_type *cand; | ||
1783 | struct btf_dedup_node *cand_node; | ||
1784 | /* if we don't find equivalent type, then we are canonical */ | ||
1785 | __u32 new_id = type_id; | ||
1786 | __u32 h; | ||
1787 | |||
1788 | switch (BTF_INFO_KIND(t->info)) { | ||
1789 | case BTF_KIND_CONST: | ||
1790 | case BTF_KIND_VOLATILE: | ||
1791 | case BTF_KIND_RESTRICT: | ||
1792 | case BTF_KIND_PTR: | ||
1793 | case BTF_KIND_TYPEDEF: | ||
1794 | case BTF_KIND_ARRAY: | ||
1795 | case BTF_KIND_STRUCT: | ||
1796 | case BTF_KIND_UNION: | ||
1797 | case BTF_KIND_FUNC: | ||
1798 | case BTF_KIND_FUNC_PROTO: | ||
1799 | return 0; | ||
1800 | |||
1801 | case BTF_KIND_INT: | ||
1802 | h = btf_hash_int(t); | ||
1803 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
1804 | cand = d->btf->types[cand_node->type_id]; | ||
1805 | if (btf_equal_int(t, cand)) { | ||
1806 | new_id = cand_node->type_id; | ||
1807 | break; | ||
1808 | } | ||
1809 | } | ||
1810 | break; | ||
1811 | |||
1812 | case BTF_KIND_ENUM: | ||
1813 | h = btf_hash_enum(t); | ||
1814 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
1815 | cand = d->btf->types[cand_node->type_id]; | ||
1816 | if (btf_equal_enum(t, cand)) { | ||
1817 | new_id = cand_node->type_id; | ||
1818 | break; | ||
1819 | } | ||
1820 | } | ||
1821 | break; | ||
1822 | |||
1823 | case BTF_KIND_FWD: | ||
1824 | h = btf_hash_common(t); | ||
1825 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
1826 | cand = d->btf->types[cand_node->type_id]; | ||
1827 | if (btf_equal_common(t, cand)) { | ||
1828 | new_id = cand_node->type_id; | ||
1829 | break; | ||
1830 | } | ||
1831 | } | ||
1832 | break; | ||
1833 | |||
1834 | default: | ||
1835 | return -EINVAL; | ||
1836 | } | ||
1837 | |||
1838 | d->map[type_id] = new_id; | ||
1839 | if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) | ||
1840 | return -ENOMEM; | ||
1841 | |||
1842 | return 0; | ||
1843 | } | ||
1844 | |||
1845 | static int btf_dedup_prim_types(struct btf_dedup *d) | ||
1846 | { | ||
1847 | int i, err; | ||
1848 | |||
1849 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
1850 | err = btf_dedup_prim_type(d, i); | ||
1851 | if (err) | ||
1852 | return err; | ||
1853 | } | ||
1854 | return 0; | ||
1855 | } | ||
1856 | |||
1857 | /* | ||
1858 | * Check whether type is already mapped into canonical one (could be to itself). | ||
1859 | */ | ||
1860 | static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) | ||
1861 | { | ||
1862 | return d->map[type_id] <= BTF_MAX_TYPE; | ||
1863 | } | ||
1864 | |||
1865 | /* | ||
1866 | * Resolve type ID into its canonical type ID, if any; otherwise return original | ||
1867 | * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow | ||
1868 | * STRUCT/UNION link and resolve it into canonical type ID as well. | ||
1869 | */ | ||
1870 | static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) | ||
1871 | { | ||
1872 | while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) | ||
1873 | type_id = d->map[type_id]; | ||
1874 | return type_id; | ||
1875 | } | ||
1876 | |||
1877 | /* | ||
1878 | * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original | ||
1879 | * type ID. | ||
1880 | */ | ||
1881 | static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) | ||
1882 | { | ||
1883 | __u32 orig_type_id = type_id; | ||
1884 | |||
1885 | if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) | ||
1886 | return type_id; | ||
1887 | |||
1888 | while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) | ||
1889 | type_id = d->map[type_id]; | ||
1890 | |||
1891 | if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) | ||
1892 | return type_id; | ||
1893 | |||
1894 | return orig_type_id; | ||
1895 | } | ||
1896 | |||
1897 | |||
1898 | static inline __u16 btf_fwd_kind(struct btf_type *t) | ||
1899 | { | ||
1900 | return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT; | ||
1901 | } | ||
1902 | |||
1903 | /* | ||
1904 | * Check equivalence of BTF type graph formed by candidate struct/union (we'll | ||
1905 | * call it "candidate graph" in this description for brevity) to a type graph | ||
1906 | * formed by (potential) canonical struct/union ("canonical graph" for brevity | ||
1907 | * here, though keep in mind that not all types in canonical graph are | ||
1908 | * necessarily canonical representatives themselves, some of them might be | ||
1909 | * duplicates or its uniqueness might not have been established yet). | ||
1910 | * Returns: | ||
1911 | * - >0, if type graphs are equivalent; | ||
1912 | * - 0, if not equivalent; | ||
1913 | * - <0, on error. | ||
1914 | * | ||
1915 | * Algorithm performs side-by-side DFS traversal of both type graphs and checks | ||
1916 | * equivalence of BTF types at each step. If at any point BTF types in candidate | ||
1917 | * and canonical graphs are not compatible structurally, whole graphs are | ||
1918 | * incompatible. If types are structurally equivalent (i.e., all information | ||
1919 | * except referenced type IDs is exactly the same), a mapping from `canon_id` to | ||
1920 | * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). | ||
1921 | * If a type references other types, then those referenced types are checked | ||
1922 | * for equivalence recursively. | ||
1923 | * | ||
1924 | * During DFS traversal, if we find that for current `canon_id` type we | ||
1925 | * already have some mapping in hypothetical map, we check for two possible | ||
1926 | * situations: | ||
1927 | * - `canon_id` is mapped to exactly the same type as `cand_id`. This will | ||
1928 | * happen when type graphs have cycles. In this case we assume those two | ||
1929 | * types are equivalent. | ||
1930 | * - `canon_id` is mapped to different type. This is contradiction in our | ||
1931 | * hypothetical mapping, because same graph in canonical graph corresponds | ||
1932 | * to two different types in candidate graph, which for equivalent type | ||
1933 | * graphs shouldn't happen. This condition terminates equivalence check | ||
1934 | * with negative result. | ||
1935 | * | ||
1936 | * If type graphs traversal exhausts types to check and find no contradiction, | ||
1937 | * then type graphs are equivalent. | ||
1938 | * | ||
1939 | * When checking types for equivalence, there is one special case: FWD types. | ||
1940 | * If FWD type resolution is allowed and one of the types (either from canonical | ||
1941 | * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind | ||
1942 | * flag) and their names match, hypothetical mapping is updated to point from | ||
1943 | * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, | ||
1944 | * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. | ||
1945 | * | ||
1946 | * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, | ||
1947 | * if there are two exactly named (or anonymous) structs/unions that are | ||
1948 | * compatible structurally, one of which has FWD field, while other is concrete | ||
1949 | * STRUCT/UNION, but according to C sources they are different structs/unions | ||
1950 | * that are referencing different types with the same name. This is extremely | ||
1951 | * unlikely to happen, but btf_dedup API allows to disable FWD resolution if | ||
1952 | * this logic is causing problems. | ||
1953 | * | ||
1954 | * Doing FWD resolution means that both candidate and/or canonical graphs can | ||
1955 | * consists of portions of the graph that come from multiple compilation units. | ||
1956 | * This is due to the fact that types within single compilation unit are always | ||
1957 | * deduplicated and FWDs are already resolved, if referenced struct/union | ||
1958 | * definiton is available. So, if we had unresolved FWD and found corresponding | ||
1959 | * STRUCT/UNION, they will be from different compilation units. This | ||
1960 | * consequently means that when we "link" FWD to corresponding STRUCT/UNION, | ||
1961 | * type graph will likely have at least two different BTF types that describe | ||
1962 | * same type (e.g., most probably there will be two different BTF types for the | ||
1963 | * same 'int' primitive type) and could even have "overlapping" parts of type | ||
1964 | * graph that describe same subset of types. | ||
1965 | * | ||
1966 | * This in turn means that our assumption that each type in canonical graph | ||
1967 | * must correspond to exactly one type in candidate graph might not hold | ||
1968 | * anymore and will make it harder to detect contradictions using hypothetical | ||
1969 | * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION | ||
1970 | * resolution only in canonical graph. FWDs in candidate graphs are never | ||
1971 | * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs | ||
1972 | * that can occur: | ||
1973 | * - Both types in canonical and candidate graphs are FWDs. If they are | ||
1974 | * structurally equivalent, then they can either be both resolved to the | ||
1975 | * same STRUCT/UNION or not resolved at all. In both cases they are | ||
1976 | * equivalent and there is no need to resolve FWD on candidate side. | ||
1977 | * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, | ||
1978 | * so nothing to resolve as well, algorithm will check equivalence anyway. | ||
1979 | * - Type in canonical graph is FWD, while type in candidate is concrete | ||
1980 | * STRUCT/UNION. In this case candidate graph comes from single compilation | ||
1981 | * unit, so there is exactly one BTF type for each unique C type. After | ||
1982 | * resolving FWD into STRUCT/UNION, there might be more than one BTF type | ||
1983 | * in canonical graph mapping to single BTF type in candidate graph, but | ||
1984 | * because hypothetical mapping maps from canonical to candidate types, it's | ||
1985 | * alright, and we still maintain the property of having single `canon_id` | ||
1986 | * mapping to single `cand_id` (there could be two different `canon_id` | ||
1987 | * mapped to the same `cand_id`, but it's not contradictory). | ||
1988 | * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate | ||
1989 | * graph is FWD. In this case we are just going to check compatibility of | ||
1990 | * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll | ||
1991 | * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to | ||
1992 | * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs | ||
1993 | * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from | ||
1994 | * canonical graph. | ||
1995 | */ | ||
1996 | static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, | ||
1997 | __u32 canon_id) | ||
1998 | { | ||
1999 | struct btf_type *cand_type; | ||
2000 | struct btf_type *canon_type; | ||
2001 | __u32 hypot_type_id; | ||
2002 | __u16 cand_kind; | ||
2003 | __u16 canon_kind; | ||
2004 | int i, eq; | ||
2005 | |||
2006 | /* if both resolve to the same canonical, they must be equivalent */ | ||
2007 | if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) | ||
2008 | return 1; | ||
2009 | |||
2010 | canon_id = resolve_fwd_id(d, canon_id); | ||
2011 | |||
2012 | hypot_type_id = d->hypot_map[canon_id]; | ||
2013 | if (hypot_type_id <= BTF_MAX_TYPE) | ||
2014 | return hypot_type_id == cand_id; | ||
2015 | |||
2016 | if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) | ||
2017 | return -ENOMEM; | ||
2018 | |||
2019 | cand_type = d->btf->types[cand_id]; | ||
2020 | canon_type = d->btf->types[canon_id]; | ||
2021 | cand_kind = BTF_INFO_KIND(cand_type->info); | ||
2022 | canon_kind = BTF_INFO_KIND(canon_type->info); | ||
2023 | |||
2024 | if (cand_type->name_off != canon_type->name_off) | ||
2025 | return 0; | ||
2026 | |||
2027 | /* FWD <--> STRUCT/UNION equivalence check, if enabled */ | ||
2028 | if (!d->opts.dont_resolve_fwds | ||
2029 | && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) | ||
2030 | && cand_kind != canon_kind) { | ||
2031 | __u16 real_kind; | ||
2032 | __u16 fwd_kind; | ||
2033 | |||
2034 | if (cand_kind == BTF_KIND_FWD) { | ||
2035 | real_kind = canon_kind; | ||
2036 | fwd_kind = btf_fwd_kind(cand_type); | ||
2037 | } else { | ||
2038 | real_kind = cand_kind; | ||
2039 | fwd_kind = btf_fwd_kind(canon_type); | ||
2040 | } | ||
2041 | return fwd_kind == real_kind; | ||
2042 | } | ||
2043 | |||
2044 | if (cand_type->info != canon_type->info) | ||
2045 | return 0; | ||
2046 | |||
2047 | switch (cand_kind) { | ||
2048 | case BTF_KIND_INT: | ||
2049 | return btf_equal_int(cand_type, canon_type); | ||
2050 | |||
2051 | case BTF_KIND_ENUM: | ||
2052 | return btf_equal_enum(cand_type, canon_type); | ||
2053 | |||
2054 | case BTF_KIND_FWD: | ||
2055 | return btf_equal_common(cand_type, canon_type); | ||
2056 | |||
2057 | case BTF_KIND_CONST: | ||
2058 | case BTF_KIND_VOLATILE: | ||
2059 | case BTF_KIND_RESTRICT: | ||
2060 | case BTF_KIND_PTR: | ||
2061 | case BTF_KIND_TYPEDEF: | ||
2062 | case BTF_KIND_FUNC: | ||
2063 | return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); | ||
2064 | |||
2065 | case BTF_KIND_ARRAY: { | ||
2066 | struct btf_array *cand_arr, *canon_arr; | ||
2067 | |||
2068 | if (!btf_compat_array(cand_type, canon_type)) | ||
2069 | return 0; | ||
2070 | cand_arr = (struct btf_array *)(cand_type + 1); | ||
2071 | canon_arr = (struct btf_array *)(canon_type + 1); | ||
2072 | eq = btf_dedup_is_equiv(d, | ||
2073 | cand_arr->index_type, canon_arr->index_type); | ||
2074 | if (eq <= 0) | ||
2075 | return eq; | ||
2076 | return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); | ||
2077 | } | ||
2078 | |||
2079 | case BTF_KIND_STRUCT: | ||
2080 | case BTF_KIND_UNION: { | ||
2081 | struct btf_member *cand_m, *canon_m; | ||
2082 | __u16 vlen; | ||
2083 | |||
2084 | if (!btf_equal_struct(cand_type, canon_type)) | ||
2085 | return 0; | ||
2086 | vlen = BTF_INFO_VLEN(cand_type->info); | ||
2087 | cand_m = (struct btf_member *)(cand_type + 1); | ||
2088 | canon_m = (struct btf_member *)(canon_type + 1); | ||
2089 | for (i = 0; i < vlen; i++) { | ||
2090 | eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); | ||
2091 | if (eq <= 0) | ||
2092 | return eq; | ||
2093 | cand_m++; | ||
2094 | canon_m++; | ||
2095 | } | ||
2096 | |||
2097 | return 1; | ||
2098 | } | ||
2099 | |||
2100 | case BTF_KIND_FUNC_PROTO: { | ||
2101 | struct btf_param *cand_p, *canon_p; | ||
2102 | __u16 vlen; | ||
2103 | |||
2104 | if (!btf_compat_fnproto(cand_type, canon_type)) | ||
2105 | return 0; | ||
2106 | eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); | ||
2107 | if (eq <= 0) | ||
2108 | return eq; | ||
2109 | vlen = BTF_INFO_VLEN(cand_type->info); | ||
2110 | cand_p = (struct btf_param *)(cand_type + 1); | ||
2111 | canon_p = (struct btf_param *)(canon_type + 1); | ||
2112 | for (i = 0; i < vlen; i++) { | ||
2113 | eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); | ||
2114 | if (eq <= 0) | ||
2115 | return eq; | ||
2116 | cand_p++; | ||
2117 | canon_p++; | ||
2118 | } | ||
2119 | return 1; | ||
2120 | } | ||
2121 | |||
2122 | default: | ||
2123 | return -EINVAL; | ||
2124 | } | ||
2125 | return 0; | ||
2126 | } | ||
2127 | |||
2128 | /* | ||
2129 | * Use hypothetical mapping, produced by successful type graph equivalence | ||
2130 | * check, to augment existing struct/union canonical mapping, where possible. | ||
2131 | * | ||
2132 | * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record | ||
2133 | * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: | ||
2134 | * it doesn't matter if FWD type was part of canonical graph or candidate one, | ||
2135 | * we are recording the mapping anyway. As opposed to carefulness required | ||
2136 | * for struct/union correspondence mapping (described below), for FWD resolution | ||
2137 | * it's not important, as by the time that FWD type (reference type) will be | ||
2138 | * deduplicated all structs/unions will be deduped already anyway. | ||
2139 | * | ||
2140 | * Recording STRUCT/UNION mapping is purely a performance optimization and is | ||
2141 | * not required for correctness. It needs to be done carefully to ensure that | ||
2142 | * struct/union from candidate's type graph is not mapped into corresponding | ||
2143 | * struct/union from canonical type graph that itself hasn't been resolved into | ||
2144 | * canonical representative. The only guarantee we have is that canonical | ||
2145 | * struct/union was determined as canonical and that won't change. But any | ||
2146 | * types referenced through that struct/union fields could have been not yet | ||
2147 | * resolved, so in case like that it's too early to establish any kind of | ||
2148 | * correspondence between structs/unions. | ||
2149 | * | ||
2150 | * No canonical correspondence is derived for primitive types (they are already | ||
2151 | * deduplicated completely already anyway) or reference types (they rely on | ||
2152 | * stability of struct/union canonical relationship for equivalence checks). | ||
2153 | */ | ||
2154 | static void btf_dedup_merge_hypot_map(struct btf_dedup *d) | ||
2155 | { | ||
2156 | __u32 cand_type_id, targ_type_id; | ||
2157 | __u16 t_kind, c_kind; | ||
2158 | __u32 t_id, c_id; | ||
2159 | int i; | ||
2160 | |||
2161 | for (i = 0; i < d->hypot_cnt; i++) { | ||
2162 | cand_type_id = d->hypot_list[i]; | ||
2163 | targ_type_id = d->hypot_map[cand_type_id]; | ||
2164 | t_id = resolve_type_id(d, targ_type_id); | ||
2165 | c_id = resolve_type_id(d, cand_type_id); | ||
2166 | t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info); | ||
2167 | c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info); | ||
2168 | /* | ||
2169 | * Resolve FWD into STRUCT/UNION. | ||
2170 | * It's ok to resolve FWD into STRUCT/UNION that's not yet | ||
2171 | * mapped to canonical representative (as opposed to | ||
2172 | * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because | ||
2173 | * eventually that struct is going to be mapped and all resolved | ||
2174 | * FWDs will automatically resolve to correct canonical | ||
2175 | * representative. This will happen before ref type deduping, | ||
2176 | * which critically depends on stability of these mapping. This | ||
2177 | * stability is not a requirement for STRUCT/UNION equivalence | ||
2178 | * checks, though. | ||
2179 | */ | ||
2180 | if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) | ||
2181 | d->map[c_id] = t_id; | ||
2182 | else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) | ||
2183 | d->map[t_id] = c_id; | ||
2184 | |||
2185 | if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && | ||
2186 | c_kind != BTF_KIND_FWD && | ||
2187 | is_type_mapped(d, c_id) && | ||
2188 | !is_type_mapped(d, t_id)) { | ||
2189 | /* | ||
2190 | * as a perf optimization, we can map struct/union | ||
2191 | * that's part of type graph we just verified for | ||
2192 | * equivalence. We can do that for struct/union that has | ||
2193 | * canonical representative only, though. | ||
2194 | */ | ||
2195 | d->map[t_id] = c_id; | ||
2196 | } | ||
2197 | } | ||
2198 | } | ||
2199 | |||
2200 | /* | ||
2201 | * Deduplicate struct/union types. | ||
2202 | * | ||
2203 | * For each struct/union type its type signature hash is calculated, taking | ||
2204 | * into account type's name, size, number, order and names of fields, but | ||
2205 | * ignoring type ID's referenced from fields, because they might not be deduped | ||
2206 | * completely until after reference types deduplication phase. This type hash | ||
2207 | * is used to iterate over all potential canonical types, sharing same hash. | ||
2208 | * For each canonical candidate we check whether type graphs that they form | ||
2209 | * (through referenced types in fields and so on) are equivalent using algorithm | ||
2210 | * implemented in `btf_dedup_is_equiv`. If such equivalence is found and | ||
2211 | * BTF_KIND_FWD resolution is allowed, then hypothetical mapping | ||
2212 | * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence | ||
2213 | * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to | ||
2214 | * potentially map other structs/unions to their canonical representatives, | ||
2215 | * if such relationship hasn't yet been established. This speeds up algorithm | ||
2216 | * by eliminating some of the duplicate work. | ||
2217 | * | ||
2218 | * If no matching canonical representative was found, struct/union is marked | ||
2219 | * as canonical for itself and is added into btf_dedup->dedup_table hash map | ||
2220 | * for further look ups. | ||
2221 | */ | ||
2222 | static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) | ||
2223 | { | ||
2224 | struct btf_dedup_node *cand_node; | ||
2225 | struct btf_type *t; | ||
2226 | /* if we don't find equivalent type, then we are canonical */ | ||
2227 | __u32 new_id = type_id; | ||
2228 | __u16 kind; | ||
2229 | __u32 h; | ||
2230 | |||
2231 | /* already deduped or is in process of deduping (loop detected) */ | ||
2232 | if (d->map[type_id] <= BTF_MAX_TYPE) | ||
2233 | return 0; | ||
2234 | |||
2235 | t = d->btf->types[type_id]; | ||
2236 | kind = BTF_INFO_KIND(t->info); | ||
2237 | |||
2238 | if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) | ||
2239 | return 0; | ||
2240 | |||
2241 | h = btf_hash_struct(t); | ||
2242 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
2243 | int eq; | ||
2244 | |||
2245 | btf_dedup_clear_hypot_map(d); | ||
2246 | eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); | ||
2247 | if (eq < 0) | ||
2248 | return eq; | ||
2249 | if (!eq) | ||
2250 | continue; | ||
2251 | new_id = cand_node->type_id; | ||
2252 | btf_dedup_merge_hypot_map(d); | ||
2253 | break; | ||
2254 | } | ||
2255 | |||
2256 | d->map[type_id] = new_id; | ||
2257 | if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) | ||
2258 | return -ENOMEM; | ||
2259 | |||
2260 | return 0; | ||
2261 | } | ||
2262 | |||
2263 | static int btf_dedup_struct_types(struct btf_dedup *d) | ||
2264 | { | ||
2265 | int i, err; | ||
2266 | |||
2267 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
2268 | err = btf_dedup_struct_type(d, i); | ||
2269 | if (err) | ||
2270 | return err; | ||
2271 | } | ||
2272 | return 0; | ||
2273 | } | ||
2274 | |||
2275 | /* | ||
2276 | * Deduplicate reference type. | ||
2277 | * | ||
2278 | * Once all primitive and struct/union types got deduplicated, we can easily | ||
2279 | * deduplicate all other (reference) BTF types. This is done in two steps: | ||
2280 | * | ||
2281 | * 1. Resolve all referenced type IDs into their canonical type IDs. This | ||
2282 | * resolution can be done either immediately for primitive or struct/union types | ||
2283 | * (because they were deduped in previous two phases) or recursively for | ||
2284 | * reference types. Recursion will always terminate at either primitive or | ||
2285 | * struct/union type, at which point we can "unwind" chain of reference types | ||
2286 | * one by one. There is no danger of encountering cycles because in C type | ||
2287 | * system the only way to form type cycle is through struct/union, so any chain | ||
2288 | * of reference types, even those taking part in a type cycle, will inevitably | ||
2289 | * reach struct/union at some point. | ||
2290 | * | ||
2291 | * 2. Once all referenced type IDs are resolved into canonical ones, BTF type | ||
2292 | * becomes "stable", in the sense that no further deduplication will cause | ||
2293 | * any changes to it. With that, it's now possible to calculate type's signature | ||
2294 | * hash (this time taking into account referenced type IDs) and loop over all | ||
2295 | * potential canonical representatives. If no match was found, current type | ||
2296 | * will become canonical representative of itself and will be added into | ||
2297 | * btf_dedup->dedup_table as another possible canonical representative. | ||
2298 | */ | ||
2299 | static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) | ||
2300 | { | ||
2301 | struct btf_dedup_node *cand_node; | ||
2302 | struct btf_type *t, *cand; | ||
2303 | /* if we don't find equivalent type, then we are representative type */ | ||
2304 | __u32 new_id = type_id; | ||
2305 | __u32 h, ref_type_id; | ||
2306 | |||
2307 | if (d->map[type_id] == BTF_IN_PROGRESS_ID) | ||
2308 | return -ELOOP; | ||
2309 | if (d->map[type_id] <= BTF_MAX_TYPE) | ||
2310 | return resolve_type_id(d, type_id); | ||
2311 | |||
2312 | t = d->btf->types[type_id]; | ||
2313 | d->map[type_id] = BTF_IN_PROGRESS_ID; | ||
2314 | |||
2315 | switch (BTF_INFO_KIND(t->info)) { | ||
2316 | case BTF_KIND_CONST: | ||
2317 | case BTF_KIND_VOLATILE: | ||
2318 | case BTF_KIND_RESTRICT: | ||
2319 | case BTF_KIND_PTR: | ||
2320 | case BTF_KIND_TYPEDEF: | ||
2321 | case BTF_KIND_FUNC: | ||
2322 | ref_type_id = btf_dedup_ref_type(d, t->type); | ||
2323 | if (ref_type_id < 0) | ||
2324 | return ref_type_id; | ||
2325 | t->type = ref_type_id; | ||
2326 | |||
2327 | h = btf_hash_common(t); | ||
2328 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
2329 | cand = d->btf->types[cand_node->type_id]; | ||
2330 | if (btf_equal_common(t, cand)) { | ||
2331 | new_id = cand_node->type_id; | ||
2332 | break; | ||
2333 | } | ||
2334 | } | ||
2335 | break; | ||
2336 | |||
2337 | case BTF_KIND_ARRAY: { | ||
2338 | struct btf_array *info = (struct btf_array *)(t + 1); | ||
2339 | |||
2340 | ref_type_id = btf_dedup_ref_type(d, info->type); | ||
2341 | if (ref_type_id < 0) | ||
2342 | return ref_type_id; | ||
2343 | info->type = ref_type_id; | ||
2344 | |||
2345 | ref_type_id = btf_dedup_ref_type(d, info->index_type); | ||
2346 | if (ref_type_id < 0) | ||
2347 | return ref_type_id; | ||
2348 | info->index_type = ref_type_id; | ||
2349 | |||
2350 | h = btf_hash_array(t); | ||
2351 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
2352 | cand = d->btf->types[cand_node->type_id]; | ||
2353 | if (btf_equal_array(t, cand)) { | ||
2354 | new_id = cand_node->type_id; | ||
2355 | break; | ||
2356 | } | ||
2357 | } | ||
2358 | break; | ||
2359 | } | ||
2360 | |||
2361 | case BTF_KIND_FUNC_PROTO: { | ||
2362 | struct btf_param *param; | ||
2363 | __u16 vlen; | ||
2364 | int i; | ||
2365 | |||
2366 | ref_type_id = btf_dedup_ref_type(d, t->type); | ||
2367 | if (ref_type_id < 0) | ||
2368 | return ref_type_id; | ||
2369 | t->type = ref_type_id; | ||
2370 | |||
2371 | vlen = BTF_INFO_VLEN(t->info); | ||
2372 | param = (struct btf_param *)(t + 1); | ||
2373 | for (i = 0; i < vlen; i++) { | ||
2374 | ref_type_id = btf_dedup_ref_type(d, param->type); | ||
2375 | if (ref_type_id < 0) | ||
2376 | return ref_type_id; | ||
2377 | param->type = ref_type_id; | ||
2378 | param++; | ||
2379 | } | ||
2380 | |||
2381 | h = btf_hash_fnproto(t); | ||
2382 | for_each_hash_node(d->dedup_table, h, cand_node) { | ||
2383 | cand = d->btf->types[cand_node->type_id]; | ||
2384 | if (btf_equal_fnproto(t, cand)) { | ||
2385 | new_id = cand_node->type_id; | ||
2386 | break; | ||
2387 | } | ||
2388 | } | ||
2389 | break; | ||
2390 | } | ||
2391 | |||
2392 | default: | ||
2393 | return -EINVAL; | ||
2394 | } | ||
2395 | |||
2396 | d->map[type_id] = new_id; | ||
2397 | if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) | ||
2398 | return -ENOMEM; | ||
2399 | |||
2400 | return new_id; | ||
2401 | } | ||
2402 | |||
2403 | static int btf_dedup_ref_types(struct btf_dedup *d) | ||
2404 | { | ||
2405 | int i, err; | ||
2406 | |||
2407 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
2408 | err = btf_dedup_ref_type(d, i); | ||
2409 | if (err < 0) | ||
2410 | return err; | ||
2411 | } | ||
2412 | btf_dedup_table_free(d); | ||
2413 | return 0; | ||
2414 | } | ||
2415 | |||
2416 | /* | ||
2417 | * Compact types. | ||
2418 | * | ||
2419 | * After we established for each type its corresponding canonical representative | ||
2420 | * type, we now can eliminate types that are not canonical and leave only | ||
2421 | * canonical ones layed out sequentially in memory by copying them over | ||
2422 | * duplicates. During compaction btf_dedup->hypot_map array is reused to store | ||
2423 | * a map from original type ID to a new compacted type ID, which will be used | ||
2424 | * during next phase to "fix up" type IDs, referenced from struct/union and | ||
2425 | * reference types. | ||
2426 | */ | ||
2427 | static int btf_dedup_compact_types(struct btf_dedup *d) | ||
2428 | { | ||
2429 | struct btf_type **new_types; | ||
2430 | __u32 next_type_id = 1; | ||
2431 | char *types_start, *p; | ||
2432 | int i, len; | ||
2433 | |||
2434 | /* we are going to reuse hypot_map to store compaction remapping */ | ||
2435 | d->hypot_map[0] = 0; | ||
2436 | for (i = 1; i <= d->btf->nr_types; i++) | ||
2437 | d->hypot_map[i] = BTF_UNPROCESSED_ID; | ||
2438 | |||
2439 | types_start = d->btf->nohdr_data + d->btf->hdr->type_off; | ||
2440 | p = types_start; | ||
2441 | |||
2442 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
2443 | if (d->map[i] != i) | ||
2444 | continue; | ||
2445 | |||
2446 | len = btf_type_size(d->btf->types[i]); | ||
2447 | if (len < 0) | ||
2448 | return len; | ||
2449 | |||
2450 | memmove(p, d->btf->types[i], len); | ||
2451 | d->hypot_map[i] = next_type_id; | ||
2452 | d->btf->types[next_type_id] = (struct btf_type *)p; | ||
2453 | p += len; | ||
2454 | next_type_id++; | ||
2455 | } | ||
2456 | |||
2457 | /* shrink struct btf's internal types index and update btf_header */ | ||
2458 | d->btf->nr_types = next_type_id - 1; | ||
2459 | d->btf->types_size = d->btf->nr_types; | ||
2460 | d->btf->hdr->type_len = p - types_start; | ||
2461 | new_types = realloc(d->btf->types, | ||
2462 | (1 + d->btf->nr_types) * sizeof(struct btf_type *)); | ||
2463 | if (!new_types) | ||
2464 | return -ENOMEM; | ||
2465 | d->btf->types = new_types; | ||
2466 | |||
2467 | /* make sure string section follows type information without gaps */ | ||
2468 | d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; | ||
2469 | memmove(p, d->btf->strings, d->btf->hdr->str_len); | ||
2470 | d->btf->strings = p; | ||
2471 | p += d->btf->hdr->str_len; | ||
2472 | |||
2473 | d->btf->data_size = p - (char *)d->btf->data; | ||
2474 | return 0; | ||
2475 | } | ||
2476 | |||
2477 | /* | ||
2478 | * Figure out final (deduplicated and compacted) type ID for provided original | ||
2479 | * `type_id` by first resolving it into corresponding canonical type ID and | ||
2480 | * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, | ||
2481 | * which is populated during compaction phase. | ||
2482 | */ | ||
2483 | static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) | ||
2484 | { | ||
2485 | __u32 resolved_type_id, new_type_id; | ||
2486 | |||
2487 | resolved_type_id = resolve_type_id(d, type_id); | ||
2488 | new_type_id = d->hypot_map[resolved_type_id]; | ||
2489 | if (new_type_id > BTF_MAX_TYPE) | ||
2490 | return -EINVAL; | ||
2491 | return new_type_id; | ||
2492 | } | ||
2493 | |||
2494 | /* | ||
2495 | * Remap referenced type IDs into deduped type IDs. | ||
2496 | * | ||
2497 | * After BTF types are deduplicated and compacted, their final type IDs may | ||
2498 | * differ from original ones. The map from original to a corresponding | ||
2499 | * deduped type ID is stored in btf_dedup->hypot_map and is populated during | ||
2500 | * compaction phase. During remapping phase we are rewriting all type IDs | ||
2501 | * referenced from any BTF type (e.g., struct fields, func proto args, etc) to | ||
2502 | * their final deduped type IDs. | ||
2503 | */ | ||
2504 | static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) | ||
2505 | { | ||
2506 | struct btf_type *t = d->btf->types[type_id]; | ||
2507 | int i, r; | ||
2508 | |||
2509 | switch (BTF_INFO_KIND(t->info)) { | ||
2510 | case BTF_KIND_INT: | ||
2511 | case BTF_KIND_ENUM: | ||
2512 | break; | ||
2513 | |||
2514 | case BTF_KIND_FWD: | ||
2515 | case BTF_KIND_CONST: | ||
2516 | case BTF_KIND_VOLATILE: | ||
2517 | case BTF_KIND_RESTRICT: | ||
2518 | case BTF_KIND_PTR: | ||
2519 | case BTF_KIND_TYPEDEF: | ||
2520 | case BTF_KIND_FUNC: | ||
2521 | r = btf_dedup_remap_type_id(d, t->type); | ||
2522 | if (r < 0) | ||
2523 | return r; | ||
2524 | t->type = r; | ||
2525 | break; | ||
2526 | |||
2527 | case BTF_KIND_ARRAY: { | ||
2528 | struct btf_array *arr_info = (struct btf_array *)(t + 1); | ||
2529 | |||
2530 | r = btf_dedup_remap_type_id(d, arr_info->type); | ||
2531 | if (r < 0) | ||
2532 | return r; | ||
2533 | arr_info->type = r; | ||
2534 | r = btf_dedup_remap_type_id(d, arr_info->index_type); | ||
2535 | if (r < 0) | ||
2536 | return r; | ||
2537 | arr_info->index_type = r; | ||
2538 | break; | ||
2539 | } | ||
2540 | |||
2541 | case BTF_KIND_STRUCT: | ||
2542 | case BTF_KIND_UNION: { | ||
2543 | struct btf_member *member = (struct btf_member *)(t + 1); | ||
2544 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
2545 | |||
2546 | for (i = 0; i < vlen; i++) { | ||
2547 | r = btf_dedup_remap_type_id(d, member->type); | ||
2548 | if (r < 0) | ||
2549 | return r; | ||
2550 | member->type = r; | ||
2551 | member++; | ||
2552 | } | ||
2553 | break; | ||
2554 | } | ||
2555 | |||
2556 | case BTF_KIND_FUNC_PROTO: { | ||
2557 | struct btf_param *param = (struct btf_param *)(t + 1); | ||
2558 | __u16 vlen = BTF_INFO_VLEN(t->info); | ||
2559 | |||
2560 | r = btf_dedup_remap_type_id(d, t->type); | ||
2561 | if (r < 0) | ||
2562 | return r; | ||
2563 | t->type = r; | ||
2564 | |||
2565 | for (i = 0; i < vlen; i++) { | ||
2566 | r = btf_dedup_remap_type_id(d, param->type); | ||
2567 | if (r < 0) | ||
2568 | return r; | ||
2569 | param->type = r; | ||
2570 | param++; | ||
2571 | } | ||
2572 | break; | ||
2573 | } | ||
2574 | |||
2575 | default: | ||
2576 | return -EINVAL; | ||
2577 | } | ||
2578 | |||
2579 | return 0; | ||
2580 | } | ||
2581 | |||
2582 | static int btf_dedup_remap_types(struct btf_dedup *d) | ||
2583 | { | ||
2584 | int i, r; | ||
2585 | |||
2586 | for (i = 1; i <= d->btf->nr_types; i++) { | ||
2587 | r = btf_dedup_remap_type(d, i); | ||
2588 | if (r < 0) | ||
2589 | return r; | ||
2590 | } | ||
2591 | return 0; | ||
2592 | } | ||