aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-27 16:37:23 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-27 16:37:23 -0500
commit7eb603459caf1de4ddf36c18d7ce3ebef28dde8e (patch)
treeded89f2190deffac52d17be63a99a8920c25da18
parent7705f730372d65f73599f0b49e3249433bba55e8 (diff)
parent79e5ad2ceb00673e5f2d278a892adcbf596a6b5a (diff)
Merge branch 'fib_trie_remove_leaf_info'
Alexander Duyck says: ==================== fib_trie: Remove leaf_info structure This patch set removes the leaf_info structure from the IPv4 fib_trie. The general idea is that the leaf_info structure itself only held about 6 actual bits of data, beyond that it was mostly just waste. As such we can drop the structure, move the 1 byte representing the prefix/suffix length into the fib_alias and just link it all into one list. My testing shows that this saves somewhere between 4 to 10ns depending on the type of test performed. I'm suspecting that this represents 1 to 2 L1 cache misses saved per look-up. One side effect of this change is that semantic_match_miss will now only increment once per leaf instead of once per leaf_info miss. However the stat is already skewed now that we perform a preliminary check on the leaf as a part of the look-up. I also have gone through and addressed a number of ordering issues in the first patch since I had misread the behavior of list_add_tail. I have since run some additional testing and verified the resulting lists are in the same order when combining multiple prefix length and tos values in a single leaf. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c489
4 files changed, 176 insertions, 322 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5bd120e4bc0a..cba4b7c32935 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -136,7 +136,7 @@ struct fib_result {
136 u32 tclassid; 136 u32 tclassid;
137 struct fib_info *fi; 137 struct fib_info *fi;
138 struct fib_table *table; 138 struct fib_table *table;
139 struct list_head *fa_head; 139 struct hlist_head *fa_head;
140}; 140};
141 141
142struct fib_result_nl { 142struct fib_result_nl {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 825981b1049a..ae2e6eede46e 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -6,11 +6,12 @@
6#include <net/ip_fib.h> 6#include <net/ip_fib.h>
7 7
8struct fib_alias { 8struct fib_alias {
9 struct list_head fa_list; 9 struct hlist_node fa_list;
10 struct fib_info *fa_info; 10 struct fib_info *fa_info;
11 u8 fa_tos; 11 u8 fa_tos;
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_state; 13 u8 fa_state;
14 u8 fa_slen;
14 struct rcu_head rcu; 15 struct rcu_head rcu;
15}; 16};
16 17
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1e2090ea663e..c6d267442dac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1163,12 +1163,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1163void fib_select_default(struct fib_result *res) 1163void fib_select_default(struct fib_result *res)
1164{ 1164{
1165 struct fib_info *fi = NULL, *last_resort = NULL; 1165 struct fib_info *fi = NULL, *last_resort = NULL;
1166 struct list_head *fa_head = res->fa_head; 1166 struct hlist_head *fa_head = res->fa_head;
1167 struct fib_table *tb = res->table; 1167 struct fib_table *tb = res->table;
1168 int order = -1, last_idx = -1; 1168 int order = -1, last_idx = -1;
1169 struct fib_alias *fa; 1169 struct fib_alias *fa;
1170 1170
1171 list_for_each_entry_rcu(fa, fa_head, fa_list) { 1171 hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1172 struct fib_info *next_fi = fa->fa_info; 1172 struct fib_info *next_fi = fa->fa_info;
1173 1173
1174 if (next_fi->fib_scope != res->scope || 1174 if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3daf0224ff2e..f48534577f8d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -108,18 +108,10 @@ struct tnode {
108 struct tnode __rcu *child[0]; 108 struct tnode __rcu *child[0];
109 }; 109 };
110 /* This list pointer if valid if bits == 0 (LEAF) */ 110 /* This list pointer if valid if bits == 0 (LEAF) */
111 struct hlist_head list; 111 struct hlist_head leaf;
112 }; 112 };
113}; 113};
114 114
115struct leaf_info {
116 struct hlist_node hlist;
117 int plen;
118 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
119 struct list_head falh;
120 struct rcu_head rcu;
121};
122
123#ifdef CONFIG_IP_FIB_TRIE_STATS 115#ifdef CONFIG_IP_FIB_TRIE_STATS
124struct trie_use_stats { 116struct trie_use_stats {
125 unsigned int gets; 117 unsigned int gets;
@@ -290,11 +282,6 @@ static void __node_free_rcu(struct rcu_head *head)
290 282
291#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) 283#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
292 284
293static inline void free_leaf_info(struct leaf_info *leaf)
294{
295 kfree_rcu(leaf, rcu);
296}
297
298static struct tnode *tnode_alloc(size_t size) 285static struct tnode *tnode_alloc(size_t size)
299{ 286{
300 if (size <= PAGE_SIZE) 287 if (size <= PAGE_SIZE)
@@ -328,22 +315,11 @@ static struct tnode *leaf_new(t_key key)
328 /* set bits to 0 indicating we are not a tnode */ 315 /* set bits to 0 indicating we are not a tnode */
329 l->bits = 0; 316 l->bits = 0;
330 317
331 INIT_HLIST_HEAD(&l->list); 318 INIT_HLIST_HEAD(&l->leaf);
332 } 319 }
333 return l; 320 return l;
334} 321}
335 322
336static struct leaf_info *leaf_info_new(int plen)
337{
338 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
339 if (li) {
340 li->plen = plen;
341 li->mask_plen = ntohl(inet_make_mask(plen));
342 INIT_LIST_HEAD(&li->falh);
343 }
344 return li;
345}
346
347static struct tnode *tnode_new(t_key key, int pos, int bits) 323static struct tnode *tnode_new(t_key key, int pos, int bits)
348{ 324{
349 size_t sz = offsetof(struct tnode, child[1ul << bits]); 325 size_t sz = offsetof(struct tnode, child[1ul << bits]);
@@ -866,31 +842,6 @@ static void resize(struct trie *t, struct tnode *tn)
866 } 842 }
867} 843}
868 844
869/* readside must use rcu_read_lock currently dump routines
870 via get_fa_head and dump */
871
872static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
873{
874 struct hlist_head *head = &l->list;
875 struct leaf_info *li;
876
877 hlist_for_each_entry_rcu(li, head, hlist)
878 if (li->plen == plen)
879 return li;
880
881 return NULL;
882}
883
884static inline struct list_head *get_fa_head(struct tnode *l, int plen)
885{
886 struct leaf_info *li = find_leaf_info(l, plen);
887
888 if (!li)
889 return NULL;
890
891 return &li->falh;
892}
893
894static void leaf_pull_suffix(struct tnode *l) 845static void leaf_pull_suffix(struct tnode *l)
895{ 846{
896 struct tnode *tp = node_parent(l); 847 struct tnode *tp = node_parent(l);
@@ -915,47 +866,47 @@ static void leaf_push_suffix(struct tnode *l)
915 } 866 }
916} 867}
917 868
918static void remove_leaf_info(struct tnode *l, struct leaf_info *old) 869static void fib_remove_alias(struct tnode *l, struct fib_alias *old)
919{ 870{
920 /* record the location of the previous list_info entry */ 871 /* record the location of the previous list_info entry */
921 struct hlist_node **pprev = old->hlist.pprev; 872 struct hlist_node **pprev = old->fa_list.pprev;
922 struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); 873 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
923 874
924 /* remove the leaf info from the list */ 875 /* remove the fib_alias from the list */
925 hlist_del_rcu(&old->hlist); 876 hlist_del_rcu(&old->fa_list);
926 877
927 /* only access li if it is pointing at the last valid hlist_node */ 878 /* only access fa if it is pointing at the last valid hlist_node */
928 if (hlist_empty(&l->list) || (*pprev)) 879 if (hlist_empty(&l->leaf) || (*pprev))
929 return; 880 return;
930 881
931 /* update the trie with the latest suffix length */ 882 /* update the trie with the latest suffix length */
932 l->slen = KEYLENGTH - li->plen; 883 l->slen = fa->fa_slen;
933 leaf_pull_suffix(l); 884 leaf_pull_suffix(l);
934} 885}
935 886
936static void insert_leaf_info(struct tnode *l, struct leaf_info *new) 887static void fib_insert_alias(struct tnode *l, struct fib_alias *fa,
888 struct fib_alias *new)
937{ 889{
938 struct hlist_head *head = &l->list; 890 if (fa) {
939 struct leaf_info *li = NULL, *last = NULL; 891 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
940
941 if (hlist_empty(head)) {
942 hlist_add_head_rcu(&new->hlist, head);
943 } else { 892 } else {
944 hlist_for_each_entry(li, head, hlist) { 893 struct fib_alias *last;
945 if (new->plen > li->plen)
946 break;
947 894
948 last = li; 895 hlist_for_each_entry(last, &l->leaf, fa_list) {
896 if (new->fa_slen < last->fa_slen)
897 break;
898 fa = last;
949 } 899 }
950 if (last) 900
951 hlist_add_behind_rcu(&new->hlist, &last->hlist); 901 if (fa)
902 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
952 else 903 else
953 hlist_add_before_rcu(&new->hlist, &li->hlist); 904 hlist_add_head_rcu(&new->fa_list, &l->leaf);
954 } 905 }
955 906
956 /* if we added to the tail node then we need to update slen */ 907 /* if we added to the tail node then we need to update slen */
957 if (l->slen < (KEYLENGTH - new->plen)) { 908 if (l->slen < new->fa_slen) {
958 l->slen = KEYLENGTH - new->plen; 909 l->slen = new->fa_slen;
959 leaf_push_suffix(l); 910 leaf_push_suffix(l);
960 } 911 }
961} 912}
@@ -994,14 +945,19 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
994/* Return the first fib alias matching TOS with 945/* Return the first fib alias matching TOS with
995 * priority less than or equal to PRIO. 946 * priority less than or equal to PRIO.
996 */ 947 */
997static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 948static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
949 u8 tos, u32 prio)
998{ 950{
999 struct fib_alias *fa; 951 struct fib_alias *fa;
1000 952
1001 if (!fah) 953 if (!fah)
1002 return NULL; 954 return NULL;
1003 955
1004 list_for_each_entry(fa, fah, fa_list) { 956 hlist_for_each_entry(fa, fah, fa_list) {
957 if (fa->fa_slen < slen)
958 continue;
959 if (fa->fa_slen != slen)
960 break;
1005 if (fa->fa_tos > tos) 961 if (fa->fa_tos > tos)
1006 continue; 962 continue;
1007 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) 963 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1027,16 +983,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1027 983
1028/* only used from updater-side */ 984/* only used from updater-side */
1029 985
1030static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) 986static struct tnode *fib_insert_node(struct trie *t, u32 key, int plen)
1031{ 987{
1032 struct list_head *fa_head = NULL;
1033 struct tnode *l, *n, *tp = NULL; 988 struct tnode *l, *n, *tp = NULL;
1034 struct leaf_info *li;
1035
1036 li = leaf_info_new(plen);
1037 if (!li)
1038 return NULL;
1039 fa_head = &li->falh;
1040 989
1041 n = rtnl_dereference(t->trie); 990 n = rtnl_dereference(t->trie);
1042 991
@@ -1067,8 +1016,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1067 /* we have found a leaf. Prefixes have already been compared */ 1016 /* we have found a leaf. Prefixes have already been compared */
1068 if (IS_LEAF(n)) { 1017 if (IS_LEAF(n)) {
1069 /* Case 1: n is a leaf, and prefixes match*/ 1018 /* Case 1: n is a leaf, and prefixes match*/
1070 insert_leaf_info(n, li); 1019 return n;
1071 return fa_head;
1072 } 1020 }
1073 1021
1074 tp = n; 1022 tp = n;
@@ -1076,12 +1024,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1076 } 1024 }
1077 1025
1078 l = leaf_new(key); 1026 l = leaf_new(key);
1079 if (!l) { 1027 if (!l)
1080 free_leaf_info(li);
1081 return NULL; 1028 return NULL;
1082 }
1083
1084 insert_leaf_info(l, li);
1085 1029
1086 /* Case 2: n is a LEAF or a TNODE and the key doesn't match. 1030 /* Case 2: n is a LEAF or a TNODE and the key doesn't match.
1087 * 1031 *
@@ -1094,7 +1038,6 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1094 1038
1095 tn = tnode_new(key, __fls(key ^ n->key), 1); 1039 tn = tnode_new(key, __fls(key ^ n->key), 1);
1096 if (!tn) { 1040 if (!tn) {
1097 free_leaf_info(li);
1098 node_free(l); 1041 node_free(l);
1099 return NULL; 1042 return NULL;
1100 } 1043 }
@@ -1120,7 +1063,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1120 rcu_assign_pointer(t->trie, l); 1063 rcu_assign_pointer(t->trie, l);
1121 } 1064 }
1122 1065
1123 return fa_head; 1066 return l;
1124} 1067}
1125 1068
1126/* 1069/*
@@ -1130,15 +1073,15 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1130{ 1073{
1131 struct trie *t = (struct trie *) tb->tb_data; 1074 struct trie *t = (struct trie *) tb->tb_data;
1132 struct fib_alias *fa, *new_fa; 1075 struct fib_alias *fa, *new_fa;
1133 struct list_head *fa_head = NULL;
1134 struct fib_info *fi; 1076 struct fib_info *fi;
1135 int plen = cfg->fc_dst_len; 1077 u8 plen = cfg->fc_dst_len;
1078 u8 slen = KEYLENGTH - plen;
1136 u8 tos = cfg->fc_tos; 1079 u8 tos = cfg->fc_tos;
1137 u32 key, mask; 1080 u32 key, mask;
1138 int err; 1081 int err;
1139 struct tnode *l; 1082 struct tnode *l;
1140 1083
1141 if (plen > 32) 1084 if (plen > KEYLENGTH)
1142 return -EINVAL; 1085 return -EINVAL;
1143 1086
1144 key = ntohl(cfg->fc_dst); 1087 key = ntohl(cfg->fc_dst);
@@ -1150,8 +1093,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1150 if (key & ~mask) 1093 if (key & ~mask)
1151 return -EINVAL; 1094 return -EINVAL;
1152 1095
1153 key = key & mask;
1154
1155 fi = fib_create_info(cfg); 1096 fi = fib_create_info(cfg);
1156 if (IS_ERR(fi)) { 1097 if (IS_ERR(fi)) {
1157 err = PTR_ERR(fi); 1098 err = PTR_ERR(fi);
@@ -1159,22 +1100,15 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1159 } 1100 }
1160 1101
1161 l = fib_find_node(t, key); 1102 l = fib_find_node(t, key);
1162 fa = NULL; 1103 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority) : NULL;
1163
1164 if (l) {
1165 fa_head = get_fa_head(l, plen);
1166 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1167 }
1168 1104
1169 /* Now fa, if non-NULL, points to the first fib alias 1105 /* Now fa, if non-NULL, points to the first fib alias
1170 * with the same keys [prefix,tos,priority], if such key already 1106 * with the same keys [prefix,tos,priority], if such key already
1171 * exists or to the node before which we will insert new one. 1107 * exists or to the node before which we will insert new one.
1172 * 1108 *
1173 * If fa is NULL, we will need to allocate a new one and 1109 * If fa is NULL, we will need to allocate a new one and
1174 * insert to the head of f. 1110 * insert to the tail of the section matching the suffix length
1175 * 1111 * of the new alias.
1176 * If f is NULL, no fib node matched the destination key
1177 * and we need to allocate a new one of those as well.
1178 */ 1112 */
1179 1113
1180 if (fa && fa->fa_tos == tos && 1114 if (fa && fa->fa_tos == tos &&
@@ -1192,9 +1126,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1192 */ 1126 */
1193 fa_match = NULL; 1127 fa_match = NULL;
1194 fa_first = fa; 1128 fa_first = fa;
1195 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1129 hlist_for_each_entry_from(fa, fa_list) {
1196 list_for_each_entry_continue(fa, fa_head, fa_list) { 1130 if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
1197 if (fa->fa_tos != tos)
1198 break; 1131 break;
1199 if (fa->fa_info->fib_priority != fi->fib_priority) 1132 if (fa->fa_info->fib_priority != fi->fib_priority)
1200 break; 1133 break;
@@ -1226,8 +1159,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1226 new_fa->fa_type = cfg->fc_type; 1159 new_fa->fa_type = cfg->fc_type;
1227 state = fa->fa_state; 1160 state = fa->fa_state;
1228 new_fa->fa_state = state & ~FA_S_ACCESSED; 1161 new_fa->fa_state = state & ~FA_S_ACCESSED;
1162 new_fa->fa_slen = fa->fa_slen;
1229 1163
1230 list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 1164 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1231 alias_free_mem_rcu(fa); 1165 alias_free_mem_rcu(fa);
1232 1166
1233 fib_release_info(fi_drop); 1167 fib_release_info(fi_drop);
@@ -1261,13 +1195,12 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1261 new_fa->fa_tos = tos; 1195 new_fa->fa_tos = tos;
1262 new_fa->fa_type = cfg->fc_type; 1196 new_fa->fa_type = cfg->fc_type;
1263 new_fa->fa_state = 0; 1197 new_fa->fa_state = 0;
1264 /* 1198 new_fa->fa_slen = slen;
1265 * Insert new entry to the list.
1266 */
1267 1199
1268 if (!fa_head) { 1200 /* Insert new entry to the list. */
1269 fa_head = fib_insert_node(t, key, plen); 1201 if (!l) {
1270 if (unlikely(!fa_head)) { 1202 l = fib_insert_node(t, key, plen);
1203 if (unlikely(!l)) {
1271 err = -ENOMEM; 1204 err = -ENOMEM;
1272 goto out_free_new_fa; 1205 goto out_free_new_fa;
1273 } 1206 }
@@ -1276,8 +1209,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1276 if (!plen) 1209 if (!plen)
1277 tb->tb_num_default++; 1210 tb->tb_num_default++;
1278 1211
1279 list_add_tail_rcu(&new_fa->fa_list, 1212 fib_insert_alias(l, fa, new_fa);
1280 (fa ? &fa->fa_list : fa_head));
1281 1213
1282 rt_cache_flush(cfg->fc_nlinfo.nl_net); 1214 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1283 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1215 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
@@ -1310,7 +1242,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1310#endif 1242#endif
1311 const t_key key = ntohl(flp->daddr); 1243 const t_key key = ntohl(flp->daddr);
1312 struct tnode *n, *pn; 1244 struct tnode *n, *pn;
1313 struct leaf_info *li; 1245 struct fib_alias *fa;
1314 t_key cindex; 1246 t_key cindex;
1315 1247
1316 n = rcu_dereference(t->trie); 1248 n = rcu_dereference(t->trie);
@@ -1413,61 +1345,56 @@ backtrace:
1413 1345
1414found: 1346found:
1415 /* Step 3: Process the leaf, if that fails fall back to backtracing */ 1347 /* Step 3: Process the leaf, if that fails fall back to backtracing */
1416 hlist_for_each_entry_rcu(li, &n->list, hlist) { 1348 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1417 struct fib_alias *fa; 1349 struct fib_info *fi = fa->fa_info;
1418 1350 int nhsel, err;
1419 if ((key ^ n->key) & li->mask_plen)
1420 continue;
1421
1422 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1423 struct fib_info *fi = fa->fa_info;
1424 int nhsel, err;
1425 1351
1426 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1352 if (((key ^ n->key) >= (1ul << fa->fa_slen)) &&
1353 ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
1427 continue; 1354 continue;
1428 if (fi->fib_dead) 1355 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1429 continue; 1356 continue;
1430 if (fa->fa_info->fib_scope < flp->flowi4_scope) 1357 if (fi->fib_dead)
1431 continue; 1358 continue;
1432 fib_alias_accessed(fa); 1359 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1433 err = fib_props[fa->fa_type].error; 1360 continue;
1434 if (unlikely(err < 0)) { 1361 fib_alias_accessed(fa);
1362 err = fib_props[fa->fa_type].error;
1363 if (unlikely(err < 0)) {
1435#ifdef CONFIG_IP_FIB_TRIE_STATS 1364#ifdef CONFIG_IP_FIB_TRIE_STATS
1436 this_cpu_inc(stats->semantic_match_passed); 1365 this_cpu_inc(stats->semantic_match_passed);
1437#endif 1366#endif
1438 return err; 1367 return err;
1439 } 1368 }
1440 if (fi->fib_flags & RTNH_F_DEAD) 1369 if (fi->fib_flags & RTNH_F_DEAD)
1370 continue;
1371 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1372 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1373
1374 if (nh->nh_flags & RTNH_F_DEAD)
1375 continue;
1376 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1441 continue; 1377 continue;
1442 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1378
1443 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1379 if (!(fib_flags & FIB_LOOKUP_NOREF))
1444 1380 atomic_inc(&fi->fib_clntref);
1445 if (nh->nh_flags & RTNH_F_DEAD) 1381
1446 continue; 1382 res->prefixlen = KEYLENGTH - fa->fa_slen;
1447 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) 1383 res->nh_sel = nhsel;
1448 continue; 1384 res->type = fa->fa_type;
1449 1385 res->scope = fi->fib_scope;
1450 if (!(fib_flags & FIB_LOOKUP_NOREF)) 1386 res->fi = fi;
1451 atomic_inc(&fi->fib_clntref); 1387 res->table = tb;
1452 1388 res->fa_head = &n->leaf;
1453 res->prefixlen = li->plen;
1454 res->nh_sel = nhsel;
1455 res->type = fa->fa_type;
1456 res->scope = fi->fib_scope;
1457 res->fi = fi;
1458 res->table = tb;
1459 res->fa_head = &li->falh;
1460#ifdef CONFIG_IP_FIB_TRIE_STATS 1389#ifdef CONFIG_IP_FIB_TRIE_STATS
1461 this_cpu_inc(stats->semantic_match_passed); 1390 this_cpu_inc(stats->semantic_match_passed);
1462#endif 1391#endif
1463 return err; 1392 return err;
1464 }
1465 } 1393 }
1466 1394 }
1467#ifdef CONFIG_IP_FIB_TRIE_STATS 1395#ifdef CONFIG_IP_FIB_TRIE_STATS
1468 this_cpu_inc(stats->semantic_match_miss); 1396 this_cpu_inc(stats->semantic_match_miss);
1469#endif 1397#endif
1470 }
1471 goto backtrace; 1398 goto backtrace;
1472} 1399}
1473EXPORT_SYMBOL_GPL(fib_table_lookup); 1400EXPORT_SYMBOL_GPL(fib_table_lookup);
@@ -1497,15 +1424,14 @@ static void trie_leaf_remove(struct trie *t, struct tnode *l)
1497int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) 1424int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1498{ 1425{
1499 struct trie *t = (struct trie *) tb->tb_data; 1426 struct trie *t = (struct trie *) tb->tb_data;
1500 u32 key, mask;
1501 int plen = cfg->fc_dst_len;
1502 u8 tos = cfg->fc_tos;
1503 struct fib_alias *fa, *fa_to_delete; 1427 struct fib_alias *fa, *fa_to_delete;
1504 struct list_head *fa_head; 1428 u8 plen = cfg->fc_dst_len;
1429 u8 tos = cfg->fc_tos;
1430 u8 slen = KEYLENGTH - plen;
1505 struct tnode *l; 1431 struct tnode *l;
1506 struct leaf_info *li; 1432 u32 key, mask;
1507 1433
1508 if (plen > 32) 1434 if (plen > KEYLENGTH)
1509 return -EINVAL; 1435 return -EINVAL;
1510 1436
1511 key = ntohl(cfg->fc_dst); 1437 key = ntohl(cfg->fc_dst);
@@ -1514,19 +1440,11 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1514 if (key & ~mask) 1440 if (key & ~mask)
1515 return -EINVAL; 1441 return -EINVAL;
1516 1442
1517 key = key & mask;
1518 l = fib_find_node(t, key); 1443 l = fib_find_node(t, key);
1519
1520 if (!l) 1444 if (!l)
1521 return -ESRCH; 1445 return -ESRCH;
1522 1446
1523 li = find_leaf_info(l, plen); 1447 fa = fib_find_alias(&l->leaf, slen, tos, 0);
1524
1525 if (!li)
1526 return -ESRCH;
1527
1528 fa_head = &li->falh;
1529 fa = fib_find_alias(fa_head, tos, 0);
1530 1448
1531 if (!fa) 1449 if (!fa)
1532 return -ESRCH; 1450 return -ESRCH;
@@ -1534,11 +1452,10 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1534 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 1452 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1535 1453
1536 fa_to_delete = NULL; 1454 fa_to_delete = NULL;
1537 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 1455 hlist_for_each_entry_from(fa, fa_list) {
1538 list_for_each_entry_continue(fa, fa_head, fa_list) {
1539 struct fib_info *fi = fa->fa_info; 1456 struct fib_info *fi = fa->fa_info;
1540 1457
1541 if (fa->fa_tos != tos) 1458 if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
1542 break; 1459 break;
1543 1460
1544 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 1461 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -1561,17 +1478,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1561 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, 1478 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1562 &cfg->fc_nlinfo, 0); 1479 &cfg->fc_nlinfo, 0);
1563 1480
1564 list_del_rcu(&fa->fa_list); 1481 fib_remove_alias(l, fa);
1565 1482
1566 if (!plen) 1483 if (!plen)
1567 tb->tb_num_default--; 1484 tb->tb_num_default--;
1568 1485
1569 if (list_empty(fa_head)) { 1486 if (hlist_empty(&l->leaf))
1570 remove_leaf_info(l, li);
1571 free_leaf_info(li);
1572 }
1573
1574 if (hlist_empty(&l->list))
1575 trie_leaf_remove(t, l); 1487 trie_leaf_remove(t, l);
1576 1488
1577 if (fa->fa_state & FA_S_ACCESSED) 1489 if (fa->fa_state & FA_S_ACCESSED)
@@ -1582,51 +1494,34 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1582 return 0; 1494 return 0;
1583} 1495}
1584 1496
1585static int trie_flush_list(struct list_head *head) 1497static int trie_flush_leaf(struct tnode *l)
1586{ 1498{
1587 struct fib_alias *fa, *fa_node; 1499 struct hlist_node *tmp;
1500 unsigned char slen = 0;
1501 struct fib_alias *fa;
1588 int found = 0; 1502 int found = 0;
1589 1503
1590 list_for_each_entry_safe(fa, fa_node, head, fa_list) { 1504 hlist_for_each_entry_safe(fa, tmp, &l->leaf, fa_list) {
1591 struct fib_info *fi = fa->fa_info; 1505 struct fib_info *fi = fa->fa_info;
1592 1506
1593 if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 1507 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1594 list_del_rcu(&fa->fa_list); 1508 hlist_del_rcu(&fa->fa_list);
1595 fib_release_info(fa->fa_info); 1509 fib_release_info(fa->fa_info);
1596 alias_free_mem_rcu(fa); 1510 alias_free_mem_rcu(fa);
1597 found++; 1511 found++;
1598 }
1599 }
1600 return found;
1601}
1602 1512
1603static int trie_flush_leaf(struct tnode *l)
1604{
1605 int found = 0;
1606 struct hlist_head *lih = &l->list;
1607 struct hlist_node *tmp;
1608 struct leaf_info *li = NULL;
1609 unsigned char plen = KEYLENGTH;
1610
1611 hlist_for_each_entry_safe(li, tmp, lih, hlist) {
1612 found += trie_flush_list(&li->falh);
1613
1614 if (list_empty(&li->falh)) {
1615 hlist_del_rcu(&li->hlist);
1616 free_leaf_info(li);
1617 continue; 1513 continue;
1618 } 1514 }
1619 1515
1620 plen = li->plen; 1516 slen = fa->fa_slen;
1621 } 1517 }
1622 1518
1623 l->slen = KEYLENGTH - plen; 1519 l->slen = slen;
1624 1520
1625 return found; 1521 return found;
1626} 1522}
1627 1523
1628/* 1524/* Scan for the next right leaf starting at node p->child[idx]
1629 * Scan for the next right leaf starting at node p->child[idx]
1630 * Since we have back pointer, no recursion necessary. 1525 * Since we have back pointer, no recursion necessary.
1631 */ 1526 */
1632static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) 1527static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
@@ -1701,7 +1596,7 @@ int fib_table_flush(struct fib_table *tb)
1701 found += trie_flush_leaf(l); 1596 found += trie_flush_leaf(l);
1702 1597
1703 if (ll) { 1598 if (ll) {
1704 if (hlist_empty(&ll->list)) 1599 if (hlist_empty(&ll->leaf))
1705 trie_leaf_remove(t, ll); 1600 trie_leaf_remove(t, ll);
1706 else 1601 else
1707 leaf_pull_suffix(ll); 1602 leaf_pull_suffix(ll);
@@ -1711,7 +1606,7 @@ int fib_table_flush(struct fib_table *tb)
1711 } 1606 }
1712 1607
1713 if (ll) { 1608 if (ll) {
1714 if (hlist_empty(&ll->list)) 1609 if (hlist_empty(&ll->leaf))
1715 trie_leaf_remove(t, ll); 1610 trie_leaf_remove(t, ll);
1716 else 1611 else
1717 leaf_pull_suffix(ll); 1612 leaf_pull_suffix(ll);
@@ -1731,20 +1626,18 @@ void fib_free_table(struct fib_table *tb)
1731 kfree(tb); 1626 kfree(tb);
1732} 1627}
1733 1628
1734static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1629static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
1735 struct fib_table *tb, 1630 struct sk_buff *skb, struct netlink_callback *cb)
1736 struct sk_buff *skb, struct netlink_callback *cb)
1737{ 1631{
1738 int i, s_i; 1632 __be32 xkey = htonl(l->key);
1739 struct fib_alias *fa; 1633 struct fib_alias *fa;
1740 __be32 xkey = htonl(key); 1634 int i, s_i;
1741 1635
1742 s_i = cb->args[5]; 1636 s_i = cb->args[4];
1743 i = 0; 1637 i = 0;
1744 1638
1745 /* rcu_read_lock is hold by caller */ 1639 /* rcu_read_lock is hold by caller */
1746 1640 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1747 list_for_each_entry_rcu(fa, fah, fa_list) {
1748 if (i < s_i) { 1641 if (i < s_i) {
1749 i++; 1642 i++;
1750 continue; 1643 continue;
@@ -1756,41 +1649,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1756 tb->tb_id, 1649 tb->tb_id,
1757 fa->fa_type, 1650 fa->fa_type,
1758 xkey, 1651 xkey,
1759 plen, 1652 KEYLENGTH - fa->fa_slen,
1760 fa->fa_tos, 1653 fa->fa_tos,
1761 fa->fa_info, NLM_F_MULTI) < 0) { 1654 fa->fa_info, NLM_F_MULTI) < 0) {
1762 cb->args[5] = i;
1763 return -1;
1764 }
1765 i++;
1766 }
1767 cb->args[5] = i;
1768 return skb->len;
1769}
1770
1771static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
1772 struct sk_buff *skb, struct netlink_callback *cb)
1773{
1774 struct leaf_info *li;
1775 int i, s_i;
1776
1777 s_i = cb->args[4];
1778 i = 0;
1779
1780 /* rcu_read_lock is hold by caller */
1781 hlist_for_each_entry_rcu(li, &l->list, hlist) {
1782 if (i < s_i) {
1783 i++;
1784 continue;
1785 }
1786
1787 if (i > s_i)
1788 cb->args[5] = 0;
1789
1790 if (list_empty(&li->falh))
1791 continue;
1792
1793 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
1794 cb->args[4] = i; 1655 cb->args[4] = i;
1795 return -1; 1656 return -1;
1796 } 1657 }
@@ -1850,8 +1711,7 @@ void __init fib_trie_init(void)
1850 0, SLAB_PANIC, NULL); 1711 0, SLAB_PANIC, NULL);
1851 1712
1852 trie_leaf_kmem = kmem_cache_create("ip_fib_trie", 1713 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1853 max(sizeof(struct tnode), 1714 sizeof(struct tnode),
1854 sizeof(struct leaf_info)),
1855 0, SLAB_PANIC, NULL); 1715 0, SLAB_PANIC, NULL);
1856} 1716}
1857 1717
@@ -1973,14 +1833,14 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
1973 rcu_read_lock(); 1833 rcu_read_lock();
1974 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { 1834 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
1975 if (IS_LEAF(n)) { 1835 if (IS_LEAF(n)) {
1976 struct leaf_info *li; 1836 struct fib_alias *fa;
1977 1837
1978 s->leaves++; 1838 s->leaves++;
1979 s->totdepth += iter.depth; 1839 s->totdepth += iter.depth;
1980 if (iter.depth > s->maxdepth) 1840 if (iter.depth > s->maxdepth)
1981 s->maxdepth = iter.depth; 1841 s->maxdepth = iter.depth;
1982 1842
1983 hlist_for_each_entry_rcu(li, &n->list, hlist) 1843 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
1984 ++s->prefixes; 1844 ++s->prefixes;
1985 } else { 1845 } else {
1986 s->tnodes++; 1846 s->tnodes++;
@@ -2012,7 +1872,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2012 bytes = sizeof(struct tnode) * stat->leaves; 1872 bytes = sizeof(struct tnode) * stat->leaves;
2013 1873
2014 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); 1874 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2015 bytes += sizeof(struct leaf_info) * stat->prefixes; 1875 bytes += sizeof(struct fib_alias) * stat->prefixes;
2016 1876
2017 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); 1877 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2018 bytes += sizeof(struct tnode) * stat->tnodes; 1878 bytes += sizeof(struct tnode) * stat->tnodes;
@@ -2263,28 +2123,25 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2263 &prf, KEYLENGTH - n->pos - n->bits, n->bits, 2123 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2264 n->full_children, n->empty_children); 2124 n->full_children, n->empty_children);
2265 } else { 2125 } else {
2266 struct leaf_info *li;
2267 __be32 val = htonl(n->key); 2126 __be32 val = htonl(n->key);
2127 struct fib_alias *fa;
2268 2128
2269 seq_indent(seq, iter->depth); 2129 seq_indent(seq, iter->depth);
2270 seq_printf(seq, " |-- %pI4\n", &val); 2130 seq_printf(seq, " |-- %pI4\n", &val);
2271 2131
2272 hlist_for_each_entry_rcu(li, &n->list, hlist) { 2132 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2273 struct fib_alias *fa; 2133 char buf1[32], buf2[32];
2274 2134
2275 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2135 seq_indent(seq, iter->depth + 1);
2276 char buf1[32], buf2[32]; 2136 seq_printf(seq, " /%zu %s %s",
2277 2137 KEYLENGTH - fa->fa_slen,
2278 seq_indent(seq, iter->depth+1); 2138 rtn_scope(buf1, sizeof(buf1),
2279 seq_printf(seq, " /%d %s %s", li->plen, 2139 fa->fa_info->fib_scope),
2280 rtn_scope(buf1, sizeof(buf1), 2140 rtn_type(buf2, sizeof(buf2),
2281 fa->fa_info->fib_scope), 2141 fa->fa_type));
2282 rtn_type(buf2, sizeof(buf2), 2142 if (fa->fa_tos)
2283 fa->fa_type)); 2143 seq_printf(seq, " tos=%d", fa->fa_tos);
2284 if (fa->fa_tos) 2144 seq_putc(seq, '\n');
2285 seq_printf(seq, " tos=%d", fa->fa_tos);
2286 seq_putc(seq, '\n');
2287 }
2288 } 2145 }
2289 } 2146 }
2290 2147
@@ -2412,8 +2269,9 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info
2412 */ 2269 */
2413static int fib_route_seq_show(struct seq_file *seq, void *v) 2270static int fib_route_seq_show(struct seq_file *seq, void *v)
2414{ 2271{
2272 struct fib_alias *fa;
2415 struct tnode *l = v; 2273 struct tnode *l = v;
2416 struct leaf_info *li; 2274 __be32 prefix;
2417 2275
2418 if (v == SEQ_START_TOKEN) { 2276 if (v == SEQ_START_TOKEN) {
2419 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2277 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2422,45 +2280,40 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2422 return 0; 2280 return 0;
2423 } 2281 }
2424 2282
2425 hlist_for_each_entry_rcu(li, &l->list, hlist) { 2283 prefix = htonl(l->key);
2426 struct fib_alias *fa;
2427 __be32 mask, prefix;
2428
2429 mask = inet_make_mask(li->plen);
2430 prefix = htonl(l->key);
2431 2284
2432 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2285 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2433 const struct fib_info *fi = fa->fa_info; 2286 const struct fib_info *fi = fa->fa_info;
2434 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); 2287 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2288 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2435 2289
2436 if (fa->fa_type == RTN_BROADCAST 2290 if ((fa->fa_type == RTN_BROADCAST) ||
2437 || fa->fa_type == RTN_MULTICAST) 2291 (fa->fa_type == RTN_MULTICAST))
2438 continue; 2292 continue;
2439 2293
2440 seq_setwidth(seq, 127); 2294 seq_setwidth(seq, 127);
2441 2295
2442 if (fi) 2296 if (fi)
2443 seq_printf(seq, 2297 seq_printf(seq,
2444 "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2298 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2445 "%d\t%08X\t%d\t%u\t%u", 2299 "%d\t%08X\t%d\t%u\t%u",
2446 fi->fib_dev ? fi->fib_dev->name : "*", 2300 fi->fib_dev ? fi->fib_dev->name : "*",
2447 prefix, 2301 prefix,
2448 fi->fib_nh->nh_gw, flags, 0, 0, 2302 fi->fib_nh->nh_gw, flags, 0, 0,
2449 fi->fib_priority, 2303 fi->fib_priority,
2450 mask, 2304 mask,
2451 (fi->fib_advmss ? 2305 (fi->fib_advmss ?
2452 fi->fib_advmss + 40 : 0), 2306 fi->fib_advmss + 40 : 0),
2453 fi->fib_window, 2307 fi->fib_window,
2454 fi->fib_rtt >> 3); 2308 fi->fib_rtt >> 3);
2455 else 2309 else
2456 seq_printf(seq, 2310 seq_printf(seq,
2457 "*\t%08X\t%08X\t%04X\t%d\t%u\t" 2311 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2458 "%d\t%08X\t%d\t%u\t%u", 2312 "%d\t%08X\t%d\t%u\t%u",
2459 prefix, 0, flags, 0, 0, 0, 2313 prefix, 0, flags, 0, 0, 0,
2460 mask, 0, 0, 0); 2314 mask, 0, 0, 0);
2461 2315
2462 seq_pad(seq, '\n'); 2316 seq_pad(seq, '\n');
2463 }
2464 } 2317 }
2465 2318
2466 return 0; 2319 return 0;