aboutsummaryrefslogtreecommitdiffstats
path: root/lib/radix-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r--lib/radix-tree.c83
1 files changed, 58 insertions, 25 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6f412ab4c24f..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -82,6 +82,16 @@ struct radix_tree_preload {
82}; 82};
83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
84 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
85static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
86{ 96{
87 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -265,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
265 return -ENOMEM; 275 return -ENOMEM;
266 276
267 /* Increase the height. */ 277 /* Increase the height. */
268 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
269 279
270 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
271 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -276,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
276 newheight = root->height+1; 286 newheight = root->height+1;
277 node->height = newheight; 287 node->height = newheight;
278 node->count = 1; 288 node->count = 1;
279 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
280 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
281 root->height = newheight; 291 root->height = newheight;
282 } while (height > root->height); 292 } while (height > root->height);
@@ -309,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
309 return error; 319 return error;
310 } 320 }
311 321
312 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
313 323
314 height = root->height; 324 height = root->height;
315 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -325,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
325 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
326 node->count++; 336 node->count++;
327 } else 337 } else
328 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
329 radix_tree_ptr_to_indirect(slot));
330 } 339 }
331 340
332 /* Go a level down */ 341 /* Go a level down */
@@ -374,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
374 return NULL; 383 return NULL;
375 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
376 } 385 }
377 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
378 387
379 height = node->height; 388 height = node->height;
380 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -393,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
393 height--; 402 height--;
394 } while (height > 0); 403 } while (height > 0);
395 404
396 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
397} 406}
398 407
399/** 408/**
@@ -455,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
455 height = root->height; 464 height = root->height;
456 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
457 466
458 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
459 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
460 469
461 while (height > 0) { 470 while (height > 0) {
@@ -509,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
509 518
510 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
511 pathp->node = NULL; 520 pathp->node = NULL;
512 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
513 522
514 while (height > 0) { 523 while (height > 0) {
515 int offset; 524 int offset;
@@ -579,7 +588,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
579 588
580 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
581 return (index == 0); 590 return (index == 0);
582 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
583 592
584 height = node->height; 593 height = node->height;
585 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -666,7 +675,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
666 } 675 }
667 676
668 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
669 slot = radix_tree_indirect_to_ptr(root->rnode); 678 slot = indirect_to_ptr(root->rnode);
670 679
671 /* 680 /*
672 * we fill the path from (root->height - 2) to 0, leaving the index at 681 * we fill the path from (root->height - 2) to 0, leaving the index at
@@ -897,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
897 results[0] = node; 906 results[0] = node;
898 return 1; 907 return 1;
899 } 908 }
900 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
901 910
902 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
903 912
@@ -916,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
916 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
917 if (!slot) 926 if (!slot)
918 continue; 927 continue;
919 results[ret + nr_found] = rcu_dereference_raw(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
920 nr_found++; 930 nr_found++;
921 } 931 }
922 ret += nr_found; 932 ret += nr_found;
@@ -965,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
965 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
966 return 1; 976 return 1;
967 } 977 }
968 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
969 979
970 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
971 981
@@ -1090,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1090 results[0] = node; 1100 results[0] = node;
1091 return 1; 1101 return 1;
1092 } 1102 }
1093 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
1094 1104
1095 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
1096 1106
@@ -1109,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1109 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
1110 if (!slot) 1120 if (!slot)
1111 continue; 1121 continue;
1112 results[ret + nr_found] = rcu_dereference_raw(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
1113 nr_found++; 1124 nr_found++;
1114 } 1125 }
1115 ret += nr_found; 1126 ret += nr_found;
@@ -1159,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1159 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1160 return 1; 1171 return 1;
1161 } 1172 }
1162 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1163 1174
1164 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1165 1176
@@ -1195,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1195 void *newptr; 1206 void *newptr;
1196 1207
1197 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1198 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1199 1210
1200 /* 1211 /*
1201 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1208,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1208 1219
1209 /* 1220 /*
1210 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1211 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1212 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1213 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1214 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1215 */ 1226 */
1216 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1217 if (root->height > 1) 1228 if (root->height > 1)
1218 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1219 root->rnode = newptr; 1230 root->rnode = newptr;
1220 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1221 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1222 } 1256 }
1223} 1257}
@@ -1254,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1254 root->rnode = NULL; 1288 root->rnode = NULL;
1255 goto out; 1289 goto out;
1256 } 1290 }
1257 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1258 1292
1259 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1260 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1296,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1296 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1297 1331
1298 if (pathp->node->count) { 1332 if (pathp->node->count) {
1299 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1300 radix_tree_indirect_to_ptr(root->rnode))
1301 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1302 goto out; 1335 goto out;
1303 } 1336 }