aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrank Rowand <frank.rowand@sony.com>2018-03-04 19:14:47 -0500
committerRob Herring <robh@kernel.org>2018-03-07 15:50:09 -0500
commit0b3ce78e90fc66c50a944320d6e39fa6fdb46cdf (patch)
tree86c61b714a4c2e6643c964f00444e01a50d59c4e
parent6adb1b9538e6414ce746ade8d170d362a657ca41 (diff)
of: cache phandle nodes to reduce cost of of_find_node_by_phandle()
Create a cache of the nodes that contain a phandle property. Use this cache to find the node for a given phandle value instead of scanning the devicetree to find the node. If the phandle value is not found in the cache, of_find_node_by_phandle() will fall back to the tree scan algorithm. The cache is initialized in of_core_init(). The cache is freed via a late_initcall_sync() if modules are not enabled. If the devicetree is created by the dtc compiler, with all phandle property values auto generated, then the size required by the cache could be 4 * (1 + number of phandles) bytes. This results in an O(1) node lookup cost for a given phandle value. Due to a concern that the phandle property values might not be consistent with what is generated by the dtc compiler, a mask has been added to the cache lookup algorithm. To maintain the O(1) node lookup cost, the size of the cache has been increased by rounding the number of entries up to the next power of two. The overhead of finding the devicetree node containing a given phandle value has been noted by several people in the recent past, in some cases with a patch to add a hashed index of devicetree nodes, based on the phandle value of the node. One concern with this approach is the extra space added to each node. This patch takes advantage of the phandle property values auto generated by the dtc compiler, which begin with one and monotonically increase by one, resulting in a range of 1..n for n phandle values. This implementation should also provide a good reduction of overhead for any range of phandle values that are mostly in a monotonic range. Performance measurements by Chintan Pandya <cpandya@codeaurora.org> of several implementations of patches that are similar to this one suggest an expected reduction of boot time by ~400ms for his test system. If the cache size was decreased to 64 entries, the boot time was reduced by ~340 ms. The measurements were on a 4.9.73 kernel for arch/arm64/boot/dts/qcom/sda670-mtp.dts, contains 2371 nodes and 814 phandle values. Reported-by: Chintan Pandya <cpandya@codeaurora.org> Signed-off-by: Frank Rowand <frank.rowand@sony.com> Signed-off-by: Rob Herring <robh@kernel.org>
-rw-r--r--drivers/of/base.c86
-rw-r--r--drivers/of/of_private.h3
-rw-r--r--drivers/of/resolver.c3
3 files changed, 85 insertions, 7 deletions
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 091aa9449c3a..848f549164cd 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -91,10 +91,72 @@ int __weak of_node_to_nid(struct device_node *np)
91} 91}
92#endif 92#endif
93 93
94static struct device_node **phandle_cache;
95static u32 phandle_cache_mask;
96
97/*
98 * Assumptions behind phandle_cache implementation:
99 * - phandle property values are in a contiguous range of 1..n
100 *
101 * If the assumptions do not hold, then
102 * - the phandle lookup overhead reduction provided by the cache
103 * will likely be less
104 */
105static void of_populate_phandle_cache(void)
106{
107 unsigned long flags;
108 u32 cache_entries;
109 struct device_node *np;
110 u32 phandles = 0;
111
112 raw_spin_lock_irqsave(&devtree_lock, flags);
113
114 kfree(phandle_cache);
115 phandle_cache = NULL;
116
117 for_each_of_allnodes(np)
118 if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
119 phandles++;
120
121 cache_entries = roundup_pow_of_two(phandles);
122 phandle_cache_mask = cache_entries - 1;
123
124 phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
125 GFP_ATOMIC);
126 if (!phandle_cache)
127 goto out;
128
129 for_each_of_allnodes(np)
130 if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
131 phandle_cache[np->phandle & phandle_cache_mask] = np;
132
133out:
134 raw_spin_unlock_irqrestore(&devtree_lock, flags);
135}
136
137#ifndef CONFIG_MODULES
138static int __init of_free_phandle_cache(void)
139{
140 unsigned long flags;
141
142 raw_spin_lock_irqsave(&devtree_lock, flags);
143
144 kfree(phandle_cache);
145 phandle_cache = NULL;
146
147 raw_spin_unlock_irqrestore(&devtree_lock, flags);
148
149 return 0;
150}
151late_initcall_sync(of_free_phandle_cache);
152#endif
153
94void __init of_core_init(void) 154void __init of_core_init(void)
95{ 155{
96 struct device_node *np; 156 struct device_node *np;
97 157
158 of_populate_phandle_cache();
159
98 /* Create the kset, and register existing nodes */ 160 /* Create the kset, and register existing nodes */
99 mutex_lock(&of_mutex); 161 mutex_lock(&of_mutex);
100 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); 162 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
@@ -1021,16 +1083,32 @@ EXPORT_SYMBOL_GPL(of_modalias_node);
1021 */ 1083 */
1022struct device_node *of_find_node_by_phandle(phandle handle) 1084struct device_node *of_find_node_by_phandle(phandle handle)
1023{ 1085{
1024 struct device_node *np; 1086 struct device_node *np = NULL;
1025 unsigned long flags; 1087 unsigned long flags;
1088 phandle masked_handle;
1026 1089
1027 if (!handle) 1090 if (!handle)
1028 return NULL; 1091 return NULL;
1029 1092
1030 raw_spin_lock_irqsave(&devtree_lock, flags); 1093 raw_spin_lock_irqsave(&devtree_lock, flags);
1031 for_each_of_allnodes(np) 1094
1032 if (np->phandle == handle) 1095 masked_handle = handle & phandle_cache_mask;
1033 break; 1096
1097 if (phandle_cache) {
1098 if (phandle_cache[masked_handle] &&
1099 handle == phandle_cache[masked_handle]->phandle)
1100 np = phandle_cache[masked_handle];
1101 }
1102
1103 if (!np) {
1104 for_each_of_allnodes(np)
1105 if (np->phandle == handle) {
1106 if (phandle_cache)
1107 phandle_cache[masked_handle] = np;
1108 break;
1109 }
1110 }
1111
1034 of_node_get(np); 1112 of_node_get(np);
1035 raw_spin_unlock_irqrestore(&devtree_lock, flags); 1113 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1036 return np; 1114 return np;
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 26bb31beb03e..891d780c076a 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -132,6 +132,9 @@ extern void __of_detach_node_sysfs(struct device_node *np);
132extern void __of_sysfs_remove_bin_file(struct device_node *np, 132extern void __of_sysfs_remove_bin_file(struct device_node *np,
133 struct property *prop); 133 struct property *prop);
134 134
135/* illegal phandle value (set when unresolved) */
136#define OF_PHANDLE_ILLEGAL 0xdeadbeef
137
135/* iterators for transactions, used for overlays */ 138/* iterators for transactions, used for overlays */
136/* forward iterator */ 139/* forward iterator */
137#define for_each_transaction_entry(_oft, _te) \ 140#define for_each_transaction_entry(_oft, _te) \
diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index b2f645187213..65d0b7adfcd4 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -19,9 +19,6 @@
19 19
20#include "of_private.h" 20#include "of_private.h"
21 21
22/* illegal phandle value (set when unresolved) */
23#define OF_PHANDLE_ILLEGAL 0xdeadbeef
24
25static phandle live_tree_max_phandle(void) 22static phandle live_tree_max_phandle(void)
26{ 23{
27 struct device_node *node; 24 struct device_node *node;