diff options
author | Mauro Carvalho Chehab <mchehab@s-opensource.com> | 2017-05-17 08:10:48 -0400 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2017-07-14 15:58:08 -0400 |
commit | 79ab3b0d21ea1ac48ce0e6b44997dd0a8c8f72e6 (patch) | |
tree | 42fff744b03118065d5af89ecda6a9292c080d6f | |
parent | aa4d520358ed03ee5c2f54ef66e42f971839e62e (diff) |
this_cpu_ops.txt: standardize document format
Each text file under Documentation follows a different
format. Some doesn't even have titles!
Change its representation to follow the adopted standard,
using ReST markups for it to be parseable by Sphinx:
- promote document title one level;
- mark literal blocks;
- move authorship to the beginning of the file and use markups.
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rw-r--r-- | Documentation/this_cpu_ops.txt | 49 |
1 files changed, 28 insertions, 21 deletions
diff --git a/Documentation/this_cpu_ops.txt b/Documentation/this_cpu_ops.txt index 2cbf71975381..5cb8b883ae83 100644 --- a/Documentation/this_cpu_ops.txt +++ b/Documentation/this_cpu_ops.txt | |||
@@ -1,5 +1,9 @@ | |||
1 | =================== | ||
1 | this_cpu operations | 2 | this_cpu operations |
2 | ------------------- | 3 | =================== |
4 | |||
5 | :Author: Christoph Lameter, August 4th, 2014 | ||
6 | :Author: Pranith Kumar, Aug 2nd, 2014 | ||
3 | 7 | ||
4 | this_cpu operations are a way of optimizing access to per cpu | 8 | this_cpu operations are a way of optimizing access to per cpu |
5 | variables associated with the *currently* executing processor. This is | 9 | variables associated with the *currently* executing processor. This is |
@@ -39,7 +43,7 @@ operations. | |||
39 | 43 | ||
40 | The following this_cpu() operations with implied preemption protection | 44 | The following this_cpu() operations with implied preemption protection |
41 | are defined. These operations can be used without worrying about | 45 | are defined. These operations can be used without worrying about |
42 | preemption and interrupts. | 46 | preemption and interrupts:: |
43 | 47 | ||
44 | this_cpu_read(pcp) | 48 | this_cpu_read(pcp) |
45 | this_cpu_write(pcp, val) | 49 | this_cpu_write(pcp, val) |
@@ -67,14 +71,14 @@ to relocate a per cpu relative address to the proper per cpu area for | |||
67 | the processor. So the relocation to the per cpu base is encoded in the | 71 | the processor. So the relocation to the per cpu base is encoded in the |
68 | instruction via a segment register prefix. | 72 | instruction via a segment register prefix. |
69 | 73 | ||
70 | For example: | 74 | For example:: |
71 | 75 | ||
72 | DEFINE_PER_CPU(int, x); | 76 | DEFINE_PER_CPU(int, x); |
73 | int z; | 77 | int z; |
74 | 78 | ||
75 | z = this_cpu_read(x); | 79 | z = this_cpu_read(x); |
76 | 80 | ||
77 | results in a single instruction | 81 | results in a single instruction:: |
78 | 82 | ||
79 | mov ax, gs:[x] | 83 | mov ax, gs:[x] |
80 | 84 | ||
@@ -84,16 +88,16 @@ this_cpu_ops such sequence also required preempt disable/enable to | |||
84 | prevent the kernel from moving the thread to a different processor | 88 | prevent the kernel from moving the thread to a different processor |
85 | while the calculation is performed. | 89 | while the calculation is performed. |
86 | 90 | ||
87 | Consider the following this_cpu operation: | 91 | Consider the following this_cpu operation:: |
88 | 92 | ||
89 | this_cpu_inc(x) | 93 | this_cpu_inc(x) |
90 | 94 | ||
91 | The above results in the following single instruction (no lock prefix!) | 95 | The above results in the following single instruction (no lock prefix!):: |
92 | 96 | ||
93 | inc gs:[x] | 97 | inc gs:[x] |
94 | 98 | ||
95 | instead of the following operations required if there is no segment | 99 | instead of the following operations required if there is no segment |
96 | register: | 100 | register:: |
97 | 101 | ||
98 | int *y; | 102 | int *y; |
99 | int cpu; | 103 | int cpu; |
@@ -121,8 +125,10 @@ has to be paid for this optimization is the need to add up the per cpu | |||
121 | counters when the value of a counter is needed. | 125 | counters when the value of a counter is needed. |
122 | 126 | ||
123 | 127 | ||
124 | Special operations: | 128 | Special operations |
125 | ------------------- | 129 | ------------------ |
130 | |||
131 | :: | ||
126 | 132 | ||
127 | y = this_cpu_ptr(&x) | 133 | y = this_cpu_ptr(&x) |
128 | 134 | ||
@@ -153,11 +159,15 @@ Therefore the use of x or &x outside of the context of per cpu | |||
153 | operations is invalid and will generally be treated like a NULL | 159 | operations is invalid and will generally be treated like a NULL |
154 | pointer dereference. | 160 | pointer dereference. |
155 | 161 | ||
162 | :: | ||
163 | |||
156 | DEFINE_PER_CPU(int, x); | 164 | DEFINE_PER_CPU(int, x); |
157 | 165 | ||
158 | In the context of per cpu operations the above implies that x is a per | 166 | In the context of per cpu operations the above implies that x is a per |
159 | cpu variable. Most this_cpu operations take a cpu variable. | 167 | cpu variable. Most this_cpu operations take a cpu variable. |
160 | 168 | ||
169 | :: | ||
170 | |||
161 | int __percpu *p = &x; | 171 | int __percpu *p = &x; |
162 | 172 | ||
163 | &x and hence p is the *offset* of a per cpu variable. this_cpu_ptr() | 173 | &x and hence p is the *offset* of a per cpu variable. this_cpu_ptr() |
@@ -168,7 +178,7 @@ strange. | |||
168 | Operations on a field of a per cpu structure | 178 | Operations on a field of a per cpu structure |
169 | -------------------------------------------- | 179 | -------------------------------------------- |
170 | 180 | ||
171 | Let's say we have a percpu structure | 181 | Let's say we have a percpu structure:: |
172 | 182 | ||
173 | struct s { | 183 | struct s { |
174 | int n,m; | 184 | int n,m; |
@@ -177,14 +187,14 @@ Let's say we have a percpu structure | |||
177 | DEFINE_PER_CPU(struct s, p); | 187 | DEFINE_PER_CPU(struct s, p); |
178 | 188 | ||
179 | 189 | ||
180 | Operations on these fields are straightforward | 190 | Operations on these fields are straightforward:: |
181 | 191 | ||
182 | this_cpu_inc(p.m) | 192 | this_cpu_inc(p.m) |
183 | 193 | ||
184 | z = this_cpu_cmpxchg(p.m, 0, 1); | 194 | z = this_cpu_cmpxchg(p.m, 0, 1); |
185 | 195 | ||
186 | 196 | ||
187 | If we have an offset to struct s: | 197 | If we have an offset to struct s:: |
188 | 198 | ||
189 | struct s __percpu *ps = &p; | 199 | struct s __percpu *ps = &p; |
190 | 200 | ||
@@ -194,7 +204,7 @@ If we have an offset to struct s: | |||
194 | 204 | ||
195 | 205 | ||
196 | The calculation of the pointer may require the use of this_cpu_ptr() | 206 | The calculation of the pointer may require the use of this_cpu_ptr() |
197 | if we do not make use of this_cpu ops later to manipulate fields: | 207 | if we do not make use of this_cpu ops later to manipulate fields:: |
198 | 208 | ||
199 | struct s *pp; | 209 | struct s *pp; |
200 | 210 | ||
@@ -206,7 +216,7 @@ if we do not make use of this_cpu ops later to manipulate fields: | |||
206 | 216 | ||
207 | 217 | ||
208 | Variants of this_cpu ops | 218 | Variants of this_cpu ops |
209 | ------------------------- | 219 | ------------------------ |
210 | 220 | ||
211 | this_cpu ops are interrupt safe. Some architectures do not support | 221 | this_cpu ops are interrupt safe. Some architectures do not support |
212 | these per cpu local operations. In that case the operation must be | 222 | these per cpu local operations. In that case the operation must be |
@@ -222,7 +232,7 @@ preemption. If a per cpu variable is not used in an interrupt context | |||
222 | and the scheduler cannot preempt, then they are safe. If any interrupts | 232 | and the scheduler cannot preempt, then they are safe. If any interrupts |
223 | still occur while an operation is in progress and if the interrupt too | 233 | still occur while an operation is in progress and if the interrupt too |
224 | modifies the variable, then RMW actions can not be guaranteed to be | 234 | modifies the variable, then RMW actions can not be guaranteed to be |
225 | safe. | 235 | safe:: |
226 | 236 | ||
227 | __this_cpu_read(pcp) | 237 | __this_cpu_read(pcp) |
228 | __this_cpu_write(pcp, val) | 238 | __this_cpu_write(pcp, val) |
@@ -279,7 +289,7 @@ unless absolutely necessary. Please consider using an IPI to wake up | |||
279 | the remote CPU and perform the update to its per cpu area. | 289 | the remote CPU and perform the update to its per cpu area. |
280 | 290 | ||
281 | To access per-cpu data structure remotely, typically the per_cpu_ptr() | 291 | To access per-cpu data structure remotely, typically the per_cpu_ptr() |
282 | function is used: | 292 | function is used:: |
283 | 293 | ||
284 | 294 | ||
285 | DEFINE_PER_CPU(struct data, datap); | 295 | DEFINE_PER_CPU(struct data, datap); |
@@ -289,7 +299,7 @@ function is used: | |||
289 | This makes it explicit that we are getting ready to access a percpu | 299 | This makes it explicit that we are getting ready to access a percpu |
290 | area remotely. | 300 | area remotely. |
291 | 301 | ||
292 | You can also do the following to convert the datap offset to an address | 302 | You can also do the following to convert the datap offset to an address:: |
293 | 303 | ||
294 | struct data *p = this_cpu_ptr(&datap); | 304 | struct data *p = this_cpu_ptr(&datap); |
295 | 305 | ||
@@ -305,7 +315,7 @@ the following scenario that occurs because two per cpu variables | |||
305 | share a cache-line but the relaxed synchronization is applied to | 315 | share a cache-line but the relaxed synchronization is applied to |
306 | only one process updating the cache-line. | 316 | only one process updating the cache-line. |
307 | 317 | ||
308 | Consider the following example | 318 | Consider the following example:: |
309 | 319 | ||
310 | 320 | ||
311 | struct test { | 321 | struct test { |
@@ -327,6 +337,3 @@ mind that a remote write will evict the cache line from the processor | |||
327 | that most likely will access it. If the processor wakes up and finds a | 337 | that most likely will access it. If the processor wakes up and finds a |
328 | missing local cache line of a per cpu area, its performance and hence | 338 | missing local cache line of a per cpu area, its performance and hence |
329 | the wake up times will be affected. | 339 | the wake up times will be affected. |
330 | |||
331 | Christoph Lameter, August 4th, 2014 | ||
332 | Pranith Kumar, Aug 2nd, 2014 | ||