diff options
| author | Robert Richter <robert.richter@amd.com> | 2010-10-25 10:28:14 -0400 |
|---|---|---|
| committer | Robert Richter <robert.richter@amd.com> | 2010-10-25 10:29:12 -0400 |
| commit | dbd1e66e04558a582e673bc4a9cd933ce0228d93 (patch) | |
| tree | 85f3633276282cde0a3ac558d988704eaa3e68af | |
| parent | 328b8f1ba50b708a1b3c0acd7c41ee1b356822f6 (diff) | |
| parent | 4a60cfa9457749f7987fd4f3c956dbba5a281129 (diff) | |
Merge commit 'linux-2.6/master' (early part) into oprofile/core
This branch depends on these apic patches:
apic, x86: Use BIOS settings for IBS and MCE threshold interrupt LVT offsets
apic, x86: Check if EILVT APIC registers are available (AMD only)
Signed-off-by: Robert Richter <robert.richter@amd.com>
465 files changed, 16810 insertions, 10045 deletions
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index 1448b33fd222..fb10fd08c05c 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl | |||
| @@ -28,7 +28,7 @@ | |||
| 28 | </authorgroup> | 28 | </authorgroup> |
| 29 | 29 | ||
| 30 | <copyright> | 30 | <copyright> |
| 31 | <year>2005-2006</year> | 31 | <year>2005-2010</year> |
| 32 | <holder>Thomas Gleixner</holder> | 32 | <holder>Thomas Gleixner</holder> |
| 33 | </copyright> | 33 | </copyright> |
| 34 | <copyright> | 34 | <copyright> |
| @@ -100,6 +100,10 @@ | |||
| 100 | <listitem><para>Edge type</para></listitem> | 100 | <listitem><para>Edge type</para></listitem> |
| 101 | <listitem><para>Simple type</para></listitem> | 101 | <listitem><para>Simple type</para></listitem> |
| 102 | </itemizedlist> | 102 | </itemizedlist> |
| 103 | During the implementation we identified another type: | ||
| 104 | <itemizedlist> | ||
| 105 | <listitem><para>Fast EOI type</para></listitem> | ||
| 106 | </itemizedlist> | ||
| 103 | In the SMP world of the __do_IRQ() super-handler another type | 107 | In the SMP world of the __do_IRQ() super-handler another type |
| 104 | was identified: | 108 | was identified: |
| 105 | <itemizedlist> | 109 | <itemizedlist> |
| @@ -153,6 +157,7 @@ | |||
| 153 | is still available. This leads to a kind of duality for the time | 157 | is still available. This leads to a kind of duality for the time |
| 154 | being. Over time the new model should be used in more and more | 158 | being. Over time the new model should be used in more and more |
| 155 | architectures, as it enables smaller and cleaner IRQ subsystems. | 159 | architectures, as it enables smaller and cleaner IRQ subsystems. |
| 160 | It's deprecated for three years now and about to be removed. | ||
| 156 | </para> | 161 | </para> |
| 157 | </chapter> | 162 | </chapter> |
| 158 | <chapter id="bugs"> | 163 | <chapter id="bugs"> |
| @@ -217,6 +222,7 @@ | |||
| 217 | <itemizedlist> | 222 | <itemizedlist> |
| 218 | <listitem><para>handle_level_irq</para></listitem> | 223 | <listitem><para>handle_level_irq</para></listitem> |
| 219 | <listitem><para>handle_edge_irq</para></listitem> | 224 | <listitem><para>handle_edge_irq</para></listitem> |
| 225 | <listitem><para>handle_fasteoi_irq</para></listitem> | ||
| 220 | <listitem><para>handle_simple_irq</para></listitem> | 226 | <listitem><para>handle_simple_irq</para></listitem> |
| 221 | <listitem><para>handle_percpu_irq</para></listitem> | 227 | <listitem><para>handle_percpu_irq</para></listitem> |
| 222 | </itemizedlist> | 228 | </itemizedlist> |
| @@ -233,33 +239,33 @@ | |||
| 233 | are used by the default flow implementations. | 239 | are used by the default flow implementations. |
| 234 | The following helper functions are implemented (simplified excerpt): | 240 | The following helper functions are implemented (simplified excerpt): |
| 235 | <programlisting> | 241 | <programlisting> |
| 236 | default_enable(irq) | 242 | default_enable(struct irq_data *data) |
| 237 | { | 243 | { |
| 238 | desc->chip->unmask(irq); | 244 | desc->chip->irq_unmask(data); |
| 239 | } | 245 | } |
| 240 | 246 | ||
| 241 | default_disable(irq) | 247 | default_disable(struct irq_data *data) |
| 242 | { | 248 | { |
| 243 | if (!delay_disable(irq)) | 249 | if (!delay_disable(data)) |
| 244 | desc->chip->mask(irq); | 250 | desc->chip->irq_mask(data); |
| 245 | } | 251 | } |
| 246 | 252 | ||
| 247 | default_ack(irq) | 253 | default_ack(struct irq_data *data) |
| 248 | { | 254 | { |
| 249 | chip->ack(irq); | 255 | chip->irq_ack(data); |
| 250 | } | 256 | } |
| 251 | 257 | ||
| 252 | default_mask_ack(irq) | 258 | default_mask_ack(struct irq_data *data) |
| 253 | { | 259 | { |
| 254 | if (chip->mask_ack) { | 260 | if (chip->irq_mask_ack) { |
| 255 | chip->mask_ack(irq); | 261 | chip->irq_mask_ack(data); |
| 256 | } else { | 262 | } else { |
| 257 | chip->mask(irq); | 263 | chip->irq_mask(data); |
| 258 | chip->ack(irq); | 264 | chip->irq_ack(data); |
| 259 | } | 265 | } |
| 260 | } | 266 | } |
| 261 | 267 | ||
| 262 | noop(irq) | 268 | noop(struct irq_data *data)) |
| 263 | { | 269 | { |
| 264 | } | 270 | } |
| 265 | 271 | ||
| @@ -278,12 +284,27 @@ noop(irq) | |||
| 278 | <para> | 284 | <para> |
| 279 | The following control flow is implemented (simplified excerpt): | 285 | The following control flow is implemented (simplified excerpt): |
| 280 | <programlisting> | 286 | <programlisting> |
| 281 | desc->chip->start(); | 287 | desc->chip->irq_mask(); |
| 282 | handle_IRQ_event(desc->action); | 288 | handle_IRQ_event(desc->action); |
| 283 | desc->chip->end(); | 289 | desc->chip->irq_unmask(); |
| 284 | </programlisting> | 290 | </programlisting> |
| 285 | </para> | 291 | </para> |
| 286 | </sect3> | 292 | </sect3> |
| 293 | <sect3 id="Default_FASTEOI_IRQ_flow_handler"> | ||
| 294 | <title>Default Fast EOI IRQ flow handler</title> | ||
| 295 | <para> | ||
| 296 | handle_fasteoi_irq provides a generic implementation | ||
| 297 | for interrupts, which only need an EOI at the end of | ||
| 298 | the handler | ||
| 299 | </para> | ||
| 300 | <para> | ||
| 301 | The following control flow is implemented (simplified excerpt): | ||
| 302 | <programlisting> | ||
| 303 | handle_IRQ_event(desc->action); | ||
| 304 | desc->chip->irq_eoi(); | ||
| 305 | </programlisting> | ||
| 306 | </para> | ||
| 307 | </sect3> | ||
| 287 | <sect3 id="Default_Edge_IRQ_flow_handler"> | 308 | <sect3 id="Default_Edge_IRQ_flow_handler"> |
| 288 | <title>Default Edge IRQ flow handler</title> | 309 | <title>Default Edge IRQ flow handler</title> |
| 289 | <para> | 310 | <para> |
| @@ -294,20 +315,19 @@ desc->chip->end(); | |||
| 294 | The following control flow is implemented (simplified excerpt): | 315 | The following control flow is implemented (simplified excerpt): |
| 295 | <programlisting> | 316 | <programlisting> |
| 296 | if (desc->status & running) { | 317 | if (desc->status & running) { |
| 297 | desc->chip->hold(); | 318 | desc->chip->irq_mask(); |
| 298 | desc->status |= pending | masked; | 319 | desc->status |= pending | masked; |
| 299 | return; | 320 | return; |
| 300 | } | 321 | } |
| 301 | desc->chip->start(); | 322 | desc->chip->irq_ack(); |
| 302 | desc->status |= running; | 323 | desc->status |= running; |
| 303 | do { | 324 | do { |
| 304 | if (desc->status & masked) | 325 | if (desc->status & masked) |
| 305 | desc->chip->enable(); | 326 | desc->chip->irq_unmask(); |
| 306 | desc->status &= ~pending; | 327 | desc->status &= ~pending; |
| 307 | handle_IRQ_event(desc->action); | 328 | handle_IRQ_event(desc->action); |
| 308 | } while (status & pending); | 329 | } while (status & pending); |
| 309 | desc->status &= ~running; | 330 | desc->status &= ~running; |
| 310 | desc->chip->end(); | ||
| 311 | </programlisting> | 331 | </programlisting> |
| 312 | </para> | 332 | </para> |
| 313 | </sect3> | 333 | </sect3> |
| @@ -342,9 +362,9 @@ handle_IRQ_event(desc->action); | |||
| 342 | <para> | 362 | <para> |
| 343 | The following control flow is implemented (simplified excerpt): | 363 | The following control flow is implemented (simplified excerpt): |
| 344 | <programlisting> | 364 | <programlisting> |
| 345 | desc->chip->start(); | ||
| 346 | handle_IRQ_event(desc->action); | 365 | handle_IRQ_event(desc->action); |
| 347 | desc->chip->end(); | 366 | if (desc->chip->irq_eoi) |
| 367 | desc->chip->irq_eoi(); | ||
| 348 | </programlisting> | 368 | </programlisting> |
| 349 | </para> | 369 | </para> |
| 350 | </sect3> | 370 | </sect3> |
| @@ -375,8 +395,7 @@ desc->chip->end(); | |||
| 375 | mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when | 395 | mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when |
| 376 | you want to use the delayed interrupt disable feature and your | 396 | you want to use the delayed interrupt disable feature and your |
| 377 | hardware is not capable of retriggering an interrupt.) | 397 | hardware is not capable of retriggering an interrupt.) |
| 378 | The delayed interrupt disable can be runtime enabled, per interrupt, | 398 | The delayed interrupt disable is not configurable. |
| 379 | by setting the IRQ_DELAYED_DISABLE flag in the irq_desc status field. | ||
| 380 | </para> | 399 | </para> |
| 381 | </sect2> | 400 | </sect2> |
| 382 | </sect1> | 401 | </sect1> |
| @@ -387,13 +406,13 @@ desc->chip->end(); | |||
| 387 | contains all the direct chip relevant functions, which | 406 | contains all the direct chip relevant functions, which |
| 388 | can be utilized by the irq flow implementations. | 407 | can be utilized by the irq flow implementations. |
| 389 | <itemizedlist> | 408 | <itemizedlist> |
| 390 | <listitem><para>ack()</para></listitem> | 409 | <listitem><para>irq_ack()</para></listitem> |
| 391 | <listitem><para>mask_ack() - Optional, recommended for performance</para></listitem> | 410 | <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem> |
| 392 | <listitem><para>mask()</para></listitem> | 411 | <listitem><para>irq_mask()</para></listitem> |
| 393 | <listitem><para>unmask()</para></listitem> | 412 | <listitem><para>irq_unmask()</para></listitem> |
| 394 | <listitem><para>retrigger() - Optional</para></listitem> | 413 | <listitem><para>irq_retrigger() - Optional</para></listitem> |
| 395 | <listitem><para>set_type() - Optional</para></listitem> | 414 | <listitem><para>irq_set_type() - Optional</para></listitem> |
| 396 | <listitem><para>set_wake() - Optional</para></listitem> | 415 | <listitem><para>irq_set_wake() - Optional</para></listitem> |
| 397 | </itemizedlist> | 416 | </itemizedlist> |
| 398 | These primitives are strictly intended to mean what they say: ack means | 417 | These primitives are strictly intended to mean what they say: ack means |
| 399 | ACK, masking means masking of an IRQ line, etc. It is up to the flow | 418 | ACK, masking means masking of an IRQ line, etc. It is up to the flow |
| @@ -458,6 +477,7 @@ desc->chip->end(); | |||
| 458 | <para> | 477 | <para> |
| 459 | This chapter contains the autogenerated documentation of the internal functions. | 478 | This chapter contains the autogenerated documentation of the internal functions. |
| 460 | </para> | 479 | </para> |
| 480 | !Ikernel/irq/irqdesc.c | ||
| 461 | !Ikernel/irq/handle.c | 481 | !Ikernel/irq/handle.c |
| 462 | !Ikernel/irq/chip.c | 482 | !Ikernel/irq/chip.c |
| 463 | </chapter> | 483 | </chapter> |
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index a0d479d1e1dd..f66f4df18690 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
| @@ -1645,7 +1645,9 @@ the amount of locking which needs to be done. | |||
| 1645 | all the readers who were traversing the list when we deleted the | 1645 | all the readers who were traversing the list when we deleted the |
| 1646 | element are finished. We use <function>call_rcu()</function> to | 1646 | element are finished. We use <function>call_rcu()</function> to |
| 1647 | register a callback which will actually destroy the object once | 1647 | register a callback which will actually destroy the object once |
| 1648 | the readers are finished. | 1648 | all pre-existing readers are finished. Alternatively, |
| 1649 | <function>synchronize_rcu()</function> may be used to block until | ||
| 1650 | all pre-existing are finished. | ||
| 1649 | </para> | 1651 | </para> |
| 1650 | <para> | 1652 | <para> |
| 1651 | But how does Read Copy Update know when the readers are | 1653 | But how does Read Copy Update know when the readers are |
| @@ -1714,7 +1716,7 @@ the amount of locking which needs to be done. | |||
| 1714 | - object_put(obj); | 1716 | - object_put(obj); |
| 1715 | + list_del_rcu(&obj->list); | 1717 | + list_del_rcu(&obj->list); |
| 1716 | cache_num--; | 1718 | cache_num--; |
| 1717 | + call_rcu(&obj->rcu, cache_delete_rcu, obj); | 1719 | + call_rcu(&obj->rcu, cache_delete_rcu); |
| 1718 | } | 1720 | } |
| 1719 | 1721 | ||
| 1720 | /* Must be holding cache_lock */ | 1722 | /* Must be holding cache_lock */ |
| @@ -1725,14 +1727,6 @@ the amount of locking which needs to be done. | |||
| 1725 | if (++cache_num > MAX_CACHE_SIZE) { | 1727 | if (++cache_num > MAX_CACHE_SIZE) { |
| 1726 | struct object *i, *outcast = NULL; | 1728 | struct object *i, *outcast = NULL; |
| 1727 | list_for_each_entry(i, &cache, list) { | 1729 | list_for_each_entry(i, &cache, list) { |
| 1728 | @@ -85,6 +94,7 @@ | ||
| 1729 | obj->popularity = 0; | ||
| 1730 | atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | ||
| 1731 | spin_lock_init(&obj->lock); | ||
| 1732 | + INIT_RCU_HEAD(&obj->rcu); | ||
| 1733 | |||
| 1734 | spin_lock_irqsave(&cache_lock, flags); | ||
| 1735 | __cache_add(obj); | ||
| 1736 | @@ -104,12 +114,11 @@ | 1730 | @@ -104,12 +114,11 @@ |
| 1737 | struct object *cache_find(int id) | 1731 | struct object *cache_find(int id) |
| 1738 | { | 1732 | { |
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 790d1a812376..0c134f8afc6f 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
| @@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome! | |||
| 218 | include: | 218 | include: |
| 219 | 219 | ||
| 220 | a. Keeping a count of the number of data-structure elements | 220 | a. Keeping a count of the number of data-structure elements |
| 221 | used by the RCU-protected data structure, including those | 221 | used by the RCU-protected data structure, including |
| 222 | waiting for a grace period to elapse. Enforce a limit | 222 | those waiting for a grace period to elapse. Enforce a |
| 223 | on this number, stalling updates as needed to allow | 223 | limit on this number, stalling updates as needed to allow |
| 224 | previously deferred frees to complete. | 224 | previously deferred frees to complete. Alternatively, |
| 225 | 225 | limit only the number awaiting deferred free rather than | |
| 226 | Alternatively, limit only the number awaiting deferred | 226 | the total number of elements. |
| 227 | free rather than the total number of elements. | 227 | |
| 228 | One way to stall the updates is to acquire the update-side | ||
| 229 | mutex. (Don't try this with a spinlock -- other CPUs | ||
| 230 | spinning on the lock could prevent the grace period | ||
| 231 | from ever ending.) Another way to stall the updates | ||
| 232 | is for the updates to use a wrapper function around | ||
| 233 | the memory allocator, so that this wrapper function | ||
| 234 | simulates OOM when there is too much memory awaiting an | ||
| 235 | RCU grace period. There are of course many other | ||
| 236 | variations on this theme. | ||
| 228 | 237 | ||
| 229 | b. Limiting update rate. For example, if updates occur only | 238 | b. Limiting update rate. For example, if updates occur only |
| 230 | once per hour, then no explicit rate limiting is required, | 239 | once per hour, then no explicit rate limiting is required, |
| @@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome! | |||
| 365 | and the compiler to freely reorder code into and out of RCU | 374 | and the compiler to freely reorder code into and out of RCU |
| 366 | read-side critical sections. It is the responsibility of the | 375 | read-side critical sections. It is the responsibility of the |
| 367 | RCU update-side primitives to deal with this. | 376 | RCU update-side primitives to deal with this. |
| 377 | |||
| 378 | 17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and | ||
| 379 | the __rcu sparse checks to validate your RCU code. These | ||
| 380 | can help find problems as follows: | ||
| 381 | |||
| 382 | CONFIG_PROVE_RCU: check that accesses to RCU-protected data | ||
| 383 | structures are carried out under the proper RCU | ||
| 384 | read-side critical section, while holding the right | ||
| 385 | combination of locks, or whatever other conditions | ||
| 386 | are appropriate. | ||
| 387 | |||
| 388 | CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the | ||
| 389 | same object to call_rcu() (or friends) before an RCU | ||
| 390 | grace period has elapsed since the last time that you | ||
| 391 | passed that same object to call_rcu() (or friends). | ||
| 392 | |||
| 393 | __rcu sparse checks: tag the pointer to the RCU-protected data | ||
| 394 | structure with __rcu, and sparse will warn you if you | ||
| 395 | access that pointer without the services of one of the | ||
| 396 | variants of rcu_dereference(). | ||
| 397 | |||
| 398 | These debugging aids can help you find problems that are | ||
| 399 | otherwise extremely difficult to spot. | ||
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 44c6dcc93d6d..862c08ef1fde 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt | |||
| @@ -80,6 +80,24 @@ o A CPU looping with bottom halves disabled. This condition can | |||
| 80 | o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel | 80 | o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel |
| 81 | without invoking schedule(). | 81 | without invoking schedule(). |
| 82 | 82 | ||
| 83 | o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might | ||
| 84 | happen to preempt a low-priority task in the middle of an RCU | ||
| 85 | read-side critical section. This is especially damaging if | ||
| 86 | that low-priority task is not permitted to run on any other CPU, | ||
| 87 | in which case the next RCU grace period can never complete, which | ||
| 88 | will eventually cause the system to run out of memory and hang. | ||
| 89 | While the system is in the process of running itself out of | ||
| 90 | memory, you might see stall-warning messages. | ||
| 91 | |||
| 92 | o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | ||
| 93 | is running at a higher priority than the RCU softirq threads. | ||
| 94 | This will prevent RCU callbacks from ever being invoked, | ||
| 95 | and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent | ||
| 96 | RCU grace periods from ever completing. Either way, the | ||
| 97 | system will eventually run out of memory and hang. In the | ||
| 98 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning | ||
| 99 | messages. | ||
| 100 | |||
| 83 | o A bug in the RCU implementation. | 101 | o A bug in the RCU implementation. |
| 84 | 102 | ||
| 85 | o A hardware failure. This is quite unlikely, but has occurred | 103 | o A hardware failure. This is quite unlikely, but has occurred |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index efd8cc95c06b..a851118775d8 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
| @@ -125,6 +125,17 @@ o "b" is the batch limit for this CPU. If more than this number | |||
| 125 | of RCU callbacks is ready to invoke, then the remainder will | 125 | of RCU callbacks is ready to invoke, then the remainder will |
| 126 | be deferred. | 126 | be deferred. |
| 127 | 127 | ||
| 128 | o "ci" is the number of RCU callbacks that have been invoked for | ||
| 129 | this CPU. Note that ci+ql is the number of callbacks that have | ||
| 130 | been registered in absence of CPU-hotplug activity. | ||
| 131 | |||
| 132 | o "co" is the number of RCU callbacks that have been orphaned due to | ||
| 133 | this CPU going offline. | ||
| 134 | |||
| 135 | o "ca" is the number of RCU callbacks that have been adopted due to | ||
| 136 | other CPUs going offline. Note that ci+co-ca+ql is the number of | ||
| 137 | RCU callbacks registered on this CPU. | ||
| 138 | |||
| 128 | There is also an rcu/rcudata.csv file with the same information in | 139 | There is also an rcu/rcudata.csv file with the same information in |
| 129 | comma-separated-variable spreadsheet format. | 140 | comma-separated-variable spreadsheet format. |
| 130 | 141 | ||
| @@ -180,7 +191,7 @@ o "s" is the "signaled" state that drives force_quiescent_state()'s | |||
| 180 | 191 | ||
| 181 | o "jfq" is the number of jiffies remaining for this grace period | 192 | o "jfq" is the number of jiffies remaining for this grace period |
| 182 | before force_quiescent_state() is invoked to help push things | 193 | before force_quiescent_state() is invoked to help push things |
| 183 | along. Note that CPUs in dyntick-idle mode thoughout the grace | 194 | along. Note that CPUs in dyntick-idle mode throughout the grace |
| 184 | period will not report on their own, but rather must be check by | 195 | period will not report on their own, but rather must be check by |
| 185 | some other CPU via force_quiescent_state(). | 196 | some other CPU via force_quiescent_state(). |
| 186 | 197 | ||
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index f1c5c4bccd3e..902d3151f527 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
| @@ -14,25 +14,39 @@ to /proc/cpuinfo. | |||
| 14 | identifier (rather than the kernel's). The actual value is | 14 | identifier (rather than the kernel's). The actual value is |
| 15 | architecture and platform dependent. | 15 | architecture and platform dependent. |
| 16 | 16 | ||
| 17 | 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: | 17 | 3) /sys/devices/system/cpu/cpuX/topology/book_id: |
| 18 | |||
| 19 | the book ID of cpuX. Typically it is the hardware platform's | ||
| 20 | identifier (rather than the kernel's). The actual value is | ||
| 21 | architecture and platform dependent. | ||
| 22 | |||
| 23 | 4) /sys/devices/system/cpu/cpuX/topology/thread_siblings: | ||
| 18 | 24 | ||
| 19 | internel kernel map of cpuX's hardware threads within the same | 25 | internel kernel map of cpuX's hardware threads within the same |
| 20 | core as cpuX | 26 | core as cpuX |
| 21 | 27 | ||
| 22 | 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: | 28 | 5) /sys/devices/system/cpu/cpuX/topology/core_siblings: |
| 23 | 29 | ||
| 24 | internal kernel map of cpuX's hardware threads within the same | 30 | internal kernel map of cpuX's hardware threads within the same |
| 25 | physical_package_id. | 31 | physical_package_id. |
| 26 | 32 | ||
| 33 | 6) /sys/devices/system/cpu/cpuX/topology/book_siblings: | ||
| 34 | |||
| 35 | internal kernel map of cpuX's hardware threads within the same | ||
| 36 | book_id. | ||
| 37 | |||
| 27 | To implement it in an architecture-neutral way, a new source file, | 38 | To implement it in an architecture-neutral way, a new source file, |
| 28 | drivers/base/topology.c, is to export the 4 attributes. | 39 | drivers/base/topology.c, is to export the 4 or 6 attributes. The two book |
| 40 | related sysfs files will only be created if CONFIG_SCHED_BOOK is selected. | ||
| 29 | 41 | ||
| 30 | For an architecture to support this feature, it must define some of | 42 | For an architecture to support this feature, it must define some of |
| 31 | these macros in include/asm-XXX/topology.h: | 43 | these macros in include/asm-XXX/topology.h: |
| 32 | #define topology_physical_package_id(cpu) | 44 | #define topology_physical_package_id(cpu) |
| 33 | #define topology_core_id(cpu) | 45 | #define topology_core_id(cpu) |
| 46 | #define topology_book_id(cpu) | ||
| 34 | #define topology_thread_cpumask(cpu) | 47 | #define topology_thread_cpumask(cpu) |
| 35 | #define topology_core_cpumask(cpu) | 48 | #define topology_core_cpumask(cpu) |
| 49 | #define topology_book_cpumask(cpu) | ||
| 36 | 50 | ||
| 37 | The type of **_id is int. | 51 | The type of **_id is int. |
| 38 | The type of siblings is (const) struct cpumask *. | 52 | The type of siblings is (const) struct cpumask *. |
| @@ -45,6 +59,9 @@ not defined by include/asm-XXX/topology.h: | |||
| 45 | 3) thread_siblings: just the given CPU | 59 | 3) thread_siblings: just the given CPU |
| 46 | 4) core_siblings: just the given CPU | 60 | 4) core_siblings: just the given CPU |
| 47 | 61 | ||
| 62 | For architectures that don't support books (CONFIG_SCHED_BOOK) there are no | ||
| 63 | default definitions for topology_book_id() and topology_book_cpumask(). | ||
| 64 | |||
| 48 | Additionally, CPU topology information is provided under | 65 | Additionally, CPU topology information is provided under |
| 49 | /sys/devices/system/cpu and includes these files. The internal | 66 | /sys/devices/system/cpu and includes these files. The internal |
| 50 | source for the output is in brackets ("[]"). | 67 | source for the output is in brackets ("[]"). |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 842aa9de84a6..5e2bc4ab897a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -386,34 +386,6 @@ Who: Tejun Heo <tj@kernel.org> | |||
| 386 | 386 | ||
| 387 | ---------------------------- | 387 | ---------------------------- |
| 388 | 388 | ||
| 389 | What: Support for VMware's guest paravirtuliazation technique [VMI] will be | ||
| 390 | dropped. | ||
| 391 | When: 2.6.37 or earlier. | ||
| 392 | Why: With the recent innovations in CPU hardware acceleration technologies | ||
| 393 | from Intel and AMD, VMware ran a few experiments to compare these | ||
| 394 | techniques to guest paravirtualization technique on VMware's platform. | ||
| 395 | These hardware assisted virtualization techniques have outperformed the | ||
| 396 | performance benefits provided by VMI in most of the workloads. VMware | ||
| 397 | expects that these hardware features will be ubiquitous in a couple of | ||
| 398 | years, as a result, VMware has started a phased retirement of this | ||
| 399 | feature from the hypervisor. We will be removing this feature from the | ||
| 400 | Kernel too. Right now we are targeting 2.6.37 but can retire earlier if | ||
| 401 | technical reasons (read opportunity to remove major chunk of pvops) | ||
| 402 | arise. | ||
| 403 | |||
| 404 | Please note that VMI has always been an optimization and non-VMI kernels | ||
| 405 | still work fine on VMware's platform. | ||
| 406 | Latest versions of VMware's product which support VMI are, | ||
| 407 | Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence | ||
| 408 | releases for these products will continue supporting VMI. | ||
| 409 | |||
| 410 | For more details about VMI retirement take a look at this, | ||
| 411 | http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html | ||
| 412 | |||
| 413 | Who: Alok N Kataria <akataria@vmware.com> | ||
| 414 | |||
| 415 | ---------------------------- | ||
| 416 | |||
| 417 | What: Support for lcd_switch and display_get in asus-laptop driver | 389 | What: Support for lcd_switch and display_get in asus-laptop driver |
| 418 | When: March 2010 | 390 | When: March 2010 |
| 419 | Why: These two features use non-standard interfaces. There are the | 391 | Why: These two features use non-standard interfaces. There are the |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8dd7248508a9..3a0009e03d14 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -455,7 +455,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 455 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, | 455 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, |
| 456 | pxa_timer,timer3,32k_counter,timer0_1 | 456 | pxa_timer,timer3,32k_counter,timer0_1 |
| 457 | [AVR32] avr32 | 457 | [AVR32] avr32 |
| 458 | [X86-32] pit,hpet,tsc,vmi-timer; | 458 | [X86-32] pit,hpet,tsc; |
| 459 | scx200_hrt on Geode; cyclone on IBM x440 | 459 | scx200_hrt on Geode; cyclone on IBM x440 |
| 460 | [MIPS] MIPS | 460 | [MIPS] MIPS |
| 461 | [PARISC] cr16 | 461 | [PARISC] cr16 |
| @@ -2153,6 +2153,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 2153 | Reserves a hole at the top of the kernel virtual | 2153 | Reserves a hole at the top of the kernel virtual |
| 2154 | address space. | 2154 | address space. |
| 2155 | 2155 | ||
| 2156 | reservelow= [X86] | ||
| 2157 | Format: nn[K] | ||
| 2158 | Set the amount of memory to reserve for BIOS at | ||
| 2159 | the bottom of the address space. | ||
| 2160 | |||
| 2156 | reset_devices [KNL] Force drivers to reset the underlying device | 2161 | reset_devices [KNL] Force drivers to reset the underlying device |
| 2157 | during initialization. | 2162 | during initialization. |
| 2158 | 2163 | ||
| @@ -2435,6 +2440,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 2435 | disables clocksource verification at runtime. | 2440 | disables clocksource verification at runtime. |
| 2436 | Used to enable high-resolution timer mode on older | 2441 | Used to enable high-resolution timer mode on older |
| 2437 | hardware, and in virtualized environment. | 2442 | hardware, and in virtualized environment. |
| 2443 | [x86] noirqtime: Do not use TSC to do irq accounting. | ||
| 2444 | Used to run time disable IRQ_TIME_ACCOUNTING on any | ||
| 2445 | platforms where RDTSC is slow and this accounting | ||
| 2446 | can add overhead. | ||
| 2438 | 2447 | ||
| 2439 | turbografx.map[2|3]= [HW,JOY] | 2448 | turbografx.map[2|3]= [HW,JOY] |
| 2440 | TurboGraFX parallel port interface | 2449 | TurboGraFX parallel port interface |
diff --git a/MAINTAINERS b/MAINTAINERS index f2a2b8e647c5..6f5b5b2b528d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -1527,6 +1527,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | |||
| 1527 | S: Supported | 1527 | S: Supported |
| 1528 | F: Documentation/filesystems/ceph.txt | 1528 | F: Documentation/filesystems/ceph.txt |
| 1529 | F: fs/ceph | 1529 | F: fs/ceph |
| 1530 | F: net/ceph | ||
| 1531 | F: include/linux/ceph | ||
| 1530 | 1532 | ||
| 1531 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | 1533 | CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: |
| 1532 | M: David Vrabel <david.vrabel@csr.com> | 1534 | M: David Vrabel <david.vrabel@csr.com> |
| @@ -3239,6 +3241,12 @@ F: drivers/net/irda/ | |||
| 3239 | F: include/net/irda/ | 3241 | F: include/net/irda/ |
| 3240 | F: net/irda/ | 3242 | F: net/irda/ |
| 3241 | 3243 | ||
| 3244 | IRQ SUBSYSTEM | ||
| 3245 | M: Thomas Gleixner <tglx@linutronix.de> | ||
| 3246 | S: Maintained | ||
| 3247 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git irq/core | ||
| 3248 | F: kernel/irq/ | ||
| 3249 | |||
| 3242 | ISAPNP | 3250 | ISAPNP |
| 3243 | M: Jaroslav Kysela <perex@perex.cz> | 3251 | M: Jaroslav Kysela <perex@perex.cz> |
| 3244 | S: Maintained | 3252 | S: Maintained |
| @@ -4805,6 +4813,15 @@ F: fs/qnx4/ | |||
| 4805 | F: include/linux/qnx4_fs.h | 4813 | F: include/linux/qnx4_fs.h |
| 4806 | F: include/linux/qnxtypes.h | 4814 | F: include/linux/qnxtypes.h |
| 4807 | 4815 | ||
| 4816 | RADOS BLOCK DEVICE (RBD) | ||
| 4817 | F: include/linux/qnxtypes.h | ||
| 4818 | M: Yehuda Sadeh <yehuda@hq.newdream.net> | ||
| 4819 | M: Sage Weil <sage@newdream.net> | ||
| 4820 | M: ceph-devel@vger.kernel.org | ||
| 4821 | S: Supported | ||
| 4822 | F: drivers/block/rbd.c | ||
| 4823 | F: drivers/block/rbd_types.h | ||
| 4824 | |||
| 4808 | RADEON FRAMEBUFFER DISPLAY DRIVER | 4825 | RADEON FRAMEBUFFER DISPLAY DRIVER |
| 4809 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> | 4826 | M: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
| 4810 | L: linux-fbdev@vger.kernel.org | 4827 | L: linux-fbdev@vger.kernel.org |
diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h index 90831f6f5f5c..5586b7c8ef6f 100644 --- a/arch/arm/include/asm/hw_irq.h +++ b/arch/arm/include/asm/hw_irq.h | |||
| @@ -24,4 +24,6 @@ void set_irq_flags(unsigned int irq, unsigned int flags); | |||
| 24 | #define IRQF_PROBE (1 << 1) | 24 | #define IRQF_PROBE (1 << 1) |
| 25 | #define IRQF_NOAUTOEN (1 << 2) | 25 | #define IRQF_NOAUTOEN (1 << 2) |
| 26 | 26 | ||
| 27 | #define ARCH_IRQ_INIT_FLAGS (IRQ_NOREQUEST | IRQ_NOPROBE) | ||
| 28 | |||
| 27 | #endif | 29 | #endif |
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index c0d5c3b3a760..36ad3be4692a 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c | |||
| @@ -154,14 +154,6 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) | |||
| 154 | 154 | ||
| 155 | void __init init_IRQ(void) | 155 | void __init init_IRQ(void) |
| 156 | { | 156 | { |
| 157 | struct irq_desc *desc; | ||
| 158 | int irq; | ||
| 159 | |||
| 160 | for (irq = 0; irq < nr_irqs; irq++) { | ||
| 161 | desc = irq_to_desc_alloc_node(irq, 0); | ||
| 162 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; | ||
| 163 | } | ||
| 164 | |||
| 165 | init_arch_irq(); | 157 | init_arch_irq(); |
| 166 | } | 158 | } |
| 167 | 159 | ||
| @@ -169,7 +161,7 @@ void __init init_IRQ(void) | |||
| 169 | int __init arch_probe_nr_irqs(void) | 161 | int __init arch_probe_nr_irqs(void) |
| 170 | { | 162 | { |
| 171 | nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; | 163 | nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; |
| 172 | return 0; | 164 | return nr_irqs; |
| 173 | } | 165 | } |
| 174 | #endif | 166 | #endif |
| 175 | 167 | ||
diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c index 29c0a911df26..77eb35c89cd0 100644 --- a/arch/arm/mach-bcmring/dma.c +++ b/arch/arm/mach-bcmring/dma.c | |||
| @@ -691,7 +691,7 @@ int dma_init(void) | |||
| 691 | 691 | ||
| 692 | memset(&gDMA, 0, sizeof(gDMA)); | 692 | memset(&gDMA, 0, sizeof(gDMA)); |
| 693 | 693 | ||
| 694 | init_MUTEX_LOCKED(&gDMA.lock); | 694 | sema_init(&gDMA.lock, 0); |
| 695 | init_waitqueue_head(&gDMA.freeChannelQ); | 695 | init_waitqueue_head(&gDMA.freeChannelQ); |
| 696 | 696 | ||
| 697 | /* Initialize the Hardware */ | 697 | /* Initialize the Hardware */ |
| @@ -1574,7 +1574,7 @@ int dma_init_mem_map(DMA_MemMap_t *memMap) | |||
| 1574 | { | 1574 | { |
| 1575 | memset(memMap, 0, sizeof(*memMap)); | 1575 | memset(memMap, 0, sizeof(*memMap)); |
| 1576 | 1576 | ||
| 1577 | init_MUTEX(&memMap->lock); | 1577 | sema_init(&memMap->lock, 1); |
| 1578 | 1578 | ||
| 1579 | return 0; | 1579 | return 0; |
| 1580 | } | 1580 | } |
diff --git a/arch/arm/mach-bcmring/irq.c b/arch/arm/mach-bcmring/irq.c index dc1c4939b0ce..e3152631eb37 100644 --- a/arch/arm/mach-bcmring/irq.c +++ b/arch/arm/mach-bcmring/irq.c | |||
| @@ -67,21 +67,21 @@ static void bcmring_unmask_irq2(unsigned int irq) | |||
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | static struct irq_chip bcmring_irq0_chip = { | 69 | static struct irq_chip bcmring_irq0_chip = { |
| 70 | .typename = "ARM-INTC0", | 70 | .name = "ARM-INTC0", |
| 71 | .ack = bcmring_mask_irq0, | 71 | .ack = bcmring_mask_irq0, |
| 72 | .mask = bcmring_mask_irq0, /* mask a specific interrupt, blocking its delivery. */ | 72 | .mask = bcmring_mask_irq0, /* mask a specific interrupt, blocking its delivery. */ |
| 73 | .unmask = bcmring_unmask_irq0, /* unmaks an interrupt */ | 73 | .unmask = bcmring_unmask_irq0, /* unmaks an interrupt */ |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | static struct irq_chip bcmring_irq1_chip = { | 76 | static struct irq_chip bcmring_irq1_chip = { |
| 77 | .typename = "ARM-INTC1", | 77 | .name = "ARM-INTC1", |
| 78 | .ack = bcmring_mask_irq1, | 78 | .ack = bcmring_mask_irq1, |
| 79 | .mask = bcmring_mask_irq1, | 79 | .mask = bcmring_mask_irq1, |
| 80 | .unmask = bcmring_unmask_irq1, | 80 | .unmask = bcmring_unmask_irq1, |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | static struct irq_chip bcmring_irq2_chip = { | 83 | static struct irq_chip bcmring_irq2_chip = { |
| 84 | .typename = "ARM-SINTC", | 84 | .name = "ARM-SINTC", |
| 85 | .ack = bcmring_mask_irq2, | 85 | .ack = bcmring_mask_irq2, |
| 86 | .mask = bcmring_mask_irq2, | 86 | .mask = bcmring_mask_irq2, |
| 87 | .unmask = bcmring_unmask_irq2, | 87 | .unmask = bcmring_unmask_irq2, |
diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c index f34b0ed80630..7149fcc16c8a 100644 --- a/arch/arm/mach-iop13xx/msi.c +++ b/arch/arm/mach-iop13xx/msi.c | |||
| @@ -164,10 +164,10 @@ static void iop13xx_msi_nop(unsigned int irq) | |||
| 164 | static struct irq_chip iop13xx_msi_chip = { | 164 | static struct irq_chip iop13xx_msi_chip = { |
| 165 | .name = "PCI-MSI", | 165 | .name = "PCI-MSI", |
| 166 | .ack = iop13xx_msi_nop, | 166 | .ack = iop13xx_msi_nop, |
| 167 | .enable = unmask_msi_irq, | 167 | .irq_enable = unmask_msi_irq, |
| 168 | .disable = mask_msi_irq, | 168 | .irq_disable = mask_msi_irq, |
| 169 | .mask = mask_msi_irq, | 169 | .irq_mask = mask_msi_irq, |
| 170 | .unmask = unmask_msi_irq, | 170 | .irq_unmask = unmask_msi_irq, |
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| 173 | int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) | 173 | int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) |
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index d514cd9edb49..8fb7d33a661f 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h | |||
| @@ -6,12 +6,6 @@ | |||
| 6 | * David Mosberger-Tang <davidm@hpl.hp.com> | 6 | * David Mosberger-Tang <davidm@hpl.hp.com> |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | |||
| 10 | #include <linux/threads.h> | ||
| 11 | #include <linux/irq.h> | ||
| 12 | |||
| 13 | #include <asm/processor.h> | ||
| 14 | |||
| 15 | /* | 9 | /* |
| 16 | * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. | 10 | * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. |
| 17 | */ | 11 | */ |
| @@ -20,6 +14,11 @@ | |||
| 20 | 14 | ||
| 21 | #define local_softirq_pending() (local_cpu_data->softirq_pending) | 15 | #define local_softirq_pending() (local_cpu_data->softirq_pending) |
| 22 | 16 | ||
| 17 | #include <linux/threads.h> | ||
| 18 | #include <linux/irq.h> | ||
| 19 | |||
| 20 | #include <asm/processor.h> | ||
| 21 | |||
| 23 | extern void __iomem *ipi_base_addr; | 22 | extern void __iomem *ipi_base_addr; |
| 24 | 23 | ||
| 25 | void ack_bad_irq(unsigned int irq); | 24 | void ack_bad_irq(unsigned int irq); |
diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h index 9f342a574ce8..dd028f2b13b3 100644 --- a/arch/ia64/include/asm/system.h +++ b/arch/ia64/include/asm/system.h | |||
| @@ -272,10 +272,6 @@ void cpu_idle_wait(void); | |||
| 272 | 272 | ||
| 273 | void default_idle(void); | 273 | void default_idle(void); |
| 274 | 274 | ||
| 275 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 276 | extern void account_system_vtime(struct task_struct *); | ||
| 277 | #endif | ||
| 278 | |||
| 279 | #endif /* __KERNEL__ */ | 275 | #endif /* __KERNEL__ */ |
| 280 | 276 | ||
| 281 | #endif /* __ASSEMBLY__ */ | 277 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 4a746ea838ff..00b19a416eab 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c | |||
| @@ -104,8 +104,8 @@ static int ia64_msi_retrigger_irq(unsigned int irq) | |||
| 104 | */ | 104 | */ |
| 105 | static struct irq_chip ia64_msi_chip = { | 105 | static struct irq_chip ia64_msi_chip = { |
| 106 | .name = "PCI-MSI", | 106 | .name = "PCI-MSI", |
| 107 | .mask = mask_msi_irq, | 107 | .irq_mask = mask_msi_irq, |
| 108 | .unmask = unmask_msi_irq, | 108 | .irq_unmask = unmask_msi_irq, |
| 109 | .ack = ia64_ack_msi_irq, | 109 | .ack = ia64_ack_msi_irq, |
| 110 | #ifdef CONFIG_SMP | 110 | #ifdef CONFIG_SMP |
| 111 | .set_affinity = ia64_set_msi_irq_affinity, | 111 | .set_affinity = ia64_set_msi_irq_affinity, |
| @@ -160,8 +160,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 160 | 160 | ||
| 161 | static struct irq_chip dmar_msi_type = { | 161 | static struct irq_chip dmar_msi_type = { |
| 162 | .name = "DMAR_MSI", | 162 | .name = "DMAR_MSI", |
| 163 | .unmask = dmar_msi_unmask, | 163 | .irq_unmask = dmar_msi_unmask, |
| 164 | .mask = dmar_msi_mask, | 164 | .irq_mask = dmar_msi_mask, |
| 165 | .ack = ia64_ack_msi_irq, | 165 | .ack = ia64_ack_msi_irq, |
| 166 | #ifdef CONFIG_SMP | 166 | #ifdef CONFIG_SMP |
| 167 | .set_affinity = dmar_msi_set_affinity, | 167 | .set_affinity = dmar_msi_set_affinity, |
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 0c72dd463831..a5e500f02853 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c | |||
| @@ -228,8 +228,8 @@ static int sn_msi_retrigger_irq(unsigned int irq) | |||
| 228 | 228 | ||
| 229 | static struct irq_chip sn_msi_chip = { | 229 | static struct irq_chip sn_msi_chip = { |
| 230 | .name = "PCI-MSI", | 230 | .name = "PCI-MSI", |
| 231 | .mask = mask_msi_irq, | 231 | .irq_mask = mask_msi_irq, |
| 232 | .unmask = unmask_msi_irq, | 232 | .irq_unmask = unmask_msi_irq, |
| 233 | .ack = sn_ack_msi_irq, | 233 | .ack = sn_ack_msi_irq, |
| 234 | #ifdef CONFIG_SMP | 234 | #ifdef CONFIG_SMP |
| 235 | .set_affinity = sn_set_msi_irq_affinity, | 235 | .set_affinity = sn_set_msi_irq_affinity, |
diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 3c71f776872c..7db26f1f082d 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c | |||
| @@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 51 | for_each_online_cpu(j) | 51 | for_each_online_cpu(j) |
| 52 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 52 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 53 | #endif | 53 | #endif |
| 54 | seq_printf(p, " %14s", irq_desc[i].chip->typename); | 54 | seq_printf(p, " %14s", irq_desc[i].chip->name); |
| 55 | seq_printf(p, " %s", action->name); | 55 | seq_printf(p, " %s", action->name); |
| 56 | 56 | ||
| 57 | for (action=action->next; action; action = action->next) | 57 | for (action=action->next; action; action = action->next) |
diff --git a/arch/m32r/platforms/m32104ut/setup.c b/arch/m32r/platforms/m32104ut/setup.c index 922fdfdadeaa..402a59d7219b 100644 --- a/arch/m32r/platforms/m32104ut/setup.c +++ b/arch/m32r/platforms/m32104ut/setup.c | |||
| @@ -65,7 +65,7 @@ static void shutdown_m32104ut_irq(unsigned int irq) | |||
| 65 | 65 | ||
| 66 | static struct irq_chip m32104ut_irq_type = | 66 | static struct irq_chip m32104ut_irq_type = |
| 67 | { | 67 | { |
| 68 | .typename = "M32104UT-IRQ", | 68 | .name = "M32104UT-IRQ", |
| 69 | .startup = startup_m32104ut_irq, | 69 | .startup = startup_m32104ut_irq, |
| 70 | .shutdown = shutdown_m32104ut_irq, | 70 | .shutdown = shutdown_m32104ut_irq, |
| 71 | .enable = enable_m32104ut_irq, | 71 | .enable = enable_m32104ut_irq, |
diff --git a/arch/m32r/platforms/m32700ut/setup.c b/arch/m32r/platforms/m32700ut/setup.c index 9c1bc7487c1e..80b1a026795a 100644 --- a/arch/m32r/platforms/m32700ut/setup.c +++ b/arch/m32r/platforms/m32700ut/setup.c | |||
| @@ -71,7 +71,7 @@ static void shutdown_m32700ut_irq(unsigned int irq) | |||
| 71 | 71 | ||
| 72 | static struct irq_chip m32700ut_irq_type = | 72 | static struct irq_chip m32700ut_irq_type = |
| 73 | { | 73 | { |
| 74 | .typename = "M32700UT-IRQ", | 74 | .name = "M32700UT-IRQ", |
| 75 | .startup = startup_m32700ut_irq, | 75 | .startup = startup_m32700ut_irq, |
| 76 | .shutdown = shutdown_m32700ut_irq, | 76 | .shutdown = shutdown_m32700ut_irq, |
| 77 | .enable = enable_m32700ut_irq, | 77 | .enable = enable_m32700ut_irq, |
| @@ -148,7 +148,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq) | |||
| 148 | 148 | ||
| 149 | static struct irq_chip m32700ut_pld_irq_type = | 149 | static struct irq_chip m32700ut_pld_irq_type = |
| 150 | { | 150 | { |
| 151 | .typename = "M32700UT-PLD-IRQ", | 151 | .name = "M32700UT-PLD-IRQ", |
| 152 | .startup = startup_m32700ut_pld_irq, | 152 | .startup = startup_m32700ut_pld_irq, |
| 153 | .shutdown = shutdown_m32700ut_pld_irq, | 153 | .shutdown = shutdown_m32700ut_pld_irq, |
| 154 | .enable = enable_m32700ut_pld_irq, | 154 | .enable = enable_m32700ut_pld_irq, |
| @@ -217,7 +217,7 @@ static void shutdown_m32700ut_lanpld_irq(unsigned int irq) | |||
| 217 | 217 | ||
| 218 | static struct irq_chip m32700ut_lanpld_irq_type = | 218 | static struct irq_chip m32700ut_lanpld_irq_type = |
| 219 | { | 219 | { |
| 220 | .typename = "M32700UT-PLD-LAN-IRQ", | 220 | .name = "M32700UT-PLD-LAN-IRQ", |
| 221 | .startup = startup_m32700ut_lanpld_irq, | 221 | .startup = startup_m32700ut_lanpld_irq, |
| 222 | .shutdown = shutdown_m32700ut_lanpld_irq, | 222 | .shutdown = shutdown_m32700ut_lanpld_irq, |
| 223 | .enable = enable_m32700ut_lanpld_irq, | 223 | .enable = enable_m32700ut_lanpld_irq, |
| @@ -286,7 +286,7 @@ static void shutdown_m32700ut_lcdpld_irq(unsigned int irq) | |||
| 286 | 286 | ||
| 287 | static struct irq_chip m32700ut_lcdpld_irq_type = | 287 | static struct irq_chip m32700ut_lcdpld_irq_type = |
| 288 | { | 288 | { |
| 289 | .typename = "M32700UT-PLD-LCD-IRQ", | 289 | .name = "M32700UT-PLD-LCD-IRQ", |
| 290 | .startup = startup_m32700ut_lcdpld_irq, | 290 | .startup = startup_m32700ut_lcdpld_irq, |
| 291 | .shutdown = shutdown_m32700ut_lcdpld_irq, | 291 | .shutdown = shutdown_m32700ut_lcdpld_irq, |
| 292 | .enable = enable_m32700ut_lcdpld_irq, | 292 | .enable = enable_m32700ut_lcdpld_irq, |
diff --git a/arch/m32r/platforms/mappi/setup.c b/arch/m32r/platforms/mappi/setup.c index fb4b17799b66..ea00c84d6b1b 100644 --- a/arch/m32r/platforms/mappi/setup.c +++ b/arch/m32r/platforms/mappi/setup.c | |||
| @@ -65,7 +65,7 @@ static void shutdown_mappi_irq(unsigned int irq) | |||
| 65 | 65 | ||
| 66 | static struct irq_chip mappi_irq_type = | 66 | static struct irq_chip mappi_irq_type = |
| 67 | { | 67 | { |
| 68 | .typename = "MAPPI-IRQ", | 68 | .name = "MAPPI-IRQ", |
| 69 | .startup = startup_mappi_irq, | 69 | .startup = startup_mappi_irq, |
| 70 | .shutdown = shutdown_mappi_irq, | 70 | .shutdown = shutdown_mappi_irq, |
| 71 | .enable = enable_mappi_irq, | 71 | .enable = enable_mappi_irq, |
diff --git a/arch/m32r/platforms/mappi2/setup.c b/arch/m32r/platforms/mappi2/setup.c index 6a65eda0a056..c049376d0270 100644 --- a/arch/m32r/platforms/mappi2/setup.c +++ b/arch/m32r/platforms/mappi2/setup.c | |||
| @@ -72,7 +72,7 @@ static void shutdown_mappi2_irq(unsigned int irq) | |||
| 72 | 72 | ||
| 73 | static struct irq_chip mappi2_irq_type = | 73 | static struct irq_chip mappi2_irq_type = |
| 74 | { | 74 | { |
| 75 | .typename = "MAPPI2-IRQ", | 75 | .name = "MAPPI2-IRQ", |
| 76 | .startup = startup_mappi2_irq, | 76 | .startup = startup_mappi2_irq, |
| 77 | .shutdown = shutdown_mappi2_irq, | 77 | .shutdown = shutdown_mappi2_irq, |
| 78 | .enable = enable_mappi2_irq, | 78 | .enable = enable_mappi2_irq, |
diff --git a/arch/m32r/platforms/mappi3/setup.c b/arch/m32r/platforms/mappi3/setup.c index 9c337aeac94b..882de25c6e8c 100644 --- a/arch/m32r/platforms/mappi3/setup.c +++ b/arch/m32r/platforms/mappi3/setup.c | |||
| @@ -72,7 +72,7 @@ static void shutdown_mappi3_irq(unsigned int irq) | |||
| 72 | 72 | ||
| 73 | static struct irq_chip mappi3_irq_type = | 73 | static struct irq_chip mappi3_irq_type = |
| 74 | { | 74 | { |
| 75 | .typename = "MAPPI3-IRQ", | 75 | .name = "MAPPI3-IRQ", |
| 76 | .startup = startup_mappi3_irq, | 76 | .startup = startup_mappi3_irq, |
| 77 | .shutdown = shutdown_mappi3_irq, | 77 | .shutdown = shutdown_mappi3_irq, |
| 78 | .enable = enable_mappi3_irq, | 78 | .enable = enable_mappi3_irq, |
diff --git a/arch/m32r/platforms/oaks32r/setup.c b/arch/m32r/platforms/oaks32r/setup.c index ed865741c38d..d11d93bf74f5 100644 --- a/arch/m32r/platforms/oaks32r/setup.c +++ b/arch/m32r/platforms/oaks32r/setup.c | |||
| @@ -63,7 +63,7 @@ static void shutdown_oaks32r_irq(unsigned int irq) | |||
| 63 | 63 | ||
| 64 | static struct irq_chip oaks32r_irq_type = | 64 | static struct irq_chip oaks32r_irq_type = |
| 65 | { | 65 | { |
| 66 | .typename = "OAKS32R-IRQ", | 66 | .name = "OAKS32R-IRQ", |
| 67 | .startup = startup_oaks32r_irq, | 67 | .startup = startup_oaks32r_irq, |
| 68 | .shutdown = shutdown_oaks32r_irq, | 68 | .shutdown = shutdown_oaks32r_irq, |
| 69 | .enable = enable_oaks32r_irq, | 69 | .enable = enable_oaks32r_irq, |
diff --git a/arch/m32r/platforms/opsput/setup.c b/arch/m32r/platforms/opsput/setup.c index 80d680657019..5f3402a2fbaf 100644 --- a/arch/m32r/platforms/opsput/setup.c +++ b/arch/m32r/platforms/opsput/setup.c | |||
| @@ -72,7 +72,7 @@ static void shutdown_opsput_irq(unsigned int irq) | |||
| 72 | 72 | ||
| 73 | static struct irq_chip opsput_irq_type = | 73 | static struct irq_chip opsput_irq_type = |
| 74 | { | 74 | { |
| 75 | .typename = "OPSPUT-IRQ", | 75 | .name = "OPSPUT-IRQ", |
| 76 | .startup = startup_opsput_irq, | 76 | .startup = startup_opsput_irq, |
| 77 | .shutdown = shutdown_opsput_irq, | 77 | .shutdown = shutdown_opsput_irq, |
| 78 | .enable = enable_opsput_irq, | 78 | .enable = enable_opsput_irq, |
| @@ -149,7 +149,7 @@ static void shutdown_opsput_pld_irq(unsigned int irq) | |||
| 149 | 149 | ||
| 150 | static struct irq_chip opsput_pld_irq_type = | 150 | static struct irq_chip opsput_pld_irq_type = |
| 151 | { | 151 | { |
| 152 | .typename = "OPSPUT-PLD-IRQ", | 152 | .name = "OPSPUT-PLD-IRQ", |
| 153 | .startup = startup_opsput_pld_irq, | 153 | .startup = startup_opsput_pld_irq, |
| 154 | .shutdown = shutdown_opsput_pld_irq, | 154 | .shutdown = shutdown_opsput_pld_irq, |
| 155 | .enable = enable_opsput_pld_irq, | 155 | .enable = enable_opsput_pld_irq, |
| @@ -218,7 +218,7 @@ static void shutdown_opsput_lanpld_irq(unsigned int irq) | |||
| 218 | 218 | ||
| 219 | static struct irq_chip opsput_lanpld_irq_type = | 219 | static struct irq_chip opsput_lanpld_irq_type = |
| 220 | { | 220 | { |
| 221 | .typename = "OPSPUT-PLD-LAN-IRQ", | 221 | .name = "OPSPUT-PLD-LAN-IRQ", |
| 222 | .startup = startup_opsput_lanpld_irq, | 222 | .startup = startup_opsput_lanpld_irq, |
| 223 | .shutdown = shutdown_opsput_lanpld_irq, | 223 | .shutdown = shutdown_opsput_lanpld_irq, |
| 224 | .enable = enable_opsput_lanpld_irq, | 224 | .enable = enable_opsput_lanpld_irq, |
diff --git a/arch/m32r/platforms/usrv/setup.c b/arch/m32r/platforms/usrv/setup.c index 757302660af8..1beac7a51ed4 100644 --- a/arch/m32r/platforms/usrv/setup.c +++ b/arch/m32r/platforms/usrv/setup.c | |||
| @@ -63,7 +63,7 @@ static void shutdown_mappi_irq(unsigned int irq) | |||
| 63 | 63 | ||
| 64 | static struct irq_chip mappi_irq_type = | 64 | static struct irq_chip mappi_irq_type = |
| 65 | { | 65 | { |
| 66 | .typename = "M32700-IRQ", | 66 | .name = "M32700-IRQ", |
| 67 | .startup = startup_mappi_irq, | 67 | .startup = startup_mappi_irq, |
| 68 | .shutdown = shutdown_mappi_irq, | 68 | .shutdown = shutdown_mappi_irq, |
| 69 | .enable = enable_mappi_irq, | 69 | .enable = enable_mappi_irq, |
| @@ -136,7 +136,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq) | |||
| 136 | 136 | ||
| 137 | static struct irq_chip m32700ut_pld_irq_type = | 137 | static struct irq_chip m32700ut_pld_irq_type = |
| 138 | { | 138 | { |
| 139 | .typename = "USRV-PLD-IRQ", | 139 | .name = "USRV-PLD-IRQ", |
| 140 | .startup = startup_m32700ut_pld_irq, | 140 | .startup = startup_m32700ut_pld_irq, |
| 141 | .shutdown = shutdown_m32700ut_pld_irq, | 141 | .shutdown = shutdown_m32700ut_pld_irq, |
| 142 | .enable = enable_m32700ut_pld_irq, | 142 | .enable = enable_m32700ut_pld_irq, |
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 2340f11dc29c..9a526ba6f257 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c | |||
| @@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, | |||
| 103 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 103 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
| 104 | goto out_unlock; | 104 | goto out_unlock; |
| 105 | 105 | ||
| 106 | retval = security_task_setscheduler(p, 0, NULL); | 106 | retval = security_task_setscheduler(p) |
| 107 | if (retval) | 107 | if (retval) |
| 108 | goto out_unlock; | 108 | goto out_unlock; |
| 109 | 109 | ||
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac848..9c3d160670b4 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h | |||
| @@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long); | |||
| 542 | 542 | ||
| 543 | #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) | 543 | #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) |
| 544 | 544 | ||
| 545 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 546 | extern void account_system_vtime(struct task_struct *); | ||
| 547 | #endif | ||
| 548 | |||
| 549 | extern struct dentry *powerpc_debugfs_root; | 545 | extern struct dentry *powerpc_debugfs_root; |
| 550 | 546 | ||
| 551 | #endif /* __KERNEL__ */ | 547 | #endif /* __KERNEL__ */ |
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 97085530aa63..e3e379c6caa7 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c | |||
| @@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) | |||
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | static struct irq_chip msic_irq_chip = { | 312 | static struct irq_chip msic_irq_chip = { |
| 313 | .mask = mask_msi_irq, | 313 | .irq_mask = mask_msi_irq, |
| 314 | .unmask = unmask_msi_irq, | 314 | .irq_unmask = unmask_msi_irq, |
| 315 | .shutdown = unmask_msi_irq, | 315 | .irq_shutdown = mask_msi_irq, |
| 316 | .name = "AXON-MSI", | 316 | .name = "AXON-MSI", |
| 317 | }; | 317 | }; |
| 318 | 318 | ||
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 93834b0d8272..67e2c4bdac8f 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c | |||
| @@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq) | |||
| 243 | * at that level, so we do it here by hand. | 243 | * at that level, so we do it here by hand. |
| 244 | */ | 244 | */ |
| 245 | if (irq_to_desc(virq)->msi_desc) | 245 | if (irq_to_desc(virq)->msi_desc) |
| 246 | unmask_msi_irq(virq); | 246 | unmask_msi_irq(irq_get_irq_data(virq)); |
| 247 | 247 | ||
| 248 | /* unmask it */ | 248 | /* unmask it */ |
| 249 | xics_unmask_irq(virq); | 249 | xics_unmask_irq(virq); |
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 87991d3abbab..bdbd896c89d8 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c | |||
| @@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq) | |||
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | static struct irq_chip fsl_msi_chip = { | 53 | static struct irq_chip fsl_msi_chip = { |
| 54 | .mask = mask_msi_irq, | 54 | .irq_mask = mask_msi_irq, |
| 55 | .unmask = unmask_msi_irq, | 55 | .irq_unmask = unmask_msi_irq, |
| 56 | .ack = fsl_msi_end_irq, | 56 | .ack = fsl_msi_end_irq, |
| 57 | .name = "FSL-MSI", | 57 | .name = "FSL-MSI", |
| 58 | }; | 58 | }; |
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 3b6a9a43718f..320ad5a9a25d 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c | |||
| @@ -39,24 +39,24 @@ | |||
| 39 | static struct mpic *msi_mpic; | 39 | static struct mpic *msi_mpic; |
| 40 | 40 | ||
| 41 | 41 | ||
| 42 | static void mpic_pasemi_msi_mask_irq(unsigned int irq) | 42 | static void mpic_pasemi_msi_mask_irq(struct irq_data *data) |
| 43 | { | 43 | { |
| 44 | pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq); | 44 | pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); |
| 45 | mask_msi_irq(irq); | 45 | mask_msi_irq(data); |
| 46 | mpic_mask_irq(irq); | 46 | mpic_mask_irq(data->irq); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | static void mpic_pasemi_msi_unmask_irq(unsigned int irq) | 49 | static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) |
| 50 | { | 50 | { |
| 51 | pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq); | 51 | pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); |
| 52 | mpic_unmask_irq(irq); | 52 | mpic_unmask_irq(data->irq); |
| 53 | unmask_msi_irq(irq); | 53 | unmask_msi_irq(data); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | static struct irq_chip mpic_pasemi_msi_chip = { | 56 | static struct irq_chip mpic_pasemi_msi_chip = { |
| 57 | .shutdown = mpic_pasemi_msi_mask_irq, | 57 | .irq_shutdown = mpic_pasemi_msi_mask_irq, |
| 58 | .mask = mpic_pasemi_msi_mask_irq, | 58 | .irq_mask = mpic_pasemi_msi_mask_irq, |
| 59 | .unmask = mpic_pasemi_msi_unmask_irq, | 59 | .irq_unmask = mpic_pasemi_msi_unmask_irq, |
| 60 | .eoi = mpic_end_irq, | 60 | .eoi = mpic_end_irq, |
| 61 | .set_type = mpic_set_irq_type, | 61 | .set_type = mpic_set_irq_type, |
| 62 | .set_affinity = mpic_set_affinity, | 62 | .set_affinity = mpic_set_affinity, |
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index bcbfe79c704b..a2b028b4a202 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c | |||
| @@ -23,22 +23,22 @@ | |||
| 23 | /* A bit ugly, can we get this from the pci_dev somehow? */ | 23 | /* A bit ugly, can we get this from the pci_dev somehow? */ |
| 24 | static struct mpic *msi_mpic; | 24 | static struct mpic *msi_mpic; |
| 25 | 25 | ||
| 26 | static void mpic_u3msi_mask_irq(unsigned int irq) | 26 | static void mpic_u3msi_mask_irq(struct irq_data *data) |
| 27 | { | 27 | { |
| 28 | mask_msi_irq(irq); | 28 | mask_msi_irq(data); |
| 29 | mpic_mask_irq(irq); | 29 | mpic_mask_irq(data->irq); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | static void mpic_u3msi_unmask_irq(unsigned int irq) | 32 | static void mpic_u3msi_unmask_irq(struct irq_data *data) |
| 33 | { | 33 | { |
| 34 | mpic_unmask_irq(irq); | 34 | mpic_unmask_irq(data->irq); |
| 35 | unmask_msi_irq(irq); | 35 | unmask_msi_irq(data); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | static struct irq_chip mpic_u3msi_chip = { | 38 | static struct irq_chip mpic_u3msi_chip = { |
| 39 | .shutdown = mpic_u3msi_mask_irq, | 39 | .irq_shutdown = mpic_u3msi_mask_irq, |
| 40 | .mask = mpic_u3msi_mask_irq, | 40 | .irq_mask = mpic_u3msi_mask_irq, |
| 41 | .unmask = mpic_u3msi_unmask_irq, | 41 | .irq_unmask = mpic_u3msi_unmask_irq, |
| 42 | .eoi = mpic_end_irq, | 42 | .eoi = mpic_end_irq, |
| 43 | .set_type = mpic_set_irq_type, | 43 | .set_type = mpic_set_irq_type, |
| 44 | .set_affinity = mpic_set_affinity, | 44 | .set_affinity = mpic_set_affinity, |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 958f0dadeadf..75976a141947 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
| @@ -199,6 +199,13 @@ config HOTPLUG_CPU | |||
| 199 | can be controlled through /sys/devices/system/cpu/cpu#. | 199 | can be controlled through /sys/devices/system/cpu/cpu#. |
| 200 | Say N if you want to disable CPU hotplug. | 200 | Say N if you want to disable CPU hotplug. |
| 201 | 201 | ||
| 202 | config SCHED_BOOK | ||
| 203 | bool "Book scheduler support" | ||
| 204 | depends on SMP | ||
| 205 | help | ||
| 206 | Book scheduler support improves the CPU scheduler's decision making | ||
| 207 | when dealing with machines that have several books. | ||
| 208 | |||
| 202 | config MATHEMU | 209 | config MATHEMU |
| 203 | bool "IEEE FPU emulation" | 210 | bool "IEEE FPU emulation" |
| 204 | depends on MARCH_G5 | 211 | depends on MARCH_G5 |
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc3892385..881d94590aeb 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h | |||
| @@ -12,10 +12,6 @@ | |||
| 12 | #ifndef __ASM_HARDIRQ_H | 12 | #ifndef __ASM_HARDIRQ_H |
| 13 | #define __ASM_HARDIRQ_H | 13 | #define __ASM_HARDIRQ_H |
| 14 | 14 | ||
| 15 | #include <linux/threads.h> | ||
| 16 | #include <linux/sched.h> | ||
| 17 | #include <linux/cache.h> | ||
| 18 | #include <linux/interrupt.h> | ||
| 19 | #include <asm/lowcore.h> | 15 | #include <asm/lowcore.h> |
| 20 | 16 | ||
| 21 | #define local_softirq_pending() (S390_lowcore.softirq_pending) | 17 | #define local_softirq_pending() (S390_lowcore.softirq_pending) |
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index cef66210c846..38ddd8a9a9e8 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h | |||
| @@ -97,7 +97,6 @@ static inline void restore_access_regs(unsigned int *acrs) | |||
| 97 | 97 | ||
| 98 | extern void account_vtime(struct task_struct *, struct task_struct *); | 98 | extern void account_vtime(struct task_struct *, struct task_struct *); |
| 99 | extern void account_tick_vtime(struct task_struct *); | 99 | extern void account_tick_vtime(struct task_struct *); |
| 100 | extern void account_system_vtime(struct task_struct *); | ||
| 101 | 100 | ||
| 102 | #ifdef CONFIG_PFAULT | 101 | #ifdef CONFIG_PFAULT |
| 103 | extern void pfault_irq_init(void); | 102 | extern void pfault_irq_init(void); |
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 831bd033ea77..051107a2c5e2 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h | |||
| @@ -3,15 +3,32 @@ | |||
| 3 | 3 | ||
| 4 | #include <linux/cpumask.h> | 4 | #include <linux/cpumask.h> |
| 5 | 5 | ||
| 6 | #define mc_capable() (1) | ||
| 7 | |||
| 8 | const struct cpumask *cpu_coregroup_mask(unsigned int cpu); | ||
| 9 | |||
| 10 | extern unsigned char cpu_core_id[NR_CPUS]; | 6 | extern unsigned char cpu_core_id[NR_CPUS]; |
| 11 | extern cpumask_t cpu_core_map[NR_CPUS]; | 7 | extern cpumask_t cpu_core_map[NR_CPUS]; |
| 12 | 8 | ||
| 9 | static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) | ||
| 10 | { | ||
| 11 | return &cpu_core_map[cpu]; | ||
| 12 | } | ||
| 13 | |||
| 13 | #define topology_core_id(cpu) (cpu_core_id[cpu]) | 14 | #define topology_core_id(cpu) (cpu_core_id[cpu]) |
| 14 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) | 15 | #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) |
| 16 | #define mc_capable() (1) | ||
| 17 | |||
| 18 | #ifdef CONFIG_SCHED_BOOK | ||
| 19 | |||
| 20 | extern unsigned char cpu_book_id[NR_CPUS]; | ||
| 21 | extern cpumask_t cpu_book_map[NR_CPUS]; | ||
| 22 | |||
| 23 | static inline const struct cpumask *cpu_book_mask(unsigned int cpu) | ||
| 24 | { | ||
| 25 | return &cpu_book_map[cpu]; | ||
| 26 | } | ||
| 27 | |||
| 28 | #define topology_book_id(cpu) (cpu_book_id[cpu]) | ||
| 29 | #define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) | ||
| 30 | |||
| 31 | #endif /* CONFIG_SCHED_BOOK */ | ||
| 15 | 32 | ||
| 16 | int topology_set_cpu_management(int fc); | 33 | int topology_set_cpu_management(int fc); |
| 17 | void topology_schedule_update(void); | 34 | void topology_schedule_update(void); |
| @@ -30,6 +47,8 @@ static inline void s390_init_cpu_topology(void) | |||
| 30 | }; | 47 | }; |
| 31 | #endif | 48 | #endif |
| 32 | 49 | ||
| 50 | #define SD_BOOK_INIT SD_CPU_INIT | ||
| 51 | |||
| 33 | #include <asm-generic/topology.h> | 52 | #include <asm-generic/topology.h> |
| 34 | 53 | ||
| 35 | #endif /* _ASM_S390_TOPOLOGY_H */ | 54 | #endif /* _ASM_S390_TOPOLOGY_H */ |
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bcef00766a64..13559c993847 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c | |||
| @@ -57,8 +57,8 @@ struct tl_info { | |||
| 57 | union tl_entry tle[0]; | 57 | union tl_entry tle[0]; |
| 58 | }; | 58 | }; |
| 59 | 59 | ||
| 60 | struct core_info { | 60 | struct mask_info { |
| 61 | struct core_info *next; | 61 | struct mask_info *next; |
| 62 | unsigned char id; | 62 | unsigned char id; |
| 63 | cpumask_t mask; | 63 | cpumask_t mask; |
| 64 | }; | 64 | }; |
| @@ -66,7 +66,6 @@ struct core_info { | |||
| 66 | static int topology_enabled; | 66 | static int topology_enabled; |
| 67 | static void topology_work_fn(struct work_struct *work); | 67 | static void topology_work_fn(struct work_struct *work); |
| 68 | static struct tl_info *tl_info; | 68 | static struct tl_info *tl_info; |
| 69 | static struct core_info core_info; | ||
| 70 | static int machine_has_topology; | 69 | static int machine_has_topology; |
| 71 | static struct timer_list topology_timer; | 70 | static struct timer_list topology_timer; |
| 72 | static void set_topology_timer(void); | 71 | static void set_topology_timer(void); |
| @@ -74,38 +73,37 @@ static DECLARE_WORK(topology_work, topology_work_fn); | |||
| 74 | /* topology_lock protects the core linked list */ | 73 | /* topology_lock protects the core linked list */ |
| 75 | static DEFINE_SPINLOCK(topology_lock); | 74 | static DEFINE_SPINLOCK(topology_lock); |
| 76 | 75 | ||
| 76 | static struct mask_info core_info; | ||
| 77 | cpumask_t cpu_core_map[NR_CPUS]; | 77 | cpumask_t cpu_core_map[NR_CPUS]; |
| 78 | unsigned char cpu_core_id[NR_CPUS]; | 78 | unsigned char cpu_core_id[NR_CPUS]; |
| 79 | 79 | ||
| 80 | static cpumask_t cpu_coregroup_map(unsigned int cpu) | 80 | #ifdef CONFIG_SCHED_BOOK |
| 81 | static struct mask_info book_info; | ||
| 82 | cpumask_t cpu_book_map[NR_CPUS]; | ||
| 83 | unsigned char cpu_book_id[NR_CPUS]; | ||
| 84 | #endif | ||
| 85 | |||
| 86 | static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) | ||
| 81 | { | 87 | { |
| 82 | struct core_info *core = &core_info; | ||
| 83 | unsigned long flags; | ||
| 84 | cpumask_t mask; | 88 | cpumask_t mask; |
| 85 | 89 | ||
| 86 | cpus_clear(mask); | 90 | cpus_clear(mask); |
| 87 | if (!topology_enabled || !machine_has_topology) | 91 | if (!topology_enabled || !machine_has_topology) |
| 88 | return cpu_possible_map; | 92 | return cpu_possible_map; |
| 89 | spin_lock_irqsave(&topology_lock, flags); | 93 | while (info) { |
| 90 | while (core) { | 94 | if (cpu_isset(cpu, info->mask)) { |
| 91 | if (cpu_isset(cpu, core->mask)) { | 95 | mask = info->mask; |
| 92 | mask = core->mask; | ||
| 93 | break; | 96 | break; |
| 94 | } | 97 | } |
| 95 | core = core->next; | 98 | info = info->next; |
| 96 | } | 99 | } |
| 97 | spin_unlock_irqrestore(&topology_lock, flags); | ||
| 98 | if (cpus_empty(mask)) | 100 | if (cpus_empty(mask)) |
| 99 | mask = cpumask_of_cpu(cpu); | 101 | mask = cpumask_of_cpu(cpu); |
| 100 | return mask; | 102 | return mask; |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 103 | const struct cpumask *cpu_coregroup_mask(unsigned int cpu) | 105 | static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book, |
| 104 | { | 106 | struct mask_info *core) |
| 105 | return &cpu_core_map[cpu]; | ||
| 106 | } | ||
| 107 | |||
| 108 | static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) | ||
| 109 | { | 107 | { |
| 110 | unsigned int cpu; | 108 | unsigned int cpu; |
| 111 | 109 | ||
| @@ -117,23 +115,35 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) | |||
| 117 | 115 | ||
| 118 | rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; | 116 | rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; |
| 119 | for_each_present_cpu(lcpu) { | 117 | for_each_present_cpu(lcpu) { |
| 120 | if (cpu_logical_map(lcpu) == rcpu) { | 118 | if (cpu_logical_map(lcpu) != rcpu) |
| 121 | cpu_set(lcpu, core->mask); | 119 | continue; |
| 122 | cpu_core_id[lcpu] = core->id; | 120 | #ifdef CONFIG_SCHED_BOOK |
| 123 | smp_cpu_polarization[lcpu] = tl_cpu->pp; | 121 | cpu_set(lcpu, book->mask); |
| 124 | } | 122 | cpu_book_id[lcpu] = book->id; |
| 123 | #endif | ||
| 124 | cpu_set(lcpu, core->mask); | ||
| 125 | cpu_core_id[lcpu] = core->id; | ||
| 126 | smp_cpu_polarization[lcpu] = tl_cpu->pp; | ||
| 125 | } | 127 | } |
| 126 | } | 128 | } |
| 127 | } | 129 | } |
| 128 | 130 | ||
| 129 | static void clear_cores(void) | 131 | static void clear_masks(void) |
| 130 | { | 132 | { |
| 131 | struct core_info *core = &core_info; | 133 | struct mask_info *info; |
| 132 | 134 | ||
| 133 | while (core) { | 135 | info = &core_info; |
| 134 | cpus_clear(core->mask); | 136 | while (info) { |
| 135 | core = core->next; | 137 | cpus_clear(info->mask); |
| 138 | info = info->next; | ||
| 139 | } | ||
| 140 | #ifdef CONFIG_SCHED_BOOK | ||
| 141 | info = &book_info; | ||
| 142 | while (info) { | ||
| 143 | cpus_clear(info->mask); | ||
| 144 | info = info->next; | ||
| 136 | } | 145 | } |
| 146 | #endif | ||
| 137 | } | 147 | } |
| 138 | 148 | ||
| 139 | static union tl_entry *next_tle(union tl_entry *tle) | 149 | static union tl_entry *next_tle(union tl_entry *tle) |
| @@ -146,29 +156,36 @@ static union tl_entry *next_tle(union tl_entry *tle) | |||
| 146 | 156 | ||
| 147 | static void tl_to_cores(struct tl_info *info) | 157 | static void tl_to_cores(struct tl_info *info) |
| 148 | { | 158 | { |
| 159 | #ifdef CONFIG_SCHED_BOOK | ||
| 160 | struct mask_info *book = &book_info; | ||
| 161 | #else | ||
| 162 | struct mask_info *book = NULL; | ||
| 163 | #endif | ||
| 164 | struct mask_info *core = &core_info; | ||
| 149 | union tl_entry *tle, *end; | 165 | union tl_entry *tle, *end; |
| 150 | struct core_info *core = &core_info; | 166 | |
| 151 | 167 | ||
| 152 | spin_lock_irq(&topology_lock); | 168 | spin_lock_irq(&topology_lock); |
| 153 | clear_cores(); | 169 | clear_masks(); |
| 154 | tle = info->tle; | 170 | tle = info->tle; |
| 155 | end = (union tl_entry *)((unsigned long)info + info->length); | 171 | end = (union tl_entry *)((unsigned long)info + info->length); |
| 156 | while (tle < end) { | 172 | while (tle < end) { |
| 157 | switch (tle->nl) { | 173 | switch (tle->nl) { |
| 158 | case 5: | 174 | #ifdef CONFIG_SCHED_BOOK |
| 159 | case 4: | ||
| 160 | case 3: | ||
| 161 | case 2: | 175 | case 2: |
| 176 | book = book->next; | ||
| 177 | book->id = tle->container.id; | ||
| 162 | break; | 178 | break; |
| 179 | #endif | ||
| 163 | case 1: | 180 | case 1: |
| 164 | core = core->next; | 181 | core = core->next; |
| 165 | core->id = tle->container.id; | 182 | core->id = tle->container.id; |
| 166 | break; | 183 | break; |
| 167 | case 0: | 184 | case 0: |
| 168 | add_cpus_to_core(&tle->cpu, core); | 185 | add_cpus_to_mask(&tle->cpu, book, core); |
| 169 | break; | 186 | break; |
| 170 | default: | 187 | default: |
| 171 | clear_cores(); | 188 | clear_masks(); |
| 172 | machine_has_topology = 0; | 189 | machine_has_topology = 0; |
| 173 | goto out; | 190 | goto out; |
| 174 | } | 191 | } |
| @@ -221,10 +238,29 @@ int topology_set_cpu_management(int fc) | |||
| 221 | 238 | ||
| 222 | static void update_cpu_core_map(void) | 239 | static void update_cpu_core_map(void) |
| 223 | { | 240 | { |
| 241 | unsigned long flags; | ||
| 224 | int cpu; | 242 | int cpu; |
| 225 | 243 | ||
| 226 | for_each_possible_cpu(cpu) | 244 | spin_lock_irqsave(&topology_lock, flags); |
| 227 | cpu_core_map[cpu] = cpu_coregroup_map(cpu); | 245 | for_each_possible_cpu(cpu) { |
| 246 | cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); | ||
| 247 | #ifdef CONFIG_SCHED_BOOK | ||
| 248 | cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); | ||
| 249 | #endif | ||
| 250 | } | ||
| 251 | spin_unlock_irqrestore(&topology_lock, flags); | ||
| 252 | } | ||
| 253 | |||
| 254 | static void store_topology(struct tl_info *info) | ||
| 255 | { | ||
| 256 | #ifdef CONFIG_SCHED_BOOK | ||
| 257 | int rc; | ||
| 258 | |||
| 259 | rc = stsi(info, 15, 1, 3); | ||
| 260 | if (rc != -ENOSYS) | ||
| 261 | return; | ||
| 262 | #endif | ||
| 263 | stsi(info, 15, 1, 2); | ||
| 228 | } | 264 | } |
| 229 | 265 | ||
| 230 | int arch_update_cpu_topology(void) | 266 | int arch_update_cpu_topology(void) |
| @@ -238,7 +274,7 @@ int arch_update_cpu_topology(void) | |||
| 238 | topology_update_polarization_simple(); | 274 | topology_update_polarization_simple(); |
| 239 | return 0; | 275 | return 0; |
| 240 | } | 276 | } |
| 241 | stsi(info, 15, 1, 2); | 277 | store_topology(info); |
| 242 | tl_to_cores(info); | 278 | tl_to_cores(info); |
| 243 | update_cpu_core_map(); | 279 | update_cpu_core_map(); |
| 244 | for_each_online_cpu(cpu) { | 280 | for_each_online_cpu(cpu) { |
| @@ -299,12 +335,24 @@ out: | |||
| 299 | } | 335 | } |
| 300 | __initcall(init_topology_update); | 336 | __initcall(init_topology_update); |
| 301 | 337 | ||
| 338 | static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset) | ||
| 339 | { | ||
| 340 | int i, nr_masks; | ||
| 341 | |||
| 342 | nr_masks = info->mag[NR_MAG - offset]; | ||
| 343 | for (i = 0; i < info->mnest - offset; i++) | ||
| 344 | nr_masks *= info->mag[NR_MAG - offset - 1 - i]; | ||
| 345 | nr_masks = max(nr_masks, 1); | ||
| 346 | for (i = 0; i < nr_masks; i++) { | ||
| 347 | mask->next = alloc_bootmem(sizeof(struct mask_info)); | ||
| 348 | mask = mask->next; | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 302 | void __init s390_init_cpu_topology(void) | 352 | void __init s390_init_cpu_topology(void) |
| 303 | { | 353 | { |
| 304 | unsigned long long facility_bits; | 354 | unsigned long long facility_bits; |
| 305 | struct tl_info *info; | 355 | struct tl_info *info; |
| 306 | struct core_info *core; | ||
| 307 | int nr_cores; | ||
| 308 | int i; | 356 | int i; |
| 309 | 357 | ||
| 310 | if (stfle(&facility_bits, 1) <= 0) | 358 | if (stfle(&facility_bits, 1) <= 0) |
| @@ -315,25 +363,13 @@ void __init s390_init_cpu_topology(void) | |||
| 315 | 363 | ||
| 316 | tl_info = alloc_bootmem_pages(PAGE_SIZE); | 364 | tl_info = alloc_bootmem_pages(PAGE_SIZE); |
| 317 | info = tl_info; | 365 | info = tl_info; |
| 318 | stsi(info, 15, 1, 2); | 366 | store_topology(info); |
| 319 | |||
| 320 | nr_cores = info->mag[NR_MAG - 2]; | ||
| 321 | for (i = 0; i < info->mnest - 2; i++) | ||
| 322 | nr_cores *= info->mag[NR_MAG - 3 - i]; | ||
| 323 | |||
| 324 | pr_info("The CPU configuration topology of the machine is:"); | 367 | pr_info("The CPU configuration topology of the machine is:"); |
| 325 | for (i = 0; i < NR_MAG; i++) | 368 | for (i = 0; i < NR_MAG; i++) |
| 326 | printk(" %d", info->mag[i]); | 369 | printk(" %d", info->mag[i]); |
| 327 | printk(" / %d\n", info->mnest); | 370 | printk(" / %d\n", info->mnest); |
| 328 | 371 | alloc_masks(info, &core_info, 2); | |
| 329 | core = &core_info; | 372 | #ifdef CONFIG_SCHED_BOOK |
| 330 | for (i = 0; i < nr_cores; i++) { | 373 | alloc_masks(info, &book_info, 3); |
| 331 | core->next = alloc_bootmem(sizeof(struct core_info)); | 374 | #endif |
| 332 | core = core->next; | ||
| 333 | if (!core) | ||
| 334 | goto error; | ||
| 335 | } | ||
| 336 | return; | ||
| 337 | error: | ||
| 338 | machine_has_topology = 0; | ||
| 339 | } | 375 | } |
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 257de1f0692b..ae5bac39b896 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c | |||
| @@ -290,7 +290,7 @@ void __init init_IRQ(void) | |||
| 290 | int __init arch_probe_nr_irqs(void) | 290 | int __init arch_probe_nr_irqs(void) |
| 291 | { | 291 | { |
| 292 | nr_irqs = sh_mv.mv_nr_irqs; | 292 | nr_irqs = sh_mv.mv_nr_irqs; |
| 293 | return 0; | 293 | return NR_IRQS_LEGACY; |
| 294 | } | 294 | } |
| 295 | #endif | 295 | #endif |
| 296 | 296 | ||
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 548b8ca9c210..b210416ace7b 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c | |||
| @@ -114,10 +114,10 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num) | |||
| 114 | 114 | ||
| 115 | static struct irq_chip msi_irq = { | 115 | static struct irq_chip msi_irq = { |
| 116 | .name = "PCI-MSI", | 116 | .name = "PCI-MSI", |
| 117 | .mask = mask_msi_irq, | 117 | .irq_mask = mask_msi_irq, |
| 118 | .unmask = unmask_msi_irq, | 118 | .irq_unmask = unmask_msi_irq, |
| 119 | .enable = unmask_msi_irq, | 119 | .irq_enable = unmask_msi_irq, |
| 120 | .disable = mask_msi_irq, | 120 | .irq_disable = mask_msi_irq, |
| 121 | /* XXX affinity XXX */ | 121 | /* XXX affinity XXX */ |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 596c60086930..9a27d563fc30 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
| @@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq) | |||
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | static struct irq_chip tile_irq_chip = { | 210 | static struct irq_chip tile_irq_chip = { |
| 211 | .typename = "tile_irq_chip", | 211 | .name = "tile_irq_chip", |
| 212 | .ack = tile_irq_chip_ack, | 212 | .ack = tile_irq_chip_ack, |
| 213 | .eoi = tile_irq_chip_eoi, | 213 | .eoi = tile_irq_chip_eoi, |
| 214 | .mask = tile_irq_chip_mask, | 214 | .mask = tile_irq_chip_mask, |
| @@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 288 | for_each_online_cpu(j) | 288 | for_each_online_cpu(j) |
| 289 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 289 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 290 | #endif | 290 | #endif |
| 291 | seq_printf(p, " %14s", irq_desc[i].chip->typename); | 291 | seq_printf(p, " %14s", irq_desc[i].chip->name); |
| 292 | seq_printf(p, " %s", action->name); | 292 | seq_printf(p, " %s", action->name); |
| 293 | 293 | ||
| 294 | for (action = action->next; action; action = action->next) | 294 | for (action = action->next; action; action = action->next) |
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index a3f0b04d7101..a746e3037a5b 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c | |||
| @@ -46,7 +46,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 46 | for_each_online_cpu(j) | 46 | for_each_online_cpu(j) |
| 47 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 47 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 48 | #endif | 48 | #endif |
| 49 | seq_printf(p, " %14s", irq_desc[i].chip->typename); | 49 | seq_printf(p, " %14s", irq_desc[i].chip->name); |
| 50 | seq_printf(p, " %s", action->name); | 50 | seq_printf(p, " %s", action->name); |
| 51 | 51 | ||
| 52 | for (action=action->next; action; action = action->next) | 52 | for (action=action->next; action; action = action->next) |
| @@ -369,7 +369,7 @@ static void dummy(unsigned int irq) | |||
| 369 | 369 | ||
| 370 | /* This is used for everything else than the timer. */ | 370 | /* This is used for everything else than the timer. */ |
| 371 | static struct irq_chip normal_irq_type = { | 371 | static struct irq_chip normal_irq_type = { |
| 372 | .typename = "SIGIO", | 372 | .name = "SIGIO", |
| 373 | .release = free_irq_by_irq_and_dev, | 373 | .release = free_irq_by_irq_and_dev, |
| 374 | .disable = dummy, | 374 | .disable = dummy, |
| 375 | .enable = dummy, | 375 | .enable = dummy, |
| @@ -378,7 +378,7 @@ static struct irq_chip normal_irq_type = { | |||
| 378 | }; | 378 | }; |
| 379 | 379 | ||
| 380 | static struct irq_chip SIGVTALRM_irq_type = { | 380 | static struct irq_chip SIGVTALRM_irq_type = { |
| 381 | .typename = "SIGVTALRM", | 381 | .name = "SIGVTALRM", |
| 382 | .release = free_irq_by_irq_and_dev, | 382 | .release = free_irq_by_irq_and_dev, |
| 383 | .shutdown = dummy, /* never called */ | 383 | .shutdown = dummy, /* never called */ |
| 384 | .disable = dummy, | 384 | .disable = dummy, |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fd227d6b8d9c..7ab9db88ab6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -63,6 +63,10 @@ config X86 | |||
| 63 | select HAVE_USER_RETURN_NOTIFIER | 63 | select HAVE_USER_RETURN_NOTIFIER |
| 64 | select HAVE_ARCH_JUMP_LABEL | 64 | select HAVE_ARCH_JUMP_LABEL |
| 65 | select HAVE_TEXT_POKE_SMP | 65 | select HAVE_TEXT_POKE_SMP |
| 66 | select HAVE_GENERIC_HARDIRQS | ||
| 67 | select HAVE_SPARSE_IRQ | ||
| 68 | select GENERIC_IRQ_PROBE | ||
| 69 | select GENERIC_PENDING_IRQ if SMP | ||
| 66 | 70 | ||
| 67 | config INSTRUCTION_DECODER | 71 | config INSTRUCTION_DECODER |
| 68 | def_bool (KPROBES || PERF_EVENTS) | 72 | def_bool (KPROBES || PERF_EVENTS) |
| @@ -204,20 +208,6 @@ config HAVE_INTEL_TXT | |||
| 204 | def_bool y | 208 | def_bool y |
| 205 | depends on EXPERIMENTAL && DMAR && ACPI | 209 | depends on EXPERIMENTAL && DMAR && ACPI |
| 206 | 210 | ||
| 207 | # Use the generic interrupt handling code in kernel/irq/: | ||
| 208 | config GENERIC_HARDIRQS | ||
| 209 | def_bool y | ||
| 210 | |||
| 211 | config GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 212 | def_bool y | ||
| 213 | |||
| 214 | config GENERIC_IRQ_PROBE | ||
| 215 | def_bool y | ||
| 216 | |||
| 217 | config GENERIC_PENDING_IRQ | ||
| 218 | def_bool y | ||
| 219 | depends on GENERIC_HARDIRQS && SMP | ||
| 220 | |||
| 221 | config USE_GENERIC_SMP_HELPERS | 211 | config USE_GENERIC_SMP_HELPERS |
| 222 | def_bool y | 212 | def_bool y |
| 223 | depends on SMP | 213 | depends on SMP |
| @@ -300,23 +290,6 @@ config X86_X2APIC | |||
| 300 | 290 | ||
| 301 | If you don't know what to do here, say N. | 291 | If you don't know what to do here, say N. |
| 302 | 292 | ||
| 303 | config SPARSE_IRQ | ||
| 304 | bool "Support sparse irq numbering" | ||
| 305 | depends on PCI_MSI || HT_IRQ | ||
| 306 | ---help--- | ||
| 307 | This enables support for sparse irqs. This is useful for distro | ||
| 308 | kernels that want to define a high CONFIG_NR_CPUS value but still | ||
| 309 | want to have low kernel memory footprint on smaller machines. | ||
| 310 | |||
| 311 | ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread | ||
| 312 | out the irq_desc[] array in a more NUMA-friendly way. ) | ||
| 313 | |||
| 314 | If you don't know what to do here, say N. | ||
| 315 | |||
| 316 | config NUMA_IRQ_DESC | ||
| 317 | def_bool y | ||
| 318 | depends on SPARSE_IRQ && NUMA | ||
| 319 | |||
| 320 | config X86_MPPARSE | 293 | config X86_MPPARSE |
| 321 | bool "Enable MPS table" if ACPI | 294 | bool "Enable MPS table" if ACPI |
| 322 | default y | 295 | default y |
| @@ -521,25 +494,6 @@ if PARAVIRT_GUEST | |||
| 521 | 494 | ||
| 522 | source "arch/x86/xen/Kconfig" | 495 | source "arch/x86/xen/Kconfig" |
| 523 | 496 | ||
| 524 | config VMI | ||
| 525 | bool "VMI Guest support (DEPRECATED)" | ||
| 526 | select PARAVIRT | ||
| 527 | depends on X86_32 | ||
| 528 | ---help--- | ||
| 529 | VMI provides a paravirtualized interface to the VMware ESX server | ||
| 530 | (it could be used by other hypervisors in theory too, but is not | ||
| 531 | at the moment), by linking the kernel to a GPL-ed ROM module | ||
| 532 | provided by the hypervisor. | ||
| 533 | |||
| 534 | As of September 2009, VMware has started a phased retirement | ||
| 535 | of this feature from VMware's products. Please see | ||
| 536 | feature-removal-schedule.txt for details. If you are | ||
| 537 | planning to enable this option, please note that you cannot | ||
| 538 | live migrate a VMI enabled VM to a future VMware product, | ||
| 539 | which doesn't support VMI. So if you expect your kernel to | ||
| 540 | seamlessly migrate to newer VMware products, keep this | ||
| 541 | disabled. | ||
| 542 | |||
| 543 | config KVM_CLOCK | 497 | config KVM_CLOCK |
| 544 | bool "KVM paravirtualized clock" | 498 | bool "KVM paravirtualized clock" |
| 545 | select PARAVIRT | 499 | select PARAVIRT |
| @@ -674,7 +628,7 @@ config GART_IOMMU | |||
| 674 | bool "GART IOMMU support" if EMBEDDED | 628 | bool "GART IOMMU support" if EMBEDDED |
| 675 | default y | 629 | default y |
| 676 | select SWIOTLB | 630 | select SWIOTLB |
| 677 | depends on X86_64 && PCI && K8_NB | 631 | depends on X86_64 && PCI && AMD_NB |
| 678 | ---help--- | 632 | ---help--- |
| 679 | Support for full DMA access of devices with 32bit memory access only | 633 | Support for full DMA access of devices with 32bit memory access only |
| 680 | on systems with more than 3GB. This is usually needed for USB, | 634 | on systems with more than 3GB. This is usually needed for USB, |
| @@ -799,6 +753,17 @@ config SCHED_MC | |||
| 799 | making when dealing with multi-core CPU chips at a cost of slightly | 753 | making when dealing with multi-core CPU chips at a cost of slightly |
| 800 | increased overhead in some places. If unsure say N here. | 754 | increased overhead in some places. If unsure say N here. |
| 801 | 755 | ||
| 756 | config IRQ_TIME_ACCOUNTING | ||
| 757 | bool "Fine granularity task level IRQ time accounting" | ||
| 758 | default n | ||
| 759 | ---help--- | ||
| 760 | Select this option to enable fine granularity task irq time | ||
| 761 | accounting. This is done by reading a timestamp on each | ||
| 762 | transitions between softirq and hardirq state, so there can be a | ||
| 763 | small performance impact. | ||
| 764 | |||
| 765 | If in doubt, say N here. | ||
| 766 | |||
| 802 | source "kernel/Kconfig.preempt" | 767 | source "kernel/Kconfig.preempt" |
| 803 | 768 | ||
| 804 | config X86_UP_APIC | 769 | config X86_UP_APIC |
| @@ -1152,6 +1117,9 @@ config X86_PAE | |||
| 1152 | config ARCH_PHYS_ADDR_T_64BIT | 1117 | config ARCH_PHYS_ADDR_T_64BIT |
| 1153 | def_bool X86_64 || X86_PAE | 1118 | def_bool X86_64 || X86_PAE |
| 1154 | 1119 | ||
| 1120 | config ARCH_DMA_ADDR_T_64BIT | ||
| 1121 | def_bool X86_64 || HIGHMEM64G | ||
| 1122 | |||
| 1155 | config DIRECT_GBPAGES | 1123 | config DIRECT_GBPAGES |
| 1156 | bool "Enable 1GB pages for kernel pagetables" if EMBEDDED | 1124 | bool "Enable 1GB pages for kernel pagetables" if EMBEDDED |
| 1157 | default y | 1125 | default y |
| @@ -1330,25 +1298,34 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | |||
| 1330 | Set whether the default state of memory_corruption_check is | 1298 | Set whether the default state of memory_corruption_check is |
| 1331 | on or off. | 1299 | on or off. |
| 1332 | 1300 | ||
| 1333 | config X86_RESERVE_LOW_64K | 1301 | config X86_RESERVE_LOW |
| 1334 | bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" | 1302 | int "Amount of low memory, in kilobytes, to reserve for the BIOS" |
| 1335 | default y | 1303 | default 64 |
| 1304 | range 4 640 | ||
| 1336 | ---help--- | 1305 | ---help--- |
| 1337 | Reserve the first 64K of physical RAM on BIOSes that are known | 1306 | Specify the amount of low memory to reserve for the BIOS. |
| 1338 | to potentially corrupt that memory range. A numbers of BIOSes are | 1307 | |
| 1339 | known to utilize this area during suspend/resume, so it must not | 1308 | The first page contains BIOS data structures that the kernel |
| 1340 | be used by the kernel. | 1309 | must not use, so that page must always be reserved. |
| 1341 | 1310 | ||
| 1342 | Set this to N if you are absolutely sure that you trust the BIOS | 1311 | By default we reserve the first 64K of physical RAM, as a |
| 1343 | to get all its memory reservations and usages right. | 1312 | number of BIOSes are known to corrupt that memory range |
| 1313 | during events such as suspend/resume or monitor cable | ||
| 1314 | insertion, so it must not be used by the kernel. | ||
| 1344 | 1315 | ||
| 1345 | If you have doubts about the BIOS (e.g. suspend/resume does not | 1316 | You can set this to 4 if you are absolutely sure that you |
| 1346 | work or there's kernel crashes after certain hardware hotplug | 1317 | trust the BIOS to get all its memory reservations and usages |
| 1347 | events) and it's not AMI or Phoenix, then you might want to enable | 1318 | right. If you know your BIOS have problems beyond the |
| 1348 | X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical | 1319 | default 64K area, you can set this to 640 to avoid using the |
| 1349 | corruption patterns. | 1320 | entire low memory range. |
| 1350 | 1321 | ||
| 1351 | Say Y if unsure. | 1322 | If you have doubts about the BIOS (e.g. suspend/resume does |
| 1323 | not work or there's kernel crashes after certain hardware | ||
| 1324 | hotplug events) then you might want to enable | ||
| 1325 | X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check | ||
| 1326 | typical corruption patterns. | ||
| 1327 | |||
| 1328 | Leave this to the default value of 64 if you are unsure. | ||
| 1352 | 1329 | ||
| 1353 | config MATH_EMULATION | 1330 | config MATH_EMULATION |
| 1354 | bool | 1331 | bool |
| @@ -1904,7 +1881,7 @@ config PCI_GODIRECT | |||
| 1904 | bool "Direct" | 1881 | bool "Direct" |
| 1905 | 1882 | ||
| 1906 | config PCI_GOOLPC | 1883 | config PCI_GOOLPC |
| 1907 | bool "OLPC" | 1884 | bool "OLPC XO-1" |
| 1908 | depends on OLPC | 1885 | depends on OLPC |
| 1909 | 1886 | ||
| 1910 | config PCI_GOANY | 1887 | config PCI_GOANY |
| @@ -2065,14 +2042,21 @@ config SCx200HR_TIMER | |||
| 2065 | config OLPC | 2042 | config OLPC |
| 2066 | bool "One Laptop Per Child support" | 2043 | bool "One Laptop Per Child support" |
| 2067 | select GPIOLIB | 2044 | select GPIOLIB |
| 2045 | select OLPC_OPENFIRMWARE | ||
| 2068 | ---help--- | 2046 | ---help--- |
| 2069 | Add support for detecting the unique features of the OLPC | 2047 | Add support for detecting the unique features of the OLPC |
| 2070 | XO hardware. | 2048 | XO hardware. |
| 2071 | 2049 | ||
| 2050 | config OLPC_XO1 | ||
| 2051 | tristate "OLPC XO-1 support" | ||
| 2052 | depends on OLPC && PCI | ||
| 2053 | ---help--- | ||
| 2054 | Add support for non-essential features of the OLPC XO-1 laptop. | ||
| 2055 | |||
| 2072 | config OLPC_OPENFIRMWARE | 2056 | config OLPC_OPENFIRMWARE |
| 2073 | bool "Support for OLPC's Open Firmware" | 2057 | bool "Support for OLPC's Open Firmware" |
| 2074 | depends on !X86_64 && !X86_PAE | 2058 | depends on !X86_64 && !X86_PAE |
| 2075 | default y if OLPC | 2059 | default n |
| 2076 | help | 2060 | help |
| 2077 | This option adds support for the implementation of Open Firmware | 2061 | This option adds support for the implementation of Open Firmware |
| 2078 | that is used on the OLPC XO-1 Children's Machine. | 2062 | that is used on the OLPC XO-1 Children's Machine. |
| @@ -2080,7 +2064,7 @@ config OLPC_OPENFIRMWARE | |||
| 2080 | 2064 | ||
| 2081 | endif # X86_32 | 2065 | endif # X86_32 |
| 2082 | 2066 | ||
| 2083 | config K8_NB | 2067 | config AMD_NB |
| 2084 | def_bool y | 2068 | def_bool y |
| 2085 | depends on CPU_SUP_AMD && PCI | 2069 | depends on CPU_SUP_AMD && PCI |
| 2086 | 2070 | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 75085080b63e..e5bb96b10f1a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -43,6 +43,10 @@ config EARLY_PRINTK | |||
| 43 | with klogd/syslogd or the X server. You should normally N here, | 43 | with klogd/syslogd or the X server. You should normally N here, |
| 44 | unless you want to debug such a crash. | 44 | unless you want to debug such a crash. |
| 45 | 45 | ||
| 46 | config EARLY_PRINTK_MRST | ||
| 47 | bool "Early printk for MRST platform support" | ||
| 48 | depends on EARLY_PRINTK && X86_MRST | ||
| 49 | |||
| 46 | config EARLY_PRINTK_DBGP | 50 | config EARLY_PRINTK_DBGP |
| 47 | bool "Early printk via EHCI debug port" | 51 | bool "Early printk via EHCI debug port" |
| 48 | depends on EARLY_PRINTK && PCI | 52 | depends on EARLY_PRINTK && PCI |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e8c8881351b3..b02e509072a7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
| @@ -96,8 +96,12 @@ cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_en | |||
| 96 | # is .cfi_signal_frame supported too? | 96 | # is .cfi_signal_frame supported too? |
| 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) | 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) |
| 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) | 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) |
| 99 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | 99 | |
| 100 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | 100 | # does binutils support specific instructions? |
| 101 | asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) | ||
| 102 | |||
| 103 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) | ||
| 104 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) | ||
| 101 | 105 | ||
| 102 | LDFLAGS := -m elf_$(UTS_MACHINE) | 106 | LDFLAGS := -m elf_$(UTS_MACHINE) |
| 103 | 107 | ||
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 5af2982133b5..f16a2caca1e0 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index cb030374b90a..916bc8111a01 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * | 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify it | 5 | * This program is free software; you can redistribute it and/or modify it |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 08616180deaf..e3509fc303bf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
| @@ -416,13 +416,22 @@ struct amd_iommu { | |||
| 416 | struct dma_ops_domain *default_dom; | 416 | struct dma_ops_domain *default_dom; |
| 417 | 417 | ||
| 418 | /* | 418 | /* |
| 419 | * This array is required to work around a potential BIOS bug. | 419 | * We can't rely on the BIOS to restore all values on reinit, so we |
| 420 | * The BIOS may miss to restore parts of the PCI configuration | 420 | * need to stash them |
| 421 | * space when the system resumes from S3. The result is that the | ||
| 422 | * IOMMU does not execute commands anymore which leads to system | ||
| 423 | * failure. | ||
| 424 | */ | 421 | */ |
| 425 | u32 cache_cfg[4]; | 422 | |
| 423 | /* The iommu BAR */ | ||
| 424 | u32 stored_addr_lo; | ||
| 425 | u32 stored_addr_hi; | ||
| 426 | |||
| 427 | /* | ||
| 428 | * Each iommu has 6 l1s, each of which is documented as having 0x12 | ||
| 429 | * registers | ||
| 430 | */ | ||
| 431 | u32 stored_l1[6][0x12]; | ||
| 432 | |||
| 433 | /* The l2 indirect registers */ | ||
| 434 | u32 stored_l2[0x83]; | ||
| 426 | }; | 435 | }; |
| 427 | 436 | ||
| 428 | /* | 437 | /* |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/amd_nb.h index af00bd1d2089..c8517f81b21e 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/amd_nb.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _ASM_X86_K8_H | 1 | #ifndef _ASM_X86_AMD_NB_H |
| 2 | #define _ASM_X86_K8_H | 2 | #define _ASM_X86_AMD_NB_H |
| 3 | 3 | ||
| 4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
| 5 | 5 | ||
| @@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[]; | |||
| 7 | struct bootnode; | 7 | struct bootnode; |
| 8 | 8 | ||
| 9 | extern int early_is_k8_nb(u32 value); | 9 | extern int early_is_k8_nb(u32 value); |
| 10 | extern struct pci_dev **k8_northbridges; | ||
| 11 | extern int num_k8_northbridges; | ||
| 12 | extern int cache_k8_northbridges(void); | 10 | extern int cache_k8_northbridges(void); |
| 13 | extern void k8_flush_garts(void); | 11 | extern void k8_flush_garts(void); |
| 14 | extern int k8_get_nodes(struct bootnode *nodes); | 12 | extern int k8_get_nodes(struct bootnode *nodes); |
| 15 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | 13 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); |
| 16 | extern int k8_scan_nodes(void); | 14 | extern int k8_scan_nodes(void); |
| 17 | 15 | ||
| 18 | #ifdef CONFIG_K8_NB | 16 | struct k8_northbridge_info { |
| 19 | extern int num_k8_northbridges; | 17 | u16 num; |
| 18 | u8 gart_supported; | ||
| 19 | struct pci_dev **nb_misc; | ||
| 20 | }; | ||
| 21 | extern struct k8_northbridge_info k8_northbridges; | ||
| 22 | |||
| 23 | #ifdef CONFIG_AMD_NB | ||
| 20 | 24 | ||
| 21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 25 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 22 | { | 26 | { |
| 23 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | 27 | return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; |
| 24 | } | 28 | } |
| 25 | 29 | ||
| 26 | #else | 30 | #else |
| 27 | #define num_k8_northbridges 0 | ||
| 28 | 31 | ||
| 29 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 32 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 30 | { | 33 | { |
| @@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node) | |||
| 33 | #endif | 36 | #endif |
| 34 | 37 | ||
| 35 | 38 | ||
| 36 | #endif /* _ASM_X86_K8_H */ | 39 | #endif /* _ASM_X86_AMD_NB_H */ |
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index a69b1ac9eaf8..2fefa501d3ba 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h | |||
| @@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event; | |||
| 54 | extern unsigned long apbt_quick_calibrate(void); | 54 | extern unsigned long apbt_quick_calibrate(void); |
| 55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); | 55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); |
| 56 | extern void apbt_setup_secondary_clock(void); | 56 | extern void apbt_setup_secondary_clock(void); |
| 57 | extern unsigned int boot_cpu_id; | ||
| 58 | 57 | ||
| 59 | extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); | 58 | extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); |
| 60 | extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); | 59 | extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1fa03e04ae44..286de34b0ed6 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
| @@ -252,9 +252,7 @@ static inline int apic_is_clustered_box(void) | |||
| 252 | } | 252 | } |
| 253 | #endif | 253 | #endif |
| 254 | 254 | ||
| 255 | extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); | 255 | extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); |
| 256 | extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); | ||
| 257 | |||
| 258 | 256 | ||
| 259 | #else /* !CONFIG_X86_LOCAL_APIC */ | 257 | #else /* !CONFIG_X86_LOCAL_APIC */ |
| 260 | static inline void lapic_shutdown(void) { } | 258 | static inline void lapic_shutdown(void) { } |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 7fe3b3060f08..a859ca461fb0 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
| @@ -131,6 +131,7 @@ | |||
| 131 | #define APIC_EILVTn(n) (0x500 + 0x10 * n) | 131 | #define APIC_EILVTn(n) (0x500 + 0x10 * n) |
| 132 | #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ | 132 | #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ |
| 133 | #define APIC_EILVT_NR_AMD_10H 4 | 133 | #define APIC_EILVT_NR_AMD_10H 4 |
| 134 | #define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H | ||
| 134 | #define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) | 135 | #define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) |
| 135 | #define APIC_EILVT_MSG_FIX 0x0 | 136 | #define APIC_EILVT_MSG_FIX 0x0 |
| 136 | #define APIC_EILVT_MSG_SMI 0x2 | 137 | #define APIC_EILVT_MSG_SMI 0x2 |
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index b185091bf19c..4fab24de26b1 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h | |||
| @@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int); | |||
| 32 | 32 | ||
| 33 | DECLARE_PER_CPU(int, cpu_state); | 33 | DECLARE_PER_CPU(int, cpu_state); |
| 34 | 34 | ||
| 35 | extern unsigned int boot_cpu_id; | ||
| 36 | 35 | ||
| 37 | #endif /* _ASM_X86_CPU_H */ | 36 | #endif /* _ASM_X86_CPU_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3f76523589af..220e2ea08e80 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -152,10 +152,14 @@ | |||
| 152 | #define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ | 152 | #define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ |
| 153 | #define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ | 153 | #define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ |
| 154 | #define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ | 154 | #define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ |
| 155 | #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ | 155 | #define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ |
| 156 | #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ | 156 | #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ |
| 157 | #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ | 157 | #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ |
| 158 | #define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ | ||
| 159 | #define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ | ||
| 158 | #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ | 160 | #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ |
| 161 | #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ | ||
| 162 | #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ | ||
| 159 | 163 | ||
| 160 | /* | 164 | /* |
| 161 | * Auxiliary flags: Linux defined - For features scattered in various | 165 | * Auxiliary flags: Linux defined - For features scattered in various |
| @@ -180,6 +184,13 @@ | |||
| 180 | #define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ | 184 | #define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ |
| 181 | #define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ | 185 | #define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ |
| 182 | #define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ | 186 | #define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ |
| 187 | #define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ | ||
| 188 | #define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ | ||
| 189 | #define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */ | ||
| 190 | #define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ | ||
| 191 | #define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ | ||
| 192 | #define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ | ||
| 193 | |||
| 183 | 194 | ||
| 184 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | 195 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
| 185 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | 196 | #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 733f7e91e7a9..326099199318 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
| @@ -89,6 +89,16 @@ | |||
| 89 | CFI_ADJUST_CFA_OFFSET -8 | 89 | CFI_ADJUST_CFA_OFFSET -8 |
| 90 | .endm | 90 | .endm |
| 91 | 91 | ||
| 92 | .macro pushfq_cfi | ||
| 93 | pushfq | ||
| 94 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 95 | .endm | ||
| 96 | |||
| 97 | .macro popfq_cfi | ||
| 98 | popfq | ||
| 99 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 100 | .endm | ||
| 101 | |||
| 92 | .macro movq_cfi reg offset=0 | 102 | .macro movq_cfi reg offset=0 |
| 93 | movq %\reg, \offset(%rsp) | 103 | movq %\reg, \offset(%rsp) |
| 94 | CFI_REL_OFFSET \reg, \offset | 104 | CFI_REL_OFFSET \reg, \offset |
| @@ -109,6 +119,16 @@ | |||
| 109 | CFI_ADJUST_CFA_OFFSET -4 | 119 | CFI_ADJUST_CFA_OFFSET -4 |
| 110 | .endm | 120 | .endm |
| 111 | 121 | ||
| 122 | .macro pushfl_cfi | ||
| 123 | pushfl | ||
| 124 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 125 | .endm | ||
| 126 | |||
| 127 | .macro popfl_cfi | ||
| 128 | popfl | ||
| 129 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 130 | .endm | ||
| 131 | |||
| 112 | .macro movl_cfi reg offset=0 | 132 | .macro movl_cfi reg offset=0 |
| 113 | movl %\reg, \offset(%esp) | 133 | movl %\reg, \offset(%esp) |
| 114 | CFI_REL_OFFSET \reg, \offset | 134 | CFI_REL_OFFSET \reg, \offset |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index d07b44f7d1dc..4d293dced62f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
| @@ -214,5 +214,20 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) | |||
| 214 | BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | 214 | BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); |
| 215 | return __virt_to_fix(vaddr); | 215 | return __virt_to_fix(vaddr); |
| 216 | } | 216 | } |
| 217 | |||
| 218 | /* Return an pointer with offset calculated */ | ||
| 219 | static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, | ||
| 220 | phys_addr_t phys, pgprot_t flags) | ||
| 221 | { | ||
| 222 | __set_fixmap(idx, phys, flags); | ||
| 223 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | ||
| 224 | } | ||
| 225 | |||
| 226 | #define set_fixmap_offset(idx, phys) \ | ||
| 227 | __set_fixmap_offset(idx, phys, PAGE_KERNEL) | ||
| 228 | |||
| 229 | #define set_fixmap_offset_nocache(idx, phys) \ | ||
| 230 | __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) | ||
| 231 | |||
| 217 | #endif /* !__ASSEMBLY__ */ | 232 | #endif /* !__ASSEMBLY__ */ |
| 218 | #endif /* _ASM_X86_FIXMAP_H */ | 233 | #endif /* _ASM_X86_FIXMAP_H */ |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4ac5b0f33fc1..bf357f9b25f0 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
| @@ -17,6 +17,7 @@ extern int fix_aperture; | |||
| 17 | #define GARTEN (1<<0) | 17 | #define GARTEN (1<<0) |
| 18 | #define DISGARTCPU (1<<4) | 18 | #define DISGARTCPU (1<<4) |
| 19 | #define DISGARTIO (1<<5) | 19 | #define DISGARTIO (1<<5) |
| 20 | #define DISTLBWALKPRB (1<<6) | ||
| 20 | 21 | ||
| 21 | /* GART cache control register bits. */ | 22 | /* GART cache control register bits. */ |
| 22 | #define INVGART (1<<0) | 23 | #define INVGART (1<<0) |
| @@ -27,7 +28,6 @@ extern int fix_aperture; | |||
| 27 | #define AMD64_GARTAPERTUREBASE 0x94 | 28 | #define AMD64_GARTAPERTUREBASE 0x94 |
| 28 | #define AMD64_GARTTABLEBASE 0x98 | 29 | #define AMD64_GARTTABLEBASE 0x98 |
| 29 | #define AMD64_GARTCACHECTL 0x9c | 30 | #define AMD64_GARTCACHECTL 0x9c |
| 30 | #define AMD64_GARTEN (1<<0) | ||
| 31 | 31 | ||
| 32 | #ifdef CONFIG_GART_IOMMU | 32 | #ifdef CONFIG_GART_IOMMU |
| 33 | extern int gart_iommu_aperture; | 33 | extern int gart_iommu_aperture; |
| @@ -57,6 +57,19 @@ static inline void gart_iommu_hole_init(void) | |||
| 57 | 57 | ||
| 58 | extern int agp_amd64_init(void); | 58 | extern int agp_amd64_init(void); |
| 59 | 59 | ||
| 60 | static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) | ||
| 61 | { | ||
| 62 | u32 ctl; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Don't enable translation but enable GART IO and CPU accesses. | ||
| 66 | * Also, set DISTLBWALKPRB since GART tables memory is UC. | ||
| 67 | */ | ||
| 68 | ctl = DISTLBWALKPRB | order << 1; | ||
| 69 | |||
| 70 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); | ||
| 71 | } | ||
| 72 | |||
| 60 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | 73 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) |
| 61 | { | 74 | { |
| 62 | u32 tmp, ctl; | 75 | u32 tmp, ctl; |
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1d5c08a1bdfd..2c392d663dce 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
| @@ -74,10 +74,12 @@ extern void hpet_disable(void); | |||
| 74 | extern unsigned int hpet_readl(unsigned int a); | 74 | extern unsigned int hpet_readl(unsigned int a); |
| 75 | extern void force_hpet_resume(void); | 75 | extern void force_hpet_resume(void); |
| 76 | 76 | ||
| 77 | extern void hpet_msi_unmask(unsigned int irq); | 77 | struct irq_data; |
| 78 | extern void hpet_msi_mask(unsigned int irq); | 78 | extern void hpet_msi_unmask(struct irq_data *data); |
| 79 | extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); | 79 | extern void hpet_msi_mask(struct irq_data *data); |
| 80 | extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); | 80 | struct hpet_dev; |
| 81 | extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); | ||
| 82 | extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); | ||
| 81 | 83 | ||
| 82 | #ifdef CONFIG_PCI_MSI | 84 | #ifdef CONFIG_PCI_MSI |
| 83 | extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); | 85 | extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3a54a1ca1a02..0274ec5a7e62 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
| @@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, | |||
| 78 | irq_attr->polarity = polarity; | 78 | irq_attr->polarity = polarity; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | struct irq_2_iommu { | ||
| 82 | struct intel_iommu *iommu; | ||
| 83 | u16 irte_index; | ||
| 84 | u16 sub_handle; | ||
| 85 | u8 irte_mask; | ||
| 86 | }; | ||
| 87 | |||
| 81 | /* | 88 | /* |
| 82 | * This is performance-critical, we want to do it O(1) | 89 | * This is performance-critical, we want to do it O(1) |
| 83 | * | 90 | * |
| @@ -89,15 +96,17 @@ struct irq_cfg { | |||
| 89 | cpumask_var_t old_domain; | 96 | cpumask_var_t old_domain; |
| 90 | u8 vector; | 97 | u8 vector; |
| 91 | u8 move_in_progress : 1; | 98 | u8 move_in_progress : 1; |
| 99 | #ifdef CONFIG_INTR_REMAP | ||
| 100 | struct irq_2_iommu irq_2_iommu; | ||
| 101 | #endif | ||
| 92 | }; | 102 | }; |
| 93 | 103 | ||
| 94 | extern struct irq_cfg *irq_cfg(unsigned int); | ||
| 95 | extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); | 104 | extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); |
| 96 | extern void send_cleanup_vector(struct irq_cfg *); | 105 | extern void send_cleanup_vector(struct irq_cfg *); |
| 97 | 106 | ||
| 98 | struct irq_desc; | 107 | struct irq_data; |
| 99 | extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, | 108 | int __ioapic_set_affinity(struct irq_data *, const struct cpumask *, |
| 100 | unsigned int *dest_id); | 109 | unsigned int *dest_id); |
| 101 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); | 110 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); |
| 102 | extern void setup_ioapic_dest(void); | 111 | extern void setup_ioapic_dest(void); |
| 103 | 112 | ||
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a73a8d5a5e69..4aa2bb3b242a 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf); | |||
| 55 | extern int restore_i387_xstate_ia32(void __user *buf); | 55 | extern int restore_i387_xstate_ia32(void __user *buf); |
| 56 | #endif | 56 | #endif |
| 57 | 57 | ||
| 58 | #ifdef CONFIG_MATH_EMULATION | ||
| 59 | extern void finit_soft_fpu(struct i387_soft_struct *soft); | ||
| 60 | #else | ||
| 61 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | ||
| 62 | #endif | ||
| 63 | |||
| 58 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 64 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
| 59 | 65 | ||
| 60 | static __always_inline __pure bool use_xsaveopt(void) | 66 | static __always_inline __pure bool use_xsaveopt(void) |
| @@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void) | |||
| 67 | return static_cpu_has(X86_FEATURE_XSAVE); | 73 | return static_cpu_has(X86_FEATURE_XSAVE); |
| 68 | } | 74 | } |
| 69 | 75 | ||
| 76 | static __always_inline __pure bool use_fxsr(void) | ||
| 77 | { | ||
| 78 | return static_cpu_has(X86_FEATURE_FXSR); | ||
| 79 | } | ||
| 80 | |||
| 70 | extern void __sanitize_i387_state(struct task_struct *); | 81 | extern void __sanitize_i387_state(struct task_struct *); |
| 71 | 82 | ||
| 72 | static inline void sanitize_i387_state(struct task_struct *tsk) | 83 | static inline void sanitize_i387_state(struct task_struct *tsk) |
| @@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk) | |||
| 77 | } | 88 | } |
| 78 | 89 | ||
| 79 | #ifdef CONFIG_X86_64 | 90 | #ifdef CONFIG_X86_64 |
| 80 | |||
| 81 | /* Ignore delayed exceptions from user space */ | ||
| 82 | static inline void tolerant_fwait(void) | ||
| 83 | { | ||
| 84 | asm volatile("1: fwait\n" | ||
| 85 | "2:\n" | ||
| 86 | _ASM_EXTABLE(1b, 2b)); | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | 91 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
| 90 | { | 92 | { |
| 91 | int err; | 93 | int err; |
| 92 | 94 | ||
| 95 | /* See comment in fxsave() below. */ | ||
| 93 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | 96 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" |
| 94 | "2:\n" | 97 | "2:\n" |
| 95 | ".section .fixup,\"ax\"\n" | 98 | ".section .fixup,\"ax\"\n" |
| @@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 98 | ".previous\n" | 101 | ".previous\n" |
| 99 | _ASM_EXTABLE(1b, 3b) | 102 | _ASM_EXTABLE(1b, 3b) |
| 100 | : [err] "=r" (err) | 103 | : [err] "=r" (err) |
| 101 | #if 0 /* See comment in fxsave() below. */ | 104 | : [fx] "R" (fx), "m" (*fx), "0" (0)); |
| 102 | : [fx] "r" (fx), "m" (*fx), "0" (0)); | ||
| 103 | #else | ||
| 104 | : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); | ||
| 105 | #endif | ||
| 106 | return err; | 105 | return err; |
| 107 | } | 106 | } |
| 108 | 107 | ||
| 109 | /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
| 110 | is pending. Clear the x87 state here by setting it to fixed | ||
| 111 | values. The kernel data segment can be sometimes 0 and sometimes | ||
| 112 | new user value. Both should be ok. | ||
| 113 | Use the PDA as safe address because it should be already in L1. */ | ||
| 114 | static inline void fpu_clear(struct fpu *fpu) | ||
| 115 | { | ||
| 116 | struct xsave_struct *xstate = &fpu->state->xsave; | ||
| 117 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | ||
| 118 | |||
| 119 | /* | ||
| 120 | * xsave header may indicate the init state of the FP. | ||
| 121 | */ | ||
| 122 | if (use_xsave() && | ||
| 123 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | ||
| 124 | return; | ||
| 125 | |||
| 126 | if (unlikely(fx->swd & X87_FSW_ES)) | ||
| 127 | asm volatile("fnclex"); | ||
| 128 | alternative_input(ASM_NOP8 ASM_NOP2, | ||
| 129 | " emms\n" /* clear stack tags */ | ||
| 130 | " fildl %%gs:0", /* load to clear state */ | ||
| 131 | X86_FEATURE_FXSAVE_LEAK); | ||
| 132 | } | ||
| 133 | |||
| 134 | static inline void clear_fpu_state(struct task_struct *tsk) | ||
| 135 | { | ||
| 136 | fpu_clear(&tsk->thread.fpu); | ||
| 137 | } | ||
| 138 | |||
| 139 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 108 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
| 140 | { | 109 | { |
| 141 | int err; | 110 | int err; |
| @@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
| 149 | if (unlikely(err)) | 118 | if (unlikely(err)) |
| 150 | return -EFAULT; | 119 | return -EFAULT; |
| 151 | 120 | ||
| 121 | /* See comment in fxsave() below. */ | ||
| 152 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | 122 | asm volatile("1: rex64/fxsave (%[fx])\n\t" |
| 153 | "2:\n" | 123 | "2:\n" |
| 154 | ".section .fixup,\"ax\"\n" | 124 | ".section .fixup,\"ax\"\n" |
| @@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
| 157 | ".previous\n" | 127 | ".previous\n" |
| 158 | _ASM_EXTABLE(1b, 3b) | 128 | _ASM_EXTABLE(1b, 3b) |
| 159 | : [err] "=r" (err), "=m" (*fx) | 129 | : [err] "=r" (err), "=m" (*fx) |
| 160 | #if 0 /* See comment in fxsave() below. */ | 130 | : [fx] "R" (fx), "0" (0)); |
| 161 | : [fx] "r" (fx), "0" (0)); | ||
| 162 | #else | ||
| 163 | : [fx] "cdaSDb" (fx), "0" (0)); | ||
| 164 | #endif | ||
| 165 | if (unlikely(err) && | 131 | if (unlikely(err) && |
| 166 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) | 132 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) |
| 167 | err = -EFAULT; | 133 | err = -EFAULT; |
| @@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
| 175 | uses any extended registers for addressing, a second REX prefix | 141 | uses any extended registers for addressing, a second REX prefix |
| 176 | will be generated (to the assembler, rex64 followed by semicolon | 142 | will be generated (to the assembler, rex64 followed by semicolon |
| 177 | is a separate instruction), and hence the 64-bitness is lost. */ | 143 | is a separate instruction), and hence the 64-bitness is lost. */ |
| 178 | #if 0 | 144 | |
| 145 | #ifdef CONFIG_AS_FXSAVEQ | ||
| 179 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 146 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
| 180 | starting with gas 2.16. */ | 147 | starting with gas 2.16. */ |
| 181 | __asm__ __volatile__("fxsaveq %0" | 148 | __asm__ __volatile__("fxsaveq %0" |
| 182 | : "=m" (fpu->state->fxsave)); | 149 | : "=m" (fpu->state->fxsave)); |
| 183 | #elif 0 | 150 | #else |
| 184 | /* Using, as a workaround, the properly prefixed form below isn't | 151 | /* Using, as a workaround, the properly prefixed form below isn't |
| 185 | accepted by any binutils version so far released, complaining that | 152 | accepted by any binutils version so far released, complaining that |
| 186 | the same type of prefix is used twice if an extended register is | 153 | the same type of prefix is used twice if an extended register is |
| 187 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 154 | needed for addressing (fix submitted to mainline 2005-11-21). |
| 188 | __asm__ __volatile__("rex64/fxsave %0" | 155 | asm volatile("rex64/fxsave %0" |
| 189 | : "=m" (fpu->state->fxsave)); | 156 | : "=m" (fpu->state->fxsave)); |
| 190 | #else | 157 | This, however, we can work around by forcing the compiler to select |
| 191 | /* This, however, we can work around by forcing the compiler to select | ||
| 192 | an addressing mode that doesn't require extended registers. */ | 158 | an addressing mode that doesn't require extended registers. */ |
| 193 | __asm__ __volatile__("rex64/fxsave (%1)" | 159 | asm volatile("rex64/fxsave (%[fx])" |
| 194 | : "=m" (fpu->state->fxsave) | 160 | : "=m" (fpu->state->fxsave) |
| 195 | : "cdaSDb" (&fpu->state->fxsave)); | 161 | : [fx] "R" (&fpu->state->fxsave)); |
| 196 | #endif | 162 | #endif |
| 197 | } | 163 | } |
| 198 | 164 | ||
| 199 | static inline void fpu_save_init(struct fpu *fpu) | ||
| 200 | { | ||
| 201 | if (use_xsave()) | ||
| 202 | fpu_xsave(fpu); | ||
| 203 | else | ||
| 204 | fpu_fxsave(fpu); | ||
| 205 | |||
| 206 | fpu_clear(fpu); | ||
| 207 | } | ||
| 208 | |||
| 209 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
| 210 | { | ||
| 211 | fpu_save_init(&tsk->thread.fpu); | ||
| 212 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
| 213 | } | ||
| 214 | |||
| 215 | #else /* CONFIG_X86_32 */ | 165 | #else /* CONFIG_X86_32 */ |
| 216 | 166 | ||
| 217 | #ifdef CONFIG_MATH_EMULATION | ||
| 218 | extern void finit_soft_fpu(struct i387_soft_struct *soft); | ||
| 219 | #else | ||
| 220 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | ||
| 221 | #endif | ||
| 222 | |||
| 223 | static inline void tolerant_fwait(void) | ||
| 224 | { | ||
| 225 | asm volatile("fnclex ; fwait"); | ||
| 226 | } | ||
| 227 | |||
| 228 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ | 167 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ |
| 229 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | 168 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
| 230 | { | 169 | { |
| @@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 241 | return 0; | 180 | return 0; |
| 242 | } | 181 | } |
| 243 | 182 | ||
| 183 | static inline void fpu_fxsave(struct fpu *fpu) | ||
| 184 | { | ||
| 185 | asm volatile("fxsave %[fx]" | ||
| 186 | : [fx] "=m" (fpu->state->fxsave)); | ||
| 187 | } | ||
| 188 | |||
| 189 | #endif /* CONFIG_X86_64 */ | ||
| 190 | |||
| 244 | /* We need a safe address that is cheap to find and that is already | 191 | /* We need a safe address that is cheap to find and that is already |
| 245 | in L1 during context switch. The best choices are unfortunately | 192 | in L1 during context switch. The best choices are unfortunately |
| 246 | different for UP and SMP */ | 193 | different for UP and SMP */ |
| @@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 256 | static inline void fpu_save_init(struct fpu *fpu) | 203 | static inline void fpu_save_init(struct fpu *fpu) |
| 257 | { | 204 | { |
| 258 | if (use_xsave()) { | 205 | if (use_xsave()) { |
| 259 | struct xsave_struct *xstate = &fpu->state->xsave; | ||
| 260 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | ||
| 261 | |||
| 262 | fpu_xsave(fpu); | 206 | fpu_xsave(fpu); |
| 263 | 207 | ||
| 264 | /* | 208 | /* |
| 265 | * xsave header may indicate the init state of the FP. | 209 | * xsave header may indicate the init state of the FP. |
| 266 | */ | 210 | */ |
| 267 | if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | 211 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
| 268 | goto end; | 212 | return; |
| 269 | 213 | } else if (use_fxsr()) { | |
| 270 | if (unlikely(fx->swd & X87_FSW_ES)) | 214 | fpu_fxsave(fpu); |
| 271 | asm volatile("fnclex"); | 215 | } else { |
| 272 | 216 | asm volatile("fsave %[fx]; fwait" | |
| 273 | /* | 217 | : [fx] "=m" (fpu->state->fsave)); |
| 274 | * we can do a simple return here or be paranoid :) | 218 | return; |
| 275 | */ | ||
| 276 | goto clear_state; | ||
| 277 | } | 219 | } |
| 278 | 220 | ||
| 279 | /* Use more nops than strictly needed in case the compiler | 221 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) |
| 280 | varies code */ | 222 | asm volatile("fnclex"); |
| 281 | alternative_input( | 223 | |
| 282 | "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, | ||
| 283 | "fxsave %[fx]\n" | ||
| 284 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | ||
| 285 | X86_FEATURE_FXSR, | ||
| 286 | [fx] "m" (fpu->state->fxsave), | ||
| 287 | [fsw] "m" (fpu->state->fxsave.swd) : "memory"); | ||
| 288 | clear_state: | ||
| 289 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 224 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
| 290 | is pending. Clear the x87 state here by setting it to fixed | 225 | is pending. Clear the x87 state here by setting it to fixed |
| 291 | values. safe_address is a random variable that should be in L1 */ | 226 | values. safe_address is a random variable that should be in L1 */ |
| 292 | alternative_input( | 227 | alternative_input( |
| 293 | GENERIC_NOP8 GENERIC_NOP2, | 228 | ASM_NOP8 ASM_NOP2, |
| 294 | "emms\n\t" /* clear stack tags */ | 229 | "emms\n\t" /* clear stack tags */ |
| 295 | "fildl %[addr]", /* set F?P to defined value */ | 230 | "fildl %P[addr]", /* set F?P to defined value */ |
| 296 | X86_FEATURE_FXSAVE_LEAK, | 231 | X86_FEATURE_FXSAVE_LEAK, |
| 297 | [addr] "m" (safe_address)); | 232 | [addr] "m" (safe_address)); |
| 298 | end: | ||
| 299 | ; | ||
| 300 | } | 233 | } |
| 301 | 234 | ||
| 302 | static inline void __save_init_fpu(struct task_struct *tsk) | 235 | static inline void __save_init_fpu(struct task_struct *tsk) |
| @@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
| 305 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 238 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 306 | } | 239 | } |
| 307 | 240 | ||
| 308 | |||
| 309 | #endif /* CONFIG_X86_64 */ | ||
| 310 | |||
| 311 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | 241 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
| 312 | { | 242 | { |
| 313 | return fxrstor_checking(&fpu->state->fxsave); | 243 | return fxrstor_checking(&fpu->state->fxsave); |
| @@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk) | |||
| 344 | static inline void __clear_fpu(struct task_struct *tsk) | 274 | static inline void __clear_fpu(struct task_struct *tsk) |
| 345 | { | 275 | { |
| 346 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 276 | if (task_thread_info(tsk)->status & TS_USEDFPU) { |
| 347 | tolerant_fwait(); | 277 | /* Ignore delayed exceptions from user space */ |
| 278 | asm volatile("1: fwait\n" | ||
| 279 | "2:\n" | ||
| 280 | _ASM_EXTABLE(1b, 2b)); | ||
| 348 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 281 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 349 | stts(); | 282 | stts(); |
| 350 | } | 283 | } |
| @@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state) | |||
| 405 | stts(); | 338 | stts(); |
| 406 | } | 339 | } |
| 407 | 340 | ||
| 408 | #ifdef CONFIG_X86_64 | ||
| 409 | |||
| 410 | static inline void save_init_fpu(struct task_struct *tsk) | ||
| 411 | { | ||
| 412 | __save_init_fpu(tsk); | ||
| 413 | stts(); | ||
| 414 | } | ||
| 415 | |||
| 416 | #define unlazy_fpu __unlazy_fpu | ||
| 417 | #define clear_fpu __clear_fpu | ||
| 418 | |||
| 419 | #else /* CONFIG_X86_32 */ | ||
| 420 | |||
| 421 | /* | 341 | /* |
| 422 | * These disable preemption on their own and are safe | 342 | * These disable preemption on their own and are safe |
| 423 | */ | 343 | */ |
| @@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
| 443 | preempt_enable(); | 363 | preempt_enable(); |
| 444 | } | 364 | } |
| 445 | 365 | ||
| 446 | #endif /* CONFIG_X86_64 */ | ||
| 447 | |||
| 448 | /* | 366 | /* |
| 449 | * i387 state interaction | 367 | * i387 state interaction |
| 450 | */ | 368 | */ |
| @@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu); | |||
| 508 | 426 | ||
| 509 | #endif /* __ASSEMBLY__ */ | 427 | #endif /* __ASSEMBLY__ */ |
| 510 | 428 | ||
| 511 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | ||
| 512 | #define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 | ||
| 513 | |||
| 514 | #endif /* _ASM_X86_I387_H */ | 429 | #endif /* _ASM_X86_I387_H */ |
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 1655147646aa..a20365953bf8 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h | |||
| @@ -55,6 +55,8 @@ extern struct irq_chip i8259A_chip; | |||
| 55 | struct legacy_pic { | 55 | struct legacy_pic { |
| 56 | int nr_legacy_irqs; | 56 | int nr_legacy_irqs; |
| 57 | struct irq_chip *chip; | 57 | struct irq_chip *chip; |
| 58 | void (*mask)(unsigned int irq); | ||
| 59 | void (*unmask)(unsigned int irq); | ||
| 58 | void (*mask_all)(void); | 60 | void (*mask_all)(void); |
| 59 | void (*restore_mask)(void); | 61 | void (*restore_mask)(void); |
| 60 | void (*init)(int auto_eoi); | 62 | void (*init)(int auto_eoi); |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 30a3e9776123..6a45ec41ec26 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
| @@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) | |||
| 206 | 206 | ||
| 207 | extern void iounmap(volatile void __iomem *addr); | 207 | extern void iounmap(volatile void __iomem *addr); |
| 208 | 208 | ||
| 209 | extern void set_iounmap_nonlazy(void); | ||
| 209 | 210 | ||
| 210 | #ifdef __KERNEL__ | 211 | #ifdef __KERNEL__ |
| 211 | 212 | ||
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9cb2edb87c2f..c8be4566c3d2 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
| @@ -170,12 +170,6 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | |||
| 170 | 170 | ||
| 171 | extern void probe_nr_irqs_gsi(void); | 171 | extern void probe_nr_irqs_gsi(void); |
| 172 | 172 | ||
| 173 | extern int setup_ioapic_entry(int apic, int irq, | ||
| 174 | struct IO_APIC_route_entry *entry, | ||
| 175 | unsigned int destination, int trigger, | ||
| 176 | int polarity, int vector, int pin); | ||
| 177 | extern void ioapic_write_entry(int apic, int pin, | ||
| 178 | struct IO_APIC_route_entry e); | ||
| 179 | extern void setup_ioapic_ids_from_mpc(void); | 173 | extern void setup_ioapic_ids_from_mpc(void); |
| 180 | 174 | ||
| 181 | struct mp_ioapic_gsi{ | 175 | struct mp_ioapic_gsi{ |
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index f275e2244505..1c23360fb2d8 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h | |||
| @@ -3,4 +3,39 @@ | |||
| 3 | 3 | ||
| 4 | #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) | 4 | #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) |
| 5 | 5 | ||
| 6 | #ifdef CONFIG_INTR_REMAP | ||
| 7 | static inline void prepare_irte(struct irte *irte, int vector, | ||
| 8 | unsigned int dest) | ||
| 9 | { | ||
| 10 | memset(irte, 0, sizeof(*irte)); | ||
| 11 | |||
| 12 | irte->present = 1; | ||
| 13 | irte->dst_mode = apic->irq_dest_mode; | ||
| 14 | /* | ||
| 15 | * Trigger mode in the IRTE will always be edge, and for IO-APIC, the | ||
| 16 | * actual level or edge trigger will be setup in the IO-APIC | ||
| 17 | * RTE. This will help simplify level triggered irq migration. | ||
| 18 | * For more details, see the comments (in io_apic.c) explainig IO-APIC | ||
| 19 | * irq migration in the presence of interrupt-remapping. | ||
| 20 | */ | ||
| 21 | irte->trigger_mode = 0; | ||
| 22 | irte->dlvry_mode = apic->irq_delivery_mode; | ||
| 23 | irte->vector = vector; | ||
| 24 | irte->dest_id = IRTE_DEST(dest); | ||
| 25 | irte->redir_hint = 1; | ||
| 26 | } | ||
| 27 | static inline bool irq_remapped(struct irq_cfg *cfg) | ||
| 28 | { | ||
| 29 | return cfg->irq_2_iommu.iommu != NULL; | ||
| 30 | } | ||
| 31 | #else | ||
| 32 | static void prepare_irte(struct irte *irte, int vector, unsigned int dest) | ||
| 33 | { | ||
| 34 | } | ||
| 35 | static inline bool irq_remapped(struct irq_cfg *cfg) | ||
| 36 | { | ||
| 37 | return false; | ||
| 38 | } | ||
| 39 | #endif | ||
| 40 | |||
| 6 | #endif /* _ASM_X86_IRQ_REMAPPING_H */ | 41 | #endif /* _ASM_X86_IRQ_REMAPPING_H */ |
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 16350740edf6..4a711a684b17 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h | |||
| @@ -10,6 +10,9 @@ | |||
| 10 | */ | 10 | */ |
| 11 | #ifndef _ASM_X86_MRST_H | 11 | #ifndef _ASM_X86_MRST_H |
| 12 | #define _ASM_X86_MRST_H | 12 | #define _ASM_X86_MRST_H |
| 13 | |||
| 14 | #include <linux/sfi.h> | ||
| 15 | |||
| 13 | extern int pci_mrst_init(void); | 16 | extern int pci_mrst_init(void); |
| 14 | int __init sfi_parse_mrtc(struct sfi_table_header *table); | 17 | int __init sfi_parse_mrtc(struct sfi_table_header *table); |
| 15 | 18 | ||
| @@ -26,7 +29,7 @@ enum mrst_cpu_type { | |||
| 26 | }; | 29 | }; |
| 27 | 30 | ||
| 28 | extern enum mrst_cpu_type __mrst_cpu_chip; | 31 | extern enum mrst_cpu_type __mrst_cpu_chip; |
| 29 | static enum mrst_cpu_type mrst_identify_cpu(void) | 32 | static inline enum mrst_cpu_type mrst_identify_cpu(void) |
| 30 | { | 33 | { |
| 31 | return __mrst_cpu_chip; | 34 | return __mrst_cpu_chip; |
| 32 | } | 35 | } |
| @@ -42,4 +45,9 @@ extern enum mrst_timer_options mrst_timer_options; | |||
| 42 | #define SFI_MTMR_MAX_NUM 8 | 45 | #define SFI_MTMR_MAX_NUM 8 |
| 43 | #define SFI_MRTC_MAX 8 | 46 | #define SFI_MRTC_MAX 8 |
| 44 | 47 | ||
| 48 | extern struct console early_mrst_console; | ||
| 49 | extern void mrst_early_console_init(void); | ||
| 50 | |||
| 51 | extern struct console early_hsu_console; | ||
| 52 | extern void hsu_early_console_init(void); | ||
| 45 | #endif /* _ASM_X86_MRST_H */ | 53 | #endif /* _ASM_X86_MRST_H */ |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h new file mode 100644 index 000000000000..bcdff997668c --- /dev/null +++ b/arch/x86/include/asm/mwait.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #ifndef _ASM_X86_MWAIT_H | ||
| 2 | #define _ASM_X86_MWAIT_H | ||
| 3 | |||
| 4 | #define MWAIT_SUBSTATE_MASK 0xf | ||
| 5 | #define MWAIT_CSTATE_MASK 0xf | ||
| 6 | #define MWAIT_SUBSTATE_SIZE 4 | ||
| 7 | #define MWAIT_MAX_NUM_CSTATES 8 | ||
| 8 | |||
| 9 | #define CPUID_MWAIT_LEAF 5 | ||
| 10 | #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 | ||
| 11 | #define CPUID5_ECX_INTERRUPT_BREAK 0x2 | ||
| 12 | |||
| 13 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 | ||
| 14 | |||
| 15 | #endif /* _ASM_X86_MWAIT_H */ | ||
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index 08fde475cb3b..2a8478140bb3 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h | |||
| @@ -21,10 +21,14 @@ extern void olpc_ofw_detect(void); | |||
| 21 | /* install OFW's pde permanently into the kernel's pgtable */ | 21 | /* install OFW's pde permanently into the kernel's pgtable */ |
| 22 | extern void setup_olpc_ofw_pgd(void); | 22 | extern void setup_olpc_ofw_pgd(void); |
| 23 | 23 | ||
| 24 | /* check if OFW was detected during boot */ | ||
| 25 | extern bool olpc_ofw_present(void); | ||
| 26 | |||
| 24 | #else /* !CONFIG_OLPC_OPENFIRMWARE */ | 27 | #else /* !CONFIG_OLPC_OPENFIRMWARE */ |
| 25 | 28 | ||
| 26 | static inline void olpc_ofw_detect(void) { } | 29 | static inline void olpc_ofw_detect(void) { } |
| 27 | static inline void setup_olpc_ofw_pgd(void) { } | 30 | static inline void setup_olpc_ofw_pgd(void) { } |
| 31 | static inline bool olpc_ofw_present(void) { return false; } | ||
| 28 | 32 | ||
| 29 | #endif /* !CONFIG_OLPC_OPENFIRMWARE */ | 33 | #endif /* !CONFIG_OLPC_OPENFIRMWARE */ |
| 30 | 34 | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a667f24c7254..1df66211fd1b 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | 8 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) |
| 9 | #define PAGE_MASK (~(PAGE_SIZE-1)) | 9 | #define PAGE_MASK (~(PAGE_SIZE-1)) |
| 10 | 10 | ||
| 11 | #define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) | 11 | #define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) |
| 12 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) | 12 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) |
| 13 | 13 | ||
| 14 | /* Cast PAGE_MASK to a signed type so that it is sign-extended if | 14 | /* Cast PAGE_MASK to a signed type so that it is sign-extended if |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5653f43d90e5..edecb4ed2210 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
| @@ -416,11 +416,6 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | |||
| 416 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); | 416 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); |
| 417 | } | 417 | } |
| 418 | 418 | ||
| 419 | static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, | ||
| 420 | unsigned long start, unsigned long count) | ||
| 421 | { | ||
| 422 | PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); | ||
| 423 | } | ||
| 424 | static inline void paravirt_release_pmd(unsigned long pfn) | 419 | static inline void paravirt_release_pmd(unsigned long pfn) |
| 425 | { | 420 | { |
| 426 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); | 421 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index db9ef5532341..b82bac975250 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
| @@ -255,7 +255,6 @@ struct pv_mmu_ops { | |||
| 255 | */ | 255 | */ |
| 256 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); | 256 | void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); |
| 257 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); | 257 | void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); |
| 258 | void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); | ||
| 259 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); | 258 | void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); |
| 260 | void (*release_pte)(unsigned long pfn); | 259 | void (*release_pte)(unsigned long pfn); |
| 261 | void (*release_pmd)(unsigned long pfn); | 260 | void (*release_pmd)(unsigned long pfn); |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a34c785c5a63..ada823a13c7c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
| @@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; | |||
| 28 | extern spinlock_t pgd_lock; | 28 | extern spinlock_t pgd_lock; |
| 29 | extern struct list_head pgd_list; | 29 | extern struct list_head pgd_list; |
| 30 | 30 | ||
| 31 | extern struct mm_struct *pgd_page_get_mm(struct page *page); | ||
| 32 | |||
| 31 | #ifdef CONFIG_PARAVIRT | 33 | #ifdef CONFIG_PARAVIRT |
| 32 | #include <asm/paravirt.h> | 34 | #include <asm/paravirt.h> |
| 33 | #else /* !CONFIG_PARAVIRT */ | 35 | #else /* !CONFIG_PARAVIRT */ |
| @@ -603,6 +605,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
| 603 | pte_update(mm, addr, ptep); | 605 | pte_update(mm, addr, ptep); |
| 604 | } | 606 | } |
| 605 | 607 | ||
| 608 | #define flush_tlb_fix_spurious_fault(vma, address) | ||
| 609 | |||
| 606 | /* | 610 | /* |
| 607 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | 611 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); |
| 608 | * | 612 | * |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 076052cd62be..f96ac9bedf75 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
| @@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd) | |||
| 102 | native_set_pgd(pgd, native_make_pgd(0)); | 102 | native_set_pgd(pgd, native_make_pgd(0)); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | extern void sync_global_pgds(unsigned long start, unsigned long end); | ||
| 106 | |||
| 105 | /* | 107 | /* |
| 106 | * Conversion functions: convert a page and protection to a page entry, | 108 | * Conversion functions: convert a page and protection to a page entry, |
| 107 | * and a page entry and page directory to the page they refer to. | 109 | * and a page entry and page directory to the page they refer to. |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 325b7bdbebaa..cae9c3cb95cf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -110,6 +110,8 @@ struct cpuinfo_x86 { | |||
| 110 | u16 phys_proc_id; | 110 | u16 phys_proc_id; |
| 111 | /* Core id: */ | 111 | /* Core id: */ |
| 112 | u16 cpu_core_id; | 112 | u16 cpu_core_id; |
| 113 | /* Compute unit id */ | ||
| 114 | u8 compute_unit_id; | ||
| 113 | /* Index into per_cpu list: */ | 115 | /* Index into per_cpu list: */ |
| 114 | u16 cpu_index; | 116 | u16 cpu_index; |
| 115 | #endif | 117 | #endif |
| @@ -602,7 +604,7 @@ extern unsigned long mmu_cr4_features; | |||
| 602 | 604 | ||
| 603 | static inline void set_in_cr4(unsigned long mask) | 605 | static inline void set_in_cr4(unsigned long mask) |
| 604 | { | 606 | { |
| 605 | unsigned cr4; | 607 | unsigned long cr4; |
| 606 | 608 | ||
| 607 | mmu_cr4_features |= mask; | 609 | mmu_cr4_features |= mask; |
| 608 | cr4 = read_cr4(); | 610 | cr4 = read_cr4(); |
| @@ -612,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask) | |||
| 612 | 614 | ||
| 613 | static inline void clear_in_cr4(unsigned long mask) | 615 | static inline void clear_in_cr4(unsigned long mask) |
| 614 | { | 616 | { |
| 615 | unsigned cr4; | 617 | unsigned long cr4; |
| 616 | 618 | ||
| 617 | mmu_cr4_features &= ~mask; | 619 | mmu_cr4_features &= ~mask; |
| 618 | cr4 = read_cr4(); | 620 | cr4 = read_cr4(); |
| @@ -764,29 +766,6 @@ extern unsigned long idle_halt; | |||
| 764 | extern unsigned long idle_nomwait; | 766 | extern unsigned long idle_nomwait; |
| 765 | extern bool c1e_detected; | 767 | extern bool c1e_detected; |
| 766 | 768 | ||
| 767 | /* | ||
| 768 | * on systems with caches, caches must be flashed as the absolute | ||
| 769 | * last instruction before going into a suspended halt. Otherwise, | ||
| 770 | * dirty data can linger in the cache and become stale on resume, | ||
| 771 | * leading to strange errors. | ||
| 772 | * | ||
| 773 | * perform a variety of operations to guarantee that the compiler | ||
| 774 | * will not reorder instructions. wbinvd itself is serializing | ||
| 775 | * so the processor will not reorder. | ||
| 776 | * | ||
| 777 | * Systems without cache can just go into halt. | ||
| 778 | */ | ||
| 779 | static inline void wbinvd_halt(void) | ||
| 780 | { | ||
| 781 | mb(); | ||
| 782 | /* check for clflush to determine if wbinvd is legal */ | ||
| 783 | if (cpu_has_clflush) | ||
| 784 | asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); | ||
| 785 | else | ||
| 786 | while (1) | ||
| 787 | halt(); | ||
| 788 | } | ||
| 789 | |||
| 790 | extern void enable_sep_cpu(void); | 769 | extern void enable_sep_cpu(void); |
| 791 | extern int sysenter_setup(void); | 770 | extern int sysenter_setup(void); |
| 792 | 771 | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ef292c792d74..d6763b139a84 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
| @@ -93,6 +93,11 @@ void *extend_brk(size_t size, size_t align); | |||
| 93 | : : "i" (sz)); \ | 93 | : : "i" (sz)); \ |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | /* Helper for reserving space for arrays of things */ | ||
| 97 | #define RESERVE_BRK_ARRAY(type, name, entries) \ | ||
| 98 | type *name; \ | ||
| 99 | RESERVE_BRK(name, sizeof(type) * entries) | ||
| 100 | |||
| 96 | #ifdef __i386__ | 101 | #ifdef __i386__ |
| 97 | 102 | ||
| 98 | void __init i386_start_kernel(void); | 103 | void __init i386_start_kernel(void); |
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h deleted file mode 100644 index 61e08c0a2907..000000000000 --- a/arch/x86/include/asm/vmi.h +++ /dev/null | |||
| @@ -1,269 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI interface definition | ||
| 3 | * | ||
| 4 | * Copyright (C) 2005, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | * Maintained by: Zachary Amsden zach@vmware.com | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | #include <linux/types.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | *--------------------------------------------------------------------- | ||
| 28 | * | ||
| 29 | * VMI Option ROM API | ||
| 30 | * | ||
| 31 | *--------------------------------------------------------------------- | ||
| 32 | */ | ||
| 33 | #define VMI_SIGNATURE 0x696d5663 /* "cVmi" */ | ||
| 34 | |||
| 35 | #define PCI_VENDOR_ID_VMWARE 0x15AD | ||
| 36 | #define PCI_DEVICE_ID_VMWARE_VMI 0x0801 | ||
| 37 | |||
| 38 | /* | ||
| 39 | * We use two version numbers for compatibility, with the major | ||
| 40 | * number signifying interface breakages, and the minor number | ||
| 41 | * interface extensions. | ||
| 42 | */ | ||
| 43 | #define VMI_API_REV_MAJOR 3 | ||
| 44 | #define VMI_API_REV_MINOR 0 | ||
| 45 | |||
| 46 | #define VMI_CALL_CPUID 0 | ||
| 47 | #define VMI_CALL_WRMSR 1 | ||
| 48 | #define VMI_CALL_RDMSR 2 | ||
| 49 | #define VMI_CALL_SetGDT 3 | ||
| 50 | #define VMI_CALL_SetLDT 4 | ||
| 51 | #define VMI_CALL_SetIDT 5 | ||
| 52 | #define VMI_CALL_SetTR 6 | ||
| 53 | #define VMI_CALL_GetGDT 7 | ||
| 54 | #define VMI_CALL_GetLDT 8 | ||
| 55 | #define VMI_CALL_GetIDT 9 | ||
| 56 | #define VMI_CALL_GetTR 10 | ||
| 57 | #define VMI_CALL_WriteGDTEntry 11 | ||
| 58 | #define VMI_CALL_WriteLDTEntry 12 | ||
| 59 | #define VMI_CALL_WriteIDTEntry 13 | ||
| 60 | #define VMI_CALL_UpdateKernelStack 14 | ||
| 61 | #define VMI_CALL_SetCR0 15 | ||
| 62 | #define VMI_CALL_SetCR2 16 | ||
| 63 | #define VMI_CALL_SetCR3 17 | ||
| 64 | #define VMI_CALL_SetCR4 18 | ||
| 65 | #define VMI_CALL_GetCR0 19 | ||
| 66 | #define VMI_CALL_GetCR2 20 | ||
| 67 | #define VMI_CALL_GetCR3 21 | ||
| 68 | #define VMI_CALL_GetCR4 22 | ||
| 69 | #define VMI_CALL_WBINVD 23 | ||
| 70 | #define VMI_CALL_SetDR 24 | ||
| 71 | #define VMI_CALL_GetDR 25 | ||
| 72 | #define VMI_CALL_RDPMC 26 | ||
| 73 | #define VMI_CALL_RDTSC 27 | ||
| 74 | #define VMI_CALL_CLTS 28 | ||
| 75 | #define VMI_CALL_EnableInterrupts 29 | ||
| 76 | #define VMI_CALL_DisableInterrupts 30 | ||
| 77 | #define VMI_CALL_GetInterruptMask 31 | ||
| 78 | #define VMI_CALL_SetInterruptMask 32 | ||
| 79 | #define VMI_CALL_IRET 33 | ||
| 80 | #define VMI_CALL_SYSEXIT 34 | ||
| 81 | #define VMI_CALL_Halt 35 | ||
| 82 | #define VMI_CALL_Reboot 36 | ||
| 83 | #define VMI_CALL_Shutdown 37 | ||
| 84 | #define VMI_CALL_SetPxE 38 | ||
| 85 | #define VMI_CALL_SetPxELong 39 | ||
| 86 | #define VMI_CALL_UpdatePxE 40 | ||
| 87 | #define VMI_CALL_UpdatePxELong 41 | ||
| 88 | #define VMI_CALL_MachineToPhysical 42 | ||
| 89 | #define VMI_CALL_PhysicalToMachine 43 | ||
| 90 | #define VMI_CALL_AllocatePage 44 | ||
| 91 | #define VMI_CALL_ReleasePage 45 | ||
| 92 | #define VMI_CALL_InvalPage 46 | ||
| 93 | #define VMI_CALL_FlushTLB 47 | ||
| 94 | #define VMI_CALL_SetLinearMapping 48 | ||
| 95 | |||
| 96 | #define VMI_CALL_SetIOPLMask 61 | ||
| 97 | #define VMI_CALL_SetInitialAPState 62 | ||
| 98 | #define VMI_CALL_APICWrite 63 | ||
| 99 | #define VMI_CALL_APICRead 64 | ||
| 100 | #define VMI_CALL_IODelay 65 | ||
| 101 | #define VMI_CALL_SetLazyMode 73 | ||
| 102 | |||
| 103 | /* | ||
| 104 | *--------------------------------------------------------------------- | ||
| 105 | * | ||
| 106 | * MMU operation flags | ||
| 107 | * | ||
| 108 | *--------------------------------------------------------------------- | ||
| 109 | */ | ||
| 110 | |||
| 111 | /* Flags used by VMI_{Allocate|Release}Page call */ | ||
| 112 | #define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */ | ||
| 113 | #define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */ | ||
| 114 | #define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */ | ||
| 115 | |||
| 116 | |||
| 117 | /* Flags shared by Allocate|Release Page and PTE updates */ | ||
| 118 | #define VMI_PAGE_PT 0x01 | ||
| 119 | #define VMI_PAGE_PD 0x02 | ||
| 120 | #define VMI_PAGE_PDP 0x04 | ||
| 121 | #define VMI_PAGE_PML4 0x08 | ||
| 122 | |||
| 123 | #define VMI_PAGE_NORMAL 0x00 /* for debugging */ | ||
| 124 | |||
| 125 | /* Flags used by PTE updates */ | ||
| 126 | #define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */ | ||
| 127 | #define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */ | ||
| 128 | #define VMI_PAGE_VA_MASK 0xfffff000 | ||
| 129 | |||
| 130 | #ifdef CONFIG_X86_PAE | ||
| 131 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
| 132 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
| 133 | #else | ||
| 134 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED) | ||
| 135 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED) | ||
| 136 | #endif | ||
| 137 | |||
| 138 | /* Flags used by VMI_FlushTLB call */ | ||
| 139 | #define VMI_FLUSH_TLB 0x01 | ||
| 140 | #define VMI_FLUSH_GLOBAL 0x02 | ||
| 141 | |||
| 142 | /* | ||
| 143 | *--------------------------------------------------------------------- | ||
| 144 | * | ||
| 145 | * VMI relocation definitions for ROM call get_reloc | ||
| 146 | * | ||
| 147 | *--------------------------------------------------------------------- | ||
| 148 | */ | ||
| 149 | |||
| 150 | /* VMI Relocation types */ | ||
| 151 | #define VMI_RELOCATION_NONE 0 | ||
| 152 | #define VMI_RELOCATION_CALL_REL 1 | ||
| 153 | #define VMI_RELOCATION_JUMP_REL 2 | ||
| 154 | #define VMI_RELOCATION_NOP 3 | ||
| 155 | |||
| 156 | #ifndef __ASSEMBLY__ | ||
| 157 | struct vmi_relocation_info { | ||
| 158 | unsigned char *eip; | ||
| 159 | unsigned char type; | ||
| 160 | unsigned char reserved[3]; | ||
| 161 | }; | ||
| 162 | #endif | ||
| 163 | |||
| 164 | |||
| 165 | /* | ||
| 166 | *--------------------------------------------------------------------- | ||
| 167 | * | ||
| 168 | * Generic ROM structures and definitions | ||
| 169 | * | ||
| 170 | *--------------------------------------------------------------------- | ||
| 171 | */ | ||
| 172 | |||
| 173 | #ifndef __ASSEMBLY__ | ||
| 174 | |||
| 175 | struct vrom_header { | ||
| 176 | u16 rom_signature; /* option ROM signature */ | ||
| 177 | u8 rom_length; /* ROM length in 512 byte chunks */ | ||
| 178 | u8 rom_entry[4]; /* 16-bit code entry point */ | ||
| 179 | u8 rom_pad0; /* 4-byte align pad */ | ||
| 180 | u32 vrom_signature; /* VROM identification signature */ | ||
| 181 | u8 api_version_min;/* Minor version of API */ | ||
| 182 | u8 api_version_maj;/* Major version of API */ | ||
| 183 | u8 jump_slots; /* Number of jump slots */ | ||
| 184 | u8 reserved1; /* Reserved for expansion */ | ||
| 185 | u32 virtual_top; /* Hypervisor virtual address start */ | ||
| 186 | u16 reserved2; /* Reserved for expansion */ | ||
| 187 | u16 license_offs; /* Offset to License string */ | ||
| 188 | u16 pci_header_offs;/* Offset to PCI OPROM header */ | ||
| 189 | u16 pnp_header_offs;/* Offset to PnP OPROM header */ | ||
| 190 | u32 rom_pad3; /* PnP reserverd / VMI reserved */ | ||
| 191 | u8 reserved[96]; /* Reserved for headers */ | ||
| 192 | char vmi_init[8]; /* VMI_Init jump point */ | ||
| 193 | char get_reloc[8]; /* VMI_GetRelocationInfo jump point */ | ||
| 194 | } __attribute__((packed)); | ||
| 195 | |||
| 196 | struct pnp_header { | ||
| 197 | char sig[4]; | ||
| 198 | char rev; | ||
| 199 | char size; | ||
| 200 | short next; | ||
| 201 | short res; | ||
| 202 | long devID; | ||
| 203 | unsigned short manufacturer_offset; | ||
| 204 | unsigned short product_offset; | ||
| 205 | } __attribute__((packed)); | ||
| 206 | |||
| 207 | struct pci_header { | ||
| 208 | char sig[4]; | ||
| 209 | short vendorID; | ||
| 210 | short deviceID; | ||
| 211 | short vpdData; | ||
| 212 | short size; | ||
| 213 | char rev; | ||
| 214 | char class; | ||
| 215 | char subclass; | ||
| 216 | char interface; | ||
| 217 | short chunks; | ||
| 218 | char rom_version_min; | ||
| 219 | char rom_version_maj; | ||
| 220 | char codetype; | ||
| 221 | char lastRom; | ||
| 222 | short reserved; | ||
| 223 | } __attribute__((packed)); | ||
| 224 | |||
| 225 | /* Function prototypes for bootstrapping */ | ||
| 226 | #ifdef CONFIG_VMI | ||
| 227 | extern void vmi_init(void); | ||
| 228 | extern void vmi_activate(void); | ||
| 229 | extern void vmi_bringup(void); | ||
| 230 | #else | ||
| 231 | static inline void vmi_init(void) {} | ||
| 232 | static inline void vmi_activate(void) {} | ||
| 233 | static inline void vmi_bringup(void) {} | ||
| 234 | #endif | ||
| 235 | |||
| 236 | /* State needed to start an application processor in an SMP system. */ | ||
| 237 | struct vmi_ap_state { | ||
| 238 | u32 cr0; | ||
| 239 | u32 cr2; | ||
| 240 | u32 cr3; | ||
| 241 | u32 cr4; | ||
| 242 | |||
| 243 | u64 efer; | ||
| 244 | |||
| 245 | u32 eip; | ||
| 246 | u32 eflags; | ||
| 247 | u32 eax; | ||
| 248 | u32 ebx; | ||
| 249 | u32 ecx; | ||
| 250 | u32 edx; | ||
| 251 | u32 esp; | ||
| 252 | u32 ebp; | ||
| 253 | u32 esi; | ||
| 254 | u32 edi; | ||
| 255 | u16 cs; | ||
| 256 | u16 ss; | ||
| 257 | u16 ds; | ||
| 258 | u16 es; | ||
| 259 | u16 fs; | ||
| 260 | u16 gs; | ||
| 261 | u16 ldtr; | ||
| 262 | |||
| 263 | u16 gdtr_limit; | ||
| 264 | u32 gdtr_base; | ||
| 265 | u32 idtr_base; | ||
| 266 | u16 idtr_limit; | ||
| 267 | }; | ||
| 268 | |||
| 269 | #endif | ||
diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h deleted file mode 100644 index c6e0bee93e3c..000000000000 --- a/arch/x86/include/asm/vmi_time.h +++ /dev/null | |||
| @@ -1,98 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI Time wrappers | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | * Send feedback to dhecht@vmware.com | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef _ASM_X86_VMI_TIME_H | ||
| 26 | #define _ASM_X86_VMI_TIME_H | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Raw VMI call indices for timer functions | ||
| 30 | */ | ||
| 31 | #define VMI_CALL_GetCycleFrequency 66 | ||
| 32 | #define VMI_CALL_GetCycleCounter 67 | ||
| 33 | #define VMI_CALL_SetAlarm 68 | ||
| 34 | #define VMI_CALL_CancelAlarm 69 | ||
| 35 | #define VMI_CALL_GetWallclockTime 70 | ||
| 36 | #define VMI_CALL_WallclockUpdated 71 | ||
| 37 | |||
| 38 | /* Cached VMI timer operations */ | ||
| 39 | extern struct vmi_timer_ops { | ||
| 40 | u64 (*get_cycle_frequency)(void); | ||
| 41 | u64 (*get_cycle_counter)(int); | ||
| 42 | u64 (*get_wallclock)(void); | ||
| 43 | int (*wallclock_updated)(void); | ||
| 44 | void (*set_alarm)(u32 flags, u64 expiry, u64 period); | ||
| 45 | void (*cancel_alarm)(u32 flags); | ||
| 46 | } vmi_timer_ops; | ||
| 47 | |||
| 48 | /* Prototypes */ | ||
| 49 | extern void __init vmi_time_init(void); | ||
| 50 | extern unsigned long vmi_get_wallclock(void); | ||
| 51 | extern int vmi_set_wallclock(unsigned long now); | ||
| 52 | extern unsigned long long vmi_sched_clock(void); | ||
| 53 | extern unsigned long vmi_tsc_khz(void); | ||
| 54 | |||
| 55 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 56 | extern void __devinit vmi_time_bsp_init(void); | ||
| 57 | extern void __devinit vmi_time_ap_init(void); | ||
| 58 | #endif | ||
| 59 | |||
| 60 | /* | ||
| 61 | * When run under a hypervisor, a vcpu is always in one of three states: | ||
| 62 | * running, halted, or ready. The vcpu is in the 'running' state if it | ||
| 63 | * is executing. When the vcpu executes the halt interface, the vcpu | ||
| 64 | * enters the 'halted' state and remains halted until there is some work | ||
| 65 | * pending for the vcpu (e.g. an alarm expires, host I/O completes on | ||
| 66 | * behalf of virtual I/O). At this point, the vcpu enters the 'ready' | ||
| 67 | * state (waiting for the hypervisor to reschedule it). Finally, at any | ||
| 68 | * time when the vcpu is not in the 'running' state nor the 'halted' | ||
| 69 | * state, it is in the 'ready' state. | ||
| 70 | * | ||
| 71 | * Real time is advances while the vcpu is 'running', 'ready', or | ||
| 72 | * 'halted'. Stolen time is the time in which the vcpu is in the | ||
| 73 | * 'ready' state. Available time is the remaining time -- the vcpu is | ||
| 74 | * either 'running' or 'halted'. | ||
| 75 | * | ||
| 76 | * All three views of time are accessible through the VMI cycle | ||
| 77 | * counters. | ||
| 78 | */ | ||
| 79 | |||
| 80 | /* The cycle counters. */ | ||
| 81 | #define VMI_CYCLES_REAL 0 | ||
| 82 | #define VMI_CYCLES_AVAILABLE 1 | ||
| 83 | #define VMI_CYCLES_STOLEN 2 | ||
| 84 | |||
| 85 | /* The alarm interface 'flags' bits */ | ||
| 86 | #define VMI_ALARM_COUNTERS 2 | ||
| 87 | |||
| 88 | #define VMI_ALARM_COUNTER_MASK 0x000000ff | ||
| 89 | |||
| 90 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
| 91 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
| 92 | |||
| 93 | #define VMI_ALARM_IS_ONESHOT 0x00000000 | ||
| 94 | #define VMI_ALARM_IS_PERIODIC 0x00000100 | ||
| 95 | |||
| 96 | #define CONFIG_VMI_ALARM_HZ 100 | ||
| 97 | |||
| 98 | #endif /* _ASM_X86_VMI_TIME_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7490bf8d1459..80a93dc99076 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -86,15 +86,15 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | |||
| 86 | obj-$(CONFIG_KGDB) += kgdb.o | 86 | obj-$(CONFIG_KGDB) += kgdb.o |
| 87 | obj-$(CONFIG_VM86) += vm86_32.o | 87 | obj-$(CONFIG_VM86) += vm86_32.o |
| 88 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 88 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
| 89 | obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o | ||
| 89 | 90 | ||
| 90 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 91 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
| 91 | obj-$(CONFIG_APB_TIMER) += apb_timer.o | 92 | obj-$(CONFIG_APB_TIMER) += apb_timer.o |
| 92 | 93 | ||
| 93 | obj-$(CONFIG_K8_NB) += k8.o | 94 | obj-$(CONFIG_AMD_NB) += amd_nb.o |
| 94 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 95 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
| 95 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 96 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
| 96 | 97 | ||
| 97 | obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | ||
| 98 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 98 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
| 99 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 99 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
| 100 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 100 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
| @@ -107,6 +107,7 @@ obj-$(CONFIG_SCx200) += scx200.o | |||
| 107 | scx200-y += scx200_32.o | 107 | scx200-y += scx200_32.o |
| 108 | 108 | ||
| 109 | obj-$(CONFIG_OLPC) += olpc.o | 109 | obj-$(CONFIG_OLPC) += olpc.o |
| 110 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | ||
| 110 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o | 111 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o |
| 111 | obj-$(CONFIG_X86_MRST) += mrst.o | 112 | obj-$(CONFIG_X86_MRST) += mrst.o |
| 112 | 113 | ||
| @@ -123,7 +124,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | |||
| 123 | # 64 bit specific files | 124 | # 64 bit specific files |
| 124 | ifeq ($(CONFIG_X86_64),y) | 125 | ifeq ($(CONFIG_X86_64),y) |
| 125 | obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o | 126 | obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o |
| 126 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | ||
| 127 | obj-$(CONFIG_AUDIT) += audit_64.o | 127 | obj-$(CONFIG_AUDIT) += audit_64.o |
| 128 | 128 | ||
| 129 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 129 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb16f17e59be..5812404a0d4c 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <acpi/processor.h> | 14 | #include <acpi/processor.h> |
| 15 | #include <asm/acpi.h> | 15 | #include <asm/acpi.h> |
| 16 | #include <asm/mwait.h> | ||
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| 18 | * Initialize bm_flags based on the CPU cache properties | 19 | * Initialize bm_flags based on the CPU cache properties |
| @@ -65,16 +66,6 @@ static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ | |||
| 65 | 66 | ||
| 66 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; | 67 | static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; |
| 67 | 68 | ||
| 68 | #define MWAIT_SUBSTATE_MASK (0xf) | ||
| 69 | #define MWAIT_CSTATE_MASK (0xf) | ||
| 70 | #define MWAIT_SUBSTATE_SIZE (4) | ||
| 71 | |||
| 72 | #define CPUID_MWAIT_LEAF (5) | ||
| 73 | #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) | ||
| 74 | #define CPUID5_ECX_INTERRUPT_BREAK (0x2) | ||
| 75 | |||
| 76 | #define MWAIT_ECX_INTERRUPT_BREAK (0x1) | ||
| 77 | |||
| 78 | #define NATIVE_CSTATE_BEYOND_HALT (2) | 69 | #define NATIVE_CSTATE_BEYOND_HALT (2) |
| 79 | 70 | ||
| 80 | static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) | 71 | static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 679b6450382b..d2fdb0826df2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 5a170cbbbed8..3cb482e123de 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
| 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
| 4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
| 5 | * | 5 | * |
| @@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size) | |||
| 194 | return 1UL << shift; | 194 | return 1UL << shift; |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | /* Access to l1 and l2 indexed register spaces */ | ||
| 198 | |||
| 199 | static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) | ||
| 200 | { | ||
| 201 | u32 val; | ||
| 202 | |||
| 203 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
| 204 | pci_read_config_dword(iommu->dev, 0xfc, &val); | ||
| 205 | return val; | ||
| 206 | } | ||
| 207 | |||
| 208 | static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) | ||
| 209 | { | ||
| 210 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); | ||
| 211 | pci_write_config_dword(iommu->dev, 0xfc, val); | ||
| 212 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
| 213 | } | ||
| 214 | |||
| 215 | static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) | ||
| 216 | { | ||
| 217 | u32 val; | ||
| 218 | |||
| 219 | pci_write_config_dword(iommu->dev, 0xf0, address); | ||
| 220 | pci_read_config_dword(iommu->dev, 0xf4, &val); | ||
| 221 | return val; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) | ||
| 225 | { | ||
| 226 | pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); | ||
| 227 | pci_write_config_dword(iommu->dev, 0xf4, val); | ||
| 228 | } | ||
| 229 | |||
| 197 | /**************************************************************************** | 230 | /**************************************************************************** |
| 198 | * | 231 | * |
| 199 | * AMD IOMMU MMIO register space handling functions | 232 | * AMD IOMMU MMIO register space handling functions |
| @@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
| 619 | { | 652 | { |
| 620 | int cap_ptr = iommu->cap_ptr; | 653 | int cap_ptr = iommu->cap_ptr; |
| 621 | u32 range, misc; | 654 | u32 range, misc; |
| 655 | int i, j; | ||
| 622 | 656 | ||
| 623 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | 657 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
| 624 | &iommu->cap); | 658 | &iommu->cap); |
| @@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
| 633 | MMIO_GET_LD(range)); | 667 | MMIO_GET_LD(range)); |
| 634 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | 668 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); |
| 635 | 669 | ||
| 636 | if (is_rd890_iommu(iommu->dev)) { | 670 | if (!is_rd890_iommu(iommu->dev)) |
| 637 | pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); | 671 | return; |
| 638 | pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); | 672 | |
| 639 | pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); | 673 | /* |
| 640 | pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); | 674 | * Some rd890 systems may not be fully reconfigured by the BIOS, so |
| 641 | } | 675 | * it's necessary for us to store this information so it can be |
| 676 | * reprogrammed on resume | ||
| 677 | */ | ||
| 678 | |||
| 679 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 680 | &iommu->stored_addr_lo); | ||
| 681 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
| 682 | &iommu->stored_addr_hi); | ||
| 683 | |||
| 684 | /* Low bit locks writes to configuration space */ | ||
| 685 | iommu->stored_addr_lo &= ~1; | ||
| 686 | |||
| 687 | for (i = 0; i < 6; i++) | ||
| 688 | for (j = 0; j < 0x12; j++) | ||
| 689 | iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); | ||
| 690 | |||
| 691 | for (i = 0; i < 0x83; i++) | ||
| 692 | iommu->stored_l2[i] = iommu_read_l2(iommu, i); | ||
| 642 | } | 693 | } |
| 643 | 694 | ||
| 644 | /* | 695 | /* |
| @@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu) | |||
| 1127 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | 1178 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); |
| 1128 | } | 1179 | } |
| 1129 | 1180 | ||
| 1130 | static void iommu_apply_quirks(struct amd_iommu *iommu) | 1181 | static void iommu_apply_resume_quirks(struct amd_iommu *iommu) |
| 1131 | { | 1182 | { |
| 1132 | if (is_rd890_iommu(iommu->dev)) { | 1183 | int i, j; |
| 1133 | pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); | 1184 | u32 ioc_feature_control; |
| 1134 | pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); | 1185 | struct pci_dev *pdev = NULL; |
| 1135 | pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); | 1186 | |
| 1136 | pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); | 1187 | /* RD890 BIOSes may not have completely reconfigured the iommu */ |
| 1137 | } | 1188 | if (!is_rd890_iommu(iommu->dev)) |
| 1189 | return; | ||
| 1190 | |||
| 1191 | /* | ||
| 1192 | * First, we need to ensure that the iommu is enabled. This is | ||
| 1193 | * controlled by a register in the northbridge | ||
| 1194 | */ | ||
| 1195 | pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); | ||
| 1196 | |||
| 1197 | if (!pdev) | ||
| 1198 | return; | ||
| 1199 | |||
| 1200 | /* Select Northbridge indirect register 0x75 and enable writing */ | ||
| 1201 | pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); | ||
| 1202 | pci_read_config_dword(pdev, 0x64, &ioc_feature_control); | ||
| 1203 | |||
| 1204 | /* Enable the iommu */ | ||
| 1205 | if (!(ioc_feature_control & 0x1)) | ||
| 1206 | pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); | ||
| 1207 | |||
| 1208 | pci_dev_put(pdev); | ||
| 1209 | |||
| 1210 | /* Restore the iommu BAR */ | ||
| 1211 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 1212 | iommu->stored_addr_lo); | ||
| 1213 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
| 1214 | iommu->stored_addr_hi); | ||
| 1215 | |||
| 1216 | /* Restore the l1 indirect regs for each of the 6 l1s */ | ||
| 1217 | for (i = 0; i < 6; i++) | ||
| 1218 | for (j = 0; j < 0x12; j++) | ||
| 1219 | iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); | ||
| 1220 | |||
| 1221 | /* Restore the l2 indirect regs */ | ||
| 1222 | for (i = 0; i < 0x83; i++) | ||
| 1223 | iommu_write_l2(iommu, i, iommu->stored_l2[i]); | ||
| 1224 | |||
| 1225 | /* Lock PCI setup registers */ | ||
| 1226 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
| 1227 | iommu->stored_addr_lo | 1); | ||
| 1138 | } | 1228 | } |
| 1139 | 1229 | ||
| 1140 | /* | 1230 | /* |
| @@ -1147,7 +1237,6 @@ static void enable_iommus(void) | |||
| 1147 | 1237 | ||
| 1148 | for_each_iommu(iommu) { | 1238 | for_each_iommu(iommu) { |
| 1149 | iommu_disable(iommu); | 1239 | iommu_disable(iommu); |
| 1150 | iommu_apply_quirks(iommu); | ||
| 1151 | iommu_init_flags(iommu); | 1240 | iommu_init_flags(iommu); |
| 1152 | iommu_set_device_table(iommu); | 1241 | iommu_set_device_table(iommu); |
| 1153 | iommu_enable_command_buffer(iommu); | 1242 | iommu_enable_command_buffer(iommu); |
| @@ -1173,6 +1262,11 @@ static void disable_iommus(void) | |||
| 1173 | 1262 | ||
| 1174 | static int amd_iommu_resume(struct sys_device *dev) | 1263 | static int amd_iommu_resume(struct sys_device *dev) |
| 1175 | { | 1264 | { |
| 1265 | struct amd_iommu *iommu; | ||
| 1266 | |||
| 1267 | for_each_iommu(iommu) | ||
| 1268 | iommu_apply_resume_quirks(iommu); | ||
| 1269 | |||
| 1176 | /* re-load the hardware */ | 1270 | /* re-load the hardware */ |
| 1177 | enable_iommus(); | 1271 | enable_iommus(); |
| 1178 | 1272 | ||
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/amd_nb.c index 0f7bc20cfcde..8f6463d8ed0d 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/amd_nb.c | |||
| @@ -8,21 +8,19 @@ | |||
| 8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
| 11 | #include <asm/k8.h> | 11 | #include <asm/amd_nb.h> |
| 12 | |||
| 13 | int num_k8_northbridges; | ||
| 14 | EXPORT_SYMBOL(num_k8_northbridges); | ||
| 15 | 12 | ||
| 16 | static u32 *flush_words; | 13 | static u32 *flush_words; |
| 17 | 14 | ||
| 18 | struct pci_device_id k8_nb_ids[] = { | 15 | struct pci_device_id k8_nb_ids[] = { |
| 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
| 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
| 18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, | ||
| 21 | {} | 19 | {} |
| 22 | }; | 20 | }; |
| 23 | EXPORT_SYMBOL(k8_nb_ids); | 21 | EXPORT_SYMBOL(k8_nb_ids); |
| 24 | 22 | ||
| 25 | struct pci_dev **k8_northbridges; | 23 | struct k8_northbridge_info k8_northbridges; |
| 26 | EXPORT_SYMBOL(k8_northbridges); | 24 | EXPORT_SYMBOL(k8_northbridges); |
| 27 | 25 | ||
| 28 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) | 26 | static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) |
| @@ -40,36 +38,45 @@ int cache_k8_northbridges(void) | |||
| 40 | int i; | 38 | int i; |
| 41 | struct pci_dev *dev; | 39 | struct pci_dev *dev; |
| 42 | 40 | ||
| 43 | if (num_k8_northbridges) | 41 | if (k8_northbridges.num) |
| 44 | return 0; | 42 | return 0; |
| 45 | 43 | ||
| 46 | dev = NULL; | 44 | dev = NULL; |
| 47 | while ((dev = next_k8_northbridge(dev)) != NULL) | 45 | while ((dev = next_k8_northbridge(dev)) != NULL) |
| 48 | num_k8_northbridges++; | 46 | k8_northbridges.num++; |
| 47 | |||
| 48 | /* some CPU families (e.g. family 0x11) do not support GART */ | ||
| 49 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || | ||
| 50 | boot_cpu_data.x86 == 0x15) | ||
| 51 | k8_northbridges.gart_supported = 1; | ||
| 49 | 52 | ||
| 50 | k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), | 53 | k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * |
| 51 | GFP_KERNEL); | 54 | sizeof(void *), GFP_KERNEL); |
| 52 | if (!k8_northbridges) | 55 | if (!k8_northbridges.nb_misc) |
| 53 | return -ENOMEM; | 56 | return -ENOMEM; |
| 54 | 57 | ||
| 55 | if (!num_k8_northbridges) { | 58 | if (!k8_northbridges.num) { |
| 56 | k8_northbridges[0] = NULL; | 59 | k8_northbridges.nb_misc[0] = NULL; |
| 57 | return 0; | 60 | return 0; |
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); | 63 | if (k8_northbridges.gart_supported) { |
| 61 | if (!flush_words) { | 64 | flush_words = kmalloc(k8_northbridges.num * sizeof(u32), |
| 62 | kfree(k8_northbridges); | 65 | GFP_KERNEL); |
| 63 | return -ENOMEM; | 66 | if (!flush_words) { |
| 67 | kfree(k8_northbridges.nb_misc); | ||
| 68 | return -ENOMEM; | ||
| 69 | } | ||
| 64 | } | 70 | } |
| 65 | 71 | ||
| 66 | dev = NULL; | 72 | dev = NULL; |
| 67 | i = 0; | 73 | i = 0; |
| 68 | while ((dev = next_k8_northbridge(dev)) != NULL) { | 74 | while ((dev = next_k8_northbridge(dev)) != NULL) { |
| 69 | k8_northbridges[i] = dev; | 75 | k8_northbridges.nb_misc[i] = dev; |
| 70 | pci_read_config_dword(dev, 0x9c, &flush_words[i++]); | 76 | if (k8_northbridges.gart_supported) |
| 77 | pci_read_config_dword(dev, 0x9c, &flush_words[i++]); | ||
| 71 | } | 78 | } |
| 72 | k8_northbridges[i] = NULL; | 79 | k8_northbridges.nb_misc[i] = NULL; |
| 73 | return 0; | 80 | return 0; |
| 74 | } | 81 | } |
| 75 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); | 82 | EXPORT_SYMBOL_GPL(cache_k8_northbridges); |
| @@ -93,22 +100,25 @@ void k8_flush_garts(void) | |||
| 93 | unsigned long flags; | 100 | unsigned long flags; |
| 94 | static DEFINE_SPINLOCK(gart_lock); | 101 | static DEFINE_SPINLOCK(gart_lock); |
| 95 | 102 | ||
| 103 | if (!k8_northbridges.gart_supported) | ||
| 104 | return; | ||
| 105 | |||
| 96 | /* Avoid races between AGP and IOMMU. In theory it's not needed | 106 | /* Avoid races between AGP and IOMMU. In theory it's not needed |
| 97 | but I'm not sure if the hardware won't lose flush requests | 107 | but I'm not sure if the hardware won't lose flush requests |
| 98 | when another is pending. This whole thing is so expensive anyways | 108 | when another is pending. This whole thing is so expensive anyways |
| 99 | that it doesn't matter to serialize more. -AK */ | 109 | that it doesn't matter to serialize more. -AK */ |
| 100 | spin_lock_irqsave(&gart_lock, flags); | 110 | spin_lock_irqsave(&gart_lock, flags); |
| 101 | flushed = 0; | 111 | flushed = 0; |
| 102 | for (i = 0; i < num_k8_northbridges; i++) { | 112 | for (i = 0; i < k8_northbridges.num; i++) { |
| 103 | pci_write_config_dword(k8_northbridges[i], 0x9c, | 113 | pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, |
| 104 | flush_words[i]|1); | 114 | flush_words[i]|1); |
| 105 | flushed++; | 115 | flushed++; |
| 106 | } | 116 | } |
| 107 | for (i = 0; i < num_k8_northbridges; i++) { | 117 | for (i = 0; i < k8_northbridges.num; i++) { |
| 108 | u32 w; | 118 | u32 w; |
| 109 | /* Make sure the hardware actually executed the flush*/ | 119 | /* Make sure the hardware actually executed the flush*/ |
| 110 | for (;;) { | 120 | for (;;) { |
| 111 | pci_read_config_dword(k8_northbridges[i], | 121 | pci_read_config_dword(k8_northbridges.nb_misc[i], |
| 112 | 0x9c, &w); | 122 | 0x9c, &w); |
| 113 | if (!(w & 1)) | 123 | if (!(w & 1)) |
| 114 | break; | 124 | break; |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 8dd77800ff5d..92543c73cf8e 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
| @@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs) | |||
| 231 | apbt_start_counter(phy_cs_timer_id); | 231 | apbt_start_counter(phy_cs_timer_id); |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | /* Setup IRQ routing via IOAPIC */ | ||
| 235 | #ifdef CONFIG_SMP | ||
| 236 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
| 237 | { | ||
| 238 | struct irq_chip *chip; | ||
| 239 | struct irq_desc *desc; | ||
| 240 | |||
| 241 | /* timer0 irq has been setup early */ | ||
| 242 | if (adev->irq == 0) | ||
| 243 | return; | ||
| 244 | desc = irq_to_desc(adev->irq); | ||
| 245 | chip = get_irq_chip(adev->irq); | ||
| 246 | disable_irq(adev->irq); | ||
| 247 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 248 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
| 249 | /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ | ||
| 250 | set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); | ||
| 251 | enable_irq(adev->irq); | ||
| 252 | if (system_state == SYSTEM_BOOTING) | ||
| 253 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
| 254 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
| 255 | adev->name, adev)) { | ||
| 256 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
| 257 | adev->num); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | #endif | ||
| 261 | |||
| 262 | static void apbt_enable_int(int n) | 234 | static void apbt_enable_int(int n) |
| 263 | { | 235 | { |
| 264 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | 236 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); |
| @@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void) | |||
| 334 | } | 306 | } |
| 335 | 307 | ||
| 336 | #ifdef CONFIG_SMP | 308 | #ifdef CONFIG_SMP |
| 309 | |||
| 310 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
| 311 | { | ||
| 312 | /* timer0 irq has been setup early */ | ||
| 313 | if (adev->irq == 0) | ||
| 314 | return; | ||
| 315 | |||
| 316 | if (system_state == SYSTEM_BOOTING) { | ||
| 317 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | ||
| 318 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | ||
| 319 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); | ||
| 320 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
| 321 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
| 322 | adev->name, adev)) { | ||
| 323 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
| 324 | adev->num); | ||
| 325 | } | ||
| 326 | } else | ||
| 327 | enable_irq(adev->irq); | ||
| 328 | } | ||
| 329 | |||
| 337 | /* Should be called with per cpu */ | 330 | /* Should be called with per cpu */ |
| 338 | void apbt_setup_secondary_clock(void) | 331 | void apbt_setup_secondary_clock(void) |
| 339 | { | 332 | { |
| @@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void) | |||
| 343 | 336 | ||
| 344 | /* Don't register boot CPU clockevent */ | 337 | /* Don't register boot CPU clockevent */ |
| 345 | cpu = smp_processor_id(); | 338 | cpu = smp_processor_id(); |
| 346 | if (cpu == boot_cpu_id) | 339 | if (!cpu) |
| 347 | return; | 340 | return; |
| 348 | /* | 341 | /* |
| 349 | * We need to calculate the scaled math multiplication factor for | 342 | * We need to calculate the scaled math multiplication factor for |
| @@ -389,16 +382,17 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
| 389 | 382 | ||
| 390 | switch (action & 0xf) { | 383 | switch (action & 0xf) { |
| 391 | case CPU_DEAD: | 384 | case CPU_DEAD: |
| 385 | disable_irq(adev->irq); | ||
| 392 | apbt_disable_int(cpu); | 386 | apbt_disable_int(cpu); |
| 393 | if (system_state == SYSTEM_RUNNING) | 387 | if (system_state == SYSTEM_RUNNING) { |
| 394 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | 388 | pr_debug("skipping APBT CPU %lu offline\n", cpu); |
| 395 | else if (adev) { | 389 | } else if (adev) { |
| 396 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | 390 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); |
| 397 | free_irq(adev->irq, adev); | 391 | free_irq(adev->irq, adev); |
| 398 | } | 392 | } |
| 399 | break; | 393 | break; |
| 400 | default: | 394 | default: |
| 401 | pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); | 395 | pr_debug("APBT notified %lu, no action\n", action); |
| 402 | } | 396 | } |
| 403 | return NOTIFY_OK; | 397 | return NOTIFY_OK; |
| 404 | } | 398 | } |
| @@ -552,7 +546,7 @@ bad_count: | |||
| 552 | pr_debug("APB CS going back %lx:%lx:%lx ", | 546 | pr_debug("APB CS going back %lx:%lx:%lx ", |
| 553 | t2, last_read, t2 - last_read); | 547 | t2, last_read, t2 - last_read); |
| 554 | bad_count_x3: | 548 | bad_count_x3: |
| 555 | pr_debug(KERN_INFO "tripple check enforced\n"); | 549 | pr_debug("triple check enforced\n"); |
| 556 | t0 = apbt_readl(phy_cs_timer_id, | 550 | t0 = apbt_readl(phy_cs_timer_id, |
| 557 | APBTMR_N_CURRENT_VALUE); | 551 | APBTMR_N_CURRENT_VALUE); |
| 558 | udelay(1); | 552 | udelay(1); |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf26e17..377f5db3b8b4 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include <asm/gart.h> | 27 | #include <asm/gart.h> |
| 28 | #include <asm/pci-direct.h> | 28 | #include <asm/pci-direct.h> |
| 29 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
| 30 | #include <asm/k8.h> | 30 | #include <asm/amd_nb.h> |
| 31 | #include <asm/x86_init.h> | 31 | #include <asm/x86_init.h> |
| 32 | 32 | ||
| 33 | int gart_iommu_aperture; | 33 | int gart_iommu_aperture; |
| @@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void) | |||
| 307 | continue; | 307 | continue; |
| 308 | 308 | ||
| 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 310 | aper_enabled = ctl & AMD64_GARTEN; | 310 | aper_enabled = ctl & GARTEN; |
| 311 | aper_order = (ctl >> 1) & 7; | 311 | aper_order = (ctl >> 1) & 7; |
| 312 | aper_size = (32 * 1024 * 1024) << aper_order; | 312 | aper_size = (32 * 1024 * 1024) << aper_order; |
| 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
| @@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void) | |||
| 362 | continue; | 362 | continue; |
| 363 | 363 | ||
| 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); | 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 365 | ctl &= ~AMD64_GARTEN; | 365 | ctl &= ~GARTEN; |
| 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 367 | } | 367 | } |
| 368 | } | 368 | } |
| @@ -505,8 +505,13 @@ out: | |||
| 505 | 505 | ||
| 506 | /* Fix up the north bridges */ | 506 | /* Fix up the north bridges */ |
| 507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 508 | int bus; | 508 | int bus, dev_base, dev_limit; |
| 509 | int dev_base, dev_limit; | 509 | |
| 510 | /* | ||
| 511 | * Don't enable translation yet but enable GART IO and CPU | ||
| 512 | * accesses and set DISTLBWALKPRB since GART table memory is UC. | ||
| 513 | */ | ||
| 514 | u32 ctl = DISTLBWALKPRB | aper_order << 1; | ||
| 510 | 515 | ||
| 511 | bus = bus_dev_ranges[i].bus; | 516 | bus = bus_dev_ranges[i].bus; |
| 512 | dev_base = bus_dev_ranges[i].dev_base; | 517 | dev_base = bus_dev_ranges[i].dev_base; |
| @@ -515,10 +520,7 @@ out: | |||
| 515 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | 520 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 516 | continue; | 521 | continue; |
| 517 | 522 | ||
| 518 | /* Don't enable translation yet. That is done later. | 523 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 519 | Assume this BIOS didn't initialise the GART so | ||
| 520 | just overwrite all previous bits */ | ||
| 521 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); | ||
| 522 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); | 524 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); |
| 523 | } | 525 | } |
| 524 | } | 526 | } |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e3b534cda49a..850657d1b0ed 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <asm/mce.h> | 52 | #include <asm/mce.h> |
| 53 | #include <asm/kvm_para.h> | 53 | #include <asm/kvm_para.h> |
| 54 | #include <asm/tsc.h> | 54 | #include <asm/tsc.h> |
| 55 | #include <asm/atomic.h> | ||
| 55 | 56 | ||
| 56 | unsigned int num_processors; | 57 | unsigned int num_processors; |
| 57 | 58 | ||
| @@ -370,38 +371,87 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
| 370 | } | 371 | } |
| 371 | 372 | ||
| 372 | /* | 373 | /* |
| 373 | * Setup extended LVT, AMD specific (K8, family 10h) | 374 | * Setup extended LVT, AMD specific |
| 374 | * | 375 | * |
| 375 | * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and | 376 | * Software should use the LVT offsets the BIOS provides. The offsets |
| 376 | * MCE interrupts are supported. Thus MCE offset must be set to 0. | 377 | * are determined by the subsystems using it like those for MCE |
| 378 | * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts | ||
| 379 | * are supported. Beginning with family 10h at least 4 offsets are | ||
| 380 | * available. | ||
| 377 | * | 381 | * |
| 378 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | 382 | * Since the offsets must be consistent for all cores, we keep track |
| 379 | * enables the vector. See also the BKDGs. | 383 | * of the LVT offsets in software and reserve the offset for the same |
| 384 | * vector also to be used on other cores. An offset is freed by | ||
| 385 | * setting the entry to APIC_EILVT_MASKED. | ||
| 386 | * | ||
| 387 | * If the BIOS is right, there should be no conflicts. Otherwise a | ||
| 388 | * "[Firmware Bug]: ..." error message is generated. However, if | ||
| 389 | * software does not properly determines the offsets, it is not | ||
| 390 | * necessarily a BIOS bug. | ||
| 380 | */ | 391 | */ |
| 381 | 392 | ||
| 382 | #define APIC_EILVT_LVTOFF_MCE 0 | 393 | static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; |
| 383 | #define APIC_EILVT_LVTOFF_IBS 1 | ||
| 384 | 394 | ||
| 385 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | 395 | static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) |
| 386 | { | 396 | { |
| 387 | unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); | 397 | return (old & APIC_EILVT_MASKED) |
| 388 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | 398 | || (new == APIC_EILVT_MASKED) |
| 389 | 399 | || ((new & ~APIC_EILVT_MASKED) == old); | |
| 390 | apic_write(reg, v); | ||
| 391 | } | 400 | } |
| 392 | 401 | ||
| 393 | u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) | 402 | static unsigned int reserve_eilvt_offset(int offset, unsigned int new) |
| 394 | { | 403 | { |
| 395 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); | 404 | unsigned int rsvd; /* 0: uninitialized */ |
| 396 | return APIC_EILVT_LVTOFF_MCE; | 405 | |
| 406 | if (offset >= APIC_EILVT_NR_MAX) | ||
| 407 | return ~0; | ||
| 408 | |||
| 409 | rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; | ||
| 410 | do { | ||
| 411 | if (rsvd && | ||
| 412 | !eilvt_entry_is_changeable(rsvd, new)) | ||
| 413 | /* may not change if vectors are different */ | ||
| 414 | return rsvd; | ||
| 415 | rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); | ||
| 416 | } while (rsvd != new); | ||
| 417 | |||
| 418 | return new; | ||
| 397 | } | 419 | } |
| 398 | 420 | ||
| 399 | u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) | 421 | /* |
| 422 | * If mask=1, the LVT entry does not generate interrupts while mask=0 | ||
| 423 | * enables the vector. See also the BKDGs. | ||
| 424 | */ | ||
| 425 | |||
| 426 | int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) | ||
| 400 | { | 427 | { |
| 401 | setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); | 428 | unsigned long reg = APIC_EILVTn(offset); |
| 402 | return APIC_EILVT_LVTOFF_IBS; | 429 | unsigned int new, old, reserved; |
| 430 | |||
| 431 | new = (mask << 16) | (msg_type << 8) | vector; | ||
| 432 | old = apic_read(reg); | ||
| 433 | reserved = reserve_eilvt_offset(offset, new); | ||
| 434 | |||
| 435 | if (reserved != new) { | ||
| 436 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " | ||
| 437 | "vector 0x%x was already reserved by another core, " | ||
| 438 | "APIC%lX=0x%x\n", | ||
| 439 | smp_processor_id(), new, reserved, reg, old); | ||
| 440 | return -EINVAL; | ||
| 441 | } | ||
| 442 | |||
| 443 | if (!eilvt_entry_is_changeable(old, new)) { | ||
| 444 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " | ||
| 445 | "register already in use, APIC%lX=0x%x\n", | ||
| 446 | smp_processor_id(), new, reg, old); | ||
| 447 | return -EBUSY; | ||
| 448 | } | ||
| 449 | |||
| 450 | apic_write(reg, new); | ||
| 451 | |||
| 452 | return 0; | ||
| 403 | } | 453 | } |
| 404 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); | 454 | EXPORT_SYMBOL_GPL(setup_APIC_eilvt); |
| 405 | 455 | ||
| 406 | /* | 456 | /* |
| 407 | * Program the next event, relative to now | 457 | * Program the next event, relative to now |
| @@ -1665,10 +1715,7 @@ int __init APIC_init_uniprocessor(void) | |||
| 1665 | } | 1715 | } |
| 1666 | #endif | 1716 | #endif |
| 1667 | 1717 | ||
| 1668 | #ifndef CONFIG_SMP | ||
| 1669 | enable_IR_x2apic(); | ||
| 1670 | default_setup_apic_routing(); | 1718 | default_setup_apic_routing(); |
| 1671 | #endif | ||
| 1672 | 1719 | ||
| 1673 | verify_local_APIC(); | 1720 | verify_local_APIC(); |
| 1674 | connect_bsp_APIC(); | 1721 | connect_bsp_APIC(); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5c5b8f3dddb5..8ae808d110f4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -131,13 +131,9 @@ struct irq_pin_list { | |||
| 131 | struct irq_pin_list *next; | 131 | struct irq_pin_list *next; |
| 132 | }; | 132 | }; |
| 133 | 133 | ||
| 134 | static struct irq_pin_list *get_one_free_irq_2_pin(int node) | 134 | static struct irq_pin_list *alloc_irq_pin_list(int node) |
| 135 | { | 135 | { |
| 136 | struct irq_pin_list *pin; | 136 | return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); |
| 137 | |||
| 138 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | ||
| 139 | |||
| 140 | return pin; | ||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 139 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
| @@ -150,10 +146,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS]; | |||
| 150 | int __init arch_early_irq_init(void) | 146 | int __init arch_early_irq_init(void) |
| 151 | { | 147 | { |
| 152 | struct irq_cfg *cfg; | 148 | struct irq_cfg *cfg; |
| 153 | struct irq_desc *desc; | 149 | int count, node, i; |
| 154 | int count; | ||
| 155 | int node; | ||
| 156 | int i; | ||
| 157 | 150 | ||
| 158 | if (!legacy_pic->nr_legacy_irqs) { | 151 | if (!legacy_pic->nr_legacy_irqs) { |
| 159 | nr_irqs_gsi = 0; | 152 | nr_irqs_gsi = 0; |
| @@ -162,13 +155,15 @@ int __init arch_early_irq_init(void) | |||
| 162 | 155 | ||
| 163 | cfg = irq_cfgx; | 156 | cfg = irq_cfgx; |
| 164 | count = ARRAY_SIZE(irq_cfgx); | 157 | count = ARRAY_SIZE(irq_cfgx); |
| 165 | node= cpu_to_node(boot_cpu_id); | 158 | node = cpu_to_node(0); |
| 159 | |||
| 160 | /* Make sure the legacy interrupts are marked in the bitmap */ | ||
| 161 | irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); | ||
| 166 | 162 | ||
| 167 | for (i = 0; i < count; i++) { | 163 | for (i = 0; i < count; i++) { |
| 168 | desc = irq_to_desc(i); | 164 | set_irq_chip_data(i, &cfg[i]); |
| 169 | desc->chip_data = &cfg[i]; | 165 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); |
| 170 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 166 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); |
| 171 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | ||
| 172 | /* | 167 | /* |
| 173 | * For legacy IRQ's, start with assigning irq0 to irq15 to | 168 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
| 174 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | 169 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. |
| @@ -183,170 +178,88 @@ int __init arch_early_irq_init(void) | |||
| 183 | } | 178 | } |
| 184 | 179 | ||
| 185 | #ifdef CONFIG_SPARSE_IRQ | 180 | #ifdef CONFIG_SPARSE_IRQ |
| 186 | struct irq_cfg *irq_cfg(unsigned int irq) | 181 | static struct irq_cfg *irq_cfg(unsigned int irq) |
| 187 | { | 182 | { |
| 188 | struct irq_cfg *cfg = NULL; | 183 | return get_irq_chip_data(irq); |
| 189 | struct irq_desc *desc; | ||
| 190 | |||
| 191 | desc = irq_to_desc(irq); | ||
| 192 | if (desc) | ||
| 193 | cfg = desc->chip_data; | ||
| 194 | |||
| 195 | return cfg; | ||
| 196 | } | 184 | } |
| 197 | 185 | ||
| 198 | static struct irq_cfg *get_one_free_irq_cfg(int node) | 186 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) |
| 199 | { | 187 | { |
| 200 | struct irq_cfg *cfg; | 188 | struct irq_cfg *cfg; |
| 201 | 189 | ||
| 202 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | 190 | cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node); |
| 203 | if (cfg) { | 191 | if (!cfg) |
| 204 | if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { | 192 | return NULL; |
| 205 | kfree(cfg); | 193 | if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node)) |
| 206 | cfg = NULL; | 194 | goto out_cfg; |
| 207 | } else if (!zalloc_cpumask_var_node(&cfg->old_domain, | 195 | if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node)) |
| 208 | GFP_ATOMIC, node)) { | 196 | goto out_domain; |
| 209 | free_cpumask_var(cfg->domain); | ||
| 210 | kfree(cfg); | ||
| 211 | cfg = NULL; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | |||
| 215 | return cfg; | 197 | return cfg; |
| 198 | out_domain: | ||
| 199 | free_cpumask_var(cfg->domain); | ||
| 200 | out_cfg: | ||
| 201 | kfree(cfg); | ||
| 202 | return NULL; | ||
| 216 | } | 203 | } |
| 217 | 204 | ||
| 218 | int arch_init_chip_data(struct irq_desc *desc, int node) | 205 | static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) |
| 219 | { | ||
| 220 | struct irq_cfg *cfg; | ||
| 221 | |||
| 222 | cfg = desc->chip_data; | ||
| 223 | if (!cfg) { | ||
| 224 | desc->chip_data = get_one_free_irq_cfg(node); | ||
| 225 | if (!desc->chip_data) { | ||
| 226 | printk(KERN_ERR "can not alloc irq_cfg\n"); | ||
| 227 | BUG_ON(1); | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* for move_irq_desc */ | ||
| 235 | static void | ||
| 236 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) | ||
| 237 | { | 206 | { |
| 238 | struct irq_pin_list *old_entry, *head, *tail, *entry; | 207 | if (!cfg) |
| 239 | |||
| 240 | cfg->irq_2_pin = NULL; | ||
| 241 | old_entry = old_cfg->irq_2_pin; | ||
| 242 | if (!old_entry) | ||
| 243 | return; | ||
| 244 | |||
| 245 | entry = get_one_free_irq_2_pin(node); | ||
| 246 | if (!entry) | ||
| 247 | return; | 208 | return; |
| 209 | set_irq_chip_data(at, NULL); | ||
| 210 | free_cpumask_var(cfg->domain); | ||
| 211 | free_cpumask_var(cfg->old_domain); | ||
| 212 | kfree(cfg); | ||
| 213 | } | ||
| 248 | 214 | ||
| 249 | entry->apic = old_entry->apic; | 215 | #else |
| 250 | entry->pin = old_entry->pin; | ||
| 251 | head = entry; | ||
| 252 | tail = entry; | ||
| 253 | old_entry = old_entry->next; | ||
| 254 | while (old_entry) { | ||
| 255 | entry = get_one_free_irq_2_pin(node); | ||
| 256 | if (!entry) { | ||
| 257 | entry = head; | ||
| 258 | while (entry) { | ||
| 259 | head = entry->next; | ||
| 260 | kfree(entry); | ||
| 261 | entry = head; | ||
| 262 | } | ||
| 263 | /* still use the old one */ | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | entry->apic = old_entry->apic; | ||
| 267 | entry->pin = old_entry->pin; | ||
| 268 | tail->next = entry; | ||
| 269 | tail = entry; | ||
| 270 | old_entry = old_entry->next; | ||
| 271 | } | ||
| 272 | 216 | ||
| 273 | tail->next = NULL; | 217 | struct irq_cfg *irq_cfg(unsigned int irq) |
| 274 | cfg->irq_2_pin = head; | 218 | { |
| 219 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
| 275 | } | 220 | } |
| 276 | 221 | ||
| 277 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) | 222 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) |
| 278 | { | 223 | { |
| 279 | struct irq_pin_list *entry, *next; | 224 | return irq_cfgx + irq; |
| 280 | 225 | } | |
| 281 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) | ||
| 282 | return; | ||
| 283 | 226 | ||
| 284 | entry = old_cfg->irq_2_pin; | 227 | static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } |
| 285 | 228 | ||
| 286 | while (entry) { | 229 | #endif |
| 287 | next = entry->next; | ||
| 288 | kfree(entry); | ||
| 289 | entry = next; | ||
| 290 | } | ||
| 291 | old_cfg->irq_2_pin = NULL; | ||
| 292 | } | ||
| 293 | 230 | ||
| 294 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | 231 | static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) |
| 295 | struct irq_desc *desc, int node) | ||
| 296 | { | 232 | { |
| 233 | int res = irq_alloc_desc_at(at, node); | ||
| 297 | struct irq_cfg *cfg; | 234 | struct irq_cfg *cfg; |
| 298 | struct irq_cfg *old_cfg; | ||
| 299 | |||
| 300 | cfg = get_one_free_irq_cfg(node); | ||
| 301 | 235 | ||
| 302 | if (!cfg) | 236 | if (res < 0) { |
| 303 | return; | 237 | if (res != -EEXIST) |
| 304 | 238 | return NULL; | |
| 305 | desc->chip_data = cfg; | 239 | cfg = get_irq_chip_data(at); |
| 306 | 240 | if (cfg) | |
| 307 | old_cfg = old_desc->chip_data; | 241 | return cfg; |
| 308 | 242 | } | |
| 309 | cfg->vector = old_cfg->vector; | ||
| 310 | cfg->move_in_progress = old_cfg->move_in_progress; | ||
| 311 | cpumask_copy(cfg->domain, old_cfg->domain); | ||
| 312 | cpumask_copy(cfg->old_domain, old_cfg->old_domain); | ||
| 313 | |||
| 314 | init_copy_irq_2_pin(old_cfg, cfg, node); | ||
| 315 | } | ||
| 316 | 243 | ||
| 317 | static void free_irq_cfg(struct irq_cfg *cfg) | 244 | cfg = alloc_irq_cfg(at, node); |
| 318 | { | 245 | if (cfg) |
| 319 | free_cpumask_var(cfg->domain); | 246 | set_irq_chip_data(at, cfg); |
| 320 | free_cpumask_var(cfg->old_domain); | 247 | else |
| 321 | kfree(cfg); | 248 | irq_free_desc(at); |
| 249 | return cfg; | ||
| 322 | } | 250 | } |
| 323 | 251 | ||
| 324 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | 252 | static int alloc_irq_from(unsigned int from, int node) |
| 325 | { | 253 | { |
| 326 | struct irq_cfg *old_cfg, *cfg; | 254 | return irq_alloc_desc_from(from, node); |
| 327 | |||
| 328 | old_cfg = old_desc->chip_data; | ||
| 329 | cfg = desc->chip_data; | ||
| 330 | |||
| 331 | if (old_cfg == cfg) | ||
| 332 | return; | ||
| 333 | |||
| 334 | if (old_cfg) { | ||
| 335 | free_irq_2_pin(old_cfg, cfg); | ||
| 336 | free_irq_cfg(old_cfg); | ||
| 337 | old_desc->chip_data = NULL; | ||
| 338 | } | ||
| 339 | } | 255 | } |
| 340 | /* end for move_irq_desc */ | ||
| 341 | 256 | ||
| 342 | #else | 257 | static void free_irq_at(unsigned int at, struct irq_cfg *cfg) |
| 343 | struct irq_cfg *irq_cfg(unsigned int irq) | ||
| 344 | { | 258 | { |
| 345 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 259 | free_irq_cfg(at, cfg); |
| 260 | irq_free_desc(at); | ||
| 346 | } | 261 | } |
| 347 | 262 | ||
| 348 | #endif | ||
| 349 | |||
| 350 | struct io_apic { | 263 | struct io_apic { |
| 351 | unsigned int index; | 264 | unsigned int index; |
| 352 | unsigned int unused[3]; | 265 | unsigned int unused[3]; |
| @@ -451,7 +364,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
| 451 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 364 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
| 452 | } | 365 | } |
| 453 | 366 | ||
| 454 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 367 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
| 455 | { | 368 | { |
| 456 | unsigned long flags; | 369 | unsigned long flags; |
| 457 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 370 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| @@ -481,7 +394,7 @@ static void ioapic_mask_entry(int apic, int pin) | |||
| 481 | * fast in the common case, and fast for shared ISA-space IRQs. | 394 | * fast in the common case, and fast for shared ISA-space IRQs. |
| 482 | */ | 395 | */ |
| 483 | static int | 396 | static int |
| 484 | add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | 397 | __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) |
| 485 | { | 398 | { |
| 486 | struct irq_pin_list **last, *entry; | 399 | struct irq_pin_list **last, *entry; |
| 487 | 400 | ||
| @@ -493,7 +406,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | |||
| 493 | last = &entry->next; | 406 | last = &entry->next; |
| 494 | } | 407 | } |
| 495 | 408 | ||
| 496 | entry = get_one_free_irq_2_pin(node); | 409 | entry = alloc_irq_pin_list(node); |
| 497 | if (!entry) { | 410 | if (!entry) { |
| 498 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | 411 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", |
| 499 | node, apic, pin); | 412 | node, apic, pin); |
| @@ -508,7 +421,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | |||
| 508 | 421 | ||
| 509 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | 422 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) |
| 510 | { | 423 | { |
| 511 | if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) | 424 | if (__add_pin_to_irq_node(cfg, node, apic, pin)) |
| 512 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); | 425 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); |
| 513 | } | 426 | } |
| 514 | 427 | ||
| @@ -571,11 +484,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | |||
| 571 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 484 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
| 572 | } | 485 | } |
| 573 | 486 | ||
| 574 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | ||
| 575 | { | ||
| 576 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); | ||
| 577 | } | ||
| 578 | |||
| 579 | static void io_apic_sync(struct irq_pin_list *entry) | 487 | static void io_apic_sync(struct irq_pin_list *entry) |
| 580 | { | 488 | { |
| 581 | /* | 489 | /* |
| @@ -587,44 +495,37 @@ static void io_apic_sync(struct irq_pin_list *entry) | |||
| 587 | readl(&io_apic->data); | 495 | readl(&io_apic->data); |
| 588 | } | 496 | } |
| 589 | 497 | ||
| 590 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | 498 | static void mask_ioapic(struct irq_cfg *cfg) |
| 591 | { | 499 | { |
| 500 | unsigned long flags; | ||
| 501 | |||
| 502 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 592 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 503 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
| 504 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 593 | } | 505 | } |
| 594 | 506 | ||
| 595 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 507 | static void mask_ioapic_irq(struct irq_data *data) |
| 596 | { | 508 | { |
| 597 | struct irq_cfg *cfg = desc->chip_data; | 509 | mask_ioapic(data->chip_data); |
| 598 | unsigned long flags; | 510 | } |
| 599 | |||
| 600 | BUG_ON(!cfg); | ||
| 601 | 511 | ||
| 602 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 512 | static void __unmask_ioapic(struct irq_cfg *cfg) |
| 603 | __mask_IO_APIC_irq(cfg); | 513 | { |
| 604 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 514 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
| 605 | } | 515 | } |
| 606 | 516 | ||
| 607 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | 517 | static void unmask_ioapic(struct irq_cfg *cfg) |
| 608 | { | 518 | { |
| 609 | struct irq_cfg *cfg = desc->chip_data; | ||
| 610 | unsigned long flags; | 519 | unsigned long flags; |
| 611 | 520 | ||
| 612 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 521 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 613 | __unmask_IO_APIC_irq(cfg); | 522 | __unmask_ioapic(cfg); |
| 614 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 523 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 615 | } | 524 | } |
| 616 | 525 | ||
| 617 | static void mask_IO_APIC_irq(unsigned int irq) | 526 | static void unmask_ioapic_irq(struct irq_data *data) |
| 618 | { | 527 | { |
| 619 | struct irq_desc *desc = irq_to_desc(irq); | 528 | unmask_ioapic(data->chip_data); |
| 620 | |||
| 621 | mask_IO_APIC_irq_desc(desc); | ||
| 622 | } | ||
| 623 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
| 624 | { | ||
| 625 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 626 | |||
| 627 | unmask_IO_APIC_irq_desc(desc); | ||
| 628 | } | 529 | } |
| 629 | 530 | ||
| 630 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 531 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
| @@ -694,14 +595,14 @@ struct IO_APIC_route_entry **alloc_ioapic_entries(void) | |||
| 694 | struct IO_APIC_route_entry **ioapic_entries; | 595 | struct IO_APIC_route_entry **ioapic_entries; |
| 695 | 596 | ||
| 696 | ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, | 597 | ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, |
| 697 | GFP_ATOMIC); | 598 | GFP_KERNEL); |
| 698 | if (!ioapic_entries) | 599 | if (!ioapic_entries) |
| 699 | return 0; | 600 | return 0; |
| 700 | 601 | ||
| 701 | for (apic = 0; apic < nr_ioapics; apic++) { | 602 | for (apic = 0; apic < nr_ioapics; apic++) { |
| 702 | ioapic_entries[apic] = | 603 | ioapic_entries[apic] = |
| 703 | kzalloc(sizeof(struct IO_APIC_route_entry) * | 604 | kzalloc(sizeof(struct IO_APIC_route_entry) * |
| 704 | nr_ioapic_registers[apic], GFP_ATOMIC); | 605 | nr_ioapic_registers[apic], GFP_KERNEL); |
| 705 | if (!ioapic_entries[apic]) | 606 | if (!ioapic_entries[apic]) |
| 706 | goto nomem; | 607 | goto nomem; |
| 707 | } | 608 | } |
| @@ -1259,7 +1160,6 @@ void __setup_vector_irq(int cpu) | |||
| 1259 | /* Initialize vector_irq on a new cpu */ | 1160 | /* Initialize vector_irq on a new cpu */ |
| 1260 | int irq, vector; | 1161 | int irq, vector; |
| 1261 | struct irq_cfg *cfg; | 1162 | struct irq_cfg *cfg; |
| 1262 | struct irq_desc *desc; | ||
| 1263 | 1163 | ||
| 1264 | /* | 1164 | /* |
| 1265 | * vector_lock will make sure that we don't run into irq vector | 1165 | * vector_lock will make sure that we don't run into irq vector |
| @@ -1268,9 +1168,10 @@ void __setup_vector_irq(int cpu) | |||
| 1268 | */ | 1168 | */ |
| 1269 | raw_spin_lock(&vector_lock); | 1169 | raw_spin_lock(&vector_lock); |
| 1270 | /* Mark the inuse vectors */ | 1170 | /* Mark the inuse vectors */ |
| 1271 | for_each_irq_desc(irq, desc) { | 1171 | for_each_active_irq(irq) { |
| 1272 | cfg = desc->chip_data; | 1172 | cfg = get_irq_chip_data(irq); |
| 1273 | 1173 | if (!cfg) | |
| 1174 | continue; | ||
| 1274 | /* | 1175 | /* |
| 1275 | * If it is a legacy IRQ handled by the legacy PIC, this cpu | 1176 | * If it is a legacy IRQ handled by the legacy PIC, this cpu |
| 1276 | * will be part of the irq_cfg's domain. | 1177 | * will be part of the irq_cfg's domain. |
| @@ -1327,17 +1228,17 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
| 1327 | } | 1228 | } |
| 1328 | #endif | 1229 | #endif |
| 1329 | 1230 | ||
| 1330 | static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) | 1231 | static void ioapic_register_intr(unsigned int irq, unsigned long trigger) |
| 1331 | { | 1232 | { |
| 1332 | 1233 | ||
| 1333 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1234 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
| 1334 | trigger == IOAPIC_LEVEL) | 1235 | trigger == IOAPIC_LEVEL) |
| 1335 | desc->status |= IRQ_LEVEL; | 1236 | irq_set_status_flags(irq, IRQ_LEVEL); |
| 1336 | else | 1237 | else |
| 1337 | desc->status &= ~IRQ_LEVEL; | 1238 | irq_clear_status_flags(irq, IRQ_LEVEL); |
| 1338 | 1239 | ||
| 1339 | if (irq_remapped(irq)) { | 1240 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 1340 | desc->status |= IRQ_MOVE_PCNTXT; | 1241 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 1341 | if (trigger) | 1242 | if (trigger) |
| 1342 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | 1243 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, |
| 1343 | handle_fasteoi_irq, | 1244 | handle_fasteoi_irq, |
| @@ -1358,10 +1259,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t | |||
| 1358 | handle_edge_irq, "edge"); | 1259 | handle_edge_irq, "edge"); |
| 1359 | } | 1260 | } |
| 1360 | 1261 | ||
| 1361 | int setup_ioapic_entry(int apic_id, int irq, | 1262 | static int setup_ioapic_entry(int apic_id, int irq, |
| 1362 | struct IO_APIC_route_entry *entry, | 1263 | struct IO_APIC_route_entry *entry, |
| 1363 | unsigned int destination, int trigger, | 1264 | unsigned int destination, int trigger, |
| 1364 | int polarity, int vector, int pin) | 1265 | int polarity, int vector, int pin) |
| 1365 | { | 1266 | { |
| 1366 | /* | 1267 | /* |
| 1367 | * add it to the IO-APIC irq-routing table: | 1268 | * add it to the IO-APIC irq-routing table: |
| @@ -1382,21 +1283,7 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
| 1382 | if (index < 0) | 1283 | if (index < 0) |
| 1383 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); | 1284 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); |
| 1384 | 1285 | ||
| 1385 | memset(&irte, 0, sizeof(irte)); | 1286 | prepare_irte(&irte, vector, destination); |
| 1386 | |||
| 1387 | irte.present = 1; | ||
| 1388 | irte.dst_mode = apic->irq_dest_mode; | ||
| 1389 | /* | ||
| 1390 | * Trigger mode in the IRTE will always be edge, and the | ||
| 1391 | * actual level or edge trigger will be setup in the IO-APIC | ||
| 1392 | * RTE. This will help simplify level triggered irq migration. | ||
| 1393 | * For more details, see the comments above explainig IO-APIC | ||
| 1394 | * irq migration in the presence of interrupt-remapping. | ||
| 1395 | */ | ||
| 1396 | irte.trigger_mode = 0; | ||
| 1397 | irte.dlvry_mode = apic->irq_delivery_mode; | ||
| 1398 | irte.vector = vector; | ||
| 1399 | irte.dest_id = IRTE_DEST(destination); | ||
| 1400 | 1287 | ||
| 1401 | /* Set source-id of interrupt request */ | 1288 | /* Set source-id of interrupt request */ |
| 1402 | set_ioapic_sid(&irte, apic_id); | 1289 | set_ioapic_sid(&irte, apic_id); |
| @@ -1431,18 +1318,14 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
| 1431 | return 0; | 1318 | return 0; |
| 1432 | } | 1319 | } |
| 1433 | 1320 | ||
| 1434 | static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, | 1321 | static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, |
| 1435 | int trigger, int polarity) | 1322 | struct irq_cfg *cfg, int trigger, int polarity) |
| 1436 | { | 1323 | { |
| 1437 | struct irq_cfg *cfg; | ||
| 1438 | struct IO_APIC_route_entry entry; | 1324 | struct IO_APIC_route_entry entry; |
| 1439 | unsigned int dest; | 1325 | unsigned int dest; |
| 1440 | 1326 | ||
| 1441 | if (!IO_APIC_IRQ(irq)) | 1327 | if (!IO_APIC_IRQ(irq)) |
| 1442 | return; | 1328 | return; |
| 1443 | |||
| 1444 | cfg = desc->chip_data; | ||
| 1445 | |||
| 1446 | /* | 1329 | /* |
| 1447 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | 1330 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy |
| 1448 | * controllers like 8259. Now that IO-APIC can handle this irq, update | 1331 | * controllers like 8259. Now that IO-APIC can handle this irq, update |
| @@ -1471,9 +1354,9 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
| 1471 | return; | 1354 | return; |
| 1472 | } | 1355 | } |
| 1473 | 1356 | ||
| 1474 | ioapic_register_intr(irq, desc, trigger); | 1357 | ioapic_register_intr(irq, trigger); |
| 1475 | if (irq < legacy_pic->nr_legacy_irqs) | 1358 | if (irq < legacy_pic->nr_legacy_irqs) |
| 1476 | legacy_pic->chip->mask(irq); | 1359 | legacy_pic->mask(irq); |
| 1477 | 1360 | ||
| 1478 | ioapic_write_entry(apic_id, pin, entry); | 1361 | ioapic_write_entry(apic_id, pin, entry); |
| 1479 | } | 1362 | } |
| @@ -1484,11 +1367,9 @@ static struct { | |||
| 1484 | 1367 | ||
| 1485 | static void __init setup_IO_APIC_irqs(void) | 1368 | static void __init setup_IO_APIC_irqs(void) |
| 1486 | { | 1369 | { |
| 1487 | int apic_id, pin, idx, irq; | 1370 | int apic_id, pin, idx, irq, notcon = 0; |
| 1488 | int notcon = 0; | 1371 | int node = cpu_to_node(0); |
| 1489 | struct irq_desc *desc; | ||
| 1490 | struct irq_cfg *cfg; | 1372 | struct irq_cfg *cfg; |
| 1491 | int node = cpu_to_node(boot_cpu_id); | ||
| 1492 | 1373 | ||
| 1493 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1374 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
| 1494 | 1375 | ||
| @@ -1525,19 +1406,17 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1525 | apic->multi_timer_check(apic_id, irq)) | 1406 | apic->multi_timer_check(apic_id, irq)) |
| 1526 | continue; | 1407 | continue; |
| 1527 | 1408 | ||
| 1528 | desc = irq_to_desc_alloc_node(irq, node); | 1409 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 1529 | if (!desc) { | 1410 | if (!cfg) |
| 1530 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
| 1531 | continue; | 1411 | continue; |
| 1532 | } | 1412 | |
| 1533 | cfg = desc->chip_data; | ||
| 1534 | add_pin_to_irq_node(cfg, node, apic_id, pin); | 1413 | add_pin_to_irq_node(cfg, node, apic_id, pin); |
| 1535 | /* | 1414 | /* |
| 1536 | * don't mark it in pin_programmed, so later acpi could | 1415 | * don't mark it in pin_programmed, so later acpi could |
| 1537 | * set it correctly when irq < 16 | 1416 | * set it correctly when irq < 16 |
| 1538 | */ | 1417 | */ |
| 1539 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | 1418 | setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx), |
| 1540 | irq_trigger(idx), irq_polarity(idx)); | 1419 | irq_polarity(idx)); |
| 1541 | } | 1420 | } |
| 1542 | 1421 | ||
| 1543 | if (notcon) | 1422 | if (notcon) |
| @@ -1552,9 +1431,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1552 | */ | 1431 | */ |
| 1553 | void setup_IO_APIC_irq_extra(u32 gsi) | 1432 | void setup_IO_APIC_irq_extra(u32 gsi) |
| 1554 | { | 1433 | { |
| 1555 | int apic_id = 0, pin, idx, irq; | 1434 | int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); |
| 1556 | int node = cpu_to_node(boot_cpu_id); | ||
| 1557 | struct irq_desc *desc; | ||
| 1558 | struct irq_cfg *cfg; | 1435 | struct irq_cfg *cfg; |
| 1559 | 1436 | ||
| 1560 | /* | 1437 | /* |
| @@ -1570,18 +1447,15 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
| 1570 | return; | 1447 | return; |
| 1571 | 1448 | ||
| 1572 | irq = pin_2_irq(idx, apic_id, pin); | 1449 | irq = pin_2_irq(idx, apic_id, pin); |
| 1573 | #ifdef CONFIG_SPARSE_IRQ | 1450 | |
| 1574 | desc = irq_to_desc(irq); | 1451 | /* Only handle the non legacy irqs on secondary ioapics */ |
| 1575 | if (desc) | 1452 | if (apic_id == 0 || irq < NR_IRQS_LEGACY) |
| 1576 | return; | 1453 | return; |
| 1577 | #endif | 1454 | |
| 1578 | desc = irq_to_desc_alloc_node(irq, node); | 1455 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 1579 | if (!desc) { | 1456 | if (!cfg) |
| 1580 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
| 1581 | return; | 1457 | return; |
| 1582 | } | ||
| 1583 | 1458 | ||
| 1584 | cfg = desc->chip_data; | ||
| 1585 | add_pin_to_irq_node(cfg, node, apic_id, pin); | 1459 | add_pin_to_irq_node(cfg, node, apic_id, pin); |
| 1586 | 1460 | ||
| 1587 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | 1461 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { |
| @@ -1591,7 +1465,7 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
| 1591 | } | 1465 | } |
| 1592 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | 1466 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); |
| 1593 | 1467 | ||
| 1594 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | 1468 | setup_ioapic_irq(apic_id, pin, irq, cfg, |
| 1595 | irq_trigger(idx), irq_polarity(idx)); | 1469 | irq_trigger(idx), irq_polarity(idx)); |
| 1596 | } | 1470 | } |
| 1597 | 1471 | ||
| @@ -1642,7 +1516,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1642 | union IO_APIC_reg_03 reg_03; | 1516 | union IO_APIC_reg_03 reg_03; |
| 1643 | unsigned long flags; | 1517 | unsigned long flags; |
| 1644 | struct irq_cfg *cfg; | 1518 | struct irq_cfg *cfg; |
| 1645 | struct irq_desc *desc; | ||
| 1646 | unsigned int irq; | 1519 | unsigned int irq; |
| 1647 | 1520 | ||
| 1648 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1521 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
| @@ -1729,10 +1602,10 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1729 | } | 1602 | } |
| 1730 | } | 1603 | } |
| 1731 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1604 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
| 1732 | for_each_irq_desc(irq, desc) { | 1605 | for_each_active_irq(irq) { |
| 1733 | struct irq_pin_list *entry; | 1606 | struct irq_pin_list *entry; |
| 1734 | 1607 | ||
| 1735 | cfg = desc->chip_data; | 1608 | cfg = get_irq_chip_data(irq); |
| 1736 | if (!cfg) | 1609 | if (!cfg) |
| 1737 | continue; | 1610 | continue; |
| 1738 | entry = cfg->irq_2_pin; | 1611 | entry = cfg->irq_2_pin; |
| @@ -2239,29 +2112,26 @@ static int __init timer_irq_works(void) | |||
| 2239 | * an edge even if it isn't on the 8259A... | 2112 | * an edge even if it isn't on the 8259A... |
| 2240 | */ | 2113 | */ |
| 2241 | 2114 | ||
| 2242 | static unsigned int startup_ioapic_irq(unsigned int irq) | 2115 | static unsigned int startup_ioapic_irq(struct irq_data *data) |
| 2243 | { | 2116 | { |
| 2244 | int was_pending = 0; | 2117 | int was_pending = 0, irq = data->irq; |
| 2245 | unsigned long flags; | 2118 | unsigned long flags; |
| 2246 | struct irq_cfg *cfg; | ||
| 2247 | 2119 | ||
| 2248 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2120 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2249 | if (irq < legacy_pic->nr_legacy_irqs) { | 2121 | if (irq < legacy_pic->nr_legacy_irqs) { |
| 2250 | legacy_pic->chip->mask(irq); | 2122 | legacy_pic->mask(irq); |
| 2251 | if (legacy_pic->irq_pending(irq)) | 2123 | if (legacy_pic->irq_pending(irq)) |
| 2252 | was_pending = 1; | 2124 | was_pending = 1; |
| 2253 | } | 2125 | } |
| 2254 | cfg = irq_cfg(irq); | 2126 | __unmask_ioapic(data->chip_data); |
| 2255 | __unmask_IO_APIC_irq(cfg); | ||
| 2256 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2127 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2257 | 2128 | ||
| 2258 | return was_pending; | 2129 | return was_pending; |
| 2259 | } | 2130 | } |
| 2260 | 2131 | ||
| 2261 | static int ioapic_retrigger_irq(unsigned int irq) | 2132 | static int ioapic_retrigger_irq(struct irq_data *data) |
| 2262 | { | 2133 | { |
| 2263 | 2134 | struct irq_cfg *cfg = data->chip_data; | |
| 2264 | struct irq_cfg *cfg = irq_cfg(irq); | ||
| 2265 | unsigned long flags; | 2135 | unsigned long flags; |
| 2266 | 2136 | ||
| 2267 | raw_spin_lock_irqsave(&vector_lock, flags); | 2137 | raw_spin_lock_irqsave(&vector_lock, flags); |
| @@ -2312,7 +2182,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
| 2312 | * With interrupt-remapping, destination information comes | 2182 | * With interrupt-remapping, destination information comes |
| 2313 | * from interrupt-remapping table entry. | 2183 | * from interrupt-remapping table entry. |
| 2314 | */ | 2184 | */ |
| 2315 | if (!irq_remapped(irq)) | 2185 | if (!irq_remapped(cfg)) |
| 2316 | io_apic_write(apic, 0x11 + pin*2, dest); | 2186 | io_apic_write(apic, 0x11 + pin*2, dest); |
| 2317 | reg = io_apic_read(apic, 0x10 + pin*2); | 2187 | reg = io_apic_read(apic, 0x10 + pin*2); |
| 2318 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 2188 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
| @@ -2322,65 +2192,46 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
| 2322 | } | 2192 | } |
| 2323 | 2193 | ||
| 2324 | /* | 2194 | /* |
| 2325 | * Either sets desc->affinity to a valid value, and returns | 2195 | * Either sets data->affinity to a valid value, and returns |
| 2326 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | 2196 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and |
| 2327 | * leaves desc->affinity untouched. | 2197 | * leaves data->affinity untouched. |
| 2328 | */ | 2198 | */ |
| 2329 | unsigned int | 2199 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2330 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, | 2200 | unsigned int *dest_id) |
| 2331 | unsigned int *dest_id) | ||
| 2332 | { | 2201 | { |
| 2333 | struct irq_cfg *cfg; | 2202 | struct irq_cfg *cfg = data->chip_data; |
| 2334 | unsigned int irq; | ||
| 2335 | 2203 | ||
| 2336 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2204 | if (!cpumask_intersects(mask, cpu_online_mask)) |
| 2337 | return -1; | 2205 | return -1; |
| 2338 | 2206 | ||
| 2339 | irq = desc->irq; | 2207 | if (assign_irq_vector(data->irq, data->chip_data, mask)) |
| 2340 | cfg = desc->chip_data; | ||
| 2341 | if (assign_irq_vector(irq, cfg, mask)) | ||
| 2342 | return -1; | 2208 | return -1; |
| 2343 | 2209 | ||
| 2344 | cpumask_copy(desc->affinity, mask); | 2210 | cpumask_copy(data->affinity, mask); |
| 2345 | 2211 | ||
| 2346 | *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | 2212 | *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); |
| 2347 | return 0; | 2213 | return 0; |
| 2348 | } | 2214 | } |
| 2349 | 2215 | ||
| 2350 | static int | 2216 | static int |
| 2351 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | 2217 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2218 | bool force) | ||
| 2352 | { | 2219 | { |
| 2353 | struct irq_cfg *cfg; | 2220 | unsigned int dest, irq = data->irq; |
| 2354 | unsigned long flags; | 2221 | unsigned long flags; |
| 2355 | unsigned int dest; | 2222 | int ret; |
| 2356 | unsigned int irq; | ||
| 2357 | int ret = -1; | ||
| 2358 | |||
| 2359 | irq = desc->irq; | ||
| 2360 | cfg = desc->chip_data; | ||
| 2361 | 2223 | ||
| 2362 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2224 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2363 | ret = set_desc_affinity(desc, mask, &dest); | 2225 | ret = __ioapic_set_affinity(data, mask, &dest); |
| 2364 | if (!ret) { | 2226 | if (!ret) { |
| 2365 | /* Only the high 8 bits are valid. */ | 2227 | /* Only the high 8 bits are valid. */ |
| 2366 | dest = SET_APIC_LOGICAL_ID(dest); | 2228 | dest = SET_APIC_LOGICAL_ID(dest); |
| 2367 | __target_IO_APIC_irq(irq, dest, cfg); | 2229 | __target_IO_APIC_irq(irq, dest, data->chip_data); |
| 2368 | } | 2230 | } |
| 2369 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2231 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2370 | |||
| 2371 | return ret; | 2232 | return ret; |
| 2372 | } | 2233 | } |
| 2373 | 2234 | ||
| 2374 | static int | ||
| 2375 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | ||
| 2376 | { | ||
| 2377 | struct irq_desc *desc; | ||
| 2378 | |||
| 2379 | desc = irq_to_desc(irq); | ||
| 2380 | |||
| 2381 | return set_ioapic_affinity_irq_desc(desc, mask); | ||
| 2382 | } | ||
| 2383 | |||
| 2384 | #ifdef CONFIG_INTR_REMAP | 2235 | #ifdef CONFIG_INTR_REMAP |
| 2385 | 2236 | ||
| 2386 | /* | 2237 | /* |
| @@ -2395,24 +2246,21 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | |||
| 2395 | * the interrupt-remapping table entry. | 2246 | * the interrupt-remapping table entry. |
| 2396 | */ | 2247 | */ |
| 2397 | static int | 2248 | static int |
| 2398 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | 2249 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2250 | bool force) | ||
| 2399 | { | 2251 | { |
| 2400 | struct irq_cfg *cfg; | 2252 | struct irq_cfg *cfg = data->chip_data; |
| 2253 | unsigned int dest, irq = data->irq; | ||
| 2401 | struct irte irte; | 2254 | struct irte irte; |
| 2402 | unsigned int dest; | ||
| 2403 | unsigned int irq; | ||
| 2404 | int ret = -1; | ||
| 2405 | 2255 | ||
| 2406 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2256 | if (!cpumask_intersects(mask, cpu_online_mask)) |
| 2407 | return ret; | 2257 | return -EINVAL; |
| 2408 | 2258 | ||
| 2409 | irq = desc->irq; | ||
| 2410 | if (get_irte(irq, &irte)) | 2259 | if (get_irte(irq, &irte)) |
| 2411 | return ret; | 2260 | return -EBUSY; |
| 2412 | 2261 | ||
| 2413 | cfg = desc->chip_data; | ||
| 2414 | if (assign_irq_vector(irq, cfg, mask)) | 2262 | if (assign_irq_vector(irq, cfg, mask)) |
| 2415 | return ret; | 2263 | return -EBUSY; |
| 2416 | 2264 | ||
| 2417 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); | 2265 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); |
| 2418 | 2266 | ||
| @@ -2427,29 +2275,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
| 2427 | if (cfg->move_in_progress) | 2275 | if (cfg->move_in_progress) |
| 2428 | send_cleanup_vector(cfg); | 2276 | send_cleanup_vector(cfg); |
| 2429 | 2277 | ||
| 2430 | cpumask_copy(desc->affinity, mask); | 2278 | cpumask_copy(data->affinity, mask); |
| 2431 | |||
| 2432 | return 0; | 2279 | return 0; |
| 2433 | } | 2280 | } |
| 2434 | 2281 | ||
| 2435 | /* | ||
| 2436 | * Migrates the IRQ destination in the process context. | ||
| 2437 | */ | ||
| 2438 | static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | ||
| 2439 | const struct cpumask *mask) | ||
| 2440 | { | ||
| 2441 | return migrate_ioapic_irq_desc(desc, mask); | ||
| 2442 | } | ||
| 2443 | static int set_ir_ioapic_affinity_irq(unsigned int irq, | ||
| 2444 | const struct cpumask *mask) | ||
| 2445 | { | ||
| 2446 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2447 | |||
| 2448 | return set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
| 2449 | } | ||
| 2450 | #else | 2282 | #else |
| 2451 | static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | 2283 | static inline int |
| 2452 | const struct cpumask *mask) | 2284 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 2285 | bool force) | ||
| 2453 | { | 2286 | { |
| 2454 | return 0; | 2287 | return 0; |
| 2455 | } | 2288 | } |
| @@ -2511,10 +2344,8 @@ unlock: | |||
| 2511 | irq_exit(); | 2344 | irq_exit(); |
| 2512 | } | 2345 | } |
| 2513 | 2346 | ||
| 2514 | static void __irq_complete_move(struct irq_desc **descp, unsigned vector) | 2347 | static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) |
| 2515 | { | 2348 | { |
| 2516 | struct irq_desc *desc = *descp; | ||
| 2517 | struct irq_cfg *cfg = desc->chip_data; | ||
| 2518 | unsigned me; | 2349 | unsigned me; |
| 2519 | 2350 | ||
| 2520 | if (likely(!cfg->move_in_progress)) | 2351 | if (likely(!cfg->move_in_progress)) |
| @@ -2526,31 +2357,28 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector) | |||
| 2526 | send_cleanup_vector(cfg); | 2357 | send_cleanup_vector(cfg); |
| 2527 | } | 2358 | } |
| 2528 | 2359 | ||
| 2529 | static void irq_complete_move(struct irq_desc **descp) | 2360 | static void irq_complete_move(struct irq_cfg *cfg) |
| 2530 | { | 2361 | { |
| 2531 | __irq_complete_move(descp, ~get_irq_regs()->orig_ax); | 2362 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); |
| 2532 | } | 2363 | } |
| 2533 | 2364 | ||
| 2534 | void irq_force_complete_move(int irq) | 2365 | void irq_force_complete_move(int irq) |
| 2535 | { | 2366 | { |
| 2536 | struct irq_desc *desc = irq_to_desc(irq); | 2367 | struct irq_cfg *cfg = get_irq_chip_data(irq); |
| 2537 | struct irq_cfg *cfg = desc->chip_data; | ||
| 2538 | 2368 | ||
| 2539 | if (!cfg) | 2369 | if (!cfg) |
| 2540 | return; | 2370 | return; |
| 2541 | 2371 | ||
| 2542 | __irq_complete_move(&desc, cfg->vector); | 2372 | __irq_complete_move(cfg, cfg->vector); |
| 2543 | } | 2373 | } |
| 2544 | #else | 2374 | #else |
| 2545 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2375 | static inline void irq_complete_move(struct irq_cfg *cfg) { } |
| 2546 | #endif | 2376 | #endif |
| 2547 | 2377 | ||
| 2548 | static void ack_apic_edge(unsigned int irq) | 2378 | static void ack_apic_edge(struct irq_data *data) |
| 2549 | { | 2379 | { |
| 2550 | struct irq_desc *desc = irq_to_desc(irq); | 2380 | irq_complete_move(data->chip_data); |
| 2551 | 2381 | move_native_irq(data->irq); | |
| 2552 | irq_complete_move(&desc); | ||
| 2553 | move_native_irq(irq); | ||
| 2554 | ack_APIC_irq(); | 2382 | ack_APIC_irq(); |
| 2555 | } | 2383 | } |
| 2556 | 2384 | ||
| @@ -2572,10 +2400,12 @@ atomic_t irq_mis_count; | |||
| 2572 | * Otherwise, we simulate the EOI message manually by changing the trigger | 2400 | * Otherwise, we simulate the EOI message manually by changing the trigger |
| 2573 | * mode to edge and then back to level, with RTE being masked during this. | 2401 | * mode to edge and then back to level, with RTE being masked during this. |
| 2574 | */ | 2402 | */ |
| 2575 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | 2403 | static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) |
| 2576 | { | 2404 | { |
| 2577 | struct irq_pin_list *entry; | 2405 | struct irq_pin_list *entry; |
| 2406 | unsigned long flags; | ||
| 2578 | 2407 | ||
| 2408 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2579 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 2409 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
| 2580 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | 2410 | if (mp_ioapics[entry->apic].apicver >= 0x20) { |
| 2581 | /* | 2411 | /* |
| @@ -2584,7 +2414,7 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | |||
| 2584 | * intr-remapping table entry. Hence for the io-apic | 2414 | * intr-remapping table entry. Hence for the io-apic |
| 2585 | * EOI we use the pin number. | 2415 | * EOI we use the pin number. |
| 2586 | */ | 2416 | */ |
| 2587 | if (irq_remapped(irq)) | 2417 | if (irq_remapped(cfg)) |
| 2588 | io_apic_eoi(entry->apic, entry->pin); | 2418 | io_apic_eoi(entry->apic, entry->pin); |
| 2589 | else | 2419 | else |
| 2590 | io_apic_eoi(entry->apic, cfg->vector); | 2420 | io_apic_eoi(entry->apic, cfg->vector); |
| @@ -2593,36 +2423,22 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | |||
| 2593 | __unmask_and_level_IO_APIC_irq(entry); | 2423 | __unmask_and_level_IO_APIC_irq(entry); |
| 2594 | } | 2424 | } |
| 2595 | } | 2425 | } |
| 2596 | } | ||
| 2597 | |||
| 2598 | static void eoi_ioapic_irq(struct irq_desc *desc) | ||
| 2599 | { | ||
| 2600 | struct irq_cfg *cfg; | ||
| 2601 | unsigned long flags; | ||
| 2602 | unsigned int irq; | ||
| 2603 | |||
| 2604 | irq = desc->irq; | ||
| 2605 | cfg = desc->chip_data; | ||
| 2606 | |||
| 2607 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2608 | __eoi_ioapic_irq(irq, cfg); | ||
| 2609 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2426 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2610 | } | 2427 | } |
| 2611 | 2428 | ||
| 2612 | static void ack_apic_level(unsigned int irq) | 2429 | static void ack_apic_level(struct irq_data *data) |
| 2613 | { | 2430 | { |
| 2431 | struct irq_cfg *cfg = data->chip_data; | ||
| 2432 | int i, do_unmask_irq = 0, irq = data->irq; | ||
| 2614 | struct irq_desc *desc = irq_to_desc(irq); | 2433 | struct irq_desc *desc = irq_to_desc(irq); |
| 2615 | unsigned long v; | 2434 | unsigned long v; |
| 2616 | int i; | ||
| 2617 | struct irq_cfg *cfg; | ||
| 2618 | int do_unmask_irq = 0; | ||
| 2619 | 2435 | ||
| 2620 | irq_complete_move(&desc); | 2436 | irq_complete_move(cfg); |
| 2621 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2437 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 2622 | /* If we are moving the irq we need to mask it */ | 2438 | /* If we are moving the irq we need to mask it */ |
| 2623 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { | 2439 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { |
| 2624 | do_unmask_irq = 1; | 2440 | do_unmask_irq = 1; |
| 2625 | mask_IO_APIC_irq_desc(desc); | 2441 | mask_ioapic(cfg); |
| 2626 | } | 2442 | } |
| 2627 | #endif | 2443 | #endif |
| 2628 | 2444 | ||
| @@ -2658,7 +2474,6 @@ static void ack_apic_level(unsigned int irq) | |||
| 2658 | * we use the above logic (mask+edge followed by unmask+level) from | 2474 | * we use the above logic (mask+edge followed by unmask+level) from |
| 2659 | * Manfred Spraul to clear the remote IRR. | 2475 | * Manfred Spraul to clear the remote IRR. |
| 2660 | */ | 2476 | */ |
| 2661 | cfg = desc->chip_data; | ||
| 2662 | i = cfg->vector; | 2477 | i = cfg->vector; |
| 2663 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2478 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
| 2664 | 2479 | ||
| @@ -2678,7 +2493,7 @@ static void ack_apic_level(unsigned int irq) | |||
| 2678 | if (!(v & (1 << (i & 0x1f)))) { | 2493 | if (!(v & (1 << (i & 0x1f)))) { |
| 2679 | atomic_inc(&irq_mis_count); | 2494 | atomic_inc(&irq_mis_count); |
| 2680 | 2495 | ||
| 2681 | eoi_ioapic_irq(desc); | 2496 | eoi_ioapic_irq(irq, cfg); |
| 2682 | } | 2497 | } |
| 2683 | 2498 | ||
| 2684 | /* Now we can move and renable the irq */ | 2499 | /* Now we can move and renable the irq */ |
| @@ -2709,61 +2524,57 @@ static void ack_apic_level(unsigned int irq) | |||
| 2709 | * accurate and is causing problems then it is a hardware bug | 2524 | * accurate and is causing problems then it is a hardware bug |
| 2710 | * and you can go talk to the chipset vendor about it. | 2525 | * and you can go talk to the chipset vendor about it. |
| 2711 | */ | 2526 | */ |
| 2712 | cfg = desc->chip_data; | ||
| 2713 | if (!io_apic_level_ack_pending(cfg)) | 2527 | if (!io_apic_level_ack_pending(cfg)) |
| 2714 | move_masked_irq(irq); | 2528 | move_masked_irq(irq); |
| 2715 | unmask_IO_APIC_irq_desc(desc); | 2529 | unmask_ioapic(cfg); |
| 2716 | } | 2530 | } |
| 2717 | } | 2531 | } |
| 2718 | 2532 | ||
| 2719 | #ifdef CONFIG_INTR_REMAP | 2533 | #ifdef CONFIG_INTR_REMAP |
| 2720 | static void ir_ack_apic_edge(unsigned int irq) | 2534 | static void ir_ack_apic_edge(struct irq_data *data) |
| 2721 | { | 2535 | { |
| 2722 | ack_APIC_irq(); | 2536 | ack_APIC_irq(); |
| 2723 | } | 2537 | } |
| 2724 | 2538 | ||
| 2725 | static void ir_ack_apic_level(unsigned int irq) | 2539 | static void ir_ack_apic_level(struct irq_data *data) |
| 2726 | { | 2540 | { |
| 2727 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 2728 | |||
| 2729 | ack_APIC_irq(); | 2541 | ack_APIC_irq(); |
| 2730 | eoi_ioapic_irq(desc); | 2542 | eoi_ioapic_irq(data->irq, data->chip_data); |
| 2731 | } | 2543 | } |
| 2732 | #endif /* CONFIG_INTR_REMAP */ | 2544 | #endif /* CONFIG_INTR_REMAP */ |
| 2733 | 2545 | ||
| 2734 | static struct irq_chip ioapic_chip __read_mostly = { | 2546 | static struct irq_chip ioapic_chip __read_mostly = { |
| 2735 | .name = "IO-APIC", | 2547 | .name = "IO-APIC", |
| 2736 | .startup = startup_ioapic_irq, | 2548 | .irq_startup = startup_ioapic_irq, |
| 2737 | .mask = mask_IO_APIC_irq, | 2549 | .irq_mask = mask_ioapic_irq, |
| 2738 | .unmask = unmask_IO_APIC_irq, | 2550 | .irq_unmask = unmask_ioapic_irq, |
| 2739 | .ack = ack_apic_edge, | 2551 | .irq_ack = ack_apic_edge, |
| 2740 | .eoi = ack_apic_level, | 2552 | .irq_eoi = ack_apic_level, |
| 2741 | #ifdef CONFIG_SMP | 2553 | #ifdef CONFIG_SMP |
| 2742 | .set_affinity = set_ioapic_affinity_irq, | 2554 | .irq_set_affinity = ioapic_set_affinity, |
| 2743 | #endif | 2555 | #endif |
| 2744 | .retrigger = ioapic_retrigger_irq, | 2556 | .irq_retrigger = ioapic_retrigger_irq, |
| 2745 | }; | 2557 | }; |
| 2746 | 2558 | ||
| 2747 | static struct irq_chip ir_ioapic_chip __read_mostly = { | 2559 | static struct irq_chip ir_ioapic_chip __read_mostly = { |
| 2748 | .name = "IR-IO-APIC", | 2560 | .name = "IR-IO-APIC", |
| 2749 | .startup = startup_ioapic_irq, | 2561 | .irq_startup = startup_ioapic_irq, |
| 2750 | .mask = mask_IO_APIC_irq, | 2562 | .irq_mask = mask_ioapic_irq, |
| 2751 | .unmask = unmask_IO_APIC_irq, | 2563 | .irq_unmask = unmask_ioapic_irq, |
| 2752 | #ifdef CONFIG_INTR_REMAP | 2564 | #ifdef CONFIG_INTR_REMAP |
| 2753 | .ack = ir_ack_apic_edge, | 2565 | .irq_ack = ir_ack_apic_edge, |
| 2754 | .eoi = ir_ack_apic_level, | 2566 | .irq_eoi = ir_ack_apic_level, |
| 2755 | #ifdef CONFIG_SMP | 2567 | #ifdef CONFIG_SMP |
| 2756 | .set_affinity = set_ir_ioapic_affinity_irq, | 2568 | .irq_set_affinity = ir_ioapic_set_affinity, |
| 2757 | #endif | 2569 | #endif |
| 2758 | #endif | 2570 | #endif |
| 2759 | .retrigger = ioapic_retrigger_irq, | 2571 | .irq_retrigger = ioapic_retrigger_irq, |
| 2760 | }; | 2572 | }; |
| 2761 | 2573 | ||
| 2762 | static inline void init_IO_APIC_traps(void) | 2574 | static inline void init_IO_APIC_traps(void) |
| 2763 | { | 2575 | { |
| 2764 | int irq; | ||
| 2765 | struct irq_desc *desc; | ||
| 2766 | struct irq_cfg *cfg; | 2576 | struct irq_cfg *cfg; |
| 2577 | unsigned int irq; | ||
| 2767 | 2578 | ||
| 2768 | /* | 2579 | /* |
| 2769 | * NOTE! The local APIC isn't very good at handling | 2580 | * NOTE! The local APIC isn't very good at handling |
| @@ -2776,8 +2587,8 @@ static inline void init_IO_APIC_traps(void) | |||
| 2776 | * Also, we've got to be careful not to trash gate | 2587 | * Also, we've got to be careful not to trash gate |
| 2777 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2588 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
| 2778 | */ | 2589 | */ |
| 2779 | for_each_irq_desc(irq, desc) { | 2590 | for_each_active_irq(irq) { |
| 2780 | cfg = desc->chip_data; | 2591 | cfg = get_irq_chip_data(irq); |
| 2781 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | 2592 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { |
| 2782 | /* | 2593 | /* |
| 2783 | * Hmm.. We don't have an entry for this, | 2594 | * Hmm.. We don't have an entry for this, |
| @@ -2788,7 +2599,7 @@ static inline void init_IO_APIC_traps(void) | |||
| 2788 | legacy_pic->make_irq(irq); | 2599 | legacy_pic->make_irq(irq); |
| 2789 | else | 2600 | else |
| 2790 | /* Strange. Oh, well.. */ | 2601 | /* Strange. Oh, well.. */ |
| 2791 | desc->chip = &no_irq_chip; | 2602 | set_irq_chip(irq, &no_irq_chip); |
| 2792 | } | 2603 | } |
| 2793 | } | 2604 | } |
| 2794 | } | 2605 | } |
| @@ -2797,7 +2608,7 @@ static inline void init_IO_APIC_traps(void) | |||
| 2797 | * The local APIC irq-chip implementation: | 2608 | * The local APIC irq-chip implementation: |
| 2798 | */ | 2609 | */ |
| 2799 | 2610 | ||
| 2800 | static void mask_lapic_irq(unsigned int irq) | 2611 | static void mask_lapic_irq(struct irq_data *data) |
| 2801 | { | 2612 | { |
| 2802 | unsigned long v; | 2613 | unsigned long v; |
| 2803 | 2614 | ||
| @@ -2805,7 +2616,7 @@ static void mask_lapic_irq(unsigned int irq) | |||
| 2805 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); | 2616 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
| 2806 | } | 2617 | } |
| 2807 | 2618 | ||
| 2808 | static void unmask_lapic_irq(unsigned int irq) | 2619 | static void unmask_lapic_irq(struct irq_data *data) |
| 2809 | { | 2620 | { |
| 2810 | unsigned long v; | 2621 | unsigned long v; |
| 2811 | 2622 | ||
| @@ -2813,21 +2624,21 @@ static void unmask_lapic_irq(unsigned int irq) | |||
| 2813 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2624 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
| 2814 | } | 2625 | } |
| 2815 | 2626 | ||
| 2816 | static void ack_lapic_irq(unsigned int irq) | 2627 | static void ack_lapic_irq(struct irq_data *data) |
| 2817 | { | 2628 | { |
| 2818 | ack_APIC_irq(); | 2629 | ack_APIC_irq(); |
| 2819 | } | 2630 | } |
| 2820 | 2631 | ||
| 2821 | static struct irq_chip lapic_chip __read_mostly = { | 2632 | static struct irq_chip lapic_chip __read_mostly = { |
| 2822 | .name = "local-APIC", | 2633 | .name = "local-APIC", |
| 2823 | .mask = mask_lapic_irq, | 2634 | .irq_mask = mask_lapic_irq, |
| 2824 | .unmask = unmask_lapic_irq, | 2635 | .irq_unmask = unmask_lapic_irq, |
| 2825 | .ack = ack_lapic_irq, | 2636 | .irq_ack = ack_lapic_irq, |
| 2826 | }; | 2637 | }; |
| 2827 | 2638 | ||
| 2828 | static void lapic_register_intr(int irq, struct irq_desc *desc) | 2639 | static void lapic_register_intr(int irq) |
| 2829 | { | 2640 | { |
| 2830 | desc->status &= ~IRQ_LEVEL; | 2641 | irq_clear_status_flags(irq, IRQ_LEVEL); |
| 2831 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2642 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
| 2832 | "edge"); | 2643 | "edge"); |
| 2833 | } | 2644 | } |
| @@ -2930,9 +2741,8 @@ int timer_through_8259 __initdata; | |||
| 2930 | */ | 2741 | */ |
| 2931 | static inline void __init check_timer(void) | 2742 | static inline void __init check_timer(void) |
| 2932 | { | 2743 | { |
| 2933 | struct irq_desc *desc = irq_to_desc(0); | 2744 | struct irq_cfg *cfg = get_irq_chip_data(0); |
| 2934 | struct irq_cfg *cfg = desc->chip_data; | 2745 | int node = cpu_to_node(0); |
| 2935 | int node = cpu_to_node(boot_cpu_id); | ||
| 2936 | int apic1, pin1, apic2, pin2; | 2746 | int apic1, pin1, apic2, pin2; |
| 2937 | unsigned long flags; | 2747 | unsigned long flags; |
| 2938 | int no_pin1 = 0; | 2748 | int no_pin1 = 0; |
| @@ -2942,7 +2752,7 @@ static inline void __init check_timer(void) | |||
| 2942 | /* | 2752 | /* |
| 2943 | * get/set the timer IRQ vector: | 2753 | * get/set the timer IRQ vector: |
| 2944 | */ | 2754 | */ |
| 2945 | legacy_pic->chip->mask(0); | 2755 | legacy_pic->mask(0); |
| 2946 | assign_irq_vector(0, cfg, apic->target_cpus()); | 2756 | assign_irq_vector(0, cfg, apic->target_cpus()); |
| 2947 | 2757 | ||
| 2948 | /* | 2758 | /* |
| @@ -3001,7 +2811,7 @@ static inline void __init check_timer(void) | |||
| 3001 | add_pin_to_irq_node(cfg, node, apic1, pin1); | 2811 | add_pin_to_irq_node(cfg, node, apic1, pin1); |
| 3002 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2812 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
| 3003 | } else { | 2813 | } else { |
| 3004 | /* for edge trigger, setup_IO_APIC_irq already | 2814 | /* for edge trigger, setup_ioapic_irq already |
| 3005 | * leave it unmasked. | 2815 | * leave it unmasked. |
| 3006 | * so only need to unmask if it is level-trigger | 2816 | * so only need to unmask if it is level-trigger |
| 3007 | * do we really have level trigger timer? | 2817 | * do we really have level trigger timer? |
| @@ -3009,12 +2819,12 @@ static inline void __init check_timer(void) | |||
| 3009 | int idx; | 2819 | int idx; |
| 3010 | idx = find_irq_entry(apic1, pin1, mp_INT); | 2820 | idx = find_irq_entry(apic1, pin1, mp_INT); |
| 3011 | if (idx != -1 && irq_trigger(idx)) | 2821 | if (idx != -1 && irq_trigger(idx)) |
| 3012 | unmask_IO_APIC_irq_desc(desc); | 2822 | unmask_ioapic(cfg); |
| 3013 | } | 2823 | } |
| 3014 | if (timer_irq_works()) { | 2824 | if (timer_irq_works()) { |
| 3015 | if (nmi_watchdog == NMI_IO_APIC) { | 2825 | if (nmi_watchdog == NMI_IO_APIC) { |
| 3016 | setup_nmi(); | 2826 | setup_nmi(); |
| 3017 | legacy_pic->chip->unmask(0); | 2827 | legacy_pic->unmask(0); |
| 3018 | } | 2828 | } |
| 3019 | if (disable_timer_pin_1 > 0) | 2829 | if (disable_timer_pin_1 > 0) |
| 3020 | clear_IO_APIC_pin(0, pin1); | 2830 | clear_IO_APIC_pin(0, pin1); |
| @@ -3037,14 +2847,14 @@ static inline void __init check_timer(void) | |||
| 3037 | */ | 2847 | */ |
| 3038 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); | 2848 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); |
| 3039 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2849 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
| 3040 | legacy_pic->chip->unmask(0); | 2850 | legacy_pic->unmask(0); |
| 3041 | if (timer_irq_works()) { | 2851 | if (timer_irq_works()) { |
| 3042 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2852 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
| 3043 | timer_through_8259 = 1; | 2853 | timer_through_8259 = 1; |
| 3044 | if (nmi_watchdog == NMI_IO_APIC) { | 2854 | if (nmi_watchdog == NMI_IO_APIC) { |
| 3045 | legacy_pic->chip->mask(0); | 2855 | legacy_pic->mask(0); |
| 3046 | setup_nmi(); | 2856 | setup_nmi(); |
| 3047 | legacy_pic->chip->unmask(0); | 2857 | legacy_pic->unmask(0); |
| 3048 | } | 2858 | } |
| 3049 | goto out; | 2859 | goto out; |
| 3050 | } | 2860 | } |
| @@ -3052,7 +2862,7 @@ static inline void __init check_timer(void) | |||
| 3052 | * Cleanup, just in case ... | 2862 | * Cleanup, just in case ... |
| 3053 | */ | 2863 | */ |
| 3054 | local_irq_disable(); | 2864 | local_irq_disable(); |
| 3055 | legacy_pic->chip->mask(0); | 2865 | legacy_pic->mask(0); |
| 3056 | clear_IO_APIC_pin(apic2, pin2); | 2866 | clear_IO_APIC_pin(apic2, pin2); |
| 3057 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2867 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
| 3058 | } | 2868 | } |
| @@ -3069,16 +2879,16 @@ static inline void __init check_timer(void) | |||
| 3069 | apic_printk(APIC_QUIET, KERN_INFO | 2879 | apic_printk(APIC_QUIET, KERN_INFO |
| 3070 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2880 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
| 3071 | 2881 | ||
| 3072 | lapic_register_intr(0, desc); | 2882 | lapic_register_intr(0); |
| 3073 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 2883 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
| 3074 | legacy_pic->chip->unmask(0); | 2884 | legacy_pic->unmask(0); |
| 3075 | 2885 | ||
| 3076 | if (timer_irq_works()) { | 2886 | if (timer_irq_works()) { |
| 3077 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 2887 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
| 3078 | goto out; | 2888 | goto out; |
| 3079 | } | 2889 | } |
| 3080 | local_irq_disable(); | 2890 | local_irq_disable(); |
| 3081 | legacy_pic->chip->mask(0); | 2891 | legacy_pic->mask(0); |
| 3082 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 2892 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
| 3083 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 2893 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
| 3084 | 2894 | ||
| @@ -3244,49 +3054,42 @@ device_initcall(ioapic_init_sysfs); | |||
| 3244 | /* | 3054 | /* |
| 3245 | * Dynamic irq allocate and deallocation | 3055 | * Dynamic irq allocate and deallocation |
| 3246 | */ | 3056 | */ |
| 3247 | unsigned int create_irq_nr(unsigned int irq_want, int node) | 3057 | unsigned int create_irq_nr(unsigned int from, int node) |
| 3248 | { | 3058 | { |
| 3249 | /* Allocate an unused irq */ | 3059 | struct irq_cfg *cfg; |
| 3250 | unsigned int irq; | ||
| 3251 | unsigned int new; | ||
| 3252 | unsigned long flags; | 3060 | unsigned long flags; |
| 3253 | struct irq_cfg *cfg_new = NULL; | 3061 | unsigned int ret = 0; |
| 3254 | struct irq_desc *desc_new = NULL; | 3062 | int irq; |
| 3255 | |||
| 3256 | irq = 0; | ||
| 3257 | if (irq_want < nr_irqs_gsi) | ||
| 3258 | irq_want = nr_irqs_gsi; | ||
| 3259 | |||
| 3260 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
| 3261 | for (new = irq_want; new < nr_irqs; new++) { | ||
| 3262 | desc_new = irq_to_desc_alloc_node(new, node); | ||
| 3263 | if (!desc_new) { | ||
| 3264 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | ||
| 3265 | continue; | ||
| 3266 | } | ||
| 3267 | cfg_new = desc_new->chip_data; | ||
| 3268 | |||
| 3269 | if (cfg_new->vector != 0) | ||
| 3270 | continue; | ||
| 3271 | 3063 | ||
| 3272 | desc_new = move_irq_desc(desc_new, node); | 3064 | if (from < nr_irqs_gsi) |
| 3273 | cfg_new = desc_new->chip_data; | 3065 | from = nr_irqs_gsi; |
| 3274 | 3066 | ||
| 3275 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3067 | irq = alloc_irq_from(from, node); |
| 3276 | irq = new; | 3068 | if (irq < 0) |
| 3277 | break; | 3069 | return 0; |
| 3070 | cfg = alloc_irq_cfg(irq, node); | ||
| 3071 | if (!cfg) { | ||
| 3072 | free_irq_at(irq, NULL); | ||
| 3073 | return 0; | ||
| 3278 | } | 3074 | } |
| 3279 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 3280 | 3075 | ||
| 3281 | if (irq > 0) | 3076 | raw_spin_lock_irqsave(&vector_lock, flags); |
| 3282 | dynamic_irq_init_keep_chip_data(irq); | 3077 | if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) |
| 3078 | ret = irq; | ||
| 3079 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 3283 | 3080 | ||
| 3284 | return irq; | 3081 | if (ret) { |
| 3082 | set_irq_chip_data(irq, cfg); | ||
| 3083 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | ||
| 3084 | } else { | ||
| 3085 | free_irq_at(irq, cfg); | ||
| 3086 | } | ||
| 3087 | return ret; | ||
| 3285 | } | 3088 | } |
| 3286 | 3089 | ||
| 3287 | int create_irq(void) | 3090 | int create_irq(void) |
| 3288 | { | 3091 | { |
| 3289 | int node = cpu_to_node(boot_cpu_id); | 3092 | int node = cpu_to_node(0); |
| 3290 | unsigned int irq_want; | 3093 | unsigned int irq_want; |
| 3291 | int irq; | 3094 | int irq; |
| 3292 | 3095 | ||
| @@ -3301,14 +3104,17 @@ int create_irq(void) | |||
| 3301 | 3104 | ||
| 3302 | void destroy_irq(unsigned int irq) | 3105 | void destroy_irq(unsigned int irq) |
| 3303 | { | 3106 | { |
| 3107 | struct irq_cfg *cfg = get_irq_chip_data(irq); | ||
| 3304 | unsigned long flags; | 3108 | unsigned long flags; |
| 3305 | 3109 | ||
| 3306 | dynamic_irq_cleanup_keep_chip_data(irq); | 3110 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); |
| 3307 | 3111 | ||
| 3308 | free_irte(irq); | 3112 | if (intr_remapping_enabled) |
| 3113 | free_irte(irq); | ||
| 3309 | raw_spin_lock_irqsave(&vector_lock, flags); | 3114 | raw_spin_lock_irqsave(&vector_lock, flags); |
| 3310 | __clear_irq_vector(irq, get_irq_chip_data(irq)); | 3115 | __clear_irq_vector(irq, cfg); |
| 3311 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 3116 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
| 3117 | free_irq_at(irq, cfg); | ||
| 3312 | } | 3118 | } |
| 3313 | 3119 | ||
| 3314 | /* | 3120 | /* |
| @@ -3332,7 +3138,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3332 | 3138 | ||
| 3333 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 3139 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
| 3334 | 3140 | ||
| 3335 | if (irq_remapped(irq)) { | 3141 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 3336 | struct irte irte; | 3142 | struct irte irte; |
| 3337 | int ir_index; | 3143 | int ir_index; |
| 3338 | u16 sub_handle; | 3144 | u16 sub_handle; |
| @@ -3340,14 +3146,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3340 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); | 3146 | ir_index = map_irq_to_irte_handle(irq, &sub_handle); |
| 3341 | BUG_ON(ir_index == -1); | 3147 | BUG_ON(ir_index == -1); |
| 3342 | 3148 | ||
| 3343 | memset (&irte, 0, sizeof(irte)); | 3149 | prepare_irte(&irte, cfg->vector, dest); |
| 3344 | |||
| 3345 | irte.present = 1; | ||
| 3346 | irte.dst_mode = apic->irq_dest_mode; | ||
| 3347 | irte.trigger_mode = 0; /* edge */ | ||
| 3348 | irte.dlvry_mode = apic->irq_delivery_mode; | ||
| 3349 | irte.vector = cfg->vector; | ||
| 3350 | irte.dest_id = IRTE_DEST(dest); | ||
| 3351 | 3150 | ||
| 3352 | /* Set source-id of interrupt request */ | 3151 | /* Set source-id of interrupt request */ |
| 3353 | if (pdev) | 3152 | if (pdev) |
| @@ -3392,26 +3191,24 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
| 3392 | } | 3191 | } |
| 3393 | 3192 | ||
| 3394 | #ifdef CONFIG_SMP | 3193 | #ifdef CONFIG_SMP |
| 3395 | static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3194 | static int |
| 3195 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | ||
| 3396 | { | 3196 | { |
| 3397 | struct irq_desc *desc = irq_to_desc(irq); | 3197 | struct irq_cfg *cfg = data->chip_data; |
| 3398 | struct irq_cfg *cfg; | ||
| 3399 | struct msi_msg msg; | 3198 | struct msi_msg msg; |
| 3400 | unsigned int dest; | 3199 | unsigned int dest; |
| 3401 | 3200 | ||
| 3402 | if (set_desc_affinity(desc, mask, &dest)) | 3201 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3403 | return -1; | 3202 | return -1; |
| 3404 | 3203 | ||
| 3405 | cfg = desc->chip_data; | 3204 | __get_cached_msi_msg(data->msi_desc, &msg); |
| 3406 | |||
| 3407 | get_cached_msi_msg_desc(desc, &msg); | ||
| 3408 | 3205 | ||
| 3409 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3206 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| 3410 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3207 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
| 3411 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3208 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
| 3412 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3209 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 3413 | 3210 | ||
| 3414 | write_msi_msg_desc(desc, &msg); | 3211 | __write_msi_msg(data->msi_desc, &msg); |
| 3415 | 3212 | ||
| 3416 | return 0; | 3213 | return 0; |
| 3417 | } | 3214 | } |
| @@ -3421,17 +3218,17 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3421 | * done in the process context using interrupt-remapping hardware. | 3218 | * done in the process context using interrupt-remapping hardware. |
| 3422 | */ | 3219 | */ |
| 3423 | static int | 3220 | static int |
| 3424 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3221 | ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| 3222 | bool force) | ||
| 3425 | { | 3223 | { |
| 3426 | struct irq_desc *desc = irq_to_desc(irq); | 3224 | struct irq_cfg *cfg = data->chip_data; |
| 3427 | struct irq_cfg *cfg = desc->chip_data; | 3225 | unsigned int dest, irq = data->irq; |
| 3428 | unsigned int dest; | ||
| 3429 | struct irte irte; | 3226 | struct irte irte; |
| 3430 | 3227 | ||
| 3431 | if (get_irte(irq, &irte)) | 3228 | if (get_irte(irq, &irte)) |
| 3432 | return -1; | 3229 | return -1; |
| 3433 | 3230 | ||
| 3434 | if (set_desc_affinity(desc, mask, &dest)) | 3231 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3435 | return -1; | 3232 | return -1; |
| 3436 | 3233 | ||
| 3437 | irte.vector = cfg->vector; | 3234 | irte.vector = cfg->vector; |
| @@ -3461,27 +3258,27 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3461 | * which implement the MSI or MSI-X Capability Structure. | 3258 | * which implement the MSI or MSI-X Capability Structure. |
| 3462 | */ | 3259 | */ |
| 3463 | static struct irq_chip msi_chip = { | 3260 | static struct irq_chip msi_chip = { |
| 3464 | .name = "PCI-MSI", | 3261 | .name = "PCI-MSI", |
| 3465 | .unmask = unmask_msi_irq, | 3262 | .irq_unmask = unmask_msi_irq, |
| 3466 | .mask = mask_msi_irq, | 3263 | .irq_mask = mask_msi_irq, |
| 3467 | .ack = ack_apic_edge, | 3264 | .irq_ack = ack_apic_edge, |
| 3468 | #ifdef CONFIG_SMP | 3265 | #ifdef CONFIG_SMP |
| 3469 | .set_affinity = set_msi_irq_affinity, | 3266 | .irq_set_affinity = msi_set_affinity, |
| 3470 | #endif | 3267 | #endif |
| 3471 | .retrigger = ioapic_retrigger_irq, | 3268 | .irq_retrigger = ioapic_retrigger_irq, |
| 3472 | }; | 3269 | }; |
| 3473 | 3270 | ||
| 3474 | static struct irq_chip msi_ir_chip = { | 3271 | static struct irq_chip msi_ir_chip = { |
| 3475 | .name = "IR-PCI-MSI", | 3272 | .name = "IR-PCI-MSI", |
| 3476 | .unmask = unmask_msi_irq, | 3273 | .irq_unmask = unmask_msi_irq, |
| 3477 | .mask = mask_msi_irq, | 3274 | .irq_mask = mask_msi_irq, |
| 3478 | #ifdef CONFIG_INTR_REMAP | 3275 | #ifdef CONFIG_INTR_REMAP |
| 3479 | .ack = ir_ack_apic_edge, | 3276 | .irq_ack = ir_ack_apic_edge, |
| 3480 | #ifdef CONFIG_SMP | 3277 | #ifdef CONFIG_SMP |
| 3481 | .set_affinity = ir_set_msi_irq_affinity, | 3278 | .irq_set_affinity = ir_msi_set_affinity, |
| 3482 | #endif | 3279 | #endif |
| 3483 | #endif | 3280 | #endif |
| 3484 | .retrigger = ioapic_retrigger_irq, | 3281 | .irq_retrigger = ioapic_retrigger_irq, |
| 3485 | }; | 3282 | }; |
| 3486 | 3283 | ||
| 3487 | /* | 3284 | /* |
| @@ -3513,8 +3310,8 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
| 3513 | 3310 | ||
| 3514 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | 3311 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
| 3515 | { | 3312 | { |
| 3516 | int ret; | ||
| 3517 | struct msi_msg msg; | 3313 | struct msi_msg msg; |
| 3314 | int ret; | ||
| 3518 | 3315 | ||
| 3519 | ret = msi_compose_msg(dev, irq, &msg, -1); | 3316 | ret = msi_compose_msg(dev, irq, &msg, -1); |
| 3520 | if (ret < 0) | 3317 | if (ret < 0) |
| @@ -3523,12 +3320,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3523 | set_irq_msi(irq, msidesc); | 3320 | set_irq_msi(irq, msidesc); |
| 3524 | write_msi_msg(irq, &msg); | 3321 | write_msi_msg(irq, &msg); |
| 3525 | 3322 | ||
| 3526 | if (irq_remapped(irq)) { | 3323 | if (irq_remapped(get_irq_chip_data(irq))) { |
| 3527 | struct irq_desc *desc = irq_to_desc(irq); | 3324 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3528 | /* | ||
| 3529 | * irq migration in process context | ||
| 3530 | */ | ||
| 3531 | desc->status |= IRQ_MOVE_PCNTXT; | ||
| 3532 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | 3325 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); |
| 3533 | } else | 3326 | } else |
| 3534 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 3327 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); |
| @@ -3540,13 +3333,10 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3540 | 3333 | ||
| 3541 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 3334 | int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
| 3542 | { | 3335 | { |
| 3543 | unsigned int irq; | 3336 | int node, ret, sub_handle, index = 0; |
| 3544 | int ret, sub_handle; | 3337 | unsigned int irq, irq_want; |
| 3545 | struct msi_desc *msidesc; | 3338 | struct msi_desc *msidesc; |
| 3546 | unsigned int irq_want; | ||
| 3547 | struct intel_iommu *iommu = NULL; | 3339 | struct intel_iommu *iommu = NULL; |
| 3548 | int index = 0; | ||
| 3549 | int node; | ||
| 3550 | 3340 | ||
| 3551 | /* x86 doesn't support multiple MSI yet */ | 3341 | /* x86 doesn't support multiple MSI yet */ |
| 3552 | if (type == PCI_CAP_ID_MSI && nvec > 1) | 3342 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
| @@ -3606,18 +3396,17 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
| 3606 | 3396 | ||
| 3607 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) | 3397 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) |
| 3608 | #ifdef CONFIG_SMP | 3398 | #ifdef CONFIG_SMP |
| 3609 | static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3399 | static int |
| 3400 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
| 3401 | bool force) | ||
| 3610 | { | 3402 | { |
| 3611 | struct irq_desc *desc = irq_to_desc(irq); | 3403 | struct irq_cfg *cfg = data->chip_data; |
| 3612 | struct irq_cfg *cfg; | 3404 | unsigned int dest, irq = data->irq; |
| 3613 | struct msi_msg msg; | 3405 | struct msi_msg msg; |
| 3614 | unsigned int dest; | ||
| 3615 | 3406 | ||
| 3616 | if (set_desc_affinity(desc, mask, &dest)) | 3407 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3617 | return -1; | 3408 | return -1; |
| 3618 | 3409 | ||
| 3619 | cfg = desc->chip_data; | ||
| 3620 | |||
| 3621 | dmar_msi_read(irq, &msg); | 3410 | dmar_msi_read(irq, &msg); |
| 3622 | 3411 | ||
| 3623 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3412 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| @@ -3633,14 +3422,14 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3633 | #endif /* CONFIG_SMP */ | 3422 | #endif /* CONFIG_SMP */ |
| 3634 | 3423 | ||
| 3635 | static struct irq_chip dmar_msi_type = { | 3424 | static struct irq_chip dmar_msi_type = { |
| 3636 | .name = "DMAR_MSI", | 3425 | .name = "DMAR_MSI", |
| 3637 | .unmask = dmar_msi_unmask, | 3426 | .irq_unmask = dmar_msi_unmask, |
| 3638 | .mask = dmar_msi_mask, | 3427 | .irq_mask = dmar_msi_mask, |
| 3639 | .ack = ack_apic_edge, | 3428 | .irq_ack = ack_apic_edge, |
| 3640 | #ifdef CONFIG_SMP | 3429 | #ifdef CONFIG_SMP |
| 3641 | .set_affinity = dmar_msi_set_affinity, | 3430 | .irq_set_affinity = dmar_msi_set_affinity, |
| 3642 | #endif | 3431 | #endif |
| 3643 | .retrigger = ioapic_retrigger_irq, | 3432 | .irq_retrigger = ioapic_retrigger_irq, |
| 3644 | }; | 3433 | }; |
| 3645 | 3434 | ||
| 3646 | int arch_setup_dmar_msi(unsigned int irq) | 3435 | int arch_setup_dmar_msi(unsigned int irq) |
| @@ -3661,26 +3450,24 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
| 3661 | #ifdef CONFIG_HPET_TIMER | 3450 | #ifdef CONFIG_HPET_TIMER |
| 3662 | 3451 | ||
| 3663 | #ifdef CONFIG_SMP | 3452 | #ifdef CONFIG_SMP |
| 3664 | static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3453 | static int hpet_msi_set_affinity(struct irq_data *data, |
| 3454 | const struct cpumask *mask, bool force) | ||
| 3665 | { | 3455 | { |
| 3666 | struct irq_desc *desc = irq_to_desc(irq); | 3456 | struct irq_cfg *cfg = data->chip_data; |
| 3667 | struct irq_cfg *cfg; | ||
| 3668 | struct msi_msg msg; | 3457 | struct msi_msg msg; |
| 3669 | unsigned int dest; | 3458 | unsigned int dest; |
| 3670 | 3459 | ||
| 3671 | if (set_desc_affinity(desc, mask, &dest)) | 3460 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3672 | return -1; | 3461 | return -1; |
| 3673 | 3462 | ||
| 3674 | cfg = desc->chip_data; | 3463 | hpet_msi_read(data->handler_data, &msg); |
| 3675 | |||
| 3676 | hpet_msi_read(irq, &msg); | ||
| 3677 | 3464 | ||
| 3678 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3465 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
| 3679 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3466 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
| 3680 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3467 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
| 3681 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3468 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
| 3682 | 3469 | ||
| 3683 | hpet_msi_write(irq, &msg); | 3470 | hpet_msi_write(data->handler_data, &msg); |
| 3684 | 3471 | ||
| 3685 | return 0; | 3472 | return 0; |
| 3686 | } | 3473 | } |
| @@ -3688,34 +3475,33 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 3688 | #endif /* CONFIG_SMP */ | 3475 | #endif /* CONFIG_SMP */ |
| 3689 | 3476 | ||
| 3690 | static struct irq_chip ir_hpet_msi_type = { | 3477 | static struct irq_chip ir_hpet_msi_type = { |
| 3691 | .name = "IR-HPET_MSI", | 3478 | .name = "IR-HPET_MSI", |
| 3692 | .unmask = hpet_msi_unmask, | 3479 | .irq_unmask = hpet_msi_unmask, |
| 3693 | .mask = hpet_msi_mask, | 3480 | .irq_mask = hpet_msi_mask, |
| 3694 | #ifdef CONFIG_INTR_REMAP | 3481 | #ifdef CONFIG_INTR_REMAP |
| 3695 | .ack = ir_ack_apic_edge, | 3482 | .irq_ack = ir_ack_apic_edge, |
| 3696 | #ifdef CONFIG_SMP | 3483 | #ifdef CONFIG_SMP |
| 3697 | .set_affinity = ir_set_msi_irq_affinity, | 3484 | .irq_set_affinity = ir_msi_set_affinity, |
| 3698 | #endif | 3485 | #endif |
| 3699 | #endif | 3486 | #endif |
| 3700 | .retrigger = ioapic_retrigger_irq, | 3487 | .irq_retrigger = ioapic_retrigger_irq, |
| 3701 | }; | 3488 | }; |
| 3702 | 3489 | ||
| 3703 | static struct irq_chip hpet_msi_type = { | 3490 | static struct irq_chip hpet_msi_type = { |
| 3704 | .name = "HPET_MSI", | 3491 | .name = "HPET_MSI", |
| 3705 | .unmask = hpet_msi_unmask, | 3492 | .irq_unmask = hpet_msi_unmask, |
| 3706 | .mask = hpet_msi_mask, | 3493 | .irq_mask = hpet_msi_mask, |
| 3707 | .ack = ack_apic_edge, | 3494 | .irq_ack = ack_apic_edge, |
| 3708 | #ifdef CONFIG_SMP | 3495 | #ifdef CONFIG_SMP |
| 3709 | .set_affinity = hpet_msi_set_affinity, | 3496 | .irq_set_affinity = hpet_msi_set_affinity, |
| 3710 | #endif | 3497 | #endif |
| 3711 | .retrigger = ioapic_retrigger_irq, | 3498 | .irq_retrigger = ioapic_retrigger_irq, |
| 3712 | }; | 3499 | }; |
| 3713 | 3500 | ||
| 3714 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | 3501 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
| 3715 | { | 3502 | { |
| 3716 | int ret; | ||
| 3717 | struct msi_msg msg; | 3503 | struct msi_msg msg; |
| 3718 | struct irq_desc *desc = irq_to_desc(irq); | 3504 | int ret; |
| 3719 | 3505 | ||
| 3720 | if (intr_remapping_enabled) { | 3506 | if (intr_remapping_enabled) { |
| 3721 | struct intel_iommu *iommu = map_hpet_to_ir(id); | 3507 | struct intel_iommu *iommu = map_hpet_to_ir(id); |
| @@ -3733,9 +3519,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
| 3733 | if (ret < 0) | 3519 | if (ret < 0) |
| 3734 | return ret; | 3520 | return ret; |
| 3735 | 3521 | ||
| 3736 | hpet_msi_write(irq, &msg); | 3522 | hpet_msi_write(get_irq_data(irq), &msg); |
| 3737 | desc->status |= IRQ_MOVE_PCNTXT; | 3523 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3738 | if (irq_remapped(irq)) | 3524 | if (irq_remapped(get_irq_chip_data(irq))) |
| 3739 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, | 3525 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, |
| 3740 | handle_edge_irq, "edge"); | 3526 | handle_edge_irq, "edge"); |
| 3741 | else | 3527 | else |
| @@ -3768,33 +3554,30 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
| 3768 | write_ht_irq_msg(irq, &msg); | 3554 | write_ht_irq_msg(irq, &msg); |
| 3769 | } | 3555 | } |
| 3770 | 3556 | ||
| 3771 | static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3557 | static int |
| 3558 | ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | ||
| 3772 | { | 3559 | { |
| 3773 | struct irq_desc *desc = irq_to_desc(irq); | 3560 | struct irq_cfg *cfg = data->chip_data; |
| 3774 | struct irq_cfg *cfg; | ||
| 3775 | unsigned int dest; | 3561 | unsigned int dest; |
| 3776 | 3562 | ||
| 3777 | if (set_desc_affinity(desc, mask, &dest)) | 3563 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 3778 | return -1; | 3564 | return -1; |
| 3779 | 3565 | ||
| 3780 | cfg = desc->chip_data; | 3566 | target_ht_irq(data->irq, dest, cfg->vector); |
| 3781 | |||
| 3782 | target_ht_irq(irq, dest, cfg->vector); | ||
| 3783 | |||
| 3784 | return 0; | 3567 | return 0; |
| 3785 | } | 3568 | } |
| 3786 | 3569 | ||
| 3787 | #endif | 3570 | #endif |
| 3788 | 3571 | ||
| 3789 | static struct irq_chip ht_irq_chip = { | 3572 | static struct irq_chip ht_irq_chip = { |
| 3790 | .name = "PCI-HT", | 3573 | .name = "PCI-HT", |
| 3791 | .mask = mask_ht_irq, | 3574 | .irq_mask = mask_ht_irq, |
| 3792 | .unmask = unmask_ht_irq, | 3575 | .irq_unmask = unmask_ht_irq, |
| 3793 | .ack = ack_apic_edge, | 3576 | .irq_ack = ack_apic_edge, |
| 3794 | #ifdef CONFIG_SMP | 3577 | #ifdef CONFIG_SMP |
| 3795 | .set_affinity = set_ht_irq_affinity, | 3578 | .irq_set_affinity = ht_set_affinity, |
| 3796 | #endif | 3579 | #endif |
| 3797 | .retrigger = ioapic_retrigger_irq, | 3580 | .irq_retrigger = ioapic_retrigger_irq, |
| 3798 | }; | 3581 | }; |
| 3799 | 3582 | ||
| 3800 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | 3583 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
| @@ -3885,14 +3668,13 @@ int __init arch_probe_nr_irqs(void) | |||
| 3885 | if (nr < nr_irqs) | 3668 | if (nr < nr_irqs) |
| 3886 | nr_irqs = nr; | 3669 | nr_irqs = nr; |
| 3887 | 3670 | ||
| 3888 | return 0; | 3671 | return NR_IRQS_LEGACY; |
| 3889 | } | 3672 | } |
| 3890 | #endif | 3673 | #endif |
| 3891 | 3674 | ||
| 3892 | static int __io_apic_set_pci_routing(struct device *dev, int irq, | 3675 | static int __io_apic_set_pci_routing(struct device *dev, int irq, |
| 3893 | struct io_apic_irq_attr *irq_attr) | 3676 | struct io_apic_irq_attr *irq_attr) |
| 3894 | { | 3677 | { |
| 3895 | struct irq_desc *desc; | ||
| 3896 | struct irq_cfg *cfg; | 3678 | struct irq_cfg *cfg; |
| 3897 | int node; | 3679 | int node; |
| 3898 | int ioapic, pin; | 3680 | int ioapic, pin; |
| @@ -3908,13 +3690,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
| 3908 | if (dev) | 3690 | if (dev) |
| 3909 | node = dev_to_node(dev); | 3691 | node = dev_to_node(dev); |
| 3910 | else | 3692 | else |
| 3911 | node = cpu_to_node(boot_cpu_id); | 3693 | node = cpu_to_node(0); |
| 3912 | 3694 | ||
| 3913 | desc = irq_to_desc_alloc_node(irq, node); | 3695 | cfg = alloc_irq_and_cfg_at(irq, node); |
| 3914 | if (!desc) { | 3696 | if (!cfg) |
| 3915 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
| 3916 | return 0; | 3697 | return 0; |
| 3917 | } | ||
| 3918 | 3698 | ||
| 3919 | pin = irq_attr->ioapic_pin; | 3699 | pin = irq_attr->ioapic_pin; |
| 3920 | trigger = irq_attr->trigger; | 3700 | trigger = irq_attr->trigger; |
| @@ -3924,15 +3704,14 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
| 3924 | * IRQs < 16 are already in the irq_2_pin[] map | 3704 | * IRQs < 16 are already in the irq_2_pin[] map |
| 3925 | */ | 3705 | */ |
| 3926 | if (irq >= legacy_pic->nr_legacy_irqs) { | 3706 | if (irq >= legacy_pic->nr_legacy_irqs) { |
| 3927 | cfg = desc->chip_data; | 3707 | if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) { |
| 3928 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { | ||
| 3929 | printk(KERN_INFO "can not add pin %d for irq %d\n", | 3708 | printk(KERN_INFO "can not add pin %d for irq %d\n", |
| 3930 | pin, irq); | 3709 | pin, irq); |
| 3931 | return 0; | 3710 | return 0; |
| 3932 | } | 3711 | } |
| 3933 | } | 3712 | } |
| 3934 | 3713 | ||
| 3935 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); | 3714 | setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity); |
| 3936 | 3715 | ||
| 3937 | return 0; | 3716 | return 0; |
| 3938 | } | 3717 | } |
| @@ -4125,14 +3904,14 @@ void __init setup_ioapic_dest(void) | |||
| 4125 | */ | 3904 | */ |
| 4126 | if (desc->status & | 3905 | if (desc->status & |
| 4127 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 3906 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
| 4128 | mask = desc->affinity; | 3907 | mask = desc->irq_data.affinity; |
| 4129 | else | 3908 | else |
| 4130 | mask = apic->target_cpus(); | 3909 | mask = apic->target_cpus(); |
| 4131 | 3910 | ||
| 4132 | if (intr_remapping_enabled) | 3911 | if (intr_remapping_enabled) |
| 4133 | set_ir_ioapic_affinity_irq_desc(desc, mask); | 3912 | ir_ioapic_set_affinity(&desc->irq_data, mask, false); |
| 4134 | else | 3913 | else |
| 4135 | set_ioapic_affinity_irq_desc(desc, mask); | 3914 | ioapic_set_affinity(&desc->irq_data, mask, false); |
| 4136 | } | 3915 | } |
| 4137 | 3916 | ||
| 4138 | } | 3917 | } |
| @@ -4316,19 +4095,18 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
| 4316 | void __init pre_init_apic_IRQ0(void) | 4095 | void __init pre_init_apic_IRQ0(void) |
| 4317 | { | 4096 | { |
| 4318 | struct irq_cfg *cfg; | 4097 | struct irq_cfg *cfg; |
| 4319 | struct irq_desc *desc; | ||
| 4320 | 4098 | ||
| 4321 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | 4099 | printk(KERN_INFO "Early APIC setup for system timer0\n"); |
| 4322 | #ifndef CONFIG_SMP | 4100 | #ifndef CONFIG_SMP |
| 4323 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 4101 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); |
| 4324 | #endif | 4102 | #endif |
| 4325 | desc = irq_to_desc_alloc_node(0, 0); | 4103 | /* Make sure the irq descriptor is set up */ |
| 4104 | cfg = alloc_irq_and_cfg_at(0, 0); | ||
| 4326 | 4105 | ||
| 4327 | setup_local_APIC(); | 4106 | setup_local_APIC(); |
| 4328 | 4107 | ||
| 4329 | cfg = irq_cfg(0); | ||
| 4330 | add_pin_to_irq_node(cfg, 0, 0, 0); | 4108 | add_pin_to_irq_node(cfg, 0, 0, 0); |
| 4331 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | 4109 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); |
| 4332 | 4110 | ||
| 4333 | setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); | 4111 | setup_ioapic_irq(0, 0, 0, cfg, 0, 0); |
| 4334 | } | 4112 | } |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index a43f71cb30f8..c90041ccb742 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
| @@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void) | |||
| 178 | error: | 178 | error: |
| 179 | if (nmi_watchdog == NMI_IO_APIC) { | 179 | if (nmi_watchdog == NMI_IO_APIC) { |
| 180 | if (!timer_through_8259) | 180 | if (!timer_through_8259) |
| 181 | legacy_pic->chip->mask(0); | 181 | legacy_pic->mask(0); |
| 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
| 183 | } | 183 | } |
| 184 | 184 | ||
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 83e9be4778e2..f9e4e6a54073 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
| @@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | |||
| 54 | */ | 54 | */ |
| 55 | void __init default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
| 56 | { | 56 | { |
| 57 | |||
| 58 | enable_IR_x2apic(); | ||
| 59 | |||
| 57 | #ifdef CONFIG_X86_X2APIC | 60 | #ifdef CONFIG_X86_X2APIC |
| 58 | if (x2apic_mode | 61 | if (x2apic_mode |
| 59 | #ifdef CONFIG_X86_UV | 62 | #ifdef CONFIG_X86_UV |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ba5f62f45f01..9e093f8fe78c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
| 148 | { | 148 | { |
| 149 | #ifdef CONFIG_SMP | 149 | #ifdef CONFIG_SMP |
| 150 | /* calling is from identify_secondary_cpu() ? */ | 150 | /* calling is from identify_secondary_cpu() ? */ |
| 151 | if (c->cpu_index == boot_cpu_id) | 151 | if (!c->cpu_index) |
| 152 | return; | 152 | return; |
| 153 | 153 | ||
| 154 | /* | 154 | /* |
| @@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid) | |||
| 253 | #endif | 253 | #endif |
| 254 | 254 | ||
| 255 | /* | 255 | /* |
| 256 | * Fixup core topology information for AMD multi-node processors. | 256 | * Fixup core topology information for |
| 257 | * Assumption: Number of cores in each internal node is the same. | 257 | * (1) AMD multi-node processors |
| 258 | * Assumption: Number of cores in each internal node is the same. | ||
| 259 | * (2) AMD processors supporting compute units | ||
| 258 | */ | 260 | */ |
| 259 | #ifdef CONFIG_X86_HT | 261 | #ifdef CONFIG_X86_HT |
| 260 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | 262 | static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) |
| 261 | { | 263 | { |
| 262 | unsigned long long value; | 264 | u32 nodes; |
| 263 | u32 nodes, cores_per_node; | 265 | u8 node_id; |
| 264 | int cpu = smp_processor_id(); | 266 | int cpu = smp_processor_id(); |
| 265 | 267 | ||
| 266 | if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) | 268 | /* get information required for multi-node processors */ |
| 267 | return; | 269 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
| 270 | u32 eax, ebx, ecx, edx; | ||
| 268 | 271 | ||
| 269 | /* fixup topology information only once for a core */ | 272 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); |
| 270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | 273 | nodes = ((ecx >> 8) & 7) + 1; |
| 271 | return; | 274 | node_id = ecx & 7; |
| 272 | 275 | ||
| 273 | rdmsrl(MSR_FAM10H_NODE_ID, value); | 276 | /* get compute unit information */ |
| 277 | smp_num_siblings = ((ebx >> 8) & 3) + 1; | ||
| 278 | c->compute_unit_id = ebx & 0xff; | ||
| 279 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { | ||
| 280 | u64 value; | ||
| 274 | 281 | ||
| 275 | nodes = ((value >> 3) & 7) + 1; | 282 | rdmsrl(MSR_FAM10H_NODE_ID, value); |
| 276 | if (nodes == 1) | 283 | nodes = ((value >> 3) & 7) + 1; |
| 284 | node_id = value & 7; | ||
| 285 | } else | ||
| 277 | return; | 286 | return; |
| 278 | 287 | ||
| 279 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 288 | /* fixup multi-node processor information */ |
| 280 | cores_per_node = c->x86_max_cores / nodes; | 289 | if (nodes > 1) { |
| 290 | u32 cores_per_node; | ||
| 291 | |||
| 292 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | ||
| 293 | cores_per_node = c->x86_max_cores / nodes; | ||
| 281 | 294 | ||
| 282 | /* store NodeID, use llc_shared_map to store sibling info */ | 295 | /* store NodeID, use llc_shared_map to store sibling info */ |
| 283 | per_cpu(cpu_llc_id, cpu) = value & 7; | 296 | per_cpu(cpu_llc_id, cpu) = node_id; |
| 284 | 297 | ||
| 285 | /* fixup core id to be in range from 0 to (cores_per_node - 1) */ | 298 | /* core id to be in range from 0 to (cores_per_node - 1) */ |
| 286 | c->cpu_core_id = c->cpu_core_id % cores_per_node; | 299 | c->cpu_core_id = c->cpu_core_id % cores_per_node; |
| 300 | } | ||
| 287 | } | 301 | } |
| 288 | #endif | 302 | #endif |
| 289 | 303 | ||
| @@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
| 304 | c->phys_proc_id = c->initial_apicid >> bits; | 318 | c->phys_proc_id = c->initial_apicid >> bits; |
| 305 | /* use socket ID also for last level cache */ | 319 | /* use socket ID also for last level cache */ |
| 306 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 320 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
| 307 | /* fixup topology information on multi-node processors */ | 321 | amd_get_topology(c); |
| 308 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
| 309 | amd_fixup_dcm(c); | ||
| 310 | #endif | 322 | #endif |
| 311 | } | 323 | } |
| 312 | 324 | ||
| @@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
| 412 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 424 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
| 413 | } | 425 | } |
| 414 | #endif | 426 | #endif |
| 427 | |||
| 428 | /* We need to do the following only once */ | ||
| 429 | if (c != &boot_cpu_data) | ||
| 430 | return; | ||
| 431 | |||
| 432 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | ||
| 433 | |||
| 434 | if (c->x86 > 0x10 || | ||
| 435 | (c->x86 == 0x10 && c->x86_model >= 0x2)) { | ||
| 436 | u64 val; | ||
| 437 | |||
| 438 | rdmsrl(MSR_K7_HWCR, val); | ||
| 439 | if (!(val & BIT(24))) | ||
| 440 | printk(KERN_WARNING FW_BUG "TSC doesn't count " | ||
| 441 | "with P0 frequency!\n"); | ||
| 442 | } | ||
| 443 | } | ||
| 415 | } | 444 | } |
| 416 | 445 | ||
| 417 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 446 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
| @@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
| 523 | #endif | 552 | #endif |
| 524 | 553 | ||
| 525 | if (c->extended_cpuid_level >= 0x80000006) { | 554 | if (c->extended_cpuid_level >= 0x80000006) { |
| 526 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) | 555 | if (cpuid_edx(0x80000006) & 0xf000) |
| 527 | num_cache_leaves = 4; | 556 | num_cache_leaves = 4; |
| 528 | else | 557 | else |
| 529 | num_cache_leaves = 3; | 558 | num_cache_leaves = 3; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f2f9ac7da25c..4b68bda30938 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
| 665 | this_cpu->c_early_init(c); | 665 | this_cpu->c_early_init(c); |
| 666 | 666 | ||
| 667 | #ifdef CONFIG_SMP | 667 | #ifdef CONFIG_SMP |
| 668 | c->cpu_index = boot_cpu_id; | 668 | c->cpu_index = 0; |
| 669 | #endif | 669 | #endif |
| 670 | filter_cpuid_features(c, false); | 670 | filter_cpuid_features(c, false); |
| 671 | } | 671 | } |
| @@ -704,16 +704,21 @@ void __init early_cpu_init(void) | |||
| 704 | } | 704 | } |
| 705 | 705 | ||
| 706 | /* | 706 | /* |
| 707 | * The NOPL instruction is supposed to exist on all CPUs with | 707 | * The NOPL instruction is supposed to exist on all CPUs of family >= 6; |
| 708 | * family >= 6; unfortunately, that's not true in practice because | 708 | * unfortunately, that's not true in practice because of early VIA |
| 709 | * of early VIA chips and (more importantly) broken virtualizers that | 709 | * chips and (more importantly) broken virtualizers that are not easy |
| 710 | * are not easy to detect. In the latter case it doesn't even *fail* | 710 | * to detect. In the latter case it doesn't even *fail* reliably, so |
| 711 | * reliably, so probing for it doesn't even work. Disable it completely | 711 | * probing for it doesn't even work. Disable it completely on 32-bit |
| 712 | * unless we can find a reliable way to detect all the broken cases. | 712 | * unless we can find a reliable way to detect all the broken cases. |
| 713 | * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). | ||
| 713 | */ | 714 | */ |
| 714 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | 715 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) |
| 715 | { | 716 | { |
| 717 | #ifdef CONFIG_X86_32 | ||
| 716 | clear_cpu_cap(c, X86_FEATURE_NOPL); | 718 | clear_cpu_cap(c, X86_FEATURE_NOPL); |
| 719 | #else | ||
| 720 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
| 721 | #endif | ||
| 717 | } | 722 | } |
| 718 | 723 | ||
| 719 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 724 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
| @@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void) | |||
| 1264 | clear_all_debug_regs(); | 1269 | clear_all_debug_regs(); |
| 1265 | dbg_restore_debug_regs(); | 1270 | dbg_restore_debug_regs(); |
| 1266 | 1271 | ||
| 1267 | /* | ||
| 1268 | * Force FPU initialization: | ||
| 1269 | */ | ||
| 1270 | current_thread_info()->status = 0; | ||
| 1271 | clear_used_math(); | ||
| 1272 | mxcsr_feature_mask_init(); | ||
| 1273 | |||
| 1274 | fpu_init(); | 1272 | fpu_init(); |
| 1275 | xsave_init(); | 1273 | xsave_init(); |
| 1276 | } | 1274 | } |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f668bb1f7d43..e765633f210e 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -32,6 +32,7 @@ struct cpu_dev { | |||
| 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
| 33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
| 34 | 34 | ||
| 35 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
| 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 36 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
| 36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | 37 | extern void get_cpu_cap(struct cpuinfo_x86 *c); |
| 37 | 38 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b4389441efbb..695f17731e23 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) | |||
| 170 | { | 170 | { |
| 171 | #ifdef CONFIG_SMP | 171 | #ifdef CONFIG_SMP |
| 172 | /* calling is from identify_secondary_cpu() ? */ | 172 | /* calling is from identify_secondary_cpu() ? */ |
| 173 | if (c->cpu_index == boot_cpu_id) | 173 | if (!c->cpu_index) |
| 174 | return; | 174 | return; |
| 175 | 175 | ||
| 176 | /* | 176 | /* |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 898c2f4eab88..12cd823c8d03 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <asm/k8.h> | 20 | #include <asm/amd_nb.h> |
| 21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
| 22 | 22 | ||
| 23 | #define LVL_1_INST 1 | 23 | #define LVL_1_INST 1 |
| @@ -306,7 +306,7 @@ struct _cache_attr { | |||
| 306 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | 306 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); |
| 307 | }; | 307 | }; |
| 308 | 308 | ||
| 309 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_AMD_NB |
| 310 | 310 | ||
| 311 | /* | 311 | /* |
| 312 | * L3 cache descriptors | 312 | * L3 cache descriptors |
| @@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
| 369 | return; | 369 | return; |
| 370 | 370 | ||
| 371 | /* not in virtualized environments */ | 371 | /* not in virtualized environments */ |
| 372 | if (num_k8_northbridges == 0) | 372 | if (k8_northbridges.num == 0) |
| 373 | return; | 373 | return; |
| 374 | 374 | ||
| 375 | /* | 375 | /* |
| @@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | |||
| 377 | * never freed but this is done only on shutdown so it doesn't matter. | 377 | * never freed but this is done only on shutdown so it doesn't matter. |
| 378 | */ | 378 | */ |
| 379 | if (!l3_caches) { | 379 | if (!l3_caches) { |
| 380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | 380 | int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); |
| 381 | 381 | ||
| 382 | l3_caches = kzalloc(size, GFP_ATOMIC); | 382 | l3_caches = kzalloc(size, GFP_ATOMIC); |
| 383 | if (!l3_caches) | 383 | if (!l3_caches) |
| @@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | |||
| 556 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 556 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, |
| 557 | show_cache_disable_1, store_cache_disable_1); | 557 | show_cache_disable_1, store_cache_disable_1); |
| 558 | 558 | ||
| 559 | #else /* CONFIG_CPU_SUP_AMD */ | 559 | #else /* CONFIG_AMD_NB */ |
| 560 | static void __cpuinit | 560 | static void __cpuinit |
| 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) | 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) |
| 562 | { | 562 | { |
| 563 | }; | 563 | }; |
| 564 | #endif /* CONFIG_CPU_SUP_AMD */ | 564 | #endif /* CONFIG_AMD_NB */ |
| 565 | 565 | ||
| 566 | static int | 566 | static int |
| 567 | __cpuinit cpuid4_cache_lookup_regs(int index, | 567 | __cpuinit cpuid4_cache_lookup_regs(int index, |
| @@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = { | |||
| 1000 | 1000 | ||
| 1001 | static struct attribute *default_l3_attrs[] = { | 1001 | static struct attribute *default_l3_attrs[] = { |
| 1002 | DEFAULT_SYSFS_CACHE_ATTRS, | 1002 | DEFAULT_SYSFS_CACHE_ATTRS, |
| 1003 | #ifdef CONFIG_CPU_SUP_AMD | 1003 | #ifdef CONFIG_AMD_NB |
| 1004 | &cache_disable_0.attr, | 1004 | &cache_disable_0.attr, |
| 1005 | &cache_disable_1.attr, | 1005 | &cache_disable_1.attr, |
| 1006 | #endif | 1006 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 39aaee5c1ab2..80c482382d5c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
| @@ -131,7 +131,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 131 | u32 low = 0, high = 0, address = 0; | 131 | u32 low = 0, high = 0, address = 0; |
| 132 | unsigned int bank, block; | 132 | unsigned int bank, block; |
| 133 | struct thresh_restart tr; | 133 | struct thresh_restart tr; |
| 134 | u8 lvt_off; | 134 | int lvt_off = -1; |
| 135 | u8 offset; | ||
| 135 | 136 | ||
| 136 | for (bank = 0; bank < NR_BANKS; ++bank) { | 137 | for (bank = 0; bank < NR_BANKS; ++bank) { |
| 137 | for (block = 0; block < NR_BLOCKS; ++block) { | 138 | for (block = 0; block < NR_BLOCKS; ++block) { |
| @@ -162,8 +163,28 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 162 | if (shared_bank[bank] && c->cpu_core_id) | 163 | if (shared_bank[bank] && c->cpu_core_id) |
| 163 | break; | 164 | break; |
| 164 | #endif | 165 | #endif |
| 165 | lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, | 166 | offset = (high & MASK_LVTOFF_HI) >> 20; |
| 166 | APIC_EILVT_MSG_FIX, 0); | 167 | if (lvt_off < 0) { |
| 168 | if (setup_APIC_eilvt(offset, | ||
| 169 | THRESHOLD_APIC_VECTOR, | ||
| 170 | APIC_EILVT_MSG_FIX, 0)) { | ||
| 171 | pr_err(FW_BUG "cpu %d, failed to " | ||
| 172 | "setup threshold interrupt " | ||
| 173 | "for bank %d, block %d " | ||
| 174 | "(MSR%08X=0x%x%08x)", | ||
| 175 | smp_processor_id(), bank, block, | ||
| 176 | address, high, low); | ||
| 177 | continue; | ||
| 178 | } | ||
| 179 | lvt_off = offset; | ||
| 180 | } else if (lvt_off != offset) { | ||
| 181 | pr_err(FW_BUG "cpu %d, invalid threshold " | ||
| 182 | "interrupt offset %d for bank %d," | ||
| 183 | "block %d (MSR%08X=0x%x%08x)", | ||
| 184 | smp_processor_id(), lvt_off, bank, | ||
| 185 | block, address, high, low); | ||
| 186 | continue; | ||
| 187 | } | ||
| 167 | 188 | ||
| 168 | high &= ~MASK_LVTOFF_HI; | 189 | high &= ~MASK_LVTOFF_HI; |
| 169 | high |= lvt_off << 20; | 190 | high |= lvt_off << 20; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 169d8804a9f8..4b683267eca5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -350,7 +350,7 @@ static void intel_thermal_interrupt(void) | |||
| 350 | 350 | ||
| 351 | static void unexpected_thermal_interrupt(void) | 351 | static void unexpected_thermal_interrupt(void) |
| 352 | { | 352 | { |
| 353 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | 353 | printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", |
| 354 | smp_processor_id()); | 354 | smp_processor_id()); |
| 355 | add_taint(TAINT_MACHINE_CHECK); | 355 | add_taint(TAINT_MACHINE_CHECK); |
| 356 | } | 356 | } |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index c5f59d071425..ac140c7be396 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
| @@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void) | |||
| 827 | 827 | ||
| 828 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | 828 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
| 829 | return 0; | 829 | return 0; |
| 830 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | 830 | if (boot_cpu_data.x86 < 0xf) |
| 831 | return 0; | 831 | return 0; |
| 832 | /* In case some hypervisor doesn't pass SYSCFG through: */ | 832 | /* In case some hypervisor doesn't pass SYSCFG through: */ |
| 833 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | 833 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 7d28d7d03885..9f27228ceffd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
| @@ -64,18 +64,59 @@ static inline void k8_check_syscfg_dram_mod_en(void) | |||
| 64 | } | 64 | } |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | /* Get the size of contiguous MTRR range */ | ||
| 68 | static u64 get_mtrr_size(u64 mask) | ||
| 69 | { | ||
| 70 | u64 size; | ||
| 71 | |||
| 72 | mask >>= PAGE_SHIFT; | ||
| 73 | mask |= size_or_mask; | ||
| 74 | size = -mask; | ||
| 75 | size <<= PAGE_SHIFT; | ||
| 76 | return size; | ||
| 77 | } | ||
| 78 | |||
| 67 | /* | 79 | /* |
| 68 | * Returns the effective MTRR type for the region | 80 | * Check and return the effective type for MTRR-MTRR type overlap. |
| 69 | * Error returns: | 81 | * Returns 1 if the effective type is UNCACHEABLE, else returns 0 |
| 70 | * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR | ||
| 71 | * - 0xFF - when MTRR is not enabled | ||
| 72 | */ | 82 | */ |
| 73 | u8 mtrr_type_lookup(u64 start, u64 end) | 83 | static int check_type_overlap(u8 *prev, u8 *curr) |
| 84 | { | ||
| 85 | if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) { | ||
| 86 | *prev = MTRR_TYPE_UNCACHABLE; | ||
| 87 | *curr = MTRR_TYPE_UNCACHABLE; | ||
| 88 | return 1; | ||
| 89 | } | ||
| 90 | |||
| 91 | if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) || | ||
| 92 | (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) { | ||
| 93 | *prev = MTRR_TYPE_WRTHROUGH; | ||
| 94 | *curr = MTRR_TYPE_WRTHROUGH; | ||
| 95 | } | ||
| 96 | |||
| 97 | if (*prev != *curr) { | ||
| 98 | *prev = MTRR_TYPE_UNCACHABLE; | ||
| 99 | *curr = MTRR_TYPE_UNCACHABLE; | ||
| 100 | return 1; | ||
| 101 | } | ||
| 102 | |||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Error/Semi-error returns: | ||
| 108 | * 0xFF - when MTRR is not enabled | ||
| 109 | * *repeat == 1 implies [start:end] spanned across MTRR range and type returned | ||
| 110 | * corresponds only to [start:*partial_end]. | ||
| 111 | * Caller has to lookup again for [*partial_end:end]. | ||
| 112 | */ | ||
| 113 | static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat) | ||
| 74 | { | 114 | { |
| 75 | int i; | 115 | int i; |
| 76 | u64 base, mask; | 116 | u64 base, mask; |
| 77 | u8 prev_match, curr_match; | 117 | u8 prev_match, curr_match; |
| 78 | 118 | ||
| 119 | *repeat = 0; | ||
| 79 | if (!mtrr_state_set) | 120 | if (!mtrr_state_set) |
| 80 | return 0xFF; | 121 | return 0xFF; |
| 81 | 122 | ||
| @@ -126,8 +167,34 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 126 | 167 | ||
| 127 | start_state = ((start & mask) == (base & mask)); | 168 | start_state = ((start & mask) == (base & mask)); |
| 128 | end_state = ((end & mask) == (base & mask)); | 169 | end_state = ((end & mask) == (base & mask)); |
| 129 | if (start_state != end_state) | 170 | |
| 130 | return 0xFE; | 171 | if (start_state != end_state) { |
| 172 | /* | ||
| 173 | * We have start:end spanning across an MTRR. | ||
| 174 | * We split the region into | ||
| 175 | * either | ||
| 176 | * (start:mtrr_end) (mtrr_end:end) | ||
| 177 | * or | ||
| 178 | * (start:mtrr_start) (mtrr_start:end) | ||
| 179 | * depending on kind of overlap. | ||
| 180 | * Return the type for first region and a pointer to | ||
| 181 | * the start of second region so that caller will | ||
| 182 | * lookup again on the second region. | ||
| 183 | * Note: This way we handle multiple overlaps as well. | ||
| 184 | */ | ||
| 185 | if (start_state) | ||
| 186 | *partial_end = base + get_mtrr_size(mask); | ||
| 187 | else | ||
| 188 | *partial_end = base; | ||
| 189 | |||
| 190 | if (unlikely(*partial_end <= start)) { | ||
| 191 | WARN_ON(1); | ||
| 192 | *partial_end = start + PAGE_SIZE; | ||
| 193 | } | ||
| 194 | |||
| 195 | end = *partial_end - 1; /* end is inclusive */ | ||
| 196 | *repeat = 1; | ||
| 197 | } | ||
| 131 | 198 | ||
| 132 | if ((start & mask) != (base & mask)) | 199 | if ((start & mask) != (base & mask)) |
| 133 | continue; | 200 | continue; |
| @@ -138,21 +205,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 138 | continue; | 205 | continue; |
| 139 | } | 206 | } |
| 140 | 207 | ||
| 141 | if (prev_match == MTRR_TYPE_UNCACHABLE || | 208 | if (check_type_overlap(&prev_match, &curr_match)) |
| 142 | curr_match == MTRR_TYPE_UNCACHABLE) { | 209 | return curr_match; |
| 143 | return MTRR_TYPE_UNCACHABLE; | ||
| 144 | } | ||
| 145 | |||
| 146 | if ((prev_match == MTRR_TYPE_WRBACK && | ||
| 147 | curr_match == MTRR_TYPE_WRTHROUGH) || | ||
| 148 | (prev_match == MTRR_TYPE_WRTHROUGH && | ||
| 149 | curr_match == MTRR_TYPE_WRBACK)) { | ||
| 150 | prev_match = MTRR_TYPE_WRTHROUGH; | ||
| 151 | curr_match = MTRR_TYPE_WRTHROUGH; | ||
| 152 | } | ||
| 153 | |||
| 154 | if (prev_match != curr_match) | ||
| 155 | return MTRR_TYPE_UNCACHABLE; | ||
| 156 | } | 210 | } |
| 157 | 211 | ||
| 158 | if (mtrr_tom2) { | 212 | if (mtrr_tom2) { |
| @@ -166,6 +220,36 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
| 166 | return mtrr_state.def_type; | 220 | return mtrr_state.def_type; |
| 167 | } | 221 | } |
| 168 | 222 | ||
| 223 | /* | ||
| 224 | * Returns the effective MTRR type for the region | ||
| 225 | * Error return: | ||
| 226 | * 0xFF - when MTRR is not enabled | ||
| 227 | */ | ||
| 228 | u8 mtrr_type_lookup(u64 start, u64 end) | ||
| 229 | { | ||
| 230 | u8 type, prev_type; | ||
| 231 | int repeat; | ||
| 232 | u64 partial_end; | ||
| 233 | |||
| 234 | type = __mtrr_type_lookup(start, end, &partial_end, &repeat); | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Common path is with repeat = 0. | ||
| 238 | * However, we can have cases where [start:end] spans across some | ||
| 239 | * MTRR range. Do repeated lookups for that case here. | ||
| 240 | */ | ||
| 241 | while (repeat) { | ||
| 242 | prev_type = type; | ||
| 243 | start = partial_end; | ||
| 244 | type = __mtrr_type_lookup(start, end, &partial_end, &repeat); | ||
| 245 | |||
| 246 | if (check_type_overlap(&prev_type, &type)) | ||
| 247 | return type; | ||
| 248 | } | ||
| 249 | |||
| 250 | return type; | ||
| 251 | } | ||
| 252 | |||
| 169 | /* Get the MSR pair relating to a var range */ | 253 | /* Get the MSR pair relating to a var range */ |
| 170 | static void | 254 | static void |
| 171 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | 255 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fb329e9f8494..d9f4ff8fcd69 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
| @@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void) | |||
| 700 | { | 700 | { |
| 701 | switch (boot_cpu_data.x86_vendor) { | 701 | switch (boot_cpu_data.x86_vendor) { |
| 702 | case X86_VENDOR_AMD: | 702 | case X86_VENDOR_AMD: |
| 703 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 703 | if (boot_cpu_data.x86 == 6 || |
| 704 | boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) | 704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) |
| 705 | return; | 705 | wd_ops = &k7_wd_ops; |
| 706 | wd_ops = &k7_wd_ops; | 706 | return; |
| 707 | break; | ||
| 708 | case X86_VENDOR_INTEL: | 707 | case X86_VENDOR_INTEL: |
| 709 | /* Work around where perfctr1 doesn't have a working enable | 708 | /* Work around where perfctr1 doesn't have a working enable |
| 710 | * bit as described in the following errata: | 709 | * bit as described in the following errata: |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d49079515122..c7f64e6f537a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
| @@ -44,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 44 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | 44 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, |
| 45 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | 45 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, |
| 46 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | 46 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, |
| 47 | { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, | ||
| 48 | { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, | ||
| 49 | { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, | ||
| 50 | { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, | ||
| 51 | { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, | ||
| 52 | { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, | ||
| 47 | { 0, 0, 0, 0, 0 } | 53 | { 0, 0, 0, 0, 0 } |
| 48 | }; | 54 | }; |
| 49 | 55 | ||
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 045b36cada65..994828899e09 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
| @@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 34 | if (!csize) | 34 | if (!csize) |
| 35 | return 0; | 35 | return 0; |
| 36 | 36 | ||
| 37 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | 37 | vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE); |
| 38 | if (!vaddr) | 38 | if (!vaddr) |
| 39 | return -ENOMEM; | 39 | return -ENOMEM; |
| 40 | 40 | ||
| @@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
| 46 | } else | 46 | } else |
| 47 | memcpy(buf, vaddr + offset, csize); | 47 | memcpy(buf, vaddr + offset, csize); |
| 48 | 48 | ||
| 49 | set_iounmap_nonlazy(); | ||
| 49 | iounmap(vaddr); | 50 | iounmap(vaddr); |
| 50 | return csize; | 51 | return csize; |
| 51 | } | 52 | } |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf2686..76b8cd953dee 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
| @@ -97,7 +97,6 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | 99 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) |
| 100 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | ||
| 101 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | 100 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) |
| 102 | { | 101 | { |
| 103 | u32 d; | 102 | u32 d; |
| @@ -115,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | |||
| 115 | d &= 0xff; | 114 | d &= 0xff; |
| 116 | return d; | 115 | return d; |
| 117 | } | 116 | } |
| 118 | #endif | ||
| 119 | 117 | ||
| 120 | static void __init ati_bugs(int num, int slot, int func) | 118 | static void __init ati_bugs(int num, int slot, int func) |
| 121 | { | 119 | { |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index fa99bae75ace..4572f25f9325 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <xen/hvc-console.h> | 14 | #include <xen/hvc-console.h> |
| 15 | #include <asm/pci-direct.h> | 15 | #include <asm/pci-direct.h> |
| 16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
| 17 | #include <asm/mrst.h> | ||
| 17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
| 18 | #include <linux/usb/ehci_def.h> | 19 | #include <linux/usb/ehci_def.h> |
| 19 | 20 | ||
| @@ -239,6 +240,18 @@ static int __init setup_early_printk(char *buf) | |||
| 239 | if (!strncmp(buf, "xen", 3)) | 240 | if (!strncmp(buf, "xen", 3)) |
| 240 | early_console_register(&xenboot_console, keep); | 241 | early_console_register(&xenboot_console, keep); |
| 241 | #endif | 242 | #endif |
| 243 | #ifdef CONFIG_X86_MRST_EARLY_PRINTK | ||
| 244 | if (!strncmp(buf, "mrst", 4)) { | ||
| 245 | mrst_early_console_init(); | ||
| 246 | early_console_register(&early_mrst_console, keep); | ||
| 247 | } | ||
| 248 | |||
| 249 | if (!strncmp(buf, "hsu", 3)) { | ||
| 250 | hsu_early_console_init(); | ||
| 251 | early_console_register(&early_hsu_console, keep); | ||
| 252 | } | ||
| 253 | |||
| 254 | #endif | ||
| 242 | buf++; | 255 | buf++; |
| 243 | } | 256 | } |
| 244 | return 0; | 257 | return 0; |
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c new file mode 100644 index 000000000000..65df603622b2 --- /dev/null +++ b/arch/x86/kernel/early_printk_mrst.c | |||
| @@ -0,0 +1,319 @@ | |||
| 1 | /* | ||
| 2 | * early_printk_mrst.c - early consoles for Intel MID platforms | ||
| 3 | * | ||
| 4 | * Copyright (c) 2008-2010, Intel Corporation | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; version 2 | ||
| 9 | * of the License. | ||
| 10 | */ | ||
| 11 | |||
| 12 | /* | ||
| 13 | * This file implements two early consoles named mrst and hsu. | ||
| 14 | * mrst is based on Maxim3110 spi-uart device, it exists in both | ||
| 15 | * Moorestown and Medfield platforms, while hsu is based on a High | ||
| 16 | * Speed UART device which only exists in the Medfield platform | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/serial_reg.h> | ||
| 20 | #include <linux/serial_mfd.h> | ||
| 21 | #include <linux/kmsg_dump.h> | ||
| 22 | #include <linux/console.h> | ||
| 23 | #include <linux/kernel.h> | ||
| 24 | #include <linux/delay.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/io.h> | ||
| 27 | |||
| 28 | #include <asm/fixmap.h> | ||
| 29 | #include <asm/pgtable.h> | ||
| 30 | #include <asm/mrst.h> | ||
| 31 | |||
| 32 | #define MRST_SPI_TIMEOUT 0x200000 | ||
| 33 | #define MRST_REGBASE_SPI0 0xff128000 | ||
| 34 | #define MRST_REGBASE_SPI1 0xff128400 | ||
| 35 | #define MRST_CLK_SPI0_REG 0xff11d86c | ||
| 36 | |||
| 37 | /* Bit fields in CTRLR0 */ | ||
| 38 | #define SPI_DFS_OFFSET 0 | ||
| 39 | |||
| 40 | #define SPI_FRF_OFFSET 4 | ||
| 41 | #define SPI_FRF_SPI 0x0 | ||
| 42 | #define SPI_FRF_SSP 0x1 | ||
| 43 | #define SPI_FRF_MICROWIRE 0x2 | ||
| 44 | #define SPI_FRF_RESV 0x3 | ||
| 45 | |||
| 46 | #define SPI_MODE_OFFSET 6 | ||
| 47 | #define SPI_SCPH_OFFSET 6 | ||
| 48 | #define SPI_SCOL_OFFSET 7 | ||
| 49 | #define SPI_TMOD_OFFSET 8 | ||
| 50 | #define SPI_TMOD_TR 0x0 /* xmit & recv */ | ||
| 51 | #define SPI_TMOD_TO 0x1 /* xmit only */ | ||
| 52 | #define SPI_TMOD_RO 0x2 /* recv only */ | ||
| 53 | #define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */ | ||
| 54 | |||
| 55 | #define SPI_SLVOE_OFFSET 10 | ||
| 56 | #define SPI_SRL_OFFSET 11 | ||
| 57 | #define SPI_CFS_OFFSET 12 | ||
| 58 | |||
| 59 | /* Bit fields in SR, 7 bits */ | ||
| 60 | #define SR_MASK 0x7f /* cover 7 bits */ | ||
| 61 | #define SR_BUSY (1 << 0) | ||
| 62 | #define SR_TF_NOT_FULL (1 << 1) | ||
| 63 | #define SR_TF_EMPT (1 << 2) | ||
| 64 | #define SR_RF_NOT_EMPT (1 << 3) | ||
| 65 | #define SR_RF_FULL (1 << 4) | ||
| 66 | #define SR_TX_ERR (1 << 5) | ||
| 67 | #define SR_DCOL (1 << 6) | ||
| 68 | |||
| 69 | struct dw_spi_reg { | ||
| 70 | u32 ctrl0; | ||
| 71 | u32 ctrl1; | ||
| 72 | u32 ssienr; | ||
| 73 | u32 mwcr; | ||
| 74 | u32 ser; | ||
| 75 | u32 baudr; | ||
| 76 | u32 txfltr; | ||
| 77 | u32 rxfltr; | ||
| 78 | u32 txflr; | ||
| 79 | u32 rxflr; | ||
| 80 | u32 sr; | ||
| 81 | u32 imr; | ||
| 82 | u32 isr; | ||
| 83 | u32 risr; | ||
| 84 | u32 txoicr; | ||
| 85 | u32 rxoicr; | ||
| 86 | u32 rxuicr; | ||
| 87 | u32 msticr; | ||
| 88 | u32 icr; | ||
| 89 | u32 dmacr; | ||
| 90 | u32 dmatdlr; | ||
| 91 | u32 dmardlr; | ||
| 92 | u32 idr; | ||
| 93 | u32 version; | ||
| 94 | |||
| 95 | /* Currently operates as 32 bits, though only the low 16 bits matter */ | ||
| 96 | u32 dr; | ||
| 97 | } __packed; | ||
| 98 | |||
| 99 | #define dw_readl(dw, name) __raw_readl(&(dw)->name) | ||
| 100 | #define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name) | ||
| 101 | |||
| 102 | /* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */ | ||
| 103 | static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; | ||
| 104 | |||
| 105 | static u32 *pclk_spi0; | ||
| 106 | /* Always contains an accessable address, start with 0 */ | ||
| 107 | static struct dw_spi_reg *pspi; | ||
| 108 | |||
| 109 | static struct kmsg_dumper dw_dumper; | ||
| 110 | static int dumper_registered; | ||
| 111 | |||
| 112 | static void dw_kmsg_dump(struct kmsg_dumper *dumper, | ||
| 113 | enum kmsg_dump_reason reason, | ||
| 114 | const char *s1, unsigned long l1, | ||
| 115 | const char *s2, unsigned long l2) | ||
| 116 | { | ||
| 117 | int i; | ||
| 118 | |||
| 119 | /* When run to this, we'd better re-init the HW */ | ||
| 120 | mrst_early_console_init(); | ||
| 121 | |||
| 122 | for (i = 0; i < l1; i++) | ||
| 123 | early_mrst_console.write(&early_mrst_console, s1 + i, 1); | ||
| 124 | for (i = 0; i < l2; i++) | ||
| 125 | early_mrst_console.write(&early_mrst_console, s2 + i, 1); | ||
| 126 | } | ||
| 127 | |||
| 128 | /* Set the ratio rate to 115200, 8n1, IRQ disabled */ | ||
| 129 | static void max3110_write_config(void) | ||
| 130 | { | ||
| 131 | u16 config; | ||
| 132 | |||
| 133 | config = 0xc001; | ||
| 134 | dw_writel(pspi, dr, config); | ||
| 135 | } | ||
| 136 | |||
| 137 | /* Translate char to a eligible word and send to max3110 */ | ||
| 138 | static void max3110_write_data(char c) | ||
| 139 | { | ||
| 140 | u16 data; | ||
| 141 | |||
| 142 | data = 0x8000 | c; | ||
| 143 | dw_writel(pspi, dr, data); | ||
| 144 | } | ||
| 145 | |||
| 146 | void mrst_early_console_init(void) | ||
| 147 | { | ||
| 148 | u32 ctrlr0 = 0; | ||
| 149 | u32 spi0_cdiv; | ||
| 150 | u32 freq; /* Freqency info only need be searched once */ | ||
| 151 | |||
| 152 | /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */ | ||
| 153 | pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 154 | MRST_CLK_SPI0_REG); | ||
| 155 | spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9; | ||
| 156 | freq = 100000000 / (spi0_cdiv + 1); | ||
| 157 | |||
| 158 | if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL) | ||
| 159 | mrst_spi_paddr = MRST_REGBASE_SPI1; | ||
| 160 | |||
| 161 | pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 162 | mrst_spi_paddr); | ||
| 163 | |||
| 164 | /* Disable SPI controller */ | ||
| 165 | dw_writel(pspi, ssienr, 0); | ||
| 166 | |||
| 167 | /* Set control param, 8 bits, transmit only mode */ | ||
| 168 | ctrlr0 = dw_readl(pspi, ctrl0); | ||
| 169 | |||
| 170 | ctrlr0 &= 0xfcc0; | ||
| 171 | ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET) | ||
| 172 | | (SPI_TMOD_TO << SPI_TMOD_OFFSET); | ||
| 173 | dw_writel(pspi, ctrl0, ctrlr0); | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Change the spi0 clk to comply with 115200 bps, use 100000 to | ||
| 177 | * calculate the clk dividor to make the clock a little slower | ||
| 178 | * than real baud rate. | ||
| 179 | */ | ||
| 180 | dw_writel(pspi, baudr, freq/100000); | ||
| 181 | |||
| 182 | /* Disable all INT for early phase */ | ||
| 183 | dw_writel(pspi, imr, 0x0); | ||
| 184 | |||
| 185 | /* Set the cs to spi-uart */ | ||
| 186 | dw_writel(pspi, ser, 0x2); | ||
| 187 | |||
| 188 | /* Enable the HW, the last step for HW init */ | ||
| 189 | dw_writel(pspi, ssienr, 0x1); | ||
| 190 | |||
| 191 | /* Set the default configuration */ | ||
| 192 | max3110_write_config(); | ||
| 193 | |||
| 194 | /* Register the kmsg dumper */ | ||
| 195 | if (!dumper_registered) { | ||
| 196 | dw_dumper.dump = dw_kmsg_dump; | ||
| 197 | kmsg_dump_register(&dw_dumper); | ||
| 198 | dumper_registered = 1; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | /* Slave select should be called in the read/write function */ | ||
| 203 | static void early_mrst_spi_putc(char c) | ||
| 204 | { | ||
| 205 | unsigned int timeout; | ||
| 206 | u32 sr; | ||
| 207 | |||
| 208 | timeout = MRST_SPI_TIMEOUT; | ||
| 209 | /* Early putc needs to make sure the TX FIFO is not full */ | ||
| 210 | while (--timeout) { | ||
| 211 | sr = dw_readl(pspi, sr); | ||
| 212 | if (!(sr & SR_TF_NOT_FULL)) | ||
| 213 | cpu_relax(); | ||
| 214 | else | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | |||
| 218 | if (!timeout) | ||
| 219 | pr_warning("MRST earlycon: timed out\n"); | ||
| 220 | else | ||
| 221 | max3110_write_data(c); | ||
| 222 | } | ||
| 223 | |||
| 224 | /* Early SPI only uses polling mode */ | ||
| 225 | static void early_mrst_spi_write(struct console *con, const char *str, unsigned n) | ||
| 226 | { | ||
| 227 | int i; | ||
| 228 | |||
| 229 | for (i = 0; i < n && *str; i++) { | ||
| 230 | if (*str == '\n') | ||
| 231 | early_mrst_spi_putc('\r'); | ||
| 232 | early_mrst_spi_putc(*str); | ||
| 233 | str++; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | struct console early_mrst_console = { | ||
| 238 | .name = "earlymrst", | ||
| 239 | .write = early_mrst_spi_write, | ||
| 240 | .flags = CON_PRINTBUFFER, | ||
| 241 | .index = -1, | ||
| 242 | }; | ||
| 243 | |||
| 244 | /* | ||
| 245 | * Following is the early console based on Medfield HSU (High | ||
| 246 | * Speed UART) device. | ||
| 247 | */ | ||
| 248 | #define HSU_PORT2_PADDR 0xffa28180 | ||
| 249 | |||
| 250 | static void __iomem *phsu; | ||
| 251 | |||
| 252 | void hsu_early_console_init(void) | ||
| 253 | { | ||
| 254 | u8 lcr; | ||
| 255 | |||
| 256 | phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, | ||
| 257 | HSU_PORT2_PADDR); | ||
| 258 | |||
| 259 | /* Disable FIFO */ | ||
| 260 | writeb(0x0, phsu + UART_FCR); | ||
| 261 | |||
| 262 | /* Set to default 115200 bps, 8n1 */ | ||
| 263 | lcr = readb(phsu + UART_LCR); | ||
| 264 | writeb((0x80 | lcr), phsu + UART_LCR); | ||
| 265 | writeb(0x18, phsu + UART_DLL); | ||
| 266 | writeb(lcr, phsu + UART_LCR); | ||
| 267 | writel(0x3600, phsu + UART_MUL*4); | ||
| 268 | |||
| 269 | writeb(0x8, phsu + UART_MCR); | ||
| 270 | writeb(0x7, phsu + UART_FCR); | ||
| 271 | writeb(0x3, phsu + UART_LCR); | ||
| 272 | |||
| 273 | /* Clear IRQ status */ | ||
| 274 | readb(phsu + UART_LSR); | ||
| 275 | readb(phsu + UART_RX); | ||
| 276 | readb(phsu + UART_IIR); | ||
| 277 | readb(phsu + UART_MSR); | ||
| 278 | |||
| 279 | /* Enable FIFO */ | ||
| 280 | writeb(0x7, phsu + UART_FCR); | ||
| 281 | } | ||
| 282 | |||
| 283 | #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) | ||
| 284 | |||
| 285 | static void early_hsu_putc(char ch) | ||
| 286 | { | ||
| 287 | unsigned int timeout = 10000; /* 10ms */ | ||
| 288 | u8 status; | ||
| 289 | |||
| 290 | while (--timeout) { | ||
| 291 | status = readb(phsu + UART_LSR); | ||
| 292 | if (status & BOTH_EMPTY) | ||
| 293 | break; | ||
| 294 | udelay(1); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* Only write the char when there was no timeout */ | ||
| 298 | if (timeout) | ||
| 299 | writeb(ch, phsu + UART_TX); | ||
| 300 | } | ||
| 301 | |||
| 302 | static void early_hsu_write(struct console *con, const char *str, unsigned n) | ||
| 303 | { | ||
| 304 | int i; | ||
| 305 | |||
| 306 | for (i = 0; i < n && *str; i++) { | ||
| 307 | if (*str == '\n') | ||
| 308 | early_hsu_putc('\r'); | ||
| 309 | early_hsu_putc(*str); | ||
| 310 | str++; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | struct console early_hsu_console = { | ||
| 315 | .name = "earlyhsu", | ||
| 316 | .write = early_hsu_write, | ||
| 317 | .flags = CON_PRINTBUFFER, | ||
| 318 | .index = -1, | ||
| 319 | }; | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d00920d2f..9fb188d7bc76 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -115,8 +115,7 @@ | |||
| 115 | 115 | ||
| 116 | /* unfortunately push/pop can't be no-op */ | 116 | /* unfortunately push/pop can't be no-op */ |
| 117 | .macro PUSH_GS | 117 | .macro PUSH_GS |
| 118 | pushl $0 | 118 | pushl_cfi $0 |
| 119 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 120 | .endm | 119 | .endm |
| 121 | .macro POP_GS pop=0 | 120 | .macro POP_GS pop=0 |
| 122 | addl $(4 + \pop), %esp | 121 | addl $(4 + \pop), %esp |
| @@ -140,14 +139,12 @@ | |||
| 140 | #else /* CONFIG_X86_32_LAZY_GS */ | 139 | #else /* CONFIG_X86_32_LAZY_GS */ |
| 141 | 140 | ||
| 142 | .macro PUSH_GS | 141 | .macro PUSH_GS |
| 143 | pushl %gs | 142 | pushl_cfi %gs |
| 144 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 145 | /*CFI_REL_OFFSET gs, 0*/ | 143 | /*CFI_REL_OFFSET gs, 0*/ |
| 146 | .endm | 144 | .endm |
| 147 | 145 | ||
| 148 | .macro POP_GS pop=0 | 146 | .macro POP_GS pop=0 |
| 149 | 98: popl %gs | 147 | 98: popl_cfi %gs |
| 150 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 151 | /*CFI_RESTORE gs*/ | 148 | /*CFI_RESTORE gs*/ |
| 152 | .if \pop <> 0 | 149 | .if \pop <> 0 |
| 153 | add $\pop, %esp | 150 | add $\pop, %esp |
| @@ -195,35 +192,25 @@ | |||
| 195 | .macro SAVE_ALL | 192 | .macro SAVE_ALL |
| 196 | cld | 193 | cld |
| 197 | PUSH_GS | 194 | PUSH_GS |
| 198 | pushl %fs | 195 | pushl_cfi %fs |
| 199 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 200 | /*CFI_REL_OFFSET fs, 0;*/ | 196 | /*CFI_REL_OFFSET fs, 0;*/ |
| 201 | pushl %es | 197 | pushl_cfi %es |
| 202 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 203 | /*CFI_REL_OFFSET es, 0;*/ | 198 | /*CFI_REL_OFFSET es, 0;*/ |
| 204 | pushl %ds | 199 | pushl_cfi %ds |
| 205 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 206 | /*CFI_REL_OFFSET ds, 0;*/ | 200 | /*CFI_REL_OFFSET ds, 0;*/ |
| 207 | pushl %eax | 201 | pushl_cfi %eax |
| 208 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 209 | CFI_REL_OFFSET eax, 0 | 202 | CFI_REL_OFFSET eax, 0 |
| 210 | pushl %ebp | 203 | pushl_cfi %ebp |
| 211 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 212 | CFI_REL_OFFSET ebp, 0 | 204 | CFI_REL_OFFSET ebp, 0 |
| 213 | pushl %edi | 205 | pushl_cfi %edi |
| 214 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 215 | CFI_REL_OFFSET edi, 0 | 206 | CFI_REL_OFFSET edi, 0 |
| 216 | pushl %esi | 207 | pushl_cfi %esi |
| 217 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 218 | CFI_REL_OFFSET esi, 0 | 208 | CFI_REL_OFFSET esi, 0 |
| 219 | pushl %edx | 209 | pushl_cfi %edx |
| 220 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 221 | CFI_REL_OFFSET edx, 0 | 210 | CFI_REL_OFFSET edx, 0 |
| 222 | pushl %ecx | 211 | pushl_cfi %ecx |
| 223 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 224 | CFI_REL_OFFSET ecx, 0 | 212 | CFI_REL_OFFSET ecx, 0 |
| 225 | pushl %ebx | 213 | pushl_cfi %ebx |
| 226 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 227 | CFI_REL_OFFSET ebx, 0 | 214 | CFI_REL_OFFSET ebx, 0 |
| 228 | movl $(__USER_DS), %edx | 215 | movl $(__USER_DS), %edx |
| 229 | movl %edx, %ds | 216 | movl %edx, %ds |
| @@ -234,39 +221,29 @@ | |||
| 234 | .endm | 221 | .endm |
| 235 | 222 | ||
| 236 | .macro RESTORE_INT_REGS | 223 | .macro RESTORE_INT_REGS |
| 237 | popl %ebx | 224 | popl_cfi %ebx |
| 238 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 239 | CFI_RESTORE ebx | 225 | CFI_RESTORE ebx |
| 240 | popl %ecx | 226 | popl_cfi %ecx |
| 241 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 242 | CFI_RESTORE ecx | 227 | CFI_RESTORE ecx |
| 243 | popl %edx | 228 | popl_cfi %edx |
| 244 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 245 | CFI_RESTORE edx | 229 | CFI_RESTORE edx |
| 246 | popl %esi | 230 | popl_cfi %esi |
| 247 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 248 | CFI_RESTORE esi | 231 | CFI_RESTORE esi |
| 249 | popl %edi | 232 | popl_cfi %edi |
| 250 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 251 | CFI_RESTORE edi | 233 | CFI_RESTORE edi |
| 252 | popl %ebp | 234 | popl_cfi %ebp |
| 253 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 254 | CFI_RESTORE ebp | 235 | CFI_RESTORE ebp |
| 255 | popl %eax | 236 | popl_cfi %eax |
| 256 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 257 | CFI_RESTORE eax | 237 | CFI_RESTORE eax |
| 258 | .endm | 238 | .endm |
| 259 | 239 | ||
| 260 | .macro RESTORE_REGS pop=0 | 240 | .macro RESTORE_REGS pop=0 |
| 261 | RESTORE_INT_REGS | 241 | RESTORE_INT_REGS |
| 262 | 1: popl %ds | 242 | 1: popl_cfi %ds |
| 263 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 264 | /*CFI_RESTORE ds;*/ | 243 | /*CFI_RESTORE ds;*/ |
| 265 | 2: popl %es | 244 | 2: popl_cfi %es |
| 266 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 267 | /*CFI_RESTORE es;*/ | 245 | /*CFI_RESTORE es;*/ |
| 268 | 3: popl %fs | 246 | 3: popl_cfi %fs |
| 269 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 270 | /*CFI_RESTORE fs;*/ | 247 | /*CFI_RESTORE fs;*/ |
| 271 | POP_GS \pop | 248 | POP_GS \pop |
| 272 | .pushsection .fixup, "ax" | 249 | .pushsection .fixup, "ax" |
| @@ -320,16 +297,12 @@ | |||
| 320 | 297 | ||
| 321 | ENTRY(ret_from_fork) | 298 | ENTRY(ret_from_fork) |
| 322 | CFI_STARTPROC | 299 | CFI_STARTPROC |
| 323 | pushl %eax | 300 | pushl_cfi %eax |
| 324 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 325 | call schedule_tail | 301 | call schedule_tail |
| 326 | GET_THREAD_INFO(%ebp) | 302 | GET_THREAD_INFO(%ebp) |
| 327 | popl %eax | 303 | popl_cfi %eax |
| 328 | CFI_ADJUST_CFA_OFFSET -4 | 304 | pushl_cfi $0x0202 # Reset kernel eflags |
| 329 | pushl $0x0202 # Reset kernel eflags | 305 | popfl_cfi |
| 330 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 331 | popfl | ||
| 332 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 333 | jmp syscall_exit | 306 | jmp syscall_exit |
| 334 | CFI_ENDPROC | 307 | CFI_ENDPROC |
| 335 | END(ret_from_fork) | 308 | END(ret_from_fork) |
| @@ -409,29 +382,23 @@ sysenter_past_esp: | |||
| 409 | * enough kernel state to call TRACE_IRQS_OFF can be called - but | 382 | * enough kernel state to call TRACE_IRQS_OFF can be called - but |
| 410 | * we immediately enable interrupts at that point anyway. | 383 | * we immediately enable interrupts at that point anyway. |
| 411 | */ | 384 | */ |
| 412 | pushl $(__USER_DS) | 385 | pushl_cfi $(__USER_DS) |
| 413 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 414 | /*CFI_REL_OFFSET ss, 0*/ | 386 | /*CFI_REL_OFFSET ss, 0*/ |
| 415 | pushl %ebp | 387 | pushl_cfi %ebp |
| 416 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 417 | CFI_REL_OFFSET esp, 0 | 388 | CFI_REL_OFFSET esp, 0 |
| 418 | pushfl | 389 | pushfl_cfi |
| 419 | orl $X86_EFLAGS_IF, (%esp) | 390 | orl $X86_EFLAGS_IF, (%esp) |
| 420 | CFI_ADJUST_CFA_OFFSET 4 | 391 | pushl_cfi $(__USER_CS) |
| 421 | pushl $(__USER_CS) | ||
| 422 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 423 | /*CFI_REL_OFFSET cs, 0*/ | 392 | /*CFI_REL_OFFSET cs, 0*/ |
| 424 | /* | 393 | /* |
| 425 | * Push current_thread_info()->sysenter_return to the stack. | 394 | * Push current_thread_info()->sysenter_return to the stack. |
| 426 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 395 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
| 427 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 396 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
| 428 | */ | 397 | */ |
| 429 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | 398 | pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
| 430 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 431 | CFI_REL_OFFSET eip, 0 | 399 | CFI_REL_OFFSET eip, 0 |
| 432 | 400 | ||
| 433 | pushl %eax | 401 | pushl_cfi %eax |
| 434 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 435 | SAVE_ALL | 402 | SAVE_ALL |
| 436 | ENABLE_INTERRUPTS(CLBR_NONE) | 403 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 437 | 404 | ||
| @@ -486,8 +453,7 @@ sysenter_audit: | |||
| 486 | movl %eax,%edx /* 2nd arg: syscall number */ | 453 | movl %eax,%edx /* 2nd arg: syscall number */ |
| 487 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 454 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
| 488 | call audit_syscall_entry | 455 | call audit_syscall_entry |
| 489 | pushl %ebx | 456 | pushl_cfi %ebx |
| 490 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 491 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 457 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
| 492 | jmp sysenter_do_call | 458 | jmp sysenter_do_call |
| 493 | 459 | ||
| @@ -529,8 +495,7 @@ ENDPROC(ia32_sysenter_target) | |||
| 529 | # system call handler stub | 495 | # system call handler stub |
| 530 | ENTRY(system_call) | 496 | ENTRY(system_call) |
| 531 | RING0_INT_FRAME # can't unwind into user space anyway | 497 | RING0_INT_FRAME # can't unwind into user space anyway |
| 532 | pushl %eax # save orig_eax | 498 | pushl_cfi %eax # save orig_eax |
| 533 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 534 | SAVE_ALL | 499 | SAVE_ALL |
| 535 | GET_THREAD_INFO(%ebp) | 500 | GET_THREAD_INFO(%ebp) |
| 536 | # system call tracing in operation / emulation | 501 | # system call tracing in operation / emulation |
| @@ -566,7 +531,6 @@ restore_all_notrace: | |||
| 566 | je ldt_ss # returning to user-space with LDT SS | 531 | je ldt_ss # returning to user-space with LDT SS |
| 567 | restore_nocheck: | 532 | restore_nocheck: |
| 568 | RESTORE_REGS 4 # skip orig_eax/error_code | 533 | RESTORE_REGS 4 # skip orig_eax/error_code |
| 569 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 570 | irq_return: | 534 | irq_return: |
| 571 | INTERRUPT_RETURN | 535 | INTERRUPT_RETURN |
| 572 | .section .fixup,"ax" | 536 | .section .fixup,"ax" |
| @@ -619,10 +583,8 @@ ldt_ss: | |||
| 619 | shr $16, %edx | 583 | shr $16, %edx |
| 620 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ | 584 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
| 621 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ | 585 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
| 622 | pushl $__ESPFIX_SS | 586 | pushl_cfi $__ESPFIX_SS |
| 623 | CFI_ADJUST_CFA_OFFSET 4 | 587 | pushl_cfi %eax /* new kernel esp */ |
| 624 | push %eax /* new kernel esp */ | ||
| 625 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 626 | /* Disable interrupts, but do not irqtrace this section: we | 588 | /* Disable interrupts, but do not irqtrace this section: we |
| 627 | * will soon execute iret and the tracer was already set to | 589 | * will soon execute iret and the tracer was already set to |
| 628 | * the irqstate after the iret */ | 590 | * the irqstate after the iret */ |
| @@ -666,11 +628,9 @@ work_notifysig: # deal with pending signals and | |||
| 666 | 628 | ||
| 667 | ALIGN | 629 | ALIGN |
| 668 | work_notifysig_v86: | 630 | work_notifysig_v86: |
| 669 | pushl %ecx # save ti_flags for do_notify_resume | 631 | pushl_cfi %ecx # save ti_flags for do_notify_resume |
| 670 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 671 | call save_v86_state # %eax contains pt_regs pointer | 632 | call save_v86_state # %eax contains pt_regs pointer |
| 672 | popl %ecx | 633 | popl_cfi %ecx |
| 673 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 674 | movl %eax, %esp | 634 | movl %eax, %esp |
| 675 | #else | 635 | #else |
| 676 | movl %esp, %eax | 636 | movl %esp, %eax |
| @@ -750,14 +710,18 @@ ptregs_##name: \ | |||
| 750 | #define PTREGSCALL3(name) \ | 710 | #define PTREGSCALL3(name) \ |
| 751 | ALIGN; \ | 711 | ALIGN; \ |
| 752 | ptregs_##name: \ | 712 | ptregs_##name: \ |
| 713 | CFI_STARTPROC; \ | ||
| 753 | leal 4(%esp),%eax; \ | 714 | leal 4(%esp),%eax; \ |
| 754 | pushl %eax; \ | 715 | pushl_cfi %eax; \ |
| 755 | movl PT_EDX(%eax),%ecx; \ | 716 | movl PT_EDX(%eax),%ecx; \ |
| 756 | movl PT_ECX(%eax),%edx; \ | 717 | movl PT_ECX(%eax),%edx; \ |
| 757 | movl PT_EBX(%eax),%eax; \ | 718 | movl PT_EBX(%eax),%eax; \ |
| 758 | call sys_##name; \ | 719 | call sys_##name; \ |
| 759 | addl $4,%esp; \ | 720 | addl $4,%esp; \ |
| 760 | ret | 721 | CFI_ADJUST_CFA_OFFSET -4; \ |
| 722 | ret; \ | ||
| 723 | CFI_ENDPROC; \ | ||
| 724 | ENDPROC(ptregs_##name) | ||
| 761 | 725 | ||
| 762 | PTREGSCALL1(iopl) | 726 | PTREGSCALL1(iopl) |
| 763 | PTREGSCALL0(fork) | 727 | PTREGSCALL0(fork) |
| @@ -772,15 +736,19 @@ PTREGSCALL1(vm86old) | |||
| 772 | /* Clone is an oddball. The 4th arg is in %edi */ | 736 | /* Clone is an oddball. The 4th arg is in %edi */ |
| 773 | ALIGN; | 737 | ALIGN; |
| 774 | ptregs_clone: | 738 | ptregs_clone: |
| 739 | CFI_STARTPROC | ||
| 775 | leal 4(%esp),%eax | 740 | leal 4(%esp),%eax |
| 776 | pushl %eax | 741 | pushl_cfi %eax |
| 777 | pushl PT_EDI(%eax) | 742 | pushl_cfi PT_EDI(%eax) |
| 778 | movl PT_EDX(%eax),%ecx | 743 | movl PT_EDX(%eax),%ecx |
| 779 | movl PT_ECX(%eax),%edx | 744 | movl PT_ECX(%eax),%edx |
| 780 | movl PT_EBX(%eax),%eax | 745 | movl PT_EBX(%eax),%eax |
| 781 | call sys_clone | 746 | call sys_clone |
| 782 | addl $8,%esp | 747 | addl $8,%esp |
| 748 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 783 | ret | 749 | ret |
| 750 | CFI_ENDPROC | ||
| 751 | ENDPROC(ptregs_clone) | ||
| 784 | 752 | ||
| 785 | .macro FIXUP_ESPFIX_STACK | 753 | .macro FIXUP_ESPFIX_STACK |
| 786 | /* | 754 | /* |
| @@ -795,10 +763,8 @@ ptregs_clone: | |||
| 795 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ | 763 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ |
| 796 | shl $16, %eax | 764 | shl $16, %eax |
| 797 | addl %esp, %eax /* the adjusted stack pointer */ | 765 | addl %esp, %eax /* the adjusted stack pointer */ |
| 798 | pushl $__KERNEL_DS | 766 | pushl_cfi $__KERNEL_DS |
| 799 | CFI_ADJUST_CFA_OFFSET 4 | 767 | pushl_cfi %eax |
| 800 | pushl %eax | ||
| 801 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 802 | lss (%esp), %esp /* switch to the normal stack segment */ | 768 | lss (%esp), %esp /* switch to the normal stack segment */ |
| 803 | CFI_ADJUST_CFA_OFFSET -8 | 769 | CFI_ADJUST_CFA_OFFSET -8 |
| 804 | .endm | 770 | .endm |
| @@ -835,8 +801,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
| 835 | .if vector <> FIRST_EXTERNAL_VECTOR | 801 | .if vector <> FIRST_EXTERNAL_VECTOR |
| 836 | CFI_ADJUST_CFA_OFFSET -4 | 802 | CFI_ADJUST_CFA_OFFSET -4 |
| 837 | .endif | 803 | .endif |
| 838 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ | 804 | 1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
| 839 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 840 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 805 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
| 841 | jmp 2f | 806 | jmp 2f |
| 842 | .endif | 807 | .endif |
| @@ -876,8 +841,7 @@ ENDPROC(common_interrupt) | |||
| 876 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 841 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
| 877 | ENTRY(name) \ | 842 | ENTRY(name) \ |
| 878 | RING0_INT_FRAME; \ | 843 | RING0_INT_FRAME; \ |
| 879 | pushl $~(nr); \ | 844 | pushl_cfi $~(nr); \ |
| 880 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
| 881 | SAVE_ALL; \ | 845 | SAVE_ALL; \ |
| 882 | TRACE_IRQS_OFF \ | 846 | TRACE_IRQS_OFF \ |
| 883 | movl %esp,%eax; \ | 847 | movl %esp,%eax; \ |
| @@ -893,21 +857,18 @@ ENDPROC(name) | |||
| 893 | 857 | ||
| 894 | ENTRY(coprocessor_error) | 858 | ENTRY(coprocessor_error) |
| 895 | RING0_INT_FRAME | 859 | RING0_INT_FRAME |
| 896 | pushl $0 | 860 | pushl_cfi $0 |
| 897 | CFI_ADJUST_CFA_OFFSET 4 | 861 | pushl_cfi $do_coprocessor_error |
| 898 | pushl $do_coprocessor_error | ||
| 899 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 900 | jmp error_code | 862 | jmp error_code |
| 901 | CFI_ENDPROC | 863 | CFI_ENDPROC |
| 902 | END(coprocessor_error) | 864 | END(coprocessor_error) |
| 903 | 865 | ||
| 904 | ENTRY(simd_coprocessor_error) | 866 | ENTRY(simd_coprocessor_error) |
| 905 | RING0_INT_FRAME | 867 | RING0_INT_FRAME |
| 906 | pushl $0 | 868 | pushl_cfi $0 |
| 907 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 908 | #ifdef CONFIG_X86_INVD_BUG | 869 | #ifdef CONFIG_X86_INVD_BUG |
| 909 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 870 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
| 910 | 661: pushl $do_general_protection | 871 | 661: pushl_cfi $do_general_protection |
| 911 | 662: | 872 | 662: |
| 912 | .section .altinstructions,"a" | 873 | .section .altinstructions,"a" |
| 913 | .balign 4 | 874 | .balign 4 |
| @@ -922,19 +883,16 @@ ENTRY(simd_coprocessor_error) | |||
| 922 | 664: | 883 | 664: |
| 923 | .previous | 884 | .previous |
| 924 | #else | 885 | #else |
| 925 | pushl $do_simd_coprocessor_error | 886 | pushl_cfi $do_simd_coprocessor_error |
| 926 | #endif | 887 | #endif |
| 927 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 928 | jmp error_code | 888 | jmp error_code |
| 929 | CFI_ENDPROC | 889 | CFI_ENDPROC |
| 930 | END(simd_coprocessor_error) | 890 | END(simd_coprocessor_error) |
| 931 | 891 | ||
| 932 | ENTRY(device_not_available) | 892 | ENTRY(device_not_available) |
| 933 | RING0_INT_FRAME | 893 | RING0_INT_FRAME |
| 934 | pushl $-1 # mark this as an int | 894 | pushl_cfi $-1 # mark this as an int |
| 935 | CFI_ADJUST_CFA_OFFSET 4 | 895 | pushl_cfi $do_device_not_available |
| 936 | pushl $do_device_not_available | ||
| 937 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 938 | jmp error_code | 896 | jmp error_code |
| 939 | CFI_ENDPROC | 897 | CFI_ENDPROC |
| 940 | END(device_not_available) | 898 | END(device_not_available) |
| @@ -956,82 +914,68 @@ END(native_irq_enable_sysexit) | |||
| 956 | 914 | ||
| 957 | ENTRY(overflow) | 915 | ENTRY(overflow) |
| 958 | RING0_INT_FRAME | 916 | RING0_INT_FRAME |
| 959 | pushl $0 | 917 | pushl_cfi $0 |
| 960 | CFI_ADJUST_CFA_OFFSET 4 | 918 | pushl_cfi $do_overflow |
| 961 | pushl $do_overflow | ||
| 962 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 963 | jmp error_code | 919 | jmp error_code |
| 964 | CFI_ENDPROC | 920 | CFI_ENDPROC |
| 965 | END(overflow) | 921 | END(overflow) |
| 966 | 922 | ||
| 967 | ENTRY(bounds) | 923 | ENTRY(bounds) |
| 968 | RING0_INT_FRAME | 924 | RING0_INT_FRAME |
| 969 | pushl $0 | 925 | pushl_cfi $0 |
| 970 | CFI_ADJUST_CFA_OFFSET 4 | 926 | pushl_cfi $do_bounds |
| 971 | pushl $do_bounds | ||
| 972 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 973 | jmp error_code | 927 | jmp error_code |
| 974 | CFI_ENDPROC | 928 | CFI_ENDPROC |
| 975 | END(bounds) | 929 | END(bounds) |
| 976 | 930 | ||
| 977 | ENTRY(invalid_op) | 931 | ENTRY(invalid_op) |
| 978 | RING0_INT_FRAME | 932 | RING0_INT_FRAME |
| 979 | pushl $0 | 933 | pushl_cfi $0 |
| 980 | CFI_ADJUST_CFA_OFFSET 4 | 934 | pushl_cfi $do_invalid_op |
| 981 | pushl $do_invalid_op | ||
| 982 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 983 | jmp error_code | 935 | jmp error_code |
| 984 | CFI_ENDPROC | 936 | CFI_ENDPROC |
| 985 | END(invalid_op) | 937 | END(invalid_op) |
| 986 | 938 | ||
| 987 | ENTRY(coprocessor_segment_overrun) | 939 | ENTRY(coprocessor_segment_overrun) |
| 988 | RING0_INT_FRAME | 940 | RING0_INT_FRAME |
| 989 | pushl $0 | 941 | pushl_cfi $0 |
| 990 | CFI_ADJUST_CFA_OFFSET 4 | 942 | pushl_cfi $do_coprocessor_segment_overrun |
| 991 | pushl $do_coprocessor_segment_overrun | ||
| 992 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 993 | jmp error_code | 943 | jmp error_code |
| 994 | CFI_ENDPROC | 944 | CFI_ENDPROC |
| 995 | END(coprocessor_segment_overrun) | 945 | END(coprocessor_segment_overrun) |
| 996 | 946 | ||
| 997 | ENTRY(invalid_TSS) | 947 | ENTRY(invalid_TSS) |
| 998 | RING0_EC_FRAME | 948 | RING0_EC_FRAME |
| 999 | pushl $do_invalid_TSS | 949 | pushl_cfi $do_invalid_TSS |
| 1000 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1001 | jmp error_code | 950 | jmp error_code |
| 1002 | CFI_ENDPROC | 951 | CFI_ENDPROC |
| 1003 | END(invalid_TSS) | 952 | END(invalid_TSS) |
| 1004 | 953 | ||
| 1005 | ENTRY(segment_not_present) | 954 | ENTRY(segment_not_present) |
| 1006 | RING0_EC_FRAME | 955 | RING0_EC_FRAME |
| 1007 | pushl $do_segment_not_present | 956 | pushl_cfi $do_segment_not_present |
| 1008 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1009 | jmp error_code | 957 | jmp error_code |
| 1010 | CFI_ENDPROC | 958 | CFI_ENDPROC |
| 1011 | END(segment_not_present) | 959 | END(segment_not_present) |
| 1012 | 960 | ||
| 1013 | ENTRY(stack_segment) | 961 | ENTRY(stack_segment) |
| 1014 | RING0_EC_FRAME | 962 | RING0_EC_FRAME |
| 1015 | pushl $do_stack_segment | 963 | pushl_cfi $do_stack_segment |
| 1016 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1017 | jmp error_code | 964 | jmp error_code |
| 1018 | CFI_ENDPROC | 965 | CFI_ENDPROC |
| 1019 | END(stack_segment) | 966 | END(stack_segment) |
| 1020 | 967 | ||
| 1021 | ENTRY(alignment_check) | 968 | ENTRY(alignment_check) |
| 1022 | RING0_EC_FRAME | 969 | RING0_EC_FRAME |
| 1023 | pushl $do_alignment_check | 970 | pushl_cfi $do_alignment_check |
| 1024 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1025 | jmp error_code | 971 | jmp error_code |
| 1026 | CFI_ENDPROC | 972 | CFI_ENDPROC |
| 1027 | END(alignment_check) | 973 | END(alignment_check) |
| 1028 | 974 | ||
| 1029 | ENTRY(divide_error) | 975 | ENTRY(divide_error) |
| 1030 | RING0_INT_FRAME | 976 | RING0_INT_FRAME |
| 1031 | pushl $0 # no error code | 977 | pushl_cfi $0 # no error code |
| 1032 | CFI_ADJUST_CFA_OFFSET 4 | 978 | pushl_cfi $do_divide_error |
| 1033 | pushl $do_divide_error | ||
| 1034 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1035 | jmp error_code | 979 | jmp error_code |
| 1036 | CFI_ENDPROC | 980 | CFI_ENDPROC |
| 1037 | END(divide_error) | 981 | END(divide_error) |
| @@ -1039,10 +983,8 @@ END(divide_error) | |||
| 1039 | #ifdef CONFIG_X86_MCE | 983 | #ifdef CONFIG_X86_MCE |
| 1040 | ENTRY(machine_check) | 984 | ENTRY(machine_check) |
| 1041 | RING0_INT_FRAME | 985 | RING0_INT_FRAME |
| 1042 | pushl $0 | 986 | pushl_cfi $0 |
| 1043 | CFI_ADJUST_CFA_OFFSET 4 | 987 | pushl_cfi machine_check_vector |
| 1044 | pushl machine_check_vector | ||
| 1045 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1046 | jmp error_code | 988 | jmp error_code |
| 1047 | CFI_ENDPROC | 989 | CFI_ENDPROC |
| 1048 | END(machine_check) | 990 | END(machine_check) |
| @@ -1050,10 +992,8 @@ END(machine_check) | |||
| 1050 | 992 | ||
| 1051 | ENTRY(spurious_interrupt_bug) | 993 | ENTRY(spurious_interrupt_bug) |
| 1052 | RING0_INT_FRAME | 994 | RING0_INT_FRAME |
| 1053 | pushl $0 | 995 | pushl_cfi $0 |
| 1054 | CFI_ADJUST_CFA_OFFSET 4 | 996 | pushl_cfi $do_spurious_interrupt_bug |
| 1055 | pushl $do_spurious_interrupt_bug | ||
| 1056 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1057 | jmp error_code | 997 | jmp error_code |
| 1058 | CFI_ENDPROC | 998 | CFI_ENDPROC |
| 1059 | END(spurious_interrupt_bug) | 999 | END(spurious_interrupt_bug) |
| @@ -1084,8 +1024,7 @@ ENTRY(xen_sysenter_target) | |||
| 1084 | 1024 | ||
| 1085 | ENTRY(xen_hypervisor_callback) | 1025 | ENTRY(xen_hypervisor_callback) |
| 1086 | CFI_STARTPROC | 1026 | CFI_STARTPROC |
| 1087 | pushl $0 | 1027 | pushl_cfi $0 |
| 1088 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1089 | SAVE_ALL | 1028 | SAVE_ALL |
| 1090 | TRACE_IRQS_OFF | 1029 | TRACE_IRQS_OFF |
| 1091 | 1030 | ||
| @@ -1121,23 +1060,20 @@ ENDPROC(xen_hypervisor_callback) | |||
| 1121 | # We distinguish between categories by maintaining a status value in EAX. | 1060 | # We distinguish between categories by maintaining a status value in EAX. |
| 1122 | ENTRY(xen_failsafe_callback) | 1061 | ENTRY(xen_failsafe_callback) |
| 1123 | CFI_STARTPROC | 1062 | CFI_STARTPROC |
| 1124 | pushl %eax | 1063 | pushl_cfi %eax |
| 1125 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1126 | movl $1,%eax | 1064 | movl $1,%eax |
| 1127 | 1: mov 4(%esp),%ds | 1065 | 1: mov 4(%esp),%ds |
| 1128 | 2: mov 8(%esp),%es | 1066 | 2: mov 8(%esp),%es |
| 1129 | 3: mov 12(%esp),%fs | 1067 | 3: mov 12(%esp),%fs |
| 1130 | 4: mov 16(%esp),%gs | 1068 | 4: mov 16(%esp),%gs |
| 1131 | testl %eax,%eax | 1069 | testl %eax,%eax |
| 1132 | popl %eax | 1070 | popl_cfi %eax |
| 1133 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1134 | lea 16(%esp),%esp | 1071 | lea 16(%esp),%esp |
| 1135 | CFI_ADJUST_CFA_OFFSET -16 | 1072 | CFI_ADJUST_CFA_OFFSET -16 |
| 1136 | jz 5f | 1073 | jz 5f |
| 1137 | addl $16,%esp | 1074 | addl $16,%esp |
| 1138 | jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) | 1075 | jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) |
| 1139 | 5: pushl $0 # EAX == 0 => Category 1 (Bad segment) | 1076 | 5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) |
| 1140 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1141 | SAVE_ALL | 1077 | SAVE_ALL |
| 1142 | jmp ret_from_exception | 1078 | jmp ret_from_exception |
| 1143 | CFI_ENDPROC | 1079 | CFI_ENDPROC |
| @@ -1287,40 +1223,29 @@ syscall_table_size=(.-sys_call_table) | |||
| 1287 | 1223 | ||
| 1288 | ENTRY(page_fault) | 1224 | ENTRY(page_fault) |
| 1289 | RING0_EC_FRAME | 1225 | RING0_EC_FRAME |
| 1290 | pushl $do_page_fault | 1226 | pushl_cfi $do_page_fault |
| 1291 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1292 | ALIGN | 1227 | ALIGN |
| 1293 | error_code: | 1228 | error_code: |
| 1294 | /* the function address is in %gs's slot on the stack */ | 1229 | /* the function address is in %gs's slot on the stack */ |
| 1295 | pushl %fs | 1230 | pushl_cfi %fs |
| 1296 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1297 | /*CFI_REL_OFFSET fs, 0*/ | 1231 | /*CFI_REL_OFFSET fs, 0*/ |
| 1298 | pushl %es | 1232 | pushl_cfi %es |
| 1299 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1300 | /*CFI_REL_OFFSET es, 0*/ | 1233 | /*CFI_REL_OFFSET es, 0*/ |
| 1301 | pushl %ds | 1234 | pushl_cfi %ds |
| 1302 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1303 | /*CFI_REL_OFFSET ds, 0*/ | 1235 | /*CFI_REL_OFFSET ds, 0*/ |
| 1304 | pushl %eax | 1236 | pushl_cfi %eax |
| 1305 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1306 | CFI_REL_OFFSET eax, 0 | 1237 | CFI_REL_OFFSET eax, 0 |
| 1307 | pushl %ebp | 1238 | pushl_cfi %ebp |
| 1308 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1309 | CFI_REL_OFFSET ebp, 0 | 1239 | CFI_REL_OFFSET ebp, 0 |
| 1310 | pushl %edi | 1240 | pushl_cfi %edi |
| 1311 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1312 | CFI_REL_OFFSET edi, 0 | 1241 | CFI_REL_OFFSET edi, 0 |
| 1313 | pushl %esi | 1242 | pushl_cfi %esi |
| 1314 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1315 | CFI_REL_OFFSET esi, 0 | 1243 | CFI_REL_OFFSET esi, 0 |
| 1316 | pushl %edx | 1244 | pushl_cfi %edx |
| 1317 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1318 | CFI_REL_OFFSET edx, 0 | 1245 | CFI_REL_OFFSET edx, 0 |
| 1319 | pushl %ecx | 1246 | pushl_cfi %ecx |
| 1320 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1321 | CFI_REL_OFFSET ecx, 0 | 1247 | CFI_REL_OFFSET ecx, 0 |
| 1322 | pushl %ebx | 1248 | pushl_cfi %ebx |
| 1323 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1324 | CFI_REL_OFFSET ebx, 0 | 1249 | CFI_REL_OFFSET ebx, 0 |
| 1325 | cld | 1250 | cld |
| 1326 | movl $(__KERNEL_PERCPU), %ecx | 1251 | movl $(__KERNEL_PERCPU), %ecx |
| @@ -1362,12 +1287,9 @@ END(page_fault) | |||
| 1362 | movl TSS_sysenter_sp0 + \offset(%esp), %esp | 1287 | movl TSS_sysenter_sp0 + \offset(%esp), %esp |
| 1363 | CFI_DEF_CFA esp, 0 | 1288 | CFI_DEF_CFA esp, 0 |
| 1364 | CFI_UNDEFINED eip | 1289 | CFI_UNDEFINED eip |
| 1365 | pushfl | 1290 | pushfl_cfi |
| 1366 | CFI_ADJUST_CFA_OFFSET 4 | 1291 | pushl_cfi $__KERNEL_CS |
| 1367 | pushl $__KERNEL_CS | 1292 | pushl_cfi $sysenter_past_esp |
| 1368 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1369 | pushl $sysenter_past_esp | ||
| 1370 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1371 | CFI_REL_OFFSET eip, 0 | 1293 | CFI_REL_OFFSET eip, 0 |
| 1372 | .endm | 1294 | .endm |
| 1373 | 1295 | ||
| @@ -1377,8 +1299,7 @@ ENTRY(debug) | |||
| 1377 | jne debug_stack_correct | 1299 | jne debug_stack_correct |
| 1378 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | 1300 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
| 1379 | debug_stack_correct: | 1301 | debug_stack_correct: |
| 1380 | pushl $-1 # mark this as an int | 1302 | pushl_cfi $-1 # mark this as an int |
| 1381 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1382 | SAVE_ALL | 1303 | SAVE_ALL |
| 1383 | TRACE_IRQS_OFF | 1304 | TRACE_IRQS_OFF |
| 1384 | xorl %edx,%edx # error code 0 | 1305 | xorl %edx,%edx # error code 0 |
| @@ -1398,32 +1319,27 @@ END(debug) | |||
| 1398 | */ | 1319 | */ |
| 1399 | ENTRY(nmi) | 1320 | ENTRY(nmi) |
| 1400 | RING0_INT_FRAME | 1321 | RING0_INT_FRAME |
| 1401 | pushl %eax | 1322 | pushl_cfi %eax |
| 1402 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1403 | movl %ss, %eax | 1323 | movl %ss, %eax |
| 1404 | cmpw $__ESPFIX_SS, %ax | 1324 | cmpw $__ESPFIX_SS, %ax |
| 1405 | popl %eax | 1325 | popl_cfi %eax |
| 1406 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1407 | je nmi_espfix_stack | 1326 | je nmi_espfix_stack |
| 1408 | cmpl $ia32_sysenter_target,(%esp) | 1327 | cmpl $ia32_sysenter_target,(%esp) |
| 1409 | je nmi_stack_fixup | 1328 | je nmi_stack_fixup |
| 1410 | pushl %eax | 1329 | pushl_cfi %eax |
| 1411 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1412 | movl %esp,%eax | 1330 | movl %esp,%eax |
| 1413 | /* Do not access memory above the end of our stack page, | 1331 | /* Do not access memory above the end of our stack page, |
| 1414 | * it might not exist. | 1332 | * it might not exist. |
| 1415 | */ | 1333 | */ |
| 1416 | andl $(THREAD_SIZE-1),%eax | 1334 | andl $(THREAD_SIZE-1),%eax |
| 1417 | cmpl $(THREAD_SIZE-20),%eax | 1335 | cmpl $(THREAD_SIZE-20),%eax |
| 1418 | popl %eax | 1336 | popl_cfi %eax |
| 1419 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 1420 | jae nmi_stack_correct | 1337 | jae nmi_stack_correct |
| 1421 | cmpl $ia32_sysenter_target,12(%esp) | 1338 | cmpl $ia32_sysenter_target,12(%esp) |
| 1422 | je nmi_debug_stack_check | 1339 | je nmi_debug_stack_check |
| 1423 | nmi_stack_correct: | 1340 | nmi_stack_correct: |
| 1424 | /* We have a RING0_INT_FRAME here */ | 1341 | /* We have a RING0_INT_FRAME here */ |
| 1425 | pushl %eax | 1342 | pushl_cfi %eax |
| 1426 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1427 | SAVE_ALL | 1343 | SAVE_ALL |
| 1428 | xorl %edx,%edx # zero error code | 1344 | xorl %edx,%edx # zero error code |
| 1429 | movl %esp,%eax # pt_regs pointer | 1345 | movl %esp,%eax # pt_regs pointer |
| @@ -1452,18 +1368,14 @@ nmi_espfix_stack: | |||
| 1452 | * | 1368 | * |
| 1453 | * create the pointer to lss back | 1369 | * create the pointer to lss back |
| 1454 | */ | 1370 | */ |
| 1455 | pushl %ss | 1371 | pushl_cfi %ss |
| 1456 | CFI_ADJUST_CFA_OFFSET 4 | 1372 | pushl_cfi %esp |
| 1457 | pushl %esp | ||
| 1458 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1459 | addl $4, (%esp) | 1373 | addl $4, (%esp) |
| 1460 | /* copy the iret frame of 12 bytes */ | 1374 | /* copy the iret frame of 12 bytes */ |
| 1461 | .rept 3 | 1375 | .rept 3 |
| 1462 | pushl 16(%esp) | 1376 | pushl_cfi 16(%esp) |
| 1463 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1464 | .endr | 1377 | .endr |
| 1465 | pushl %eax | 1378 | pushl_cfi %eax |
| 1466 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1467 | SAVE_ALL | 1379 | SAVE_ALL |
| 1468 | FIXUP_ESPFIX_STACK # %eax == %esp | 1380 | FIXUP_ESPFIX_STACK # %eax == %esp |
| 1469 | xorl %edx,%edx # zero error code | 1381 | xorl %edx,%edx # zero error code |
| @@ -1477,8 +1389,7 @@ END(nmi) | |||
| 1477 | 1389 | ||
| 1478 | ENTRY(int3) | 1390 | ENTRY(int3) |
| 1479 | RING0_INT_FRAME | 1391 | RING0_INT_FRAME |
| 1480 | pushl $-1 # mark this as an int | 1392 | pushl_cfi $-1 # mark this as an int |
| 1481 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1482 | SAVE_ALL | 1393 | SAVE_ALL |
| 1483 | TRACE_IRQS_OFF | 1394 | TRACE_IRQS_OFF |
| 1484 | xorl %edx,%edx # zero error code | 1395 | xorl %edx,%edx # zero error code |
| @@ -1490,8 +1401,7 @@ END(int3) | |||
| 1490 | 1401 | ||
| 1491 | ENTRY(general_protection) | 1402 | ENTRY(general_protection) |
| 1492 | RING0_EC_FRAME | 1403 | RING0_EC_FRAME |
| 1493 | pushl $do_general_protection | 1404 | pushl_cfi $do_general_protection |
| 1494 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 1495 | jmp error_code | 1405 | jmp error_code |
| 1496 | CFI_ENDPROC | 1406 | CFI_ENDPROC |
| 1497 | END(general_protection) | 1407 | END(general_protection) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c375c79065f8..a7ae7fd1010f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -213,23 +213,17 @@ ENDPROC(native_usergs_sysret64) | |||
| 213 | .macro FAKE_STACK_FRAME child_rip | 213 | .macro FAKE_STACK_FRAME child_rip |
| 214 | /* push in order ss, rsp, eflags, cs, rip */ | 214 | /* push in order ss, rsp, eflags, cs, rip */ |
| 215 | xorl %eax, %eax | 215 | xorl %eax, %eax |
| 216 | pushq $__KERNEL_DS /* ss */ | 216 | pushq_cfi $__KERNEL_DS /* ss */ |
| 217 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 218 | /*CFI_REL_OFFSET ss,0*/ | 217 | /*CFI_REL_OFFSET ss,0*/ |
| 219 | pushq %rax /* rsp */ | 218 | pushq_cfi %rax /* rsp */ |
| 220 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 221 | CFI_REL_OFFSET rsp,0 | 219 | CFI_REL_OFFSET rsp,0 |
| 222 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ | 220 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ |
| 223 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 224 | /*CFI_REL_OFFSET rflags,0*/ | 221 | /*CFI_REL_OFFSET rflags,0*/ |
| 225 | pushq $__KERNEL_CS /* cs */ | 222 | pushq_cfi $__KERNEL_CS /* cs */ |
| 226 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 227 | /*CFI_REL_OFFSET cs,0*/ | 223 | /*CFI_REL_OFFSET cs,0*/ |
| 228 | pushq \child_rip /* rip */ | 224 | pushq_cfi \child_rip /* rip */ |
| 229 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 230 | CFI_REL_OFFSET rip,0 | 225 | CFI_REL_OFFSET rip,0 |
| 231 | pushq %rax /* orig rax */ | 226 | pushq_cfi %rax /* orig rax */ |
| 232 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 233 | .endm | 227 | .endm |
| 234 | 228 | ||
| 235 | .macro UNFAKE_STACK_FRAME | 229 | .macro UNFAKE_STACK_FRAME |
| @@ -398,10 +392,8 @@ ENTRY(ret_from_fork) | |||
| 398 | 392 | ||
| 399 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 393 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
| 400 | 394 | ||
| 401 | push kernel_eflags(%rip) | 395 | pushq_cfi kernel_eflags(%rip) |
| 402 | CFI_ADJUST_CFA_OFFSET 8 | 396 | popfq_cfi # reset kernel eflags |
| 403 | popf # reset kernel eflags | ||
| 404 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 405 | 397 | ||
| 406 | call schedule_tail # rdi: 'prev' task parameter | 398 | call schedule_tail # rdi: 'prev' task parameter |
| 407 | 399 | ||
| @@ -521,11 +513,9 @@ sysret_careful: | |||
| 521 | jnc sysret_signal | 513 | jnc sysret_signal |
| 522 | TRACE_IRQS_ON | 514 | TRACE_IRQS_ON |
| 523 | ENABLE_INTERRUPTS(CLBR_NONE) | 515 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 524 | pushq %rdi | 516 | pushq_cfi %rdi |
| 525 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 526 | call schedule | 517 | call schedule |
| 527 | popq %rdi | 518 | popq_cfi %rdi |
| 528 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 529 | jmp sysret_check | 519 | jmp sysret_check |
| 530 | 520 | ||
| 531 | /* Handle a signal */ | 521 | /* Handle a signal */ |
| @@ -634,11 +624,9 @@ int_careful: | |||
| 634 | jnc int_very_careful | 624 | jnc int_very_careful |
| 635 | TRACE_IRQS_ON | 625 | TRACE_IRQS_ON |
| 636 | ENABLE_INTERRUPTS(CLBR_NONE) | 626 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 637 | pushq %rdi | 627 | pushq_cfi %rdi |
| 638 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 639 | call schedule | 628 | call schedule |
| 640 | popq %rdi | 629 | popq_cfi %rdi |
| 641 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 642 | DISABLE_INTERRUPTS(CLBR_NONE) | 630 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 643 | TRACE_IRQS_OFF | 631 | TRACE_IRQS_OFF |
| 644 | jmp int_with_check | 632 | jmp int_with_check |
| @@ -652,12 +640,10 @@ int_check_syscall_exit_work: | |||
| 652 | /* Check for syscall exit trace */ | 640 | /* Check for syscall exit trace */ |
| 653 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 641 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
| 654 | jz int_signal | 642 | jz int_signal |
| 655 | pushq %rdi | 643 | pushq_cfi %rdi |
| 656 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 657 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 644 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
| 658 | call syscall_trace_leave | 645 | call syscall_trace_leave |
| 659 | popq %rdi | 646 | popq_cfi %rdi |
| 660 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 661 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 647 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
| 662 | jmp int_restore_rest | 648 | jmp int_restore_rest |
| 663 | 649 | ||
| @@ -714,9 +700,8 @@ END(ptregscall_common) | |||
| 714 | 700 | ||
| 715 | ENTRY(stub_execve) | 701 | ENTRY(stub_execve) |
| 716 | CFI_STARTPROC | 702 | CFI_STARTPROC |
| 717 | popq %r11 | 703 | addq $8, %rsp |
| 718 | CFI_ADJUST_CFA_OFFSET -8 | 704 | PARTIAL_FRAME 0 |
| 719 | CFI_REGISTER rip, r11 | ||
| 720 | SAVE_REST | 705 | SAVE_REST |
| 721 | FIXUP_TOP_OF_STACK %r11 | 706 | FIXUP_TOP_OF_STACK %r11 |
| 722 | movq %rsp, %rcx | 707 | movq %rsp, %rcx |
| @@ -735,7 +720,7 @@ END(stub_execve) | |||
| 735 | ENTRY(stub_rt_sigreturn) | 720 | ENTRY(stub_rt_sigreturn) |
| 736 | CFI_STARTPROC | 721 | CFI_STARTPROC |
| 737 | addq $8, %rsp | 722 | addq $8, %rsp |
| 738 | CFI_ADJUST_CFA_OFFSET -8 | 723 | PARTIAL_FRAME 0 |
| 739 | SAVE_REST | 724 | SAVE_REST |
| 740 | movq %rsp,%rdi | 725 | movq %rsp,%rdi |
| 741 | FIXUP_TOP_OF_STACK %r11 | 726 | FIXUP_TOP_OF_STACK %r11 |
| @@ -766,8 +751,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
| 766 | .if vector <> FIRST_EXTERNAL_VECTOR | 751 | .if vector <> FIRST_EXTERNAL_VECTOR |
| 767 | CFI_ADJUST_CFA_OFFSET -8 | 752 | CFI_ADJUST_CFA_OFFSET -8 |
| 768 | .endif | 753 | .endif |
| 769 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | 754 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
| 770 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 771 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 755 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
| 772 | jmp 2f | 756 | jmp 2f |
| 773 | .endif | 757 | .endif |
| @@ -796,8 +780,8 @@ END(interrupt) | |||
| 796 | 780 | ||
| 797 | /* 0(%rsp): ~(interrupt number) */ | 781 | /* 0(%rsp): ~(interrupt number) */ |
| 798 | .macro interrupt func | 782 | .macro interrupt func |
| 799 | subq $10*8, %rsp | 783 | subq $ORIG_RAX-ARGOFFSET+8, %rsp |
| 800 | CFI_ADJUST_CFA_OFFSET 10*8 | 784 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 |
| 801 | call save_args | 785 | call save_args |
| 802 | PARTIAL_FRAME 0 | 786 | PARTIAL_FRAME 0 |
| 803 | call \func | 787 | call \func |
| @@ -822,6 +806,7 @@ ret_from_intr: | |||
| 822 | TRACE_IRQS_OFF | 806 | TRACE_IRQS_OFF |
| 823 | decl PER_CPU_VAR(irq_count) | 807 | decl PER_CPU_VAR(irq_count) |
| 824 | leaveq | 808 | leaveq |
| 809 | CFI_RESTORE rbp | ||
| 825 | CFI_DEF_CFA_REGISTER rsp | 810 | CFI_DEF_CFA_REGISTER rsp |
| 826 | CFI_ADJUST_CFA_OFFSET -8 | 811 | CFI_ADJUST_CFA_OFFSET -8 |
| 827 | exit_intr: | 812 | exit_intr: |
| @@ -903,11 +888,9 @@ retint_careful: | |||
| 903 | jnc retint_signal | 888 | jnc retint_signal |
| 904 | TRACE_IRQS_ON | 889 | TRACE_IRQS_ON |
| 905 | ENABLE_INTERRUPTS(CLBR_NONE) | 890 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 906 | pushq %rdi | 891 | pushq_cfi %rdi |
| 907 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 908 | call schedule | 892 | call schedule |
| 909 | popq %rdi | 893 | popq_cfi %rdi |
| 910 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 911 | GET_THREAD_INFO(%rcx) | 894 | GET_THREAD_INFO(%rcx) |
| 912 | DISABLE_INTERRUPTS(CLBR_NONE) | 895 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 913 | TRACE_IRQS_OFF | 896 | TRACE_IRQS_OFF |
| @@ -956,8 +939,7 @@ END(common_interrupt) | |||
| 956 | .macro apicinterrupt num sym do_sym | 939 | .macro apicinterrupt num sym do_sym |
| 957 | ENTRY(\sym) | 940 | ENTRY(\sym) |
| 958 | INTR_FRAME | 941 | INTR_FRAME |
| 959 | pushq $~(\num) | 942 | pushq_cfi $~(\num) |
| 960 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 961 | interrupt \do_sym | 943 | interrupt \do_sym |
| 962 | jmp ret_from_intr | 944 | jmp ret_from_intr |
| 963 | CFI_ENDPROC | 945 | CFI_ENDPROC |
| @@ -1036,8 +1018,8 @@ ENTRY(\sym) | |||
| 1036 | INTR_FRAME | 1018 | INTR_FRAME |
| 1037 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1019 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1038 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1020 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1039 | subq $15*8,%rsp | 1021 | subq $ORIG_RAX-R15, %rsp |
| 1040 | CFI_ADJUST_CFA_OFFSET 15*8 | 1022 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1041 | call error_entry | 1023 | call error_entry |
| 1042 | DEFAULT_FRAME 0 | 1024 | DEFAULT_FRAME 0 |
| 1043 | movq %rsp,%rdi /* pt_regs pointer */ | 1025 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1052,9 +1034,9 @@ END(\sym) | |||
| 1052 | ENTRY(\sym) | 1034 | ENTRY(\sym) |
| 1053 | INTR_FRAME | 1035 | INTR_FRAME |
| 1054 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1036 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1055 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | 1037 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1056 | CFI_ADJUST_CFA_OFFSET 8 | 1038 | subq $ORIG_RAX-R15, %rsp |
| 1057 | subq $15*8, %rsp | 1039 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1058 | call save_paranoid | 1040 | call save_paranoid |
| 1059 | TRACE_IRQS_OFF | 1041 | TRACE_IRQS_OFF |
| 1060 | movq %rsp,%rdi /* pt_regs pointer */ | 1042 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1070,9 +1052,9 @@ END(\sym) | |||
| 1070 | ENTRY(\sym) | 1052 | ENTRY(\sym) |
| 1071 | INTR_FRAME | 1053 | INTR_FRAME |
| 1072 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1054 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1073 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | 1055 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
| 1074 | CFI_ADJUST_CFA_OFFSET 8 | 1056 | subq $ORIG_RAX-R15, %rsp |
| 1075 | subq $15*8, %rsp | 1057 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1076 | call save_paranoid | 1058 | call save_paranoid |
| 1077 | TRACE_IRQS_OFF | 1059 | TRACE_IRQS_OFF |
| 1078 | movq %rsp,%rdi /* pt_regs pointer */ | 1060 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1089,8 +1071,8 @@ END(\sym) | |||
| 1089 | ENTRY(\sym) | 1071 | ENTRY(\sym) |
| 1090 | XCPT_FRAME | 1072 | XCPT_FRAME |
| 1091 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1073 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1092 | subq $15*8,%rsp | 1074 | subq $ORIG_RAX-R15, %rsp |
| 1093 | CFI_ADJUST_CFA_OFFSET 15*8 | 1075 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1094 | call error_entry | 1076 | call error_entry |
| 1095 | DEFAULT_FRAME 0 | 1077 | DEFAULT_FRAME 0 |
| 1096 | movq %rsp,%rdi /* pt_regs pointer */ | 1078 | movq %rsp,%rdi /* pt_regs pointer */ |
| @@ -1107,8 +1089,8 @@ END(\sym) | |||
| 1107 | ENTRY(\sym) | 1089 | ENTRY(\sym) |
| 1108 | XCPT_FRAME | 1090 | XCPT_FRAME |
| 1109 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1091 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1110 | subq $15*8,%rsp | 1092 | subq $ORIG_RAX-R15, %rsp |
| 1111 | CFI_ADJUST_CFA_OFFSET 15*8 | 1093 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1112 | call save_paranoid | 1094 | call save_paranoid |
| 1113 | DEFAULT_FRAME 0 | 1095 | DEFAULT_FRAME 0 |
| 1114 | TRACE_IRQS_OFF | 1096 | TRACE_IRQS_OFF |
| @@ -1139,16 +1121,14 @@ zeroentry simd_coprocessor_error do_simd_coprocessor_error | |||
| 1139 | /* edi: new selector */ | 1121 | /* edi: new selector */ |
| 1140 | ENTRY(native_load_gs_index) | 1122 | ENTRY(native_load_gs_index) |
| 1141 | CFI_STARTPROC | 1123 | CFI_STARTPROC |
| 1142 | pushf | 1124 | pushfq_cfi |
| 1143 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1144 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) | 1125 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
| 1145 | SWAPGS | 1126 | SWAPGS |
| 1146 | gs_change: | 1127 | gs_change: |
| 1147 | movl %edi,%gs | 1128 | movl %edi,%gs |
| 1148 | 2: mfence /* workaround */ | 1129 | 2: mfence /* workaround */ |
| 1149 | SWAPGS | 1130 | SWAPGS |
| 1150 | popf | 1131 | popfq_cfi |
| 1151 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 1152 | ret | 1132 | ret |
| 1153 | CFI_ENDPROC | 1133 | CFI_ENDPROC |
| 1154 | END(native_load_gs_index) | 1134 | END(native_load_gs_index) |
| @@ -1215,8 +1195,7 @@ END(kernel_execve) | |||
| 1215 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1195 | /* Call softirq on interrupt stack. Interrupts are off. */ |
| 1216 | ENTRY(call_softirq) | 1196 | ENTRY(call_softirq) |
| 1217 | CFI_STARTPROC | 1197 | CFI_STARTPROC |
| 1218 | push %rbp | 1198 | pushq_cfi %rbp |
| 1219 | CFI_ADJUST_CFA_OFFSET 8 | ||
| 1220 | CFI_REL_OFFSET rbp,0 | 1199 | CFI_REL_OFFSET rbp,0 |
| 1221 | mov %rsp,%rbp | 1200 | mov %rsp,%rbp |
| 1222 | CFI_DEF_CFA_REGISTER rbp | 1201 | CFI_DEF_CFA_REGISTER rbp |
| @@ -1225,6 +1204,7 @@ ENTRY(call_softirq) | |||
| 1225 | push %rbp # backlink for old unwinder | 1204 | push %rbp # backlink for old unwinder |
| 1226 | call __do_softirq | 1205 | call __do_softirq |
| 1227 | leaveq | 1206 | leaveq |
| 1207 | CFI_RESTORE rbp | ||
| 1228 | CFI_DEF_CFA_REGISTER rsp | 1208 | CFI_DEF_CFA_REGISTER rsp |
| 1229 | CFI_ADJUST_CFA_OFFSET -8 | 1209 | CFI_ADJUST_CFA_OFFSET -8 |
| 1230 | decl PER_CPU_VAR(irq_count) | 1210 | decl PER_CPU_VAR(irq_count) |
| @@ -1368,7 +1348,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
| 1368 | 1348 | ||
| 1369 | /* ebx: no swapgs flag */ | 1349 | /* ebx: no swapgs flag */ |
| 1370 | ENTRY(paranoid_exit) | 1350 | ENTRY(paranoid_exit) |
| 1371 | INTR_FRAME | 1351 | DEFAULT_FRAME |
| 1372 | DISABLE_INTERRUPTS(CLBR_NONE) | 1352 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 1373 | TRACE_IRQS_OFF | 1353 | TRACE_IRQS_OFF |
| 1374 | testl %ebx,%ebx /* swapgs needed? */ | 1354 | testl %ebx,%ebx /* swapgs needed? */ |
| @@ -1445,7 +1425,6 @@ error_swapgs: | |||
| 1445 | error_sti: | 1425 | error_sti: |
| 1446 | TRACE_IRQS_OFF | 1426 | TRACE_IRQS_OFF |
| 1447 | ret | 1427 | ret |
| 1448 | CFI_ENDPROC | ||
| 1449 | 1428 | ||
| 1450 | /* | 1429 | /* |
| 1451 | * There are two places in the kernel that can potentially fault with | 1430 | * There are two places in the kernel that can potentially fault with |
| @@ -1470,6 +1449,7 @@ bstep_iret: | |||
| 1470 | /* Fix truncated RIP */ | 1449 | /* Fix truncated RIP */ |
| 1471 | movq %rcx,RIP+8(%rsp) | 1450 | movq %rcx,RIP+8(%rsp) |
| 1472 | jmp error_swapgs | 1451 | jmp error_swapgs |
| 1452 | CFI_ENDPROC | ||
| 1473 | END(error_entry) | 1453 | END(error_entry) |
| 1474 | 1454 | ||
| 1475 | 1455 | ||
| @@ -1498,8 +1478,8 @@ ENTRY(nmi) | |||
| 1498 | INTR_FRAME | 1478 | INTR_FRAME |
| 1499 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1479 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
| 1500 | pushq_cfi $-1 | 1480 | pushq_cfi $-1 |
| 1501 | subq $15*8, %rsp | 1481 | subq $ORIG_RAX-R15, %rsp |
| 1502 | CFI_ADJUST_CFA_OFFSET 15*8 | 1482 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1503 | call save_paranoid | 1483 | call save_paranoid |
| 1504 | DEFAULT_FRAME 0 | 1484 | DEFAULT_FRAME 0 |
| 1505 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1485 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7494999141b3..efaf906daf93 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -440,9 +440,9 @@ static int hpet_legacy_next_event(unsigned long delta, | |||
| 440 | static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); | 440 | static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); |
| 441 | static struct hpet_dev *hpet_devs; | 441 | static struct hpet_dev *hpet_devs; |
| 442 | 442 | ||
| 443 | void hpet_msi_unmask(unsigned int irq) | 443 | void hpet_msi_unmask(struct irq_data *data) |
| 444 | { | 444 | { |
| 445 | struct hpet_dev *hdev = get_irq_data(irq); | 445 | struct hpet_dev *hdev = data->handler_data; |
| 446 | unsigned int cfg; | 446 | unsigned int cfg; |
| 447 | 447 | ||
| 448 | /* unmask it */ | 448 | /* unmask it */ |
| @@ -451,10 +451,10 @@ void hpet_msi_unmask(unsigned int irq) | |||
| 451 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 451 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
| 452 | } | 452 | } |
| 453 | 453 | ||
| 454 | void hpet_msi_mask(unsigned int irq) | 454 | void hpet_msi_mask(struct irq_data *data) |
| 455 | { | 455 | { |
| 456 | struct hpet_dev *hdev = data->handler_data; | ||
| 456 | unsigned int cfg; | 457 | unsigned int cfg; |
| 457 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 458 | 458 | ||
| 459 | /* mask it */ | 459 | /* mask it */ |
| 460 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 460 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
| @@ -462,18 +462,14 @@ void hpet_msi_mask(unsigned int irq) | |||
| 462 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); | 462 | hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); |
| 463 | } | 463 | } |
| 464 | 464 | ||
| 465 | void hpet_msi_write(unsigned int irq, struct msi_msg *msg) | 465 | void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg) |
| 466 | { | 466 | { |
| 467 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 468 | |||
| 469 | hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); | 467 | hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); |
| 470 | hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); | 468 | hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); |
| 471 | } | 469 | } |
| 472 | 470 | ||
| 473 | void hpet_msi_read(unsigned int irq, struct msi_msg *msg) | 471 | void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) |
| 474 | { | 472 | { |
| 475 | struct hpet_dev *hdev = get_irq_data(irq); | ||
| 476 | |||
| 477 | msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); | 473 | msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); |
| 478 | msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); | 474 | msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); |
| 479 | msg->address_hi = 0; | 475 | msg->address_hi = 0; |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a46cb3522c0c..58bb239a2fd7 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void) | |||
| 68 | */ | 68 | */ |
| 69 | 69 | ||
| 70 | if (!HAVE_HWFP) { | 70 | if (!HAVE_HWFP) { |
| 71 | /* | ||
| 72 | * Disable xsave as we do not support it if i387 | ||
| 73 | * emulation is enabled. | ||
| 74 | */ | ||
| 75 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 76 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 71 | xstate_size = sizeof(struct i387_soft_struct); | 77 | xstate_size = sizeof(struct i387_soft_struct); |
| 72 | return; | 78 | return; |
| 73 | } | 79 | } |
| 74 | 80 | ||
| 75 | if (cpu_has_fxsr) | 81 | if (cpu_has_fxsr) |
| 76 | xstate_size = sizeof(struct i387_fxsave_struct); | 82 | xstate_size = sizeof(struct i387_fxsave_struct); |
| 77 | #ifdef CONFIG_X86_32 | ||
| 78 | else | 83 | else |
| 79 | xstate_size = sizeof(struct i387_fsave_struct); | 84 | xstate_size = sizeof(struct i387_fsave_struct); |
| 80 | #endif | ||
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | #ifdef CONFIG_X86_64 | ||
| 84 | /* | 87 | /* |
| 85 | * Called at bootup to set up the initial FPU state that is later cloned | 88 | * Called at bootup to set up the initial FPU state that is later cloned |
| 86 | * into all processes. | 89 | * into all processes. |
| @@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void) | |||
| 88 | 91 | ||
| 89 | void __cpuinit fpu_init(void) | 92 | void __cpuinit fpu_init(void) |
| 90 | { | 93 | { |
| 91 | unsigned long oldcr0 = read_cr0(); | 94 | unsigned long cr0; |
| 92 | 95 | unsigned long cr4_mask = 0; | |
| 93 | set_in_cr4(X86_CR4_OSFXSR); | ||
| 94 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
| 95 | 96 | ||
| 96 | write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ | 97 | if (cpu_has_fxsr) |
| 98 | cr4_mask |= X86_CR4_OSFXSR; | ||
| 99 | if (cpu_has_xmm) | ||
| 100 | cr4_mask |= X86_CR4_OSXMMEXCPT; | ||
| 101 | if (cr4_mask) | ||
| 102 | set_in_cr4(cr4_mask); | ||
| 103 | |||
| 104 | cr0 = read_cr0(); | ||
| 105 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | ||
| 106 | if (!HAVE_HWFP) | ||
| 107 | cr0 |= X86_CR0_EM; | ||
| 108 | write_cr0(cr0); | ||
| 97 | 109 | ||
| 98 | if (!smp_processor_id()) | 110 | if (!smp_processor_id()) |
| 99 | init_thread_xstate(); | 111 | init_thread_xstate(); |
| @@ -104,24 +116,12 @@ void __cpuinit fpu_init(void) | |||
| 104 | clear_used_math(); | 116 | clear_used_math(); |
| 105 | } | 117 | } |
| 106 | 118 | ||
| 107 | #else /* CONFIG_X86_64 */ | ||
| 108 | |||
| 109 | void __cpuinit fpu_init(void) | ||
| 110 | { | ||
| 111 | if (!smp_processor_id()) | ||
| 112 | init_thread_xstate(); | ||
| 113 | } | ||
| 114 | |||
| 115 | #endif /* CONFIG_X86_32 */ | ||
| 116 | |||
| 117 | void fpu_finit(struct fpu *fpu) | 119 | void fpu_finit(struct fpu *fpu) |
| 118 | { | 120 | { |
| 119 | #ifdef CONFIG_X86_32 | ||
| 120 | if (!HAVE_HWFP) { | 121 | if (!HAVE_HWFP) { |
| 121 | finit_soft_fpu(&fpu->state->soft); | 122 | finit_soft_fpu(&fpu->state->soft); |
| 122 | return; | 123 | return; |
| 123 | } | 124 | } |
| 124 | #endif | ||
| 125 | 125 | ||
| 126 | if (cpu_has_fxsr) { | 126 | if (cpu_has_fxsr) { |
| 127 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 127 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| @@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | |||
| 386 | #ifdef CONFIG_X86_64 | 386 | #ifdef CONFIG_X86_64 |
| 387 | env->fip = fxsave->rip; | 387 | env->fip = fxsave->rip; |
| 388 | env->foo = fxsave->rdp; | 388 | env->foo = fxsave->rdp; |
| 389 | /* | ||
| 390 | * should be actually ds/cs at fpu exception time, but | ||
| 391 | * that information is not available in 64bit mode. | ||
| 392 | */ | ||
| 393 | env->fcs = task_pt_regs(tsk)->cs; | ||
| 389 | if (tsk == current) { | 394 | if (tsk == current) { |
| 390 | /* | 395 | savesegment(ds, env->fos); |
| 391 | * should be actually ds/cs at fpu exception time, but | ||
| 392 | * that information is not available in 64bit mode. | ||
| 393 | */ | ||
| 394 | asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos)); | ||
| 395 | asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs)); | ||
| 396 | } else { | 396 | } else { |
| 397 | struct pt_regs *regs = task_pt_regs(tsk); | 397 | env->fos = tsk->thread.ds; |
| 398 | |||
| 399 | env->fos = 0xffff0000 | tsk->thread.ds; | ||
| 400 | env->fcs = regs->cs; | ||
| 401 | } | 398 | } |
| 399 | env->fos |= 0xffff0000; | ||
| 402 | #else | 400 | #else |
| 403 | env->fip = fxsave->fip; | 401 | env->fip = fxsave->fip; |
| 404 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); | 402 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index cafa7c80ac95..20757cb2efa3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
| @@ -29,24 +29,10 @@ | |||
| 29 | * plus some generic x86 specific things if generic specifics makes | 29 | * plus some generic x86 specific things if generic specifics makes |
| 30 | * any sense at all. | 30 | * any sense at all. |
| 31 | */ | 31 | */ |
| 32 | static void init_8259A(int auto_eoi); | ||
| 32 | 33 | ||
| 33 | static int i8259A_auto_eoi; | 34 | static int i8259A_auto_eoi; |
| 34 | DEFINE_RAW_SPINLOCK(i8259A_lock); | 35 | DEFINE_RAW_SPINLOCK(i8259A_lock); |
| 35 | static void mask_and_ack_8259A(unsigned int); | ||
| 36 | static void mask_8259A(void); | ||
| 37 | static void unmask_8259A(void); | ||
| 38 | static void disable_8259A_irq(unsigned int irq); | ||
| 39 | static void enable_8259A_irq(unsigned int irq); | ||
| 40 | static void init_8259A(int auto_eoi); | ||
| 41 | static int i8259A_irq_pending(unsigned int irq); | ||
| 42 | |||
| 43 | struct irq_chip i8259A_chip = { | ||
| 44 | .name = "XT-PIC", | ||
| 45 | .mask = disable_8259A_irq, | ||
| 46 | .disable = disable_8259A_irq, | ||
| 47 | .unmask = enable_8259A_irq, | ||
| 48 | .mask_ack = mask_and_ack_8259A, | ||
| 49 | }; | ||
| 50 | 36 | ||
| 51 | /* | 37 | /* |
| 52 | * 8259A PIC functions to handle ISA devices: | 38 | * 8259A PIC functions to handle ISA devices: |
| @@ -68,7 +54,7 @@ unsigned int cached_irq_mask = 0xffff; | |||
| 68 | */ | 54 | */ |
| 69 | unsigned long io_apic_irqs; | 55 | unsigned long io_apic_irqs; |
| 70 | 56 | ||
| 71 | static void disable_8259A_irq(unsigned int irq) | 57 | static void mask_8259A_irq(unsigned int irq) |
| 72 | { | 58 | { |
| 73 | unsigned int mask = 1 << irq; | 59 | unsigned int mask = 1 << irq; |
| 74 | unsigned long flags; | 60 | unsigned long flags; |
| @@ -82,7 +68,12 @@ static void disable_8259A_irq(unsigned int irq) | |||
| 82 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 68 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 83 | } | 69 | } |
| 84 | 70 | ||
| 85 | static void enable_8259A_irq(unsigned int irq) | 71 | static void disable_8259A_irq(struct irq_data *data) |
| 72 | { | ||
| 73 | mask_8259A_irq(data->irq); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void unmask_8259A_irq(unsigned int irq) | ||
| 86 | { | 77 | { |
| 87 | unsigned int mask = ~(1 << irq); | 78 | unsigned int mask = ~(1 << irq); |
| 88 | unsigned long flags; | 79 | unsigned long flags; |
| @@ -96,6 +87,11 @@ static void enable_8259A_irq(unsigned int irq) | |||
| 96 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 87 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 97 | } | 88 | } |
| 98 | 89 | ||
| 90 | static void enable_8259A_irq(struct irq_data *data) | ||
| 91 | { | ||
| 92 | unmask_8259A_irq(data->irq); | ||
| 93 | } | ||
| 94 | |||
| 99 | static int i8259A_irq_pending(unsigned int irq) | 95 | static int i8259A_irq_pending(unsigned int irq) |
| 100 | { | 96 | { |
| 101 | unsigned int mask = 1<<irq; | 97 | unsigned int mask = 1<<irq; |
| @@ -117,7 +113,7 @@ static void make_8259A_irq(unsigned int irq) | |||
| 117 | disable_irq_nosync(irq); | 113 | disable_irq_nosync(irq); |
| 118 | io_apic_irqs &= ~(1<<irq); | 114 | io_apic_irqs &= ~(1<<irq); |
| 119 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, | 115 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, |
| 120 | "XT"); | 116 | i8259A_chip.name); |
| 121 | enable_irq(irq); | 117 | enable_irq(irq); |
| 122 | } | 118 | } |
| 123 | 119 | ||
| @@ -150,8 +146,9 @@ static inline int i8259A_irq_real(unsigned int irq) | |||
| 150 | * first, _then_ send the EOI, and the order of EOI | 146 | * first, _then_ send the EOI, and the order of EOI |
| 151 | * to the two 8259s is important! | 147 | * to the two 8259s is important! |
| 152 | */ | 148 | */ |
| 153 | static void mask_and_ack_8259A(unsigned int irq) | 149 | static void mask_and_ack_8259A(struct irq_data *data) |
| 154 | { | 150 | { |
| 151 | unsigned int irq = data->irq; | ||
| 155 | unsigned int irqmask = 1 << irq; | 152 | unsigned int irqmask = 1 << irq; |
| 156 | unsigned long flags; | 153 | unsigned long flags; |
| 157 | 154 | ||
| @@ -223,6 +220,14 @@ spurious_8259A_irq: | |||
| 223 | } | 220 | } |
| 224 | } | 221 | } |
| 225 | 222 | ||
| 223 | struct irq_chip i8259A_chip = { | ||
| 224 | .name = "XT-PIC", | ||
| 225 | .irq_mask = disable_8259A_irq, | ||
| 226 | .irq_disable = disable_8259A_irq, | ||
| 227 | .irq_unmask = enable_8259A_irq, | ||
| 228 | .irq_mask_ack = mask_and_ack_8259A, | ||
| 229 | }; | ||
| 230 | |||
| 226 | static char irq_trigger[2]; | 231 | static char irq_trigger[2]; |
| 227 | /** | 232 | /** |
| 228 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ | 233 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ |
| @@ -342,9 +347,9 @@ static void init_8259A(int auto_eoi) | |||
| 342 | * In AEOI mode we just have to mask the interrupt | 347 | * In AEOI mode we just have to mask the interrupt |
| 343 | * when acking. | 348 | * when acking. |
| 344 | */ | 349 | */ |
| 345 | i8259A_chip.mask_ack = disable_8259A_irq; | 350 | i8259A_chip.irq_mask_ack = disable_8259A_irq; |
| 346 | else | 351 | else |
| 347 | i8259A_chip.mask_ack = mask_and_ack_8259A; | 352 | i8259A_chip.irq_mask_ack = mask_and_ack_8259A; |
| 348 | 353 | ||
| 349 | udelay(100); /* wait for 8259A to initialize */ | 354 | udelay(100); /* wait for 8259A to initialize */ |
| 350 | 355 | ||
| @@ -363,14 +368,6 @@ static void init_8259A(int auto_eoi) | |||
| 363 | static void legacy_pic_noop(void) { }; | 368 | static void legacy_pic_noop(void) { }; |
| 364 | static void legacy_pic_uint_noop(unsigned int unused) { }; | 369 | static void legacy_pic_uint_noop(unsigned int unused) { }; |
| 365 | static void legacy_pic_int_noop(int unused) { }; | 370 | static void legacy_pic_int_noop(int unused) { }; |
| 366 | |||
| 367 | static struct irq_chip dummy_pic_chip = { | ||
| 368 | .name = "dummy pic", | ||
| 369 | .mask = legacy_pic_uint_noop, | ||
| 370 | .unmask = legacy_pic_uint_noop, | ||
| 371 | .disable = legacy_pic_uint_noop, | ||
| 372 | .mask_ack = legacy_pic_uint_noop, | ||
| 373 | }; | ||
| 374 | static int legacy_pic_irq_pending_noop(unsigned int irq) | 371 | static int legacy_pic_irq_pending_noop(unsigned int irq) |
| 375 | { | 372 | { |
| 376 | return 0; | 373 | return 0; |
| @@ -378,7 +375,9 @@ static int legacy_pic_irq_pending_noop(unsigned int irq) | |||
| 378 | 375 | ||
| 379 | struct legacy_pic null_legacy_pic = { | 376 | struct legacy_pic null_legacy_pic = { |
| 380 | .nr_legacy_irqs = 0, | 377 | .nr_legacy_irqs = 0, |
| 381 | .chip = &dummy_pic_chip, | 378 | .chip = &dummy_irq_chip, |
| 379 | .mask = legacy_pic_uint_noop, | ||
| 380 | .unmask = legacy_pic_uint_noop, | ||
| 382 | .mask_all = legacy_pic_noop, | 381 | .mask_all = legacy_pic_noop, |
| 383 | .restore_mask = legacy_pic_noop, | 382 | .restore_mask = legacy_pic_noop, |
| 384 | .init = legacy_pic_int_noop, | 383 | .init = legacy_pic_int_noop, |
| @@ -389,7 +388,9 @@ struct legacy_pic null_legacy_pic = { | |||
| 389 | struct legacy_pic default_legacy_pic = { | 388 | struct legacy_pic default_legacy_pic = { |
| 390 | .nr_legacy_irqs = NR_IRQS_LEGACY, | 389 | .nr_legacy_irqs = NR_IRQS_LEGACY, |
| 391 | .chip = &i8259A_chip, | 390 | .chip = &i8259A_chip, |
| 392 | .mask_all = mask_8259A, | 391 | .mask = mask_8259A_irq, |
| 392 | .unmask = unmask_8259A_irq, | ||
| 393 | .mask_all = mask_8259A, | ||
| 393 | .restore_mask = unmask_8259A, | 394 | .restore_mask = unmask_8259A, |
| 394 | .init = init_8259A, | 395 | .init = init_8259A, |
| 395 | .irq_pending = i8259A_irq_pending, | 396 | .irq_pending = i8259A_irq_pending, |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 44edb03fc9ec..83ec0175f986 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -159,7 +159,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 159 | seq_printf(p, "%*d: ", prec, i); | 159 | seq_printf(p, "%*d: ", prec, i); |
| 160 | for_each_online_cpu(j) | 160 | for_each_online_cpu(j) |
| 161 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 161 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 162 | seq_printf(p, " %8s", desc->chip->name); | 162 | seq_printf(p, " %8s", desc->irq_data.chip->name); |
| 163 | seq_printf(p, "-%-8s", desc->name); | 163 | seq_printf(p, "-%-8s", desc->name); |
| 164 | 164 | ||
| 165 | if (action) { | 165 | if (action) { |
| @@ -282,6 +282,7 @@ void fixup_irqs(void) | |||
| 282 | unsigned int irq, vector; | 282 | unsigned int irq, vector; |
| 283 | static int warned; | 283 | static int warned; |
| 284 | struct irq_desc *desc; | 284 | struct irq_desc *desc; |
| 285 | struct irq_data *data; | ||
| 285 | 286 | ||
| 286 | for_each_irq_desc(irq, desc) { | 287 | for_each_irq_desc(irq, desc) { |
| 287 | int break_affinity = 0; | 288 | int break_affinity = 0; |
| @@ -296,7 +297,8 @@ void fixup_irqs(void) | |||
| 296 | /* interrupt's are disabled at this point */ | 297 | /* interrupt's are disabled at this point */ |
| 297 | raw_spin_lock(&desc->lock); | 298 | raw_spin_lock(&desc->lock); |
| 298 | 299 | ||
| 299 | affinity = desc->affinity; | 300 | data = &desc->irq_data; |
| 301 | affinity = data->affinity; | ||
| 300 | if (!irq_has_action(irq) || | 302 | if (!irq_has_action(irq) || |
| 301 | cpumask_equal(affinity, cpu_online_mask)) { | 303 | cpumask_equal(affinity, cpu_online_mask)) { |
| 302 | raw_spin_unlock(&desc->lock); | 304 | raw_spin_unlock(&desc->lock); |
| @@ -315,16 +317,16 @@ void fixup_irqs(void) | |||
| 315 | affinity = cpu_all_mask; | 317 | affinity = cpu_all_mask; |
| 316 | } | 318 | } |
| 317 | 319 | ||
| 318 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) | 320 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) |
| 319 | desc->chip->mask(irq); | 321 | data->chip->irq_mask(data); |
| 320 | 322 | ||
| 321 | if (desc->chip->set_affinity) | 323 | if (data->chip->irq_set_affinity) |
| 322 | desc->chip->set_affinity(irq, affinity); | 324 | data->chip->irq_set_affinity(data, affinity, true); |
| 323 | else if (!(warned++)) | 325 | else if (!(warned++)) |
| 324 | set_affinity = 0; | 326 | set_affinity = 0; |
| 325 | 327 | ||
| 326 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) | 328 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) |
| 327 | desc->chip->unmask(irq); | 329 | data->chip->irq_unmask(data); |
| 328 | 330 | ||
| 329 | raw_spin_unlock(&desc->lock); | 331 | raw_spin_unlock(&desc->lock); |
| 330 | 332 | ||
| @@ -355,10 +357,10 @@ void fixup_irqs(void) | |||
| 355 | if (irr & (1 << (vector % 32))) { | 357 | if (irr & (1 << (vector % 32))) { |
| 356 | irq = __get_cpu_var(vector_irq)[vector]; | 358 | irq = __get_cpu_var(vector_irq)[vector]; |
| 357 | 359 | ||
| 358 | desc = irq_to_desc(irq); | 360 | data = irq_get_irq_data(irq); |
| 359 | raw_spin_lock(&desc->lock); | 361 | raw_spin_lock(&desc->lock); |
| 360 | if (desc->chip->retrigger) | 362 | if (data->chip->irq_retrigger) |
| 361 | desc->chip->retrigger(irq); | 363 | data->chip->irq_retrigger(data); |
| 362 | raw_spin_unlock(&desc->lock); | 364 | raw_spin_unlock(&desc->lock); |
| 363 | } | 365 | } |
| 364 | } | 366 | } |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 713969b9266b..c752e973958d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -100,6 +100,8 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
| 100 | 100 | ||
| 101 | void __init init_ISA_irqs(void) | 101 | void __init init_ISA_irqs(void) |
| 102 | { | 102 | { |
| 103 | struct irq_chip *chip = legacy_pic->chip; | ||
| 104 | const char *name = chip->name; | ||
| 103 | int i; | 105 | int i; |
| 104 | 106 | ||
| 105 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 107 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
| @@ -107,19 +109,8 @@ void __init init_ISA_irqs(void) | |||
| 107 | #endif | 109 | #endif |
| 108 | legacy_pic->init(0); | 110 | legacy_pic->init(0); |
| 109 | 111 | ||
| 110 | /* | 112 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) |
| 111 | * 16 old-style INTA-cycle interrupts: | 113 | set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); |
| 112 | */ | ||
| 113 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) { | ||
| 114 | struct irq_desc *desc = irq_to_desc(i); | ||
| 115 | |||
| 116 | desc->status = IRQ_DISABLED; | ||
| 117 | desc->action = NULL; | ||
| 118 | desc->depth = 1; | ||
| 119 | |||
| 120 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
| 121 | handle_level_irq, "XT"); | ||
| 122 | } | ||
| 123 | } | 114 | } |
| 124 | 115 | ||
| 125 | void __init init_IRQ(void) | 116 | void __init init_IRQ(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 035c8c529181..b3ea9db39db6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | |||
| 36 | if (!page) | 36 | if (!page) |
| 37 | goto out; | 37 | goto out; |
| 38 | pud = (pud_t *)page_address(page); | 38 | pud = (pud_t *)page_address(page); |
| 39 | memset(pud, 0, PAGE_SIZE); | 39 | clear_page(pud); |
| 40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | 40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); |
| 41 | } | 41 | } |
| 42 | pud = pud_offset(pgd, addr); | 42 | pud = pud_offset(pgd, addr); |
| @@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | |||
| 45 | if (!page) | 45 | if (!page) |
| 46 | goto out; | 46 | goto out; |
| 47 | pmd = (pmd_t *)page_address(page); | 47 | pmd = (pmd_t *)page_address(page); |
| 48 | memset(pmd, 0, PAGE_SIZE); | 48 | clear_page(pmd); |
| 49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | 49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); |
| 50 | } | 50 | } |
| 51 | pmd = pmd_offset(pud, addr); | 51 | pmd = pmd_offset(pud, addr); |
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c new file mode 100644 index 000000000000..f5442c03abc3 --- /dev/null +++ b/arch/x86/kernel/olpc-xo1.c | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | /* | ||
| 2 | * Support for features of the OLPC XO-1 laptop | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 One Laptop per Child | ||
| 5 | * Copyright (C) 2006 Red Hat, Inc. | ||
| 6 | * Copyright (C) 2006 Advanced Micro Devices, Inc. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/pci.h> | ||
| 16 | #include <linux/pci_ids.h> | ||
| 17 | #include <linux/platform_device.h> | ||
| 18 | #include <linux/pm.h> | ||
| 19 | |||
| 20 | #include <asm/io.h> | ||
| 21 | #include <asm/olpc.h> | ||
| 22 | |||
| 23 | #define DRV_NAME "olpc-xo1" | ||
| 24 | |||
| 25 | #define PMS_BAR 4 | ||
| 26 | #define ACPI_BAR 5 | ||
| 27 | |||
| 28 | /* PMC registers (PMS block) */ | ||
| 29 | #define PM_SCLK 0x10 | ||
| 30 | #define PM_IN_SLPCTL 0x20 | ||
| 31 | #define PM_WKXD 0x34 | ||
| 32 | #define PM_WKD 0x30 | ||
| 33 | #define PM_SSC 0x54 | ||
| 34 | |||
| 35 | /* PM registers (ACPI block) */ | ||
| 36 | #define PM1_CNT 0x08 | ||
| 37 | #define PM_GPE0_STS 0x18 | ||
| 38 | |||
| 39 | static unsigned long acpi_base; | ||
| 40 | static unsigned long pms_base; | ||
| 41 | |||
| 42 | static void xo1_power_off(void) | ||
| 43 | { | ||
| 44 | printk(KERN_INFO "OLPC XO-1 power off sequence...\n"); | ||
| 45 | |||
| 46 | /* Enable all of these controls with 0 delay */ | ||
| 47 | outl(0x40000000, pms_base + PM_SCLK); | ||
| 48 | outl(0x40000000, pms_base + PM_IN_SLPCTL); | ||
| 49 | outl(0x40000000, pms_base + PM_WKXD); | ||
| 50 | outl(0x40000000, pms_base + PM_WKD); | ||
| 51 | |||
| 52 | /* Clear status bits (possibly unnecessary) */ | ||
| 53 | outl(0x0002ffff, pms_base + PM_SSC); | ||
| 54 | outl(0xffffffff, acpi_base + PM_GPE0_STS); | ||
| 55 | |||
| 56 | /* Write SLP_EN bit to start the machinery */ | ||
| 57 | outl(0x00002000, acpi_base + PM1_CNT); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* Read the base addresses from the PCI BAR info */ | ||
| 61 | static int __devinit setup_bases(struct pci_dev *pdev) | ||
| 62 | { | ||
| 63 | int r; | ||
| 64 | |||
| 65 | r = pci_enable_device_io(pdev); | ||
| 66 | if (r) { | ||
| 67 | dev_err(&pdev->dev, "can't enable device IO\n"); | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); | ||
| 72 | if (r) { | ||
| 73 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); | ||
| 74 | return r; | ||
| 75 | } | ||
| 76 | |||
| 77 | r = pci_request_region(pdev, PMS_BAR, DRV_NAME); | ||
| 78 | if (r) { | ||
| 79 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); | ||
| 80 | pci_release_region(pdev, ACPI_BAR); | ||
| 81 | return r; | ||
| 82 | } | ||
| 83 | |||
| 84 | acpi_base = pci_resource_start(pdev, ACPI_BAR); | ||
| 85 | pms_base = pci_resource_start(pdev, PMS_BAR); | ||
| 86 | |||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) | ||
| 91 | { | ||
| 92 | struct pci_dev *pcidev; | ||
| 93 | int r; | ||
| 94 | |||
| 95 | pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, | ||
| 96 | NULL); | ||
| 97 | if (!pdev) | ||
| 98 | return -ENODEV; | ||
| 99 | |||
| 100 | r = setup_bases(pcidev); | ||
| 101 | if (r) | ||
| 102 | return r; | ||
| 103 | |||
| 104 | pm_power_off = xo1_power_off; | ||
| 105 | |||
| 106 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | ||
| 107 | return 0; | ||
| 108 | } | ||
| 109 | |||
| 110 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) | ||
| 111 | { | ||
| 112 | pm_power_off = NULL; | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | static struct platform_driver olpc_xo1_driver = { | ||
| 117 | .driver = { | ||
| 118 | .name = DRV_NAME, | ||
| 119 | .owner = THIS_MODULE, | ||
| 120 | }, | ||
| 121 | .probe = olpc_xo1_probe, | ||
| 122 | .remove = __devexit_p(olpc_xo1_remove), | ||
| 123 | }; | ||
| 124 | |||
| 125 | static int __init olpc_xo1_init(void) | ||
| 126 | { | ||
| 127 | return platform_driver_register(&olpc_xo1_driver); | ||
| 128 | } | ||
| 129 | |||
| 130 | static void __exit olpc_xo1_exit(void) | ||
| 131 | { | ||
| 132 | platform_driver_unregister(&olpc_xo1_driver); | ||
| 133 | } | ||
| 134 | |||
| 135 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); | ||
| 136 | MODULE_LICENSE("GPL"); | ||
| 137 | MODULE_ALIAS("platform:olpc-xo1"); | ||
| 138 | |||
| 139 | module_init(olpc_xo1_init); | ||
| 140 | module_exit(olpc_xo1_exit); | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 0e0cdde519be..edaf3fe8dc5e 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
| 19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
| 20 | #include <linux/platform_device.h> | ||
| 20 | 21 | ||
| 21 | #include <asm/geode.h> | 22 | #include <asm/geode.h> |
| 22 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
| @@ -114,6 +115,7 @@ int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, | |||
| 114 | unsigned long flags; | 115 | unsigned long flags; |
| 115 | int ret = -EIO; | 116 | int ret = -EIO; |
| 116 | int i; | 117 | int i; |
| 118 | int restarts = 0; | ||
| 117 | 119 | ||
| 118 | spin_lock_irqsave(&ec_lock, flags); | 120 | spin_lock_irqsave(&ec_lock, flags); |
| 119 | 121 | ||
| @@ -169,7 +171,9 @@ restart: | |||
| 169 | if (wait_on_obf(0x6c, 1)) { | 171 | if (wait_on_obf(0x6c, 1)) { |
| 170 | printk(KERN_ERR "olpc-ec: timeout waiting for" | 172 | printk(KERN_ERR "olpc-ec: timeout waiting for" |
| 171 | " EC to provide data!\n"); | 173 | " EC to provide data!\n"); |
| 172 | goto restart; | 174 | if (restarts++ < 10) |
| 175 | goto restart; | ||
| 176 | goto err; | ||
| 173 | } | 177 | } |
| 174 | outbuf[i] = inb(0x68); | 178 | outbuf[i] = inb(0x68); |
| 175 | pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); | 179 | pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); |
| @@ -183,8 +187,21 @@ err: | |||
| 183 | } | 187 | } |
| 184 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); | 188 | EXPORT_SYMBOL_GPL(olpc_ec_cmd); |
| 185 | 189 | ||
| 186 | #ifdef CONFIG_OLPC_OPENFIRMWARE | 190 | static bool __init check_ofw_architecture(void) |
| 187 | static void __init platform_detect(void) | 191 | { |
| 192 | size_t propsize; | ||
| 193 | char olpc_arch[5]; | ||
| 194 | const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 }; | ||
| 195 | void *res[] = { &propsize }; | ||
| 196 | |||
| 197 | if (olpc_ofw("getprop", args, res)) { | ||
| 198 | printk(KERN_ERR "ofw: getprop call failed!\n"); | ||
| 199 | return false; | ||
| 200 | } | ||
| 201 | return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0; | ||
| 202 | } | ||
| 203 | |||
| 204 | static u32 __init get_board_revision(void) | ||
| 188 | { | 205 | { |
| 189 | size_t propsize; | 206 | size_t propsize; |
| 190 | __be32 rev; | 207 | __be32 rev; |
| @@ -193,45 +210,43 @@ static void __init platform_detect(void) | |||
| 193 | 210 | ||
| 194 | if (olpc_ofw("getprop", args, res) || propsize != 4) { | 211 | if (olpc_ofw("getprop", args, res) || propsize != 4) { |
| 195 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 212 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
| 196 | rev = cpu_to_be32(0); | 213 | return cpu_to_be32(0); |
| 197 | } | 214 | } |
| 198 | olpc_platform_info.boardrev = be32_to_cpu(rev); | 215 | return be32_to_cpu(rev); |
| 199 | } | 216 | } |
| 200 | #else | 217 | |
| 201 | static void __init platform_detect(void) | 218 | static bool __init platform_detect(void) |
| 202 | { | 219 | { |
| 203 | /* stopgap until OFW support is added to the kernel */ | 220 | if (!check_ofw_architecture()) |
| 204 | olpc_platform_info.boardrev = olpc_board(0xc2); | 221 | return false; |
| 222 | olpc_platform_info.flags |= OLPC_F_PRESENT; | ||
| 223 | olpc_platform_info.boardrev = get_board_revision(); | ||
| 224 | return true; | ||
| 205 | } | 225 | } |
| 206 | #endif | ||
| 207 | 226 | ||
| 208 | static int __init olpc_init(void) | 227 | static int __init add_xo1_platform_devices(void) |
| 209 | { | 228 | { |
| 210 | unsigned char *romsig; | 229 | struct platform_device *pdev; |
| 211 | 230 | ||
| 212 | /* The ioremap check is dangerous; limit what we run it on */ | 231 | pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0); |
| 213 | if (!is_geode() || cs5535_has_vsa2()) | 232 | if (IS_ERR(pdev)) |
| 214 | return 0; | 233 | return PTR_ERR(pdev); |
| 215 | 234 | ||
| 216 | spin_lock_init(&ec_lock); | 235 | pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0); |
| 236 | if (IS_ERR(pdev)) | ||
| 237 | return PTR_ERR(pdev); | ||
| 217 | 238 | ||
| 218 | romsig = ioremap(0xffffffc0, 16); | 239 | return 0; |
| 219 | if (!romsig) | 240 | } |
| 220 | return 0; | ||
| 221 | 241 | ||
| 222 | if (strncmp(romsig, "CL1 Q", 7)) | 242 | static int __init olpc_init(void) |
| 223 | goto unmap; | 243 | { |
| 224 | if (strncmp(romsig+6, romsig+13, 3)) { | 244 | int r = 0; |
| 225 | printk(KERN_INFO "OLPC BIOS signature looks invalid. " | ||
| 226 | "Assuming not OLPC\n"); | ||
| 227 | goto unmap; | ||
| 228 | } | ||
| 229 | 245 | ||
| 230 | printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); | 246 | if (!olpc_ofw_present() || !platform_detect()) |
| 231 | olpc_platform_info.flags |= OLPC_F_PRESENT; | 247 | return 0; |
| 232 | 248 | ||
| 233 | /* get the platform revision */ | 249 | spin_lock_init(&ec_lock); |
| 234 | platform_detect(); | ||
| 235 | 250 | ||
| 236 | /* assume B1 and above models always have a DCON */ | 251 | /* assume B1 and above models always have a DCON */ |
| 237 | if (olpc_board_at_least(olpc_board(0xb1))) | 252 | if (olpc_board_at_least(olpc_board(0xb1))) |
| @@ -242,8 +257,10 @@ static int __init olpc_init(void) | |||
| 242 | (unsigned char *) &olpc_platform_info.ecver, 1); | 257 | (unsigned char *) &olpc_platform_info.ecver, 1); |
| 243 | 258 | ||
| 244 | #ifdef CONFIG_PCI_OLPC | 259 | #ifdef CONFIG_PCI_OLPC |
| 245 | /* If the VSA exists let it emulate PCI, if not emulate in kernel */ | 260 | /* If the VSA exists let it emulate PCI, if not emulate in kernel. |
| 246 | if (!cs5535_has_vsa2()) | 261 | * XO-1 only. */ |
| 262 | if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) && | ||
| 263 | !cs5535_has_vsa2()) | ||
| 247 | x86_init.pci.arch_init = pci_olpc_init; | 264 | x86_init.pci.arch_init = pci_olpc_init; |
| 248 | #endif | 265 | #endif |
| 249 | 266 | ||
| @@ -252,8 +269,12 @@ static int __init olpc_init(void) | |||
| 252 | olpc_platform_info.boardrev >> 4, | 269 | olpc_platform_info.boardrev >> 4, |
| 253 | olpc_platform_info.ecver); | 270 | olpc_platform_info.ecver); |
| 254 | 271 | ||
| 255 | unmap: | 272 | if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */ |
| 256 | iounmap(romsig); | 273 | r = add_xo1_platform_devices(); |
| 274 | if (r) | ||
| 275 | return r; | ||
| 276 | } | ||
| 277 | |||
| 257 | return 0; | 278 | return 0; |
| 258 | } | 279 | } |
| 259 | 280 | ||
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c index 3218aa71ab5e..787320464379 100644 --- a/arch/x86/kernel/olpc_ofw.c +++ b/arch/x86/kernel/olpc_ofw.c | |||
| @@ -74,6 +74,12 @@ int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res, | |||
| 74 | } | 74 | } |
| 75 | EXPORT_SYMBOL_GPL(__olpc_ofw); | 75 | EXPORT_SYMBOL_GPL(__olpc_ofw); |
| 76 | 76 | ||
| 77 | bool olpc_ofw_present(void) | ||
| 78 | { | ||
| 79 | return olpc_ofw_cif != NULL; | ||
| 80 | } | ||
| 81 | EXPORT_SYMBOL_GPL(olpc_ofw_present); | ||
| 82 | |||
| 77 | /* OFW cif _should_ be above this address */ | 83 | /* OFW cif _should_ be above this address */ |
| 78 | #define OFW_MIN 0xff000000 | 84 | #define OFW_MIN 0xff000000 |
| 79 | 85 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1db183ed7c01..c5b250011fd4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -413,7 +413,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
| 413 | 413 | ||
| 414 | .alloc_pte = paravirt_nop, | 414 | .alloc_pte = paravirt_nop, |
| 415 | .alloc_pmd = paravirt_nop, | 415 | .alloc_pmd = paravirt_nop, |
| 416 | .alloc_pmd_clone = paravirt_nop, | ||
| 417 | .alloc_pud = paravirt_nop, | 416 | .alloc_pud = paravirt_nop, |
| 418 | .release_pte = paravirt_nop, | 417 | .release_pte = paravirt_nop, |
| 419 | .release_pmd = paravirt_nop, | 418 | .release_pmd = paravirt_nop, |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0f7f130caa67..c562207b1b3d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -39,7 +39,7 @@ | |||
| 39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
| 40 | #include <asm/swiotlb.h> | 40 | #include <asm/swiotlb.h> |
| 41 | #include <asm/dma.h> | 41 | #include <asm/dma.h> |
| 42 | #include <asm/k8.h> | 42 | #include <asm/amd_nb.h> |
| 43 | #include <asm/x86_init.h> | 43 | #include <asm/x86_init.h> |
| 44 | 44 | ||
| 45 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 45 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
| @@ -560,8 +560,11 @@ static void enable_gart_translations(void) | |||
| 560 | { | 560 | { |
| 561 | int i; | 561 | int i; |
| 562 | 562 | ||
| 563 | for (i = 0; i < num_k8_northbridges; i++) { | 563 | if (!k8_northbridges.gart_supported) |
| 564 | struct pci_dev *dev = k8_northbridges[i]; | 564 | return; |
| 565 | |||
| 566 | for (i = 0; i < k8_northbridges.num; i++) { | ||
| 567 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | ||
| 565 | 568 | ||
| 566 | enable_gart_translation(dev, __pa(agp_gatt_table)); | 569 | enable_gart_translation(dev, __pa(agp_gatt_table)); |
| 567 | } | 570 | } |
| @@ -592,16 +595,19 @@ static void gart_fixup_northbridges(struct sys_device *dev) | |||
| 592 | if (!fix_up_north_bridges) | 595 | if (!fix_up_north_bridges) |
| 593 | return; | 596 | return; |
| 594 | 597 | ||
| 598 | if (!k8_northbridges.gart_supported) | ||
| 599 | return; | ||
| 600 | |||
| 595 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); | 601 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
| 596 | 602 | ||
| 597 | for (i = 0; i < num_k8_northbridges; i++) { | 603 | for (i = 0; i < k8_northbridges.num; i++) { |
| 598 | struct pci_dev *dev = k8_northbridges[i]; | 604 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; |
| 599 | 605 | ||
| 600 | /* | 606 | /* |
| 601 | * Don't enable translations just yet. That is the next | 607 | * Don't enable translations just yet. That is the next |
| 602 | * step. Restore the pre-suspend aperture settings. | 608 | * step. Restore the pre-suspend aperture settings. |
| 603 | */ | 609 | */ |
| 604 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); | 610 | gart_set_size_and_enable(dev, aperture_order); |
| 605 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); | 611 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
| 606 | } | 612 | } |
| 607 | } | 613 | } |
| @@ -649,8 +655,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 649 | 655 | ||
| 650 | aper_size = aper_base = info->aper_size = 0; | 656 | aper_size = aper_base = info->aper_size = 0; |
| 651 | dev = NULL; | 657 | dev = NULL; |
| 652 | for (i = 0; i < num_k8_northbridges; i++) { | 658 | for (i = 0; i < k8_northbridges.num; i++) { |
| 653 | dev = k8_northbridges[i]; | 659 | dev = k8_northbridges.nb_misc[i]; |
| 654 | new_aper_base = read_aperture(dev, &new_aper_size); | 660 | new_aper_base = read_aperture(dev, &new_aper_size); |
| 655 | if (!new_aper_base) | 661 | if (!new_aper_base) |
| 656 | goto nommu; | 662 | goto nommu; |
| @@ -718,10 +724,13 @@ static void gart_iommu_shutdown(void) | |||
| 718 | if (!no_agp) | 724 | if (!no_agp) |
| 719 | return; | 725 | return; |
| 720 | 726 | ||
| 721 | for (i = 0; i < num_k8_northbridges; i++) { | 727 | if (!k8_northbridges.gart_supported) |
| 728 | return; | ||
| 729 | |||
| 730 | for (i = 0; i < k8_northbridges.num; i++) { | ||
| 722 | u32 ctl; | 731 | u32 ctl; |
| 723 | 732 | ||
| 724 | dev = k8_northbridges[i]; | 733 | dev = k8_northbridges.nb_misc[i]; |
| 725 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); | 734 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); |
| 726 | 735 | ||
| 727 | ctl &= ~GARTEN; | 736 | ctl &= ~GARTEN; |
| @@ -739,7 +748,7 @@ int __init gart_iommu_init(void) | |||
| 739 | unsigned long scratch; | 748 | unsigned long scratch; |
| 740 | long i; | 749 | long i; |
| 741 | 750 | ||
| 742 | if (num_k8_northbridges == 0) | 751 | if (!k8_northbridges.gart_supported) |
| 743 | return 0; | 752 | return 0; |
| 744 | 753 | ||
| 745 | #ifndef CONFIG_AGP_AMD64 | 754 | #ifndef CONFIG_AGP_AMD64 |
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c deleted file mode 100644 index b112406f1996..000000000000 --- a/arch/x86/kernel/pmtimer_64.c +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | /* Ported over from i386 by AK, original copyright was: | ||
| 2 | * | ||
| 3 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
| 4 | * | ||
| 5 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
| 6 | * southbridges as primary timing source for the Linux kernel. | ||
| 7 | * | ||
| 8 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
| 9 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
| 10 | * | ||
| 11 | * This file is licensed under the GPL v2. | ||
| 12 | * | ||
| 13 | * Dropped all the hardware bug workarounds for now. Hopefully they | ||
| 14 | * are not needed on 64bit chipsets. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/jiffies.h> | ||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/time.h> | ||
| 20 | #include <linux/init.h> | ||
| 21 | #include <linux/cpumask.h> | ||
| 22 | #include <linux/acpi_pmtmr.h> | ||
| 23 | |||
| 24 | #include <asm/io.h> | ||
| 25 | #include <asm/proto.h> | ||
| 26 | #include <asm/msr.h> | ||
| 27 | #include <asm/vsyscall.h> | ||
| 28 | |||
| 29 | static inline u32 cyc2us(u32 cycles) | ||
| 30 | { | ||
| 31 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
| 32 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
| 33 | * | ||
| 34 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
| 35 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
| 36 | * u32 overflows. | ||
| 37 | */ | ||
| 38 | cycles *= 286; | ||
| 39 | return (cycles >> 10); | ||
| 40 | } | ||
| 41 | |||
| 42 | static unsigned pmtimer_wait_tick(void) | ||
| 43 | { | ||
| 44 | u32 a, b; | ||
| 45 | for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; | ||
| 46 | a == b; | ||
| 47 | b = inl(pmtmr_ioport) & ACPI_PM_MASK) | ||
| 48 | cpu_relax(); | ||
| 49 | return b; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* note: wait time is rounded up to one tick */ | ||
| 53 | void pmtimer_wait(unsigned us) | ||
| 54 | { | ||
| 55 | u32 a, b; | ||
| 56 | a = pmtimer_wait_tick(); | ||
| 57 | do { | ||
| 58 | b = inl(pmtmr_ioport); | ||
| 59 | cpu_relax(); | ||
| 60 | } while (cyc2us(b - a) < us); | ||
| 61 | } | ||
| 62 | |||
| 63 | static int __init nopmtimer_setup(char *s) | ||
| 64 | { | ||
| 65 | pmtmr_ioport = 0; | ||
| 66 | return 1; | ||
| 67 | } | ||
| 68 | |||
| 69 | __setup("nopmtimer", nopmtimer_setup); | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3d9ea531ddd1..b3d7a3a04f38 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 424 | load_TLS(next, cpu); | 424 | load_TLS(next, cpu); |
| 425 | 425 | ||
| 426 | /* Must be after DS reload */ | 426 | /* Must be after DS reload */ |
| 427 | unlazy_fpu(prev_p); | 427 | __unlazy_fpu(prev_p); |
| 428 | 428 | ||
| 429 | /* Make sure cpu is ready for new context */ | 429 | /* Make sure cpu is ready for new context */ |
| 430 | if (preload_fpu) | 430 | if (preload_fpu) |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e3af342fe83a..7a4cf14223ba 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -84,7 +84,7 @@ static int __init reboot_setup(char *str) | |||
| 84 | } | 84 | } |
| 85 | /* we will leave sorting out the final value | 85 | /* we will leave sorting out the final value |
| 86 | when we are ready to reboot, since we might not | 86 | when we are ready to reboot, since we might not |
| 87 | have set up boot_cpu_id or smp_num_cpu */ | 87 | have detected BSP APIC ID or smp_num_cpu */ |
| 88 | break; | 88 | break; |
| 89 | #endif /* CONFIG_SMP */ | 89 | #endif /* CONFIG_SMP */ |
| 90 | 90 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 00e167870f71..a59f6a6df5e2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -83,7 +83,6 @@ | |||
| 83 | #include <asm/dmi.h> | 83 | #include <asm/dmi.h> |
| 84 | #include <asm/io_apic.h> | 84 | #include <asm/io_apic.h> |
| 85 | #include <asm/ist.h> | 85 | #include <asm/ist.h> |
| 86 | #include <asm/vmi.h> | ||
| 87 | #include <asm/setup_arch.h> | 86 | #include <asm/setup_arch.h> |
| 88 | #include <asm/bios_ebda.h> | 87 | #include <asm/bios_ebda.h> |
| 89 | #include <asm/cacheflush.h> | 88 | #include <asm/cacheflush.h> |
| @@ -107,7 +106,7 @@ | |||
| 107 | #include <asm/percpu.h> | 106 | #include <asm/percpu.h> |
| 108 | #include <asm/topology.h> | 107 | #include <asm/topology.h> |
| 109 | #include <asm/apicdef.h> | 108 | #include <asm/apicdef.h> |
| 110 | #include <asm/k8.h> | 109 | #include <asm/amd_nb.h> |
| 111 | #ifdef CONFIG_X86_64 | 110 | #ifdef CONFIG_X86_64 |
| 112 | #include <asm/numa_64.h> | 111 | #include <asm/numa_64.h> |
| 113 | #endif | 112 | #endif |
| @@ -126,7 +125,6 @@ unsigned long max_pfn_mapped; | |||
| 126 | RESERVE_BRK(dmi_alloc, 65536); | 125 | RESERVE_BRK(dmi_alloc, 65536); |
| 127 | #endif | 126 | #endif |
| 128 | 127 | ||
| 129 | unsigned int boot_cpu_id __read_mostly; | ||
| 130 | 128 | ||
| 131 | static __initdata unsigned long _brk_start = (unsigned long)__brk_base; | 129 | static __initdata unsigned long _brk_start = (unsigned long)__brk_base; |
| 132 | unsigned long _brk_end = (unsigned long)__brk_base; | 130 | unsigned long _brk_end = (unsigned long)__brk_base; |
| @@ -619,79 +617,7 @@ static __init void reserve_ibft_region(void) | |||
| 619 | reserve_early_overlap_ok(addr, addr + size, "ibft"); | 617 | reserve_early_overlap_ok(addr, addr + size, "ibft"); |
| 620 | } | 618 | } |
| 621 | 619 | ||
| 622 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 620 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
| 623 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | ||
| 624 | { | ||
| 625 | printk(KERN_NOTICE | ||
| 626 | "%s detected: BIOS may corrupt low RAM, working around it.\n", | ||
| 627 | d->ident); | ||
| 628 | |||
| 629 | e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); | ||
| 630 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
| 631 | |||
| 632 | return 0; | ||
| 633 | } | ||
| 634 | #endif | ||
| 635 | |||
| 636 | /* List of systems that have known low memory corruption BIOS problems */ | ||
| 637 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | ||
| 638 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
| 639 | { | ||
| 640 | .callback = dmi_low_memory_corruption, | ||
| 641 | .ident = "AMI BIOS", | ||
| 642 | .matches = { | ||
| 643 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | ||
| 644 | }, | ||
| 645 | }, | ||
| 646 | { | ||
| 647 | .callback = dmi_low_memory_corruption, | ||
| 648 | .ident = "Phoenix BIOS", | ||
| 649 | .matches = { | ||
| 650 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), | ||
| 651 | }, | ||
| 652 | }, | ||
| 653 | { | ||
| 654 | .callback = dmi_low_memory_corruption, | ||
| 655 | .ident = "Phoenix/MSC BIOS", | ||
| 656 | .matches = { | ||
| 657 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), | ||
| 658 | }, | ||
| 659 | }, | ||
| 660 | /* | ||
| 661 | * AMI BIOS with low memory corruption was found on Intel DG45ID and | ||
| 662 | * DG45FC boards. | ||
| 663 | * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | ||
| 664 | * match only DMI_BOARD_NAME and see if there is more bad products | ||
| 665 | * with this vendor. | ||
| 666 | */ | ||
| 667 | { | ||
| 668 | .callback = dmi_low_memory_corruption, | ||
| 669 | .ident = "AMI BIOS", | ||
| 670 | .matches = { | ||
| 671 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), | ||
| 672 | }, | ||
| 673 | }, | ||
| 674 | { | ||
| 675 | .callback = dmi_low_memory_corruption, | ||
| 676 | .ident = "AMI BIOS", | ||
| 677 | .matches = { | ||
| 678 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | ||
| 679 | }, | ||
| 680 | }, | ||
| 681 | /* | ||
| 682 | * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so | ||
| 683 | * match on the product name. | ||
| 684 | */ | ||
| 685 | { | ||
| 686 | .callback = dmi_low_memory_corruption, | ||
| 687 | .ident = "Phoenix BIOS", | ||
| 688 | .matches = { | ||
| 689 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), | ||
| 690 | }, | ||
| 691 | }, | ||
| 692 | #endif | ||
| 693 | {} | ||
| 694 | }; | ||
| 695 | 621 | ||
| 696 | static void __init trim_bios_range(void) | 622 | static void __init trim_bios_range(void) |
| 697 | { | 623 | { |
| @@ -699,8 +625,14 @@ static void __init trim_bios_range(void) | |||
| 699 | * A special case is the first 4Kb of memory; | 625 | * A special case is the first 4Kb of memory; |
| 700 | * This is a BIOS owned area, not kernel ram, but generally | 626 | * This is a BIOS owned area, not kernel ram, but generally |
| 701 | * not listed as such in the E820 table. | 627 | * not listed as such in the E820 table. |
| 628 | * | ||
| 629 | * This typically reserves additional memory (64KiB by default) | ||
| 630 | * since some BIOSes are known to corrupt low memory. See the | ||
| 631 | * Kconfig help text for X86_RESERVE_LOW. | ||
| 702 | */ | 632 | */ |
| 703 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); | 633 | e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), |
| 634 | E820_RAM, E820_RESERVED); | ||
| 635 | |||
| 704 | /* | 636 | /* |
| 705 | * special case: Some BIOSen report the PC BIOS | 637 | * special case: Some BIOSen report the PC BIOS |
| 706 | * area (640->1Mb) as ram even though it is not. | 638 | * area (640->1Mb) as ram even though it is not. |
| @@ -710,6 +642,28 @@ static void __init trim_bios_range(void) | |||
| 710 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 642 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
| 711 | } | 643 | } |
| 712 | 644 | ||
| 645 | static int __init parse_reservelow(char *p) | ||
| 646 | { | ||
| 647 | unsigned long long size; | ||
| 648 | |||
| 649 | if (!p) | ||
| 650 | return -EINVAL; | ||
| 651 | |||
| 652 | size = memparse(p, &p); | ||
| 653 | |||
| 654 | if (size < 4096) | ||
| 655 | size = 4096; | ||
| 656 | |||
| 657 | if (size > 640*1024) | ||
| 658 | size = 640*1024; | ||
| 659 | |||
| 660 | reserve_low = size; | ||
| 661 | |||
| 662 | return 0; | ||
| 663 | } | ||
| 664 | |||
| 665 | early_param("reservelow", parse_reservelow); | ||
| 666 | |||
| 713 | /* | 667 | /* |
| 714 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 668 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
| 715 | * passed the efi memmap, systab, etc., so we should use these data structures | 669 | * passed the efi memmap, systab, etc., so we should use these data structures |
| @@ -736,10 +690,10 @@ void __init setup_arch(char **cmdline_p) | |||
| 736 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 690 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
| 737 | #endif | 691 | #endif |
| 738 | 692 | ||
| 739 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 693 | /* |
| 740 | vmi_init(); | 694 | * If we have OLPC OFW, we might end up relocating the fixmap due to |
| 741 | 695 | * reserve_top(), so do this before touching the ioremap area. | |
| 742 | /* OFW also may relocate the fixmap */ | 696 | */ |
| 743 | olpc_ofw_detect(); | 697 | olpc_ofw_detect(); |
| 744 | 698 | ||
| 745 | early_trap_init(); | 699 | early_trap_init(); |
| @@ -840,9 +794,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 840 | 794 | ||
| 841 | x86_report_nx(); | 795 | x86_report_nx(); |
| 842 | 796 | ||
| 843 | /* Must be before kernel pagetables are setup */ | ||
| 844 | vmi_activate(); | ||
| 845 | |||
| 846 | /* after early param, so could get panic from serial */ | 797 | /* after early param, so could get panic from serial */ |
| 847 | reserve_early_setup_data(); | 798 | reserve_early_setup_data(); |
| 848 | 799 | ||
| @@ -865,8 +816,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 865 | 816 | ||
| 866 | dmi_scan_machine(); | 817 | dmi_scan_machine(); |
| 867 | 818 | ||
| 868 | dmi_check_system(bad_bios_dmi_table); | ||
| 869 | |||
| 870 | /* | 819 | /* |
| 871 | * VMware detection requires dmi to be available, so this | 820 | * VMware detection requires dmi to be available, so this |
| 872 | * needs to be done after dmi_scan_machine, for the BP. | 821 | * needs to be done after dmi_scan_machine, for the BP. |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a60df9ae6454..2335c15c93a4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -253,7 +253,7 @@ void __init setup_per_cpu_areas(void) | |||
| 253 | * Up to this point, the boot CPU has been using .init.data | 253 | * Up to this point, the boot CPU has been using .init.data |
| 254 | * area. Reload any changed state for the boot CPU. | 254 | * area. Reload any changed state for the boot CPU. |
| 255 | */ | 255 | */ |
| 256 | if (cpu == boot_cpu_id) | 256 | if (!cpu) |
| 257 | switch_to_new_gdt(cpu); | 257 | switch_to_new_gdt(cpu); |
| 258 | } | 258 | } |
| 259 | 259 | ||
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index cb22acf3ed09..dd4c281ffe57 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | #ifdef CONFIG_X86_LOCAL_APIC | 34 | #ifdef CONFIG_X86_LOCAL_APIC |
| 35 | static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | 35 | static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; |
| 36 | 36 | ||
| 37 | void __init mp_sfi_register_lapic_address(unsigned long address) | 37 | static void __init mp_sfi_register_lapic_address(unsigned long address) |
| 38 | { | 38 | { |
| 39 | mp_lapic_addr = address; | 39 | mp_lapic_addr = address; |
| 40 | 40 | ||
| @@ -46,7 +46,7 @@ void __init mp_sfi_register_lapic_address(unsigned long address) | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | /* All CPUs enumerated by SFI must be present and enabled */ | 48 | /* All CPUs enumerated by SFI must be present and enabled */ |
| 49 | void __cpuinit mp_sfi_register_lapic(u8 id) | 49 | static void __cpuinit mp_sfi_register_lapic(u8 id) |
| 50 | { | 50 | { |
| 51 | if (MAX_APICS - id <= 0) { | 51 | if (MAX_APICS - id <= 0) { |
| 52 | pr_warning("Processor #%d invalid (max %d)\n", | 52 | pr_warning("Processor #%d invalid (max %d)\n", |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4dd708..dfb50890b5b7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -62,7 +62,7 @@ | |||
| 62 | #include <asm/pgtable.h> | 62 | #include <asm/pgtable.h> |
| 63 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
| 64 | #include <asm/mtrr.h> | 64 | #include <asm/mtrr.h> |
| 65 | #include <asm/vmi.h> | 65 | #include <asm/mwait.h> |
| 66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
| 67 | #include <asm/setup.h> | 67 | #include <asm/setup.h> |
| 68 | #include <asm/uv/uv.h> | 68 | #include <asm/uv/uv.h> |
| @@ -311,7 +311,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 311 | __flush_tlb_all(); | 311 | __flush_tlb_all(); |
| 312 | #endif | 312 | #endif |
| 313 | 313 | ||
| 314 | vmi_bringup(); | ||
| 315 | cpu_init(); | 314 | cpu_init(); |
| 316 | preempt_disable(); | 315 | preempt_disable(); |
| 317 | smp_callin(); | 316 | smp_callin(); |
| @@ -324,9 +323,9 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 324 | check_tsc_sync_target(); | 323 | check_tsc_sync_target(); |
| 325 | 324 | ||
| 326 | if (nmi_watchdog == NMI_IO_APIC) { | 325 | if (nmi_watchdog == NMI_IO_APIC) { |
| 327 | legacy_pic->chip->mask(0); | 326 | legacy_pic->mask(0); |
| 328 | enable_NMI_through_LVT0(); | 327 | enable_NMI_through_LVT0(); |
| 329 | legacy_pic->chip->unmask(0); | 328 | legacy_pic->unmask(0); |
| 330 | } | 329 | } |
| 331 | 330 | ||
| 332 | /* This must be done before setting cpu_online_mask */ | 331 | /* This must be done before setting cpu_online_mask */ |
| @@ -397,6 +396,19 @@ void __cpuinit smp_store_cpu_info(int id) | |||
| 397 | identify_secondary_cpu(c); | 396 | identify_secondary_cpu(c); |
| 398 | } | 397 | } |
| 399 | 398 | ||
| 399 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | ||
| 400 | { | ||
| 401 | struct cpuinfo_x86 *c1 = &cpu_data(cpu1); | ||
| 402 | struct cpuinfo_x86 *c2 = &cpu_data(cpu2); | ||
| 403 | |||
| 404 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | ||
| 405 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | ||
| 406 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | ||
| 407 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | ||
| 408 | cpumask_set_cpu(cpu1, c2->llc_shared_map); | ||
| 409 | cpumask_set_cpu(cpu2, c1->llc_shared_map); | ||
| 410 | } | ||
| 411 | |||
| 400 | 412 | ||
| 401 | void __cpuinit set_cpu_sibling_map(int cpu) | 413 | void __cpuinit set_cpu_sibling_map(int cpu) |
| 402 | { | 414 | { |
| @@ -409,14 +421,13 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 409 | for_each_cpu(i, cpu_sibling_setup_mask) { | 421 | for_each_cpu(i, cpu_sibling_setup_mask) { |
| 410 | struct cpuinfo_x86 *o = &cpu_data(i); | 422 | struct cpuinfo_x86 *o = &cpu_data(i); |
| 411 | 423 | ||
| 412 | if (c->phys_proc_id == o->phys_proc_id && | 424 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
| 413 | c->cpu_core_id == o->cpu_core_id) { | 425 | if (c->phys_proc_id == o->phys_proc_id && |
| 414 | cpumask_set_cpu(i, cpu_sibling_mask(cpu)); | 426 | c->compute_unit_id == o->compute_unit_id) |
| 415 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); | 427 | link_thread_siblings(cpu, i); |
| 416 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 428 | } else if (c->phys_proc_id == o->phys_proc_id && |
| 417 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 429 | c->cpu_core_id == o->cpu_core_id) { |
| 418 | cpumask_set_cpu(i, c->llc_shared_map); | 430 | link_thread_siblings(cpu, i); |
| 419 | cpumask_set_cpu(cpu, o->llc_shared_map); | ||
| 420 | } | 431 | } |
| 421 | } | 432 | } |
| 422 | } else { | 433 | } else { |
| @@ -1109,8 +1120,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1109 | } | 1120 | } |
| 1110 | set_cpu_sibling_map(0); | 1121 | set_cpu_sibling_map(0); |
| 1111 | 1122 | ||
| 1112 | enable_IR_x2apic(); | ||
| 1113 | default_setup_apic_routing(); | ||
| 1114 | 1123 | ||
| 1115 | if (smp_sanity_check(max_cpus) < 0) { | 1124 | if (smp_sanity_check(max_cpus) < 0) { |
| 1116 | printk(KERN_INFO "SMP disabled\n"); | 1125 | printk(KERN_INFO "SMP disabled\n"); |
| @@ -1118,6 +1127,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1118 | goto out; | 1127 | goto out; |
| 1119 | } | 1128 | } |
| 1120 | 1129 | ||
| 1130 | default_setup_apic_routing(); | ||
| 1131 | |||
| 1121 | preempt_disable(); | 1132 | preempt_disable(); |
| 1122 | if (read_apic_id() != boot_cpu_physical_apicid) { | 1133 | if (read_apic_id() != boot_cpu_physical_apicid) { |
| 1123 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 1134 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
| @@ -1383,11 +1394,88 @@ void play_dead_common(void) | |||
| 1383 | local_irq_disable(); | 1394 | local_irq_disable(); |
| 1384 | } | 1395 | } |
| 1385 | 1396 | ||
| 1397 | /* | ||
| 1398 | * We need to flush the caches before going to sleep, lest we have | ||
| 1399 | * dirty data in our caches when we come back up. | ||
| 1400 | */ | ||
| 1401 | static inline void mwait_play_dead(void) | ||
| 1402 | { | ||
| 1403 | unsigned int eax, ebx, ecx, edx; | ||
| 1404 | unsigned int highest_cstate = 0; | ||
| 1405 | unsigned int highest_subcstate = 0; | ||
| 1406 | int i; | ||
| 1407 | void *mwait_ptr; | ||
| 1408 | |||
| 1409 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) | ||
| 1410 | return; | ||
| 1411 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) | ||
| 1412 | return; | ||
| 1413 | if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) | ||
| 1414 | return; | ||
| 1415 | |||
| 1416 | eax = CPUID_MWAIT_LEAF; | ||
| 1417 | ecx = 0; | ||
| 1418 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
| 1419 | |||
| 1420 | /* | ||
| 1421 | * eax will be 0 if EDX enumeration is not valid. | ||
| 1422 | * Initialized below to cstate, sub_cstate value when EDX is valid. | ||
| 1423 | */ | ||
| 1424 | if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { | ||
| 1425 | eax = 0; | ||
| 1426 | } else { | ||
| 1427 | edx >>= MWAIT_SUBSTATE_SIZE; | ||
| 1428 | for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { | ||
| 1429 | if (edx & MWAIT_SUBSTATE_MASK) { | ||
| 1430 | highest_cstate = i; | ||
| 1431 | highest_subcstate = edx & MWAIT_SUBSTATE_MASK; | ||
| 1432 | } | ||
| 1433 | } | ||
| 1434 | eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | | ||
| 1435 | (highest_subcstate - 1); | ||
| 1436 | } | ||
| 1437 | |||
| 1438 | /* | ||
| 1439 | * This should be a memory location in a cache line which is | ||
| 1440 | * unlikely to be touched by other processors. The actual | ||
| 1441 | * content is immaterial as it is not actually modified in any way. | ||
| 1442 | */ | ||
| 1443 | mwait_ptr = ¤t_thread_info()->flags; | ||
| 1444 | |||
| 1445 | wbinvd(); | ||
| 1446 | |||
| 1447 | while (1) { | ||
| 1448 | /* | ||
| 1449 | * The CLFLUSH is a workaround for erratum AAI65 for | ||
| 1450 | * the Xeon 7400 series. It's not clear it is actually | ||
| 1451 | * needed, but it should be harmless in either case. | ||
| 1452 | * The WBINVD is insufficient due to the spurious-wakeup | ||
| 1453 | * case where we return around the loop. | ||
| 1454 | */ | ||
| 1455 | clflush(mwait_ptr); | ||
| 1456 | __monitor(mwait_ptr, 0, 0); | ||
| 1457 | mb(); | ||
| 1458 | __mwait(eax, 0); | ||
| 1459 | } | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | static inline void hlt_play_dead(void) | ||
| 1463 | { | ||
| 1464 | if (current_cpu_data.x86 >= 4) | ||
| 1465 | wbinvd(); | ||
| 1466 | |||
| 1467 | while (1) { | ||
| 1468 | native_halt(); | ||
| 1469 | } | ||
| 1470 | } | ||
| 1471 | |||
| 1386 | void native_play_dead(void) | 1472 | void native_play_dead(void) |
| 1387 | { | 1473 | { |
| 1388 | play_dead_common(); | 1474 | play_dead_common(); |
| 1389 | tboot_shutdown(TB_SHUTDOWN_WFS); | 1475 | tboot_shutdown(TB_SHUTDOWN_WFS); |
| 1390 | wbinvd_halt(); | 1476 | |
| 1477 | mwait_play_dead(); /* Only returns on failure */ | ||
| 1478 | hlt_play_dead(); | ||
| 1391 | } | 1479 | } |
| 1392 | 1480 | ||
| 1393 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1481 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index d5e06624e34a..0b0cb5fede19 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
| @@ -33,8 +33,8 @@ int kernel_execve(const char *filename, | |||
| 33 | const char *const envp[]) | 33 | const char *const envp[]) |
| 34 | { | 34 | { |
| 35 | long __res; | 35 | long __res; |
| 36 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" | 36 | asm volatile ("int $0x80" |
| 37 | : "=a" (__res) | 37 | : "=a" (__res) |
| 38 | : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); | 38 | : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); |
| 39 | return __res; | 39 | return __res; |
| 40 | } | 40 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 60788dee0f8a..d43968503dd2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void) | |||
| 776 | } | 776 | } |
| 777 | EXPORT_SYMBOL_GPL(math_state_restore); | 777 | EXPORT_SYMBOL_GPL(math_state_restore); |
| 778 | 778 | ||
| 779 | #ifndef CONFIG_MATH_EMULATION | ||
| 780 | void math_emulate(struct math_emu_info *info) | ||
| 781 | { | ||
| 782 | printk(KERN_EMERG | ||
| 783 | "math-emulation not enabled and no coprocessor found.\n"); | ||
| 784 | printk(KERN_EMERG "killing %s.\n", current->comm); | ||
| 785 | force_sig(SIGFPE, current); | ||
| 786 | schedule(); | ||
| 787 | } | ||
| 788 | #endif /* CONFIG_MATH_EMULATION */ | ||
| 789 | |||
| 790 | dotraplinkage void __kprobes | 779 | dotraplinkage void __kprobes |
| 791 | do_device_not_available(struct pt_regs *regs, long error_code) | 780 | do_device_not_available(struct pt_regs *regs, long error_code) |
| 792 | { | 781 | { |
| 793 | #ifdef CONFIG_X86_32 | 782 | #ifdef CONFIG_MATH_EMULATION |
| 794 | if (read_cr0() & X86_CR0_EM) { | 783 | if (read_cr0() & X86_CR0_EM) { |
| 795 | struct math_emu_info info = { }; | 784 | struct math_emu_info info = { }; |
| 796 | 785 | ||
| @@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
| 798 | 787 | ||
| 799 | info.regs = regs; | 788 | info.regs = regs; |
| 800 | math_emulate(&info); | 789 | math_emulate(&info); |
| 801 | } else { | 790 | return; |
| 802 | math_state_restore(); /* interrupts still off */ | ||
| 803 | conditional_sti(regs); | ||
| 804 | } | 791 | } |
| 805 | #else | 792 | #endif |
| 806 | math_state_restore(); | 793 | math_state_restore(); /* interrupts still off */ |
| 794 | #ifdef CONFIG_X86_32 | ||
| 795 | conditional_sti(regs); | ||
| 807 | #endif | 796 | #endif |
| 808 | } | 797 | } |
| 809 | 798 | ||
| @@ -881,18 +870,6 @@ void __init trap_init(void) | |||
| 881 | #endif | 870 | #endif |
| 882 | 871 | ||
| 883 | #ifdef CONFIG_X86_32 | 872 | #ifdef CONFIG_X86_32 |
| 884 | if (cpu_has_fxsr) { | ||
| 885 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | ||
| 886 | set_in_cr4(X86_CR4_OSFXSR); | ||
| 887 | printk("done.\n"); | ||
| 888 | } | ||
| 889 | if (cpu_has_xmm) { | ||
| 890 | printk(KERN_INFO | ||
| 891 | "Enabling unmasked SIMD FPU exception support... "); | ||
| 892 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
| 893 | printk("done.\n"); | ||
| 894 | } | ||
| 895 | |||
| 896 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 873 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
| 897 | set_bit(SYSCALL_VECTOR, used_vectors); | 874 | set_bit(SYSCALL_VECTOR, used_vectors); |
| 898 | #endif | 875 | #endif |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 26a863a9c2a8..0c40d8b72416 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
| @@ -104,10 +104,14 @@ int __init notsc_setup(char *str) | |||
| 104 | 104 | ||
| 105 | __setup("notsc", notsc_setup); | 105 | __setup("notsc", notsc_setup); |
| 106 | 106 | ||
| 107 | static int no_sched_irq_time; | ||
| 108 | |||
| 107 | static int __init tsc_setup(char *str) | 109 | static int __init tsc_setup(char *str) |
| 108 | { | 110 | { |
| 109 | if (!strcmp(str, "reliable")) | 111 | if (!strcmp(str, "reliable")) |
| 110 | tsc_clocksource_reliable = 1; | 112 | tsc_clocksource_reliable = 1; |
| 113 | if (!strncmp(str, "noirqtime", 9)) | ||
| 114 | no_sched_irq_time = 1; | ||
| 111 | return 1; | 115 | return 1; |
| 112 | } | 116 | } |
| 113 | 117 | ||
| @@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason) | |||
| 801 | if (!tsc_unstable) { | 805 | if (!tsc_unstable) { |
| 802 | tsc_unstable = 1; | 806 | tsc_unstable = 1; |
| 803 | sched_clock_stable = 0; | 807 | sched_clock_stable = 0; |
| 808 | disable_sched_clock_irqtime(); | ||
| 804 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 809 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
| 805 | /* Change only the rating, when not registered */ | 810 | /* Change only the rating, when not registered */ |
| 806 | if (clocksource_tsc.mult) | 811 | if (clocksource_tsc.mult) |
| @@ -892,60 +897,6 @@ static void __init init_tsc_clocksource(void) | |||
| 892 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 897 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
| 893 | } | 898 | } |
| 894 | 899 | ||
| 895 | #ifdef CONFIG_X86_64 | ||
| 896 | /* | ||
| 897 | * calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
| 898 | * processor frequency | ||
| 899 | */ | ||
| 900 | #define TICK_COUNT 100000000 | ||
| 901 | static unsigned long __init calibrate_cpu(void) | ||
| 902 | { | ||
| 903 | int tsc_start, tsc_now; | ||
| 904 | int i, no_ctr_free; | ||
| 905 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
| 906 | unsigned long flags; | ||
| 907 | |||
| 908 | for (i = 0; i < 4; i++) | ||
| 909 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
| 910 | break; | ||
| 911 | no_ctr_free = (i == 4); | ||
| 912 | if (no_ctr_free) { | ||
| 913 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
| 914 | "cpu_khz value may be incorrect.\n"); | ||
| 915 | i = 3; | ||
| 916 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
| 917 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
| 918 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
| 919 | } else { | ||
| 920 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 921 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 922 | } | ||
| 923 | local_irq_save(flags); | ||
| 924 | /* start measuring cycles, incrementing from 0 */ | ||
| 925 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
| 926 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
| 927 | rdtscl(tsc_start); | ||
| 928 | do { | ||
| 929 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
| 930 | tsc_now = get_cycles(); | ||
| 931 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
| 932 | |||
| 933 | local_irq_restore(flags); | ||
| 934 | if (no_ctr_free) { | ||
| 935 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
| 936 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
| 937 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
| 938 | } else { | ||
| 939 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 940 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 941 | } | ||
| 942 | |||
| 943 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
| 944 | } | ||
| 945 | #else | ||
| 946 | static inline unsigned long calibrate_cpu(void) { return cpu_khz; } | ||
| 947 | #endif | ||
| 948 | |||
| 949 | void __init tsc_init(void) | 900 | void __init tsc_init(void) |
| 950 | { | 901 | { |
| 951 | u64 lpj; | 902 | u64 lpj; |
| @@ -964,10 +915,6 @@ void __init tsc_init(void) | |||
| 964 | return; | 915 | return; |
| 965 | } | 916 | } |
| 966 | 917 | ||
| 967 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
| 968 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | ||
| 969 | cpu_khz = calibrate_cpu(); | ||
| 970 | |||
| 971 | printk("Detected %lu.%03lu MHz processor.\n", | 918 | printk("Detected %lu.%03lu MHz processor.\n", |
| 972 | (unsigned long)cpu_khz / 1000, | 919 | (unsigned long)cpu_khz / 1000, |
| 973 | (unsigned long)cpu_khz % 1000); | 920 | (unsigned long)cpu_khz % 1000); |
| @@ -987,6 +934,9 @@ void __init tsc_init(void) | |||
| 987 | /* now allow native_sched_clock() to use rdtsc */ | 934 | /* now allow native_sched_clock() to use rdtsc */ |
| 988 | tsc_disabled = 0; | 935 | tsc_disabled = 0; |
| 989 | 936 | ||
| 937 | if (!no_sched_irq_time) | ||
| 938 | enable_sched_clock_irqtime(); | ||
| 939 | |||
| 990 | lpj = ((u64)tsc_khz * 1000); | 940 | lpj = ((u64)tsc_khz * 1000); |
| 991 | do_div(lpj, HZ); | 941 | do_div(lpj, HZ); |
| 992 | lpj_fine = lpj; | 942 | lpj_fine = lpj; |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 1132129db792..7b24460917d5 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
| @@ -28,34 +28,21 @@ struct uv_irq_2_mmr_pnode{ | |||
| 28 | static spinlock_t uv_irq_lock; | 28 | static spinlock_t uv_irq_lock; |
| 29 | static struct rb_root uv_irq_root; | 29 | static struct rb_root uv_irq_root; |
| 30 | 30 | ||
| 31 | static int uv_set_irq_affinity(unsigned int, const struct cpumask *); | 31 | static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); |
| 32 | 32 | ||
| 33 | static void uv_noop(unsigned int irq) | 33 | static void uv_noop(struct irq_data *data) { } |
| 34 | { | ||
| 35 | } | ||
| 36 | |||
| 37 | static unsigned int uv_noop_ret(unsigned int irq) | ||
| 38 | { | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | 34 | ||
| 42 | static void uv_ack_apic(unsigned int irq) | 35 | static void uv_ack_apic(struct irq_data *data) |
| 43 | { | 36 | { |
| 44 | ack_APIC_irq(); | 37 | ack_APIC_irq(); |
| 45 | } | 38 | } |
| 46 | 39 | ||
| 47 | static struct irq_chip uv_irq_chip = { | 40 | static struct irq_chip uv_irq_chip = { |
| 48 | .name = "UV-CORE", | 41 | .name = "UV-CORE", |
| 49 | .startup = uv_noop_ret, | 42 | .irq_mask = uv_noop, |
| 50 | .shutdown = uv_noop, | 43 | .irq_unmask = uv_noop, |
| 51 | .enable = uv_noop, | 44 | .irq_eoi = uv_ack_apic, |
| 52 | .disable = uv_noop, | 45 | .irq_set_affinity = uv_set_irq_affinity, |
| 53 | .ack = uv_noop, | ||
| 54 | .mask = uv_noop, | ||
| 55 | .unmask = uv_noop, | ||
| 56 | .eoi = uv_ack_apic, | ||
| 57 | .end = uv_noop, | ||
| 58 | .set_affinity = uv_set_irq_affinity, | ||
| 59 | }; | 46 | }; |
| 60 | 47 | ||
| 61 | /* | 48 | /* |
| @@ -144,26 +131,22 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
| 144 | unsigned long mmr_offset, int limit) | 131 | unsigned long mmr_offset, int limit) |
| 145 | { | 132 | { |
| 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
| 147 | struct irq_desc *desc = irq_to_desc(irq); | 134 | struct irq_cfg *cfg = get_irq_chip_data(irq); |
| 148 | struct irq_cfg *cfg; | ||
| 149 | int mmr_pnode; | ||
| 150 | unsigned long mmr_value; | 135 | unsigned long mmr_value; |
| 151 | struct uv_IO_APIC_route_entry *entry; | 136 | struct uv_IO_APIC_route_entry *entry; |
| 152 | int err; | 137 | int mmr_pnode, err; |
| 153 | 138 | ||
| 154 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | 139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != |
| 155 | sizeof(unsigned long)); | 140 | sizeof(unsigned long)); |
| 156 | 141 | ||
| 157 | cfg = irq_cfg(irq); | ||
| 158 | |||
| 159 | err = assign_irq_vector(irq, cfg, eligible_cpu); | 142 | err = assign_irq_vector(irq, cfg, eligible_cpu); |
| 160 | if (err != 0) | 143 | if (err != 0) |
| 161 | return err; | 144 | return err; |
| 162 | 145 | ||
| 163 | if (limit == UV_AFFINITY_CPU) | 146 | if (limit == UV_AFFINITY_CPU) |
| 164 | desc->status |= IRQ_NO_BALANCING; | 147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); |
| 165 | else | 148 | else |
| 166 | desc->status |= IRQ_MOVE_PCNTXT; | 149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 167 | 150 | ||
| 168 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | 151 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, |
| 169 | irq_name); | 152 | irq_name); |
| @@ -206,17 +189,17 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | |||
| 206 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 189 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
| 207 | } | 190 | } |
| 208 | 191 | ||
| 209 | static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | 192 | static int |
| 193 | uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, | ||
| 194 | bool force) | ||
| 210 | { | 195 | { |
| 211 | struct irq_desc *desc = irq_to_desc(irq); | 196 | struct irq_cfg *cfg = data->chip_data; |
| 212 | struct irq_cfg *cfg = desc->chip_data; | ||
| 213 | unsigned int dest; | 197 | unsigned int dest; |
| 214 | unsigned long mmr_value; | 198 | unsigned long mmr_value, mmr_offset; |
| 215 | struct uv_IO_APIC_route_entry *entry; | 199 | struct uv_IO_APIC_route_entry *entry; |
| 216 | unsigned long mmr_offset; | ||
| 217 | int mmr_pnode; | 200 | int mmr_pnode; |
| 218 | 201 | ||
| 219 | if (set_desc_affinity(desc, mask, &dest)) | 202 | if (__ioapic_set_affinity(data, mask, &dest)) |
| 220 | return -1; | 203 | return -1; |
| 221 | 204 | ||
| 222 | mmr_value = 0; | 205 | mmr_value = 0; |
| @@ -231,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 231 | entry->dest = dest; | 214 | entry->dest = dest; |
| 232 | 215 | ||
| 233 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | 216 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ |
| 234 | if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) | 217 | if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) |
| 235 | return -1; | 218 | return -1; |
| 236 | 219 | ||
| 237 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 220 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e680ea52db9b..3371bd053b89 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
| @@ -66,10 +66,7 @@ static void __init visws_time_init(void) | |||
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | /* Replaces the default init_ISA_irqs in the generic setup */ | 68 | /* Replaces the default init_ISA_irqs in the generic setup */ |
| 69 | static void __init visws_pre_intr_init(void) | 69 | static void __init visws_pre_intr_init(void); |
| 70 | { | ||
| 71 | init_VISWS_APIC_irqs(); | ||
| 72 | } | ||
| 73 | 70 | ||
| 74 | /* Quirk for machine specific memory setup. */ | 71 | /* Quirk for machine specific memory setup. */ |
| 75 | 72 | ||
| @@ -429,67 +426,34 @@ static int is_co_apic(unsigned int irq) | |||
| 429 | /* | 426 | /* |
| 430 | * This is the SGI Cobalt (IO-)APIC: | 427 | * This is the SGI Cobalt (IO-)APIC: |
| 431 | */ | 428 | */ |
| 432 | 429 | static void enable_cobalt_irq(struct irq_data *data) | |
| 433 | static void enable_cobalt_irq(unsigned int irq) | ||
| 434 | { | 430 | { |
| 435 | co_apic_set(is_co_apic(irq), irq); | 431 | co_apic_set(is_co_apic(data->irq), data->irq); |
| 436 | } | 432 | } |
| 437 | 433 | ||
| 438 | static void disable_cobalt_irq(unsigned int irq) | 434 | static void disable_cobalt_irq(struct irq_data *data) |
| 439 | { | 435 | { |
| 440 | int entry = is_co_apic(irq); | 436 | int entry = is_co_apic(data->irq); |
| 441 | 437 | ||
| 442 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); | 438 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); |
| 443 | co_apic_read(CO_APIC_LO(entry)); | 439 | co_apic_read(CO_APIC_LO(entry)); |
| 444 | } | 440 | } |
| 445 | 441 | ||
| 446 | /* | 442 | static void ack_cobalt_irq(struct irq_data *data) |
| 447 | * "irq" really just serves to identify the device. Here is where we | ||
| 448 | * map this to the Cobalt APIC entry where it's physically wired. | ||
| 449 | * This is called via request_irq -> setup_irq -> irq_desc->startup() | ||
| 450 | */ | ||
| 451 | static unsigned int startup_cobalt_irq(unsigned int irq) | ||
| 452 | { | 443 | { |
| 453 | unsigned long flags; | 444 | unsigned long flags; |
| 454 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 455 | 445 | ||
| 456 | spin_lock_irqsave(&cobalt_lock, flags); | 446 | spin_lock_irqsave(&cobalt_lock, flags); |
| 457 | if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) | 447 | disable_cobalt_irq(data); |
| 458 | desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); | ||
| 459 | enable_cobalt_irq(irq); | ||
| 460 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
| 461 | return 0; | ||
| 462 | } | ||
| 463 | |||
| 464 | static void ack_cobalt_irq(unsigned int irq) | ||
| 465 | { | ||
| 466 | unsigned long flags; | ||
| 467 | |||
| 468 | spin_lock_irqsave(&cobalt_lock, flags); | ||
| 469 | disable_cobalt_irq(irq); | ||
| 470 | apic_write(APIC_EOI, APIC_EIO_ACK); | 448 | apic_write(APIC_EOI, APIC_EIO_ACK); |
| 471 | spin_unlock_irqrestore(&cobalt_lock, flags); | 449 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 472 | } | 450 | } |
| 473 | 451 | ||
| 474 | static void end_cobalt_irq(unsigned int irq) | ||
| 475 | { | ||
| 476 | unsigned long flags; | ||
| 477 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 478 | |||
| 479 | spin_lock_irqsave(&cobalt_lock, flags); | ||
| 480 | if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) | ||
| 481 | enable_cobalt_irq(irq); | ||
| 482 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
| 483 | } | ||
| 484 | |||
| 485 | static struct irq_chip cobalt_irq_type = { | 452 | static struct irq_chip cobalt_irq_type = { |
| 486 | .name = "Cobalt-APIC", | 453 | .name = "Cobalt-APIC", |
| 487 | .startup = startup_cobalt_irq, | 454 | .irq_enable = enable_cobalt_irq, |
| 488 | .shutdown = disable_cobalt_irq, | 455 | .irq_disable = disable_cobalt_irq, |
| 489 | .enable = enable_cobalt_irq, | 456 | .irq_ack = ack_cobalt_irq, |
| 490 | .disable = disable_cobalt_irq, | ||
| 491 | .ack = ack_cobalt_irq, | ||
| 492 | .end = end_cobalt_irq, | ||
| 493 | }; | 457 | }; |
| 494 | 458 | ||
| 495 | 459 | ||
| @@ -503,35 +467,34 @@ static struct irq_chip cobalt_irq_type = { | |||
| 503 | * interrupt controller type, and through a special virtual interrupt- | 467 | * interrupt controller type, and through a special virtual interrupt- |
| 504 | * controller. Device drivers only see the virtual interrupt sources. | 468 | * controller. Device drivers only see the virtual interrupt sources. |
| 505 | */ | 469 | */ |
| 506 | static unsigned int startup_piix4_master_irq(unsigned int irq) | 470 | static unsigned int startup_piix4_master_irq(struct irq_data *data) |
| 507 | { | 471 | { |
| 508 | legacy_pic->init(0); | 472 | legacy_pic->init(0); |
| 509 | 473 | enable_cobalt_irq(data); | |
| 510 | return startup_cobalt_irq(irq); | ||
| 511 | } | 474 | } |
| 512 | 475 | ||
| 513 | static void end_piix4_master_irq(unsigned int irq) | 476 | static void end_piix4_master_irq(struct irq_data *data) |
| 514 | { | 477 | { |
| 515 | unsigned long flags; | 478 | unsigned long flags; |
| 516 | 479 | ||
| 517 | spin_lock_irqsave(&cobalt_lock, flags); | 480 | spin_lock_irqsave(&cobalt_lock, flags); |
| 518 | enable_cobalt_irq(irq); | 481 | enable_cobalt_irq(data); |
| 519 | spin_unlock_irqrestore(&cobalt_lock, flags); | 482 | spin_unlock_irqrestore(&cobalt_lock, flags); |
| 520 | } | 483 | } |
| 521 | 484 | ||
| 522 | static struct irq_chip piix4_master_irq_type = { | 485 | static struct irq_chip piix4_master_irq_type = { |
| 523 | .name = "PIIX4-master", | 486 | .name = "PIIX4-master", |
| 524 | .startup = startup_piix4_master_irq, | 487 | .irq_startup = startup_piix4_master_irq, |
| 525 | .ack = ack_cobalt_irq, | 488 | .irq_ack = ack_cobalt_irq, |
| 526 | .end = end_piix4_master_irq, | ||
| 527 | }; | 489 | }; |
| 528 | 490 | ||
| 491 | static void pii4_mask(struct irq_data *data) { } | ||
| 529 | 492 | ||
| 530 | static struct irq_chip piix4_virtual_irq_type = { | 493 | static struct irq_chip piix4_virtual_irq_type = { |
| 531 | .name = "PIIX4-virtual", | 494 | .name = "PIIX4-virtual", |
| 495 | .mask = pii4_mask, | ||
| 532 | }; | 496 | }; |
| 533 | 497 | ||
| 534 | |||
| 535 | /* | 498 | /* |
| 536 | * PIIX4-8259 master/virtual functions to handle interrupt requests | 499 | * PIIX4-8259 master/virtual functions to handle interrupt requests |
| 537 | * from legacy devices: floppy, parallel, serial, rtc. | 500 | * from legacy devices: floppy, parallel, serial, rtc. |
| @@ -549,9 +512,8 @@ static struct irq_chip piix4_virtual_irq_type = { | |||
| 549 | */ | 512 | */ |
| 550 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | 513 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) |
| 551 | { | 514 | { |
| 552 | int realirq; | ||
| 553 | struct irq_desc *desc; | ||
| 554 | unsigned long flags; | 515 | unsigned long flags; |
| 516 | int realirq; | ||
| 555 | 517 | ||
| 556 | raw_spin_lock_irqsave(&i8259A_lock, flags); | 518 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
| 557 | 519 | ||
| @@ -592,18 +554,10 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
| 592 | 554 | ||
| 593 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 555 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
| 594 | 556 | ||
| 595 | desc = irq_to_desc(realirq); | ||
| 596 | |||
| 597 | /* | 557 | /* |
| 598 | * handle this 'virtual interrupt' as a Cobalt one now. | 558 | * handle this 'virtual interrupt' as a Cobalt one now. |
| 599 | */ | 559 | */ |
| 600 | kstat_incr_irqs_this_cpu(realirq, desc); | 560 | generic_handle_irq(realirq); |
| 601 | |||
| 602 | if (likely(desc->action != NULL)) | ||
| 603 | handle_IRQ_event(realirq, desc->action); | ||
| 604 | |||
| 605 | if (!(desc->status & IRQ_DISABLED)) | ||
| 606 | legacy_pic->chip->unmask(realirq); | ||
| 607 | 561 | ||
| 608 | return IRQ_HANDLED; | 562 | return IRQ_HANDLED; |
| 609 | 563 | ||
| @@ -624,41 +578,35 @@ static struct irqaction cascade_action = { | |||
| 624 | 578 | ||
| 625 | static inline void set_piix4_virtual_irq_type(void) | 579 | static inline void set_piix4_virtual_irq_type(void) |
| 626 | { | 580 | { |
| 627 | piix4_virtual_irq_type.shutdown = i8259A_chip.mask; | ||
| 628 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; | 581 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; |
| 629 | piix4_virtual_irq_type.disable = i8259A_chip.mask; | 582 | piix4_virtual_irq_type.disable = i8259A_chip.mask; |
| 583 | piix4_virtual_irq_type.unmask = i8259A_chip.unmask; | ||
| 630 | } | 584 | } |
| 631 | 585 | ||
| 632 | void init_VISWS_APIC_irqs(void) | 586 | static void __init visws_pre_intr_init(void) |
| 633 | { | 587 | { |
| 634 | int i; | 588 | int i; |
| 635 | 589 | ||
| 636 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | 590 | set_piix4_virtual_irq_type(); |
| 637 | struct irq_desc *desc = irq_to_desc(i); | ||
| 638 | |||
| 639 | desc->status = IRQ_DISABLED; | ||
| 640 | desc->action = 0; | ||
| 641 | desc->depth = 1; | ||
| 642 | 591 | ||
| 643 | if (i == 0) { | 592 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { |
| 644 | desc->chip = &cobalt_irq_type; | 593 | struct irq_chip *chip = NULL; |
| 645 | } | 594 | |
| 646 | else if (i == CO_IRQ_IDE0) { | 595 | if (i == 0) |
| 647 | desc->chip = &cobalt_irq_type; | 596 | chip = &cobalt_irq_type; |
| 648 | } | 597 | else if (i == CO_IRQ_IDE0) |
| 649 | else if (i == CO_IRQ_IDE1) { | 598 | chip = &cobalt_irq_type; |
| 650 | desc->chip = &cobalt_irq_type; | 599 | else if (i == CO_IRQ_IDE1) |
| 651 | } | 600 | >chip = &cobalt_irq_type; |
| 652 | else if (i == CO_IRQ_8259) { | 601 | else if (i == CO_IRQ_8259) |
| 653 | desc->chip = &piix4_master_irq_type; | 602 | chip = &piix4_master_irq_type; |
| 654 | } | 603 | else if (i < CO_IRQ_APIC0) |
| 655 | else if (i < CO_IRQ_APIC0) { | 604 | chip = &piix4_virtual_irq_type; |
| 656 | set_piix4_virtual_irq_type(); | 605 | else if (IS_CO_APIC(i)) |
| 657 | desc->chip = &piix4_virtual_irq_type; | 606 | chip = &cobalt_irq_type; |
| 658 | } | 607 | |
| 659 | else if (IS_CO_APIC(i)) { | 608 | if (chip) |
| 660 | desc->chip = &cobalt_irq_type; | 609 | set_irq_chip(i, chip); |
| 661 | } | ||
| 662 | } | 610 | } |
| 663 | 611 | ||
| 664 | setup_irq(CO_IRQ_8259, &master_action); | 612 | setup_irq(CO_IRQ_8259, &master_action); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c deleted file mode 100644 index ce9fbacb7526..000000000000 --- a/arch/x86/kernel/vmi_32.c +++ /dev/null | |||
| @@ -1,893 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI specific paravirt-ops implementation | ||
| 3 | * | ||
| 4 | * Copyright (C) 2005, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | * Send feedback to zach@vmware.com | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <linux/module.h> | ||
| 26 | #include <linux/cpu.h> | ||
| 27 | #include <linux/bootmem.h> | ||
| 28 | #include <linux/mm.h> | ||
| 29 | #include <linux/highmem.h> | ||
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/gfp.h> | ||
| 32 | #include <asm/vmi.h> | ||
| 33 | #include <asm/io.h> | ||
| 34 | #include <asm/fixmap.h> | ||
| 35 | #include <asm/apicdef.h> | ||
| 36 | #include <asm/apic.h> | ||
| 37 | #include <asm/pgalloc.h> | ||
| 38 | #include <asm/processor.h> | ||
| 39 | #include <asm/timer.h> | ||
| 40 | #include <asm/vmi_time.h> | ||
| 41 | #include <asm/kmap_types.h> | ||
| 42 | #include <asm/setup.h> | ||
| 43 | |||
| 44 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
| 45 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
| 46 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
| 47 | |||
| 48 | #define call_vrom_func(rom,func) \ | ||
| 49 | (((VROMFUNC *)(rom->func))()) | ||
| 50 | |||
| 51 | #define call_vrom_long_func(rom,func,arg) \ | ||
| 52 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
| 53 | |||
| 54 | static struct vrom_header *vmi_rom; | ||
| 55 | static int disable_pge; | ||
| 56 | static int disable_pse; | ||
| 57 | static int disable_sep; | ||
| 58 | static int disable_tsc; | ||
| 59 | static int disable_mtrr; | ||
| 60 | static int disable_noidle; | ||
| 61 | static int disable_vmi_timer; | ||
| 62 | |||
| 63 | /* Cached VMI operations */ | ||
| 64 | static struct { | ||
| 65 | void (*cpuid)(void /* non-c */); | ||
| 66 | void (*_set_ldt)(u32 selector); | ||
| 67 | void (*set_tr)(u32 selector); | ||
| 68 | void (*write_idt_entry)(struct desc_struct *, int, u32, u32); | ||
| 69 | void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); | ||
| 70 | void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); | ||
| 71 | void (*set_kernel_stack)(u32 selector, u32 sp0); | ||
| 72 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
| 73 | void (*release_page)(u32, u32); | ||
| 74 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
| 75 | void (*update_pte)(pte_t *, unsigned); | ||
| 76 | void (*set_linear_mapping)(int, void *, u32, u32); | ||
| 77 | void (*_flush_tlb)(int); | ||
| 78 | void (*set_initial_ap_state)(int, int); | ||
| 79 | void (*halt)(void); | ||
| 80 | void (*set_lazy_mode)(int mode); | ||
| 81 | } vmi_ops; | ||
| 82 | |||
| 83 | /* Cached VMI operations */ | ||
| 84 | struct vmi_timer_ops vmi_timer_ops; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * VMI patching routines. | ||
| 88 | */ | ||
| 89 | #define MNEM_CALL 0xe8 | ||
| 90 | #define MNEM_JMP 0xe9 | ||
| 91 | #define MNEM_RET 0xc3 | ||
| 92 | |||
| 93 | #define IRQ_PATCH_INT_MASK 0 | ||
| 94 | #define IRQ_PATCH_DISABLE 5 | ||
| 95 | |||
| 96 | static inline void patch_offset(void *insnbuf, | ||
| 97 | unsigned long ip, unsigned long dest) | ||
| 98 | { | ||
| 99 | *(unsigned long *)(insnbuf+1) = dest-ip-5; | ||
| 100 | } | ||
| 101 | |||
| 102 | static unsigned patch_internal(int call, unsigned len, void *insnbuf, | ||
| 103 | unsigned long ip) | ||
| 104 | { | ||
| 105 | u64 reloc; | ||
| 106 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
| 107 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
| 108 | switch(rel->type) { | ||
| 109 | case VMI_RELOCATION_CALL_REL: | ||
| 110 | BUG_ON(len < 5); | ||
| 111 | *(char *)insnbuf = MNEM_CALL; | ||
| 112 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
| 113 | return 5; | ||
| 114 | |||
| 115 | case VMI_RELOCATION_JUMP_REL: | ||
| 116 | BUG_ON(len < 5); | ||
| 117 | *(char *)insnbuf = MNEM_JMP; | ||
| 118 | patch_offset(insnbuf, ip, (unsigned long)rel->eip); | ||
| 119 | return 5; | ||
| 120 | |||
| 121 | case VMI_RELOCATION_NOP: | ||
| 122 | /* obliterate the whole thing */ | ||
| 123 | return 0; | ||
| 124 | |||
| 125 | case VMI_RELOCATION_NONE: | ||
| 126 | /* leave native code in place */ | ||
| 127 | break; | ||
| 128 | |||
| 129 | default: | ||
| 130 | BUG(); | ||
| 131 | } | ||
| 132 | return len; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Apply patch if appropriate, return length of new instruction | ||
| 137 | * sequence. The callee does nop padding for us. | ||
| 138 | */ | ||
| 139 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | ||
| 140 | unsigned long ip, unsigned len) | ||
| 141 | { | ||
| 142 | switch (type) { | ||
| 143 | case PARAVIRT_PATCH(pv_irq_ops.irq_disable): | ||
| 144 | return patch_internal(VMI_CALL_DisableInterrupts, len, | ||
| 145 | insns, ip); | ||
| 146 | case PARAVIRT_PATCH(pv_irq_ops.irq_enable): | ||
| 147 | return patch_internal(VMI_CALL_EnableInterrupts, len, | ||
| 148 | insns, ip); | ||
| 149 | case PARAVIRT_PATCH(pv_irq_ops.restore_fl): | ||
| 150 | return patch_internal(VMI_CALL_SetInterruptMask, len, | ||
| 151 | insns, ip); | ||
| 152 | case PARAVIRT_PATCH(pv_irq_ops.save_fl): | ||
| 153 | return patch_internal(VMI_CALL_GetInterruptMask, len, | ||
| 154 | insns, ip); | ||
| 155 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | ||
| 156 | return patch_internal(VMI_CALL_IRET, len, insns, ip); | ||
| 157 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): | ||
| 158 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); | ||
| 159 | default: | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | return len; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
| 166 | static void vmi_cpuid(unsigned int *ax, unsigned int *bx, | ||
| 167 | unsigned int *cx, unsigned int *dx) | ||
| 168 | { | ||
| 169 | int override = 0; | ||
| 170 | if (*ax == 1) | ||
| 171 | override = 1; | ||
| 172 | asm volatile ("call *%6" | ||
| 173 | : "=a" (*ax), | ||
| 174 | "=b" (*bx), | ||
| 175 | "=c" (*cx), | ||
| 176 | "=d" (*dx) | ||
| 177 | : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); | ||
| 178 | if (override) { | ||
| 179 | if (disable_pse) | ||
| 180 | *dx &= ~X86_FEATURE_PSE; | ||
| 181 | if (disable_pge) | ||
| 182 | *dx &= ~X86_FEATURE_PGE; | ||
| 183 | if (disable_sep) | ||
| 184 | *dx &= ~X86_FEATURE_SEP; | ||
| 185 | if (disable_tsc) | ||
| 186 | *dx &= ~X86_FEATURE_TSC; | ||
| 187 | if (disable_mtrr) | ||
| 188 | *dx &= ~X86_FEATURE_MTRR; | ||
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
| 193 | { | ||
| 194 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
| 195 | write_gdt_entry(gdt, nr, new, 0); | ||
| 196 | } | ||
| 197 | |||
| 198 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
| 199 | { | ||
| 200 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
| 201 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
| 202 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
| 203 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
| 204 | } | ||
| 205 | |||
| 206 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
| 207 | { | ||
| 208 | unsigned cpu = smp_processor_id(); | ||
| 209 | struct desc_struct desc; | ||
| 210 | |||
| 211 | pack_descriptor(&desc, (unsigned long)addr, | ||
| 212 | entries * sizeof(struct desc_struct) - 1, | ||
| 213 | DESC_LDT, 0); | ||
| 214 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); | ||
| 215 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
| 216 | } | ||
| 217 | |||
| 218 | static void vmi_set_tr(void) | ||
| 219 | { | ||
| 220 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
| 221 | } | ||
| 222 | |||
| 223 | static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) | ||
| 224 | { | ||
| 225 | u32 *idt_entry = (u32 *)g; | ||
| 226 | vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); | ||
| 227 | } | ||
| 228 | |||
| 229 | static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, | ||
| 230 | const void *desc, int type) | ||
| 231 | { | ||
| 232 | u32 *gdt_entry = (u32 *)desc; | ||
| 233 | vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); | ||
| 234 | } | ||
| 235 | |||
| 236 | static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | ||
| 237 | const void *desc) | ||
| 238 | { | ||
| 239 | u32 *ldt_entry = (u32 *)desc; | ||
| 240 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | ||
| 241 | } | ||
| 242 | |||
| 243 | static void vmi_load_sp0(struct tss_struct *tss, | ||
| 244 | struct thread_struct *thread) | ||
| 245 | { | ||
| 246 | tss->x86_tss.sp0 = thread->sp0; | ||
| 247 | |||
| 248 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
| 249 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
| 250 | tss->x86_tss.ss1 = thread->sysenter_cs; | ||
| 251 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
| 252 | } | ||
| 253 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); | ||
| 254 | } | ||
| 255 | |||
| 256 | static void vmi_flush_tlb_user(void) | ||
| 257 | { | ||
| 258 | vmi_ops._flush_tlb(VMI_FLUSH_TLB); | ||
| 259 | } | ||
| 260 | |||
| 261 | static void vmi_flush_tlb_kernel(void) | ||
| 262 | { | ||
| 263 | vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
| 264 | } | ||
| 265 | |||
| 266 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
| 267 | static void vmi_nop(void) | ||
| 268 | { | ||
| 269 | } | ||
| 270 | |||
| 271 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | ||
| 272 | { | ||
| 273 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
| 274 | } | ||
| 275 | |||
| 276 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | ||
| 277 | { | ||
| 278 | /* | ||
| 279 | * This call comes in very early, before mem_map is setup. | ||
| 280 | * It is called only for swapper_pg_dir, which already has | ||
| 281 | * data on it. | ||
| 282 | */ | ||
| 283 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
| 284 | } | ||
| 285 | |||
| 286 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | ||
| 287 | { | ||
| 288 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
| 289 | } | ||
| 290 | |||
| 291 | static void vmi_release_pte(unsigned long pfn) | ||
| 292 | { | ||
| 293 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
| 294 | } | ||
| 295 | |||
| 296 | static void vmi_release_pmd(unsigned long pfn) | ||
| 297 | { | ||
| 298 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
| 299 | } | ||
| 300 | |||
| 301 | /* | ||
| 302 | * We use the pgd_free hook for releasing the pgd page: | ||
| 303 | */ | ||
| 304 | static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
| 305 | { | ||
| 306 | unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; | ||
| 307 | |||
| 308 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
| 309 | } | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
| 313 | * or page invalidation only if the update is to the current address space | ||
| 314 | * (otherwise, there is no flush). We must check against init_mm, since | ||
| 315 | * this could be a kernel update, which usually passes init_mm, although | ||
| 316 | * sometimes this check can be skipped if we know the particular function | ||
| 317 | * is only called on user mode PTEs. We could change the kernel to pass | ||
| 318 | * current->active_mm here, but in particular, I was unsure if changing | ||
| 319 | * mm/highmem.c to do this would still be correct on other architectures. | ||
| 320 | */ | ||
| 321 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
| 322 | (!mustbeuser && (mm) == &init_mm)) | ||
| 323 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
| 324 | ((level) | (is_current_as(mm, user) ? \ | ||
| 325 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
| 326 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
| 327 | ((level) | (is_current_as(mm, user) ? \ | ||
| 328 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
| 329 | |||
| 330 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 331 | { | ||
| 332 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 333 | } | ||
| 334 | |||
| 335 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 336 | { | ||
| 337 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
| 338 | } | ||
| 339 | |||
| 340 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
| 341 | { | ||
| 342 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
| 343 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
| 344 | } | ||
| 345 | |||
| 346 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
| 347 | { | ||
| 348 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 349 | } | ||
| 350 | |||
| 351 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
| 352 | { | ||
| 353 | #ifdef CONFIG_X86_PAE | ||
| 354 | const pte_t pte = { .pte = pmdval.pmd }; | ||
| 355 | #else | ||
| 356 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
| 357 | #endif | ||
| 358 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
| 359 | } | ||
| 360 | |||
| 361 | #ifdef CONFIG_X86_PAE | ||
| 362 | |||
| 363 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
| 364 | { | ||
| 365 | /* | ||
| 366 | * XXX This is called from set_pmd_pte, but at both PT | ||
| 367 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
| 368 | * it is only called for large page mapping changes, | ||
| 369 | * the Xen backend, doesn't support large pages, and the | ||
| 370 | * ESX backend doesn't depend on the flag. | ||
| 371 | */ | ||
| 372 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
| 373 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
| 374 | } | ||
| 375 | |||
| 376 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
| 377 | { | ||
| 378 | /* Um, eww */ | ||
| 379 | const pte_t pte = { .pte = pudval.pgd.pgd }; | ||
| 380 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
| 381 | } | ||
| 382 | |||
| 383 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
| 384 | { | ||
| 385 | const pte_t pte = { .pte = 0 }; | ||
| 386 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
| 387 | } | ||
| 388 | |||
| 389 | static void vmi_pmd_clear(pmd_t *pmd) | ||
| 390 | { | ||
| 391 | const pte_t pte = { .pte = 0 }; | ||
| 392 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
| 393 | } | ||
| 394 | #endif | ||
| 395 | |||
| 396 | #ifdef CONFIG_SMP | ||
| 397 | static void __devinit | ||
| 398 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
| 399 | unsigned long start_esp) | ||
| 400 | { | ||
| 401 | struct vmi_ap_state ap; | ||
| 402 | |||
| 403 | /* Default everything to zero. This is fine for most GPRs. */ | ||
| 404 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
| 405 | |||
| 406 | ap.gdtr_limit = GDT_SIZE - 1; | ||
| 407 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
| 408 | |||
| 409 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
| 410 | ap.idtr_base = (unsigned long) idt_table; | ||
| 411 | |||
| 412 | ap.ldtr = 0; | ||
| 413 | |||
| 414 | ap.cs = __KERNEL_CS; | ||
| 415 | ap.eip = (unsigned long) start_eip; | ||
| 416 | ap.ss = __KERNEL_DS; | ||
| 417 | ap.esp = (unsigned long) start_esp; | ||
| 418 | |||
| 419 | ap.ds = __USER_DS; | ||
| 420 | ap.es = __USER_DS; | ||
| 421 | ap.fs = __KERNEL_PERCPU; | ||
| 422 | ap.gs = __KERNEL_STACK_CANARY; | ||
| 423 | |||
| 424 | ap.eflags = 0; | ||
| 425 | |||
| 426 | #ifdef CONFIG_X86_PAE | ||
| 427 | /* efer should match BSP efer. */ | ||
| 428 | if (cpu_has_nx) { | ||
| 429 | unsigned l, h; | ||
| 430 | rdmsr(MSR_EFER, l, h); | ||
| 431 | ap.efer = (unsigned long long) h << 32 | l; | ||
| 432 | } | ||
| 433 | #endif | ||
| 434 | |||
| 435 | ap.cr3 = __pa(swapper_pg_dir); | ||
| 436 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
| 437 | ap.cr0 = 0x80050023; | ||
| 438 | ap.cr4 = mmu_cr4_features; | ||
| 439 | vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); | ||
| 440 | } | ||
| 441 | #endif | ||
| 442 | |||
| 443 | static void vmi_start_context_switch(struct task_struct *prev) | ||
| 444 | { | ||
| 445 | paravirt_start_context_switch(prev); | ||
| 446 | vmi_ops.set_lazy_mode(2); | ||
| 447 | } | ||
| 448 | |||
| 449 | static void vmi_end_context_switch(struct task_struct *next) | ||
| 450 | { | ||
| 451 | vmi_ops.set_lazy_mode(0); | ||
| 452 | paravirt_end_context_switch(next); | ||
| 453 | } | ||
| 454 | |||
| 455 | static void vmi_enter_lazy_mmu(void) | ||
| 456 | { | ||
| 457 | paravirt_enter_lazy_mmu(); | ||
| 458 | vmi_ops.set_lazy_mode(1); | ||
| 459 | } | ||
| 460 | |||
| 461 | static void vmi_leave_lazy_mmu(void) | ||
| 462 | { | ||
| 463 | vmi_ops.set_lazy_mode(0); | ||
| 464 | paravirt_leave_lazy_mmu(); | ||
| 465 | } | ||
| 466 | |||
| 467 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
| 468 | { | ||
| 469 | struct pci_header *pci; | ||
| 470 | struct pnp_header *pnp; | ||
| 471 | const char *manufacturer = "UNKNOWN"; | ||
| 472 | const char *product = "UNKNOWN"; | ||
| 473 | const char *license = "unspecified"; | ||
| 474 | |||
| 475 | if (rom->rom_signature != 0xaa55) | ||
| 476 | return 0; | ||
| 477 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
| 478 | return 0; | ||
| 479 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
| 480 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
| 481 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
| 482 | rom->api_version_maj, | ||
| 483 | rom->api_version_min); | ||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | /* | ||
| 488 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
| 489 | * the PCI header and device type to make sure this is really a | ||
| 490 | * VMI device. | ||
| 491 | */ | ||
| 492 | if (!rom->pci_header_offs) { | ||
| 493 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
| 494 | return 0; | ||
| 495 | } | ||
| 496 | |||
| 497 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
| 498 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
| 499 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
| 500 | /* Allow it to run... anyways, but warn */ | ||
| 501 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
| 502 | } | ||
| 503 | |||
| 504 | if (rom->pnp_header_offs) { | ||
| 505 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
| 506 | if (pnp->manufacturer_offset) | ||
| 507 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
| 508 | if (pnp->product_offset) | ||
| 509 | product = (const char *)rom+pnp->product_offset; | ||
| 510 | } | ||
| 511 | |||
| 512 | if (rom->license_offs) | ||
| 513 | license = (char *)rom+rom->license_offs; | ||
| 514 | |||
| 515 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
| 516 | manufacturer, product, | ||
| 517 | rom->api_version_maj, rom->api_version_min, | ||
| 518 | pci->rom_version_maj, pci->rom_version_min); | ||
| 519 | |||
| 520 | /* Don't allow BSD/MIT here for now because we don't want to end up | ||
| 521 | with any binary only shim layers */ | ||
| 522 | if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) { | ||
| 523 | printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n", | ||
| 524 | license); | ||
| 525 | return 0; | ||
| 526 | } | ||
| 527 | |||
| 528 | return 1; | ||
| 529 | } | ||
| 530 | |||
| 531 | /* | ||
| 532 | * Probe for the VMI option ROM | ||
| 533 | */ | ||
| 534 | static inline int __init probe_vmi_rom(void) | ||
| 535 | { | ||
| 536 | unsigned long base; | ||
| 537 | |||
| 538 | /* VMI ROM is in option ROM area, check signature */ | ||
| 539 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
| 540 | struct vrom_header *romstart; | ||
| 541 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
| 542 | if (check_vmi_rom(romstart)) { | ||
| 543 | vmi_rom = romstart; | ||
| 544 | return 1; | ||
| 545 | } | ||
| 546 | } | ||
| 547 | return 0; | ||
| 548 | } | ||
| 549 | |||
| 550 | /* | ||
| 551 | * VMI setup common to all processors | ||
| 552 | */ | ||
| 553 | void vmi_bringup(void) | ||
| 554 | { | ||
| 555 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
| 556 | if (vmi_ops.set_linear_mapping) | ||
| 557 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); | ||
| 558 | } | ||
| 559 | |||
| 560 | /* | ||
| 561 | * Return a pointer to a VMI function or NULL if unimplemented | ||
| 562 | */ | ||
| 563 | static void *vmi_get_function(int vmicall) | ||
| 564 | { | ||
| 565 | u64 reloc; | ||
| 566 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
| 567 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
| 568 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
| 569 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
| 570 | return (void *)rel->eip; | ||
| 571 | else | ||
| 572 | return NULL; | ||
| 573 | } | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
| 577 | * For unimplemented operations, fall back to default, unless nop | ||
| 578 | * is returned by the ROM. | ||
| 579 | */ | ||
| 580 | #define para_fill(opname, vmicall) \ | ||
| 581 | do { \ | ||
| 582 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
| 583 | VMI_CALL_##vmicall); \ | ||
| 584 | if (rel->type == VMI_RELOCATION_CALL_REL) \ | ||
| 585 | opname = (void *)rel->eip; \ | ||
| 586 | else if (rel->type == VMI_RELOCATION_NOP) \ | ||
| 587 | opname = (void *)vmi_nop; \ | ||
| 588 | else if (rel->type != VMI_RELOCATION_NONE) \ | ||
| 589 | printk(KERN_WARNING "VMI: Unknown relocation " \ | ||
| 590 | "type %d for " #vmicall"\n",\ | ||
| 591 | rel->type); \ | ||
| 592 | } while (0) | ||
| 593 | |||
| 594 | /* | ||
| 595 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
| 596 | * For cached operations which do not match the VMI ROM ABI and must | ||
| 597 | * go through a tranlation stub. Ignore NOPs, since it is not clear | ||
| 598 | * a NOP * VMI function corresponds to a NOP paravirt-op when the | ||
| 599 | * functions are not in 1-1 correspondence. | ||
| 600 | */ | ||
| 601 | #define para_wrap(opname, wrapper, cache, vmicall) \ | ||
| 602 | do { \ | ||
| 603 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
| 604 | VMI_CALL_##vmicall); \ | ||
| 605 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ | ||
| 606 | if (rel->type == VMI_RELOCATION_CALL_REL) { \ | ||
| 607 | opname = wrapper; \ | ||
| 608 | vmi_ops.cache = (void *)rel->eip; \ | ||
| 609 | } \ | ||
| 610 | } while (0) | ||
| 611 | |||
| 612 | /* | ||
| 613 | * Activate the VMI interface and switch into paravirtualized mode | ||
| 614 | */ | ||
| 615 | static inline int __init activate_vmi(void) | ||
| 616 | { | ||
| 617 | short kernel_cs; | ||
| 618 | u64 reloc; | ||
| 619 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
| 620 | |||
| 621 | /* | ||
| 622 | * Prevent page tables from being allocated in highmem, even if | ||
| 623 | * CONFIG_HIGHPTE is enabled. | ||
| 624 | */ | ||
| 625 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
| 626 | |||
| 627 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
| 628 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
| 629 | return 0; | ||
| 630 | } | ||
| 631 | savesegment(cs, kernel_cs); | ||
| 632 | |||
| 633 | pv_info.paravirt_enabled = 1; | ||
| 634 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
| 635 | pv_info.name = "vmi [deprecated]"; | ||
| 636 | |||
| 637 | pv_init_ops.patch = vmi_patch; | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Many of these operations are ABI compatible with VMI. | ||
| 641 | * This means we can fill in the paravirt-ops with direct | ||
| 642 | * pointers into the VMI ROM. If the calling convention for | ||
| 643 | * these operations changes, this code needs to be updated. | ||
| 644 | * | ||
| 645 | * Exceptions | ||
| 646 | * CPUID paravirt-op uses pointers, not the native ISA | ||
| 647 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
| 648 | * no MSR support yet - just trap and emulate. VMI uses the | ||
| 649 | * same ABI as the native ISA, but Linux wants exceptions | ||
| 650 | * from bogus MSR read / write handled | ||
| 651 | * rdpmc is not yet used in Linux | ||
| 652 | */ | ||
| 653 | |||
| 654 | /* CPUID is special, so very special it gets wrapped like a present */ | ||
| 655 | para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); | ||
| 656 | |||
| 657 | para_fill(pv_cpu_ops.clts, CLTS); | ||
| 658 | para_fill(pv_cpu_ops.get_debugreg, GetDR); | ||
| 659 | para_fill(pv_cpu_ops.set_debugreg, SetDR); | ||
| 660 | para_fill(pv_cpu_ops.read_cr0, GetCR0); | ||
| 661 | para_fill(pv_mmu_ops.read_cr2, GetCR2); | ||
| 662 | para_fill(pv_mmu_ops.read_cr3, GetCR3); | ||
| 663 | para_fill(pv_cpu_ops.read_cr4, GetCR4); | ||
| 664 | para_fill(pv_cpu_ops.write_cr0, SetCR0); | ||
| 665 | para_fill(pv_mmu_ops.write_cr2, SetCR2); | ||
| 666 | para_fill(pv_mmu_ops.write_cr3, SetCR3); | ||
| 667 | para_fill(pv_cpu_ops.write_cr4, SetCR4); | ||
| 668 | |||
| 669 | para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); | ||
| 670 | para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); | ||
| 671 | para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); | ||
| 672 | para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); | ||
| 673 | |||
| 674 | para_fill(pv_cpu_ops.wbinvd, WBINVD); | ||
| 675 | para_fill(pv_cpu_ops.read_tsc, RDTSC); | ||
| 676 | |||
| 677 | /* The following we emulate with trap and emulate for now */ | ||
| 678 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
| 679 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
| 680 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
| 681 | |||
| 682 | /* TR interface doesn't pass TR value, wrap */ | ||
| 683 | para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); | ||
| 684 | |||
| 685 | /* LDT is special, too */ | ||
| 686 | para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); | ||
| 687 | |||
| 688 | para_fill(pv_cpu_ops.load_gdt, SetGDT); | ||
| 689 | para_fill(pv_cpu_ops.load_idt, SetIDT); | ||
| 690 | para_fill(pv_cpu_ops.store_gdt, GetGDT); | ||
| 691 | para_fill(pv_cpu_ops.store_idt, GetIDT); | ||
| 692 | para_fill(pv_cpu_ops.store_tr, GetTR); | ||
| 693 | pv_cpu_ops.load_tls = vmi_load_tls; | ||
| 694 | para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, | ||
| 695 | write_ldt_entry, WriteLDTEntry); | ||
| 696 | para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, | ||
| 697 | write_gdt_entry, WriteGDTEntry); | ||
| 698 | para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, | ||
| 699 | write_idt_entry, WriteIDTEntry); | ||
| 700 | para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); | ||
| 701 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | ||
| 702 | para_fill(pv_cpu_ops.io_delay, IODelay); | ||
| 703 | |||
| 704 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, | ||
| 705 | set_lazy_mode, SetLazyMode); | ||
| 706 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, | ||
| 707 | set_lazy_mode, SetLazyMode); | ||
| 708 | |||
| 709 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | ||
| 710 | set_lazy_mode, SetLazyMode); | ||
| 711 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, | ||
| 712 | set_lazy_mode, SetLazyMode); | ||
| 713 | |||
| 714 | /* user and kernel flush are just handled with different flags to FlushTLB */ | ||
| 715 | para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); | ||
| 716 | para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); | ||
| 717 | para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); | ||
| 718 | |||
| 719 | /* | ||
| 720 | * Until a standard flag format can be agreed on, we need to | ||
| 721 | * implement these as wrappers in Linux. Get the VMI ROM | ||
| 722 | * function pointers for the two backend calls. | ||
| 723 | */ | ||
| 724 | #ifdef CONFIG_X86_PAE | ||
| 725 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
| 726 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
| 727 | #else | ||
| 728 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
| 729 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
| 730 | #endif | ||
| 731 | |||
| 732 | if (vmi_ops.set_pte) { | ||
| 733 | pv_mmu_ops.set_pte = vmi_set_pte; | ||
| 734 | pv_mmu_ops.set_pte_at = vmi_set_pte_at; | ||
| 735 | pv_mmu_ops.set_pmd = vmi_set_pmd; | ||
| 736 | #ifdef CONFIG_X86_PAE | ||
| 737 | pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
| 738 | pv_mmu_ops.set_pud = vmi_set_pud; | ||
| 739 | pv_mmu_ops.pte_clear = vmi_pte_clear; | ||
| 740 | pv_mmu_ops.pmd_clear = vmi_pmd_clear; | ||
| 741 | #endif | ||
| 742 | } | ||
| 743 | |||
| 744 | if (vmi_ops.update_pte) { | ||
| 745 | pv_mmu_ops.pte_update = vmi_update_pte; | ||
| 746 | pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; | ||
| 747 | } | ||
| 748 | |||
| 749 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
| 750 | if (vmi_ops.allocate_page) { | ||
| 751 | pv_mmu_ops.alloc_pte = vmi_allocate_pte; | ||
| 752 | pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; | ||
| 753 | pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; | ||
| 754 | } | ||
| 755 | |||
| 756 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
| 757 | if (vmi_ops.release_page) { | ||
| 758 | pv_mmu_ops.release_pte = vmi_release_pte; | ||
| 759 | pv_mmu_ops.release_pmd = vmi_release_pmd; | ||
| 760 | pv_mmu_ops.pgd_free = vmi_pgd_free; | ||
| 761 | } | ||
| 762 | |||
| 763 | /* Set linear is needed in all cases */ | ||
| 764 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
| 765 | |||
| 766 | /* | ||
| 767 | * These MUST always be patched. Don't support indirect jumps | ||
| 768 | * through these operations, as the VMI interface may use either | ||
| 769 | * a jump or a call to get to these operations, depending on | ||
| 770 | * the backend. They are performance critical anyway, so requiring | ||
| 771 | * a patch is not a big problem. | ||
| 772 | */ | ||
| 773 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
| 774 | pv_cpu_ops.iret = (void *)0xbadbab0; | ||
| 775 | |||
| 776 | #ifdef CONFIG_SMP | ||
| 777 | para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); | ||
| 778 | #endif | ||
| 779 | |||
| 780 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 781 | para_fill(apic->read, APICRead); | ||
| 782 | para_fill(apic->write, APICWrite); | ||
| 783 | #endif | ||
| 784 | |||
| 785 | /* | ||
| 786 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
| 787 | */ | ||
| 788 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
| 789 | if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { | ||
| 790 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
| 791 | vmi_timer_ops.get_cycle_counter = | ||
| 792 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
| 793 | vmi_timer_ops.get_wallclock = | ||
| 794 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
| 795 | vmi_timer_ops.wallclock_updated = | ||
| 796 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
| 797 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
| 798 | vmi_timer_ops.cancel_alarm = | ||
| 799 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
| 800 | x86_init.timers.timer_init = vmi_time_init; | ||
| 801 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 802 | x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; | ||
| 803 | x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; | ||
| 804 | #endif | ||
| 805 | pv_time_ops.sched_clock = vmi_sched_clock; | ||
| 806 | x86_platform.calibrate_tsc = vmi_tsc_khz; | ||
| 807 | x86_platform.get_wallclock = vmi_get_wallclock; | ||
| 808 | x86_platform.set_wallclock = vmi_set_wallclock; | ||
| 809 | |||
| 810 | /* We have true wallclock functions; disable CMOS clock sync */ | ||
| 811 | no_sync_cmos_clock = 1; | ||
| 812 | } else { | ||
| 813 | disable_noidle = 1; | ||
| 814 | disable_vmi_timer = 1; | ||
| 815 | } | ||
| 816 | |||
| 817 | para_fill(pv_irq_ops.safe_halt, Halt); | ||
| 818 | |||
| 819 | /* | ||
| 820 | * Alternative instruction rewriting doesn't happen soon enough | ||
| 821 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
| 822 | * to do this before IRQs get reenabled. Fortunately, it is | ||
| 823 | * idempotent. | ||
| 824 | */ | ||
| 825 | apply_paravirt(__parainstructions, __parainstructions_end); | ||
| 826 | |||
| 827 | vmi_bringup(); | ||
| 828 | |||
| 829 | return 1; | ||
| 830 | } | ||
| 831 | |||
| 832 | #undef para_fill | ||
| 833 | |||
| 834 | void __init vmi_init(void) | ||
| 835 | { | ||
| 836 | if (!vmi_rom) | ||
| 837 | probe_vmi_rom(); | ||
| 838 | else | ||
| 839 | check_vmi_rom(vmi_rom); | ||
| 840 | |||
| 841 | /* In case probing for or validating the ROM failed, basil */ | ||
| 842 | if (!vmi_rom) | ||
| 843 | return; | ||
| 844 | |||
| 845 | reserve_top_address(-vmi_rom->virtual_top); | ||
| 846 | |||
| 847 | #ifdef CONFIG_X86_IO_APIC | ||
| 848 | /* This is virtual hardware; timer routing is wired correctly */ | ||
| 849 | no_timer_check = 1; | ||
| 850 | #endif | ||
| 851 | } | ||
| 852 | |||
| 853 | void __init vmi_activate(void) | ||
| 854 | { | ||
| 855 | unsigned long flags; | ||
| 856 | |||
| 857 | if (!vmi_rom) | ||
| 858 | return; | ||
| 859 | |||
| 860 | local_irq_save(flags); | ||
| 861 | activate_vmi(); | ||
| 862 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
| 863 | } | ||
| 864 | |||
| 865 | static int __init parse_vmi(char *arg) | ||
| 866 | { | ||
| 867 | if (!arg) | ||
| 868 | return -EINVAL; | ||
| 869 | |||
| 870 | if (!strcmp(arg, "disable_pge")) { | ||
| 871 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); | ||
| 872 | disable_pge = 1; | ||
| 873 | } else if (!strcmp(arg, "disable_pse")) { | ||
| 874 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); | ||
| 875 | disable_pse = 1; | ||
| 876 | } else if (!strcmp(arg, "disable_sep")) { | ||
| 877 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); | ||
| 878 | disable_sep = 1; | ||
| 879 | } else if (!strcmp(arg, "disable_tsc")) { | ||
| 880 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); | ||
| 881 | disable_tsc = 1; | ||
| 882 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
| 883 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); | ||
| 884 | disable_mtrr = 1; | ||
| 885 | } else if (!strcmp(arg, "disable_timer")) { | ||
| 886 | disable_vmi_timer = 1; | ||
| 887 | disable_noidle = 1; | ||
| 888 | } else if (!strcmp(arg, "disable_noidle")) | ||
| 889 | disable_noidle = 1; | ||
| 890 | return 0; | ||
| 891 | } | ||
| 892 | |||
| 893 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c deleted file mode 100644 index 5e1ff66ecd73..000000000000 --- a/arch/x86/kernel/vmiclock_32.c +++ /dev/null | |||
| @@ -1,317 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * VMI paravirtual timer support routines. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, VMware, Inc. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * This program is distributed in the hope that it will be useful, but | ||
| 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 15 | * details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/smp.h> | ||
| 24 | #include <linux/interrupt.h> | ||
| 25 | #include <linux/cpumask.h> | ||
| 26 | #include <linux/clocksource.h> | ||
| 27 | #include <linux/clockchips.h> | ||
| 28 | |||
| 29 | #include <asm/vmi.h> | ||
| 30 | #include <asm/vmi_time.h> | ||
| 31 | #include <asm/apicdef.h> | ||
| 32 | #include <asm/apic.h> | ||
| 33 | #include <asm/timer.h> | ||
| 34 | #include <asm/i8253.h> | ||
| 35 | #include <asm/irq_vectors.h> | ||
| 36 | |||
| 37 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
| 38 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
| 39 | |||
| 40 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
| 41 | |||
| 42 | static inline u32 vmi_counter(u32 flags) | ||
| 43 | { | ||
| 44 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
| 45 | * cycle counter. */ | ||
| 46 | return flags & VMI_ALARM_COUNTER_MASK; | ||
| 47 | } | ||
| 48 | |||
| 49 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
| 50 | unsigned long vmi_get_wallclock(void) | ||
| 51 | { | ||
| 52 | unsigned long long wallclock; | ||
| 53 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
| 54 | (void)do_div(wallclock, 1000000000); // sec | ||
| 55 | |||
| 56 | return wallclock; | ||
| 57 | } | ||
| 58 | |||
| 59 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
| 60 | int vmi_set_wallclock(unsigned long now) | ||
| 61 | { | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* paravirt_ops.sched_clock = vmi_sched_clock */ | ||
| 66 | unsigned long long vmi_sched_clock(void) | ||
| 67 | { | ||
| 68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | ||
| 69 | } | ||
| 70 | |||
| 71 | /* x86_platform.calibrate_tsc = vmi_tsc_khz */ | ||
| 72 | unsigned long vmi_tsc_khz(void) | ||
| 73 | { | ||
| 74 | unsigned long long khz; | ||
| 75 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
| 76 | (void)do_div(khz, 1000); | ||
| 77 | return khz; | ||
| 78 | } | ||
| 79 | |||
| 80 | static inline unsigned int vmi_get_timer_vector(void) | ||
| 81 | { | ||
| 82 | return IRQ0_VECTOR; | ||
| 83 | } | ||
| 84 | |||
| 85 | /** vmi clockchip */ | ||
| 86 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 87 | static unsigned int startup_timer_irq(unsigned int irq) | ||
| 88 | { | ||
| 89 | unsigned long val = apic_read(APIC_LVTT); | ||
| 90 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 91 | |||
| 92 | return (val & APIC_SEND_PENDING); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void mask_timer_irq(unsigned int irq) | ||
| 96 | { | ||
| 97 | unsigned long val = apic_read(APIC_LVTT); | ||
| 98 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
| 99 | } | ||
| 100 | |||
| 101 | static void unmask_timer_irq(unsigned int irq) | ||
| 102 | { | ||
| 103 | unsigned long val = apic_read(APIC_LVTT); | ||
| 104 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
| 105 | } | ||
| 106 | |||
| 107 | static void ack_timer_irq(unsigned int irq) | ||
| 108 | { | ||
| 109 | ack_APIC_irq(); | ||
| 110 | } | ||
| 111 | |||
| 112 | static struct irq_chip vmi_chip __read_mostly = { | ||
| 113 | .name = "VMI-LOCAL", | ||
| 114 | .startup = startup_timer_irq, | ||
| 115 | .mask = mask_timer_irq, | ||
| 116 | .unmask = unmask_timer_irq, | ||
| 117 | .ack = ack_timer_irq | ||
| 118 | }; | ||
| 119 | #endif | ||
| 120 | |||
| 121 | /** vmi clockevent */ | ||
| 122 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
| 123 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
| 124 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
| 125 | |||
| 126 | static inline int vmi_get_alarm_wiring(void) | ||
| 127 | { | ||
| 128 | return vmi_wiring; | ||
| 129 | } | ||
| 130 | |||
| 131 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
| 132 | struct clock_event_device *evt) | ||
| 133 | { | ||
| 134 | cycle_t now, cycles_per_hz; | ||
| 135 | BUG_ON(!irqs_disabled()); | ||
| 136 | |||
| 137 | switch (mode) { | ||
| 138 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 139 | case CLOCK_EVT_MODE_RESUME: | ||
| 140 | break; | ||
| 141 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 142 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
| 143 | (void)do_div(cycles_per_hz, HZ); | ||
| 144 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
| 145 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
| 146 | break; | ||
| 147 | case CLOCK_EVT_MODE_UNUSED: | ||
| 148 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
| 149 | switch (evt->mode) { | ||
| 150 | case CLOCK_EVT_MODE_ONESHOT: | ||
| 151 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
| 152 | break; | ||
| 153 | case CLOCK_EVT_MODE_PERIODIC: | ||
| 154 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
| 155 | break; | ||
| 156 | default: | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | break; | ||
| 160 | default: | ||
| 161 | break; | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | static int vmi_timer_next_event(unsigned long delta, | ||
| 166 | struct clock_event_device *evt) | ||
| 167 | { | ||
| 168 | /* Unfortunately, set_next_event interface only passes relative | ||
| 169 | * expiry, but we want absolute expiry. It'd be better if were | ||
| 170 | * were passed an absolute expiry, since a bunch of time may | ||
| 171 | * have been stolen between the time the delta is computed and | ||
| 172 | * when we set the alarm below. */ | ||
| 173 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
| 174 | |||
| 175 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
| 176 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | static struct clock_event_device vmi_clockevent = { | ||
| 181 | .name = "vmi-timer", | ||
| 182 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
| 183 | .shift = 22, | ||
| 184 | .set_mode = vmi_timer_set_mode, | ||
| 185 | .set_next_event = vmi_timer_next_event, | ||
| 186 | .rating = 1000, | ||
| 187 | .irq = 0, | ||
| 188 | }; | ||
| 189 | |||
| 190 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
| 191 | { | ||
| 192 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
| 193 | evt->event_handler(evt); | ||
| 194 | return IRQ_HANDLED; | ||
| 195 | } | ||
| 196 | |||
| 197 | static struct irqaction vmi_clock_action = { | ||
| 198 | .name = "vmi-timer", | ||
| 199 | .handler = vmi_timer_interrupt, | ||
| 200 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, | ||
| 201 | }; | ||
| 202 | |||
| 203 | static void __devinit vmi_time_init_clockevent(void) | ||
| 204 | { | ||
| 205 | cycle_t cycles_per_msec; | ||
| 206 | struct clock_event_device *evt; | ||
| 207 | |||
| 208 | int cpu = smp_processor_id(); | ||
| 209 | evt = &__get_cpu_var(local_events); | ||
| 210 | |||
| 211 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
| 212 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
| 213 | (void)do_div(cycles_per_msec, 1000); | ||
| 214 | |||
| 215 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
| 216 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
| 217 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
| 218 | * before overflow. */ | ||
| 219 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
| 220 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
| 221 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
| 222 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
| 223 | evt->cpumask = cpumask_of(cpu); | ||
| 224 | |||
| 225 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", | ||
| 226 | evt->name, evt->mult, evt->shift); | ||
| 227 | clockevents_register_device(evt); | ||
| 228 | } | ||
| 229 | |||
| 230 | void __init vmi_time_init(void) | ||
| 231 | { | ||
| 232 | unsigned int cpu; | ||
| 233 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
| 234 | outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
| 235 | |||
| 236 | vmi_time_init_clockevent(); | ||
| 237 | setup_irq(0, &vmi_clock_action); | ||
| 238 | for_each_possible_cpu(cpu) | ||
| 239 | per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; | ||
| 240 | } | ||
| 241 | |||
| 242 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 243 | void __devinit vmi_time_bsp_init(void) | ||
| 244 | { | ||
| 245 | /* | ||
| 246 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
| 247 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
| 248 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
| 249 | * without interfering with that code. | ||
| 250 | */ | ||
| 251 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
| 252 | local_irq_disable(); | ||
| 253 | #ifdef CONFIG_SMP | ||
| 254 | /* | ||
| 255 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
| 256 | * to using it, since this is a local interrupt, which each CPU must | ||
| 257 | * handle individually without locking out or dropping simultaneous | ||
| 258 | * local timers on other CPUs. We also don't want to trigger the | ||
| 259 | * quirk workaround code for interrupts which gets invoked from | ||
| 260 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
| 261 | */ | ||
| 262 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
| 263 | #else | ||
| 264 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
| 265 | #endif | ||
| 266 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
| 267 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 268 | local_irq_enable(); | ||
| 269 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
| 270 | } | ||
| 271 | |||
| 272 | void __devinit vmi_time_ap_init(void) | ||
| 273 | { | ||
| 274 | vmi_time_init_clockevent(); | ||
| 275 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
| 276 | } | ||
| 277 | #endif | ||
| 278 | |||
| 279 | /** vmi clocksource */ | ||
| 280 | static struct clocksource clocksource_vmi; | ||
| 281 | |||
| 282 | static cycle_t read_real_cycles(struct clocksource *cs) | ||
| 283 | { | ||
| 284 | cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
| 285 | return max(ret, clocksource_vmi.cycle_last); | ||
| 286 | } | ||
| 287 | |||
| 288 | static struct clocksource clocksource_vmi = { | ||
| 289 | .name = "vmi-timer", | ||
| 290 | .rating = 450, | ||
| 291 | .read = read_real_cycles, | ||
| 292 | .mask = CLOCKSOURCE_MASK(64), | ||
| 293 | .mult = 0, /* to be set */ | ||
| 294 | .shift = 22, | ||
| 295 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
| 296 | }; | ||
| 297 | |||
| 298 | static int __init init_vmi_clocksource(void) | ||
| 299 | { | ||
| 300 | cycle_t cycles_per_msec; | ||
| 301 | |||
| 302 | if (!vmi_timer_ops.get_cycle_frequency) | ||
| 303 | return 0; | ||
| 304 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
| 305 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
| 306 | (void)do_div(cycles_per_msec, 1000); | ||
| 307 | |||
| 308 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
| 309 | * as clockevents. */ | ||
| 310 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
| 311 | clocksource_vmi.shift); | ||
| 312 | |||
| 313 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
| 314 | return clocksource_register(&clocksource_vmi); | ||
| 315 | |||
| 316 | } | ||
| 317 | module_init(init_vmi_clocksource); | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 77d8c0f4817d..22b06f7660f4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -1056,14 +1056,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
| 1056 | 1056 | ||
| 1057 | vcpu->arch.apic = apic; | 1057 | vcpu->arch.apic = apic; |
| 1058 | 1058 | ||
| 1059 | apic->regs_page = alloc_page(GFP_KERNEL); | 1059 | apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO); |
| 1060 | if (apic->regs_page == NULL) { | 1060 | if (apic->regs_page == NULL) { |
| 1061 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 1061 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
| 1062 | vcpu->vcpu_id); | 1062 | vcpu->vcpu_id); |
| 1063 | goto nomem_free_apic; | 1063 | goto nomem_free_apic; |
| 1064 | } | 1064 | } |
| 1065 | apic->regs = page_address(apic->regs_page); | 1065 | apic->regs = page_address(apic->regs_page); |
| 1066 | memset(apic->regs, 0, PAGE_SIZE); | ||
| 1067 | apic->vcpu = vcpu; | 1066 | apic->vcpu = vcpu; |
| 1068 | 1067 | ||
| 1069 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 1068 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a09c625d526..6c2ecf0a806d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -1991,13 +1991,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | 1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | |
| 1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
| 1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
| 1994 | 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); | 1994 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | |
| 1995 | F(F16C); | ||
| 1995 | /* cpuid 0x80000001.ecx */ | 1996 | /* cpuid 0x80000001.ecx */ |
| 1996 | const u32 kvm_supported_word6_x86_features = | 1997 | const u32 kvm_supported_word6_x86_features = |
| 1997 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | | 1998 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
| 1998 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | 1999 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
| 1999 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | | 2000 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | |
| 2000 | 0 /* SKINIT */ | 0 /* WDT */; | 2001 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
| 2001 | 2002 | ||
| 2002 | /* all calls to cpuid_count() should be made on the same cpu */ | 2003 | /* all calls to cpuid_count() should be made on the same cpu */ |
| 2003 | get_cpu(); | 2004 | get_cpu(); |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9d5f55848455..73b1e1a1f489 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -791,22 +791,22 @@ static void lguest_flush_tlb_kernel(void) | |||
| 791 | * simple as setting a bit. We don't actually "ack" interrupts as such, we | 791 | * simple as setting a bit. We don't actually "ack" interrupts as such, we |
| 792 | * just mask and unmask them. I wonder if we should be cleverer? | 792 | * just mask and unmask them. I wonder if we should be cleverer? |
| 793 | */ | 793 | */ |
| 794 | static void disable_lguest_irq(unsigned int irq) | 794 | static void disable_lguest_irq(struct irq_data *data) |
| 795 | { | 795 | { |
| 796 | set_bit(irq, lguest_data.blocked_interrupts); | 796 | set_bit(data->irq, lguest_data.blocked_interrupts); |
| 797 | } | 797 | } |
| 798 | 798 | ||
| 799 | static void enable_lguest_irq(unsigned int irq) | 799 | static void enable_lguest_irq(struct irq_data *data) |
| 800 | { | 800 | { |
| 801 | clear_bit(irq, lguest_data.blocked_interrupts); | 801 | clear_bit(data->irq, lguest_data.blocked_interrupts); |
| 802 | } | 802 | } |
| 803 | 803 | ||
| 804 | /* This structure describes the lguest IRQ controller. */ | 804 | /* This structure describes the lguest IRQ controller. */ |
| 805 | static struct irq_chip lguest_irq_controller = { | 805 | static struct irq_chip lguest_irq_controller = { |
| 806 | .name = "lguest", | 806 | .name = "lguest", |
| 807 | .mask = disable_lguest_irq, | 807 | .irq_mask = disable_lguest_irq, |
| 808 | .mask_ack = disable_lguest_irq, | 808 | .irq_mask_ack = disable_lguest_irq, |
| 809 | .unmask = enable_lguest_irq, | 809 | .irq_unmask = enable_lguest_irq, |
| 810 | }; | 810 | }; |
| 811 | 811 | ||
| 812 | /* | 812 | /* |
| @@ -838,12 +838,12 @@ static void __init lguest_init_IRQ(void) | |||
| 838 | * rather than set them in lguest_init_IRQ we are called here every time an | 838 | * rather than set them in lguest_init_IRQ we are called here every time an |
| 839 | * lguest device needs an interrupt. | 839 | * lguest device needs an interrupt. |
| 840 | * | 840 | * |
| 841 | * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should | 841 | * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should |
| 842 | * pass that up! | 842 | * pass that up! |
| 843 | */ | 843 | */ |
| 844 | void lguest_setup_irq(unsigned int irq) | 844 | void lguest_setup_irq(unsigned int irq) |
| 845 | { | 845 | { |
| 846 | irq_to_desc_alloc_node(irq, 0); | 846 | irq_alloc_desc_at(irq, 0); |
| 847 | set_irq_chip_and_handler_name(irq, &lguest_irq_controller, | 847 | set_irq_chip_and_handler_name(irq, &lguest_irq_controller, |
| 848 | handle_level_irq, "level"); | 848 | handle_level_irq, "level"); |
| 849 | } | 849 | } |
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index 5415a9d06f53..b908a59eccf5 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c | |||
| @@ -22,22 +22,187 @@ EXPORT_SYMBOL(memset); | |||
| 22 | 22 | ||
| 23 | void *memmove(void *dest, const void *src, size_t n) | 23 | void *memmove(void *dest, const void *src, size_t n) |
| 24 | { | 24 | { |
| 25 | int d0, d1, d2; | 25 | int d0,d1,d2,d3,d4,d5; |
| 26 | 26 | char *ret = dest; | |
| 27 | if (dest < src) { | 27 | |
| 28 | memcpy(dest, src, n); | 28 | __asm__ __volatile__( |
| 29 | } else { | 29 | /* Handle more 16bytes in loop */ |
| 30 | __asm__ __volatile__( | 30 | "cmp $0x10, %0\n\t" |
| 31 | "std\n\t" | 31 | "jb 1f\n\t" |
| 32 | "rep\n\t" | 32 | |
| 33 | "movsb\n\t" | 33 | /* Decide forward/backward copy mode */ |
| 34 | "cld" | 34 | "cmp %2, %1\n\t" |
| 35 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) | 35 | "jb 2f\n\t" |
| 36 | :"0" (n), | 36 | |
| 37 | "1" (n-1+src), | 37 | /* |
| 38 | "2" (n-1+dest) | 38 | * movs instruction have many startup latency |
| 39 | :"memory"); | 39 | * so we handle small size by general register. |
| 40 | } | 40 | */ |
| 41 | return dest; | 41 | "cmp $680, %0\n\t" |
| 42 | "jb 3f\n\t" | ||
| 43 | /* | ||
| 44 | * movs instruction is only good for aligned case. | ||
| 45 | */ | ||
| 46 | "mov %1, %3\n\t" | ||
| 47 | "xor %2, %3\n\t" | ||
| 48 | "and $0xff, %3\n\t" | ||
| 49 | "jz 4f\n\t" | ||
| 50 | "3:\n\t" | ||
| 51 | "sub $0x10, %0\n\t" | ||
| 52 | |||
| 53 | /* | ||
| 54 | * We gobble 16byts forward in each loop. | ||
| 55 | */ | ||
| 56 | "3:\n\t" | ||
| 57 | "sub $0x10, %0\n\t" | ||
| 58 | "mov 0*4(%1), %3\n\t" | ||
| 59 | "mov 1*4(%1), %4\n\t" | ||
| 60 | "mov %3, 0*4(%2)\n\t" | ||
| 61 | "mov %4, 1*4(%2)\n\t" | ||
| 62 | "mov 2*4(%1), %3\n\t" | ||
| 63 | "mov 3*4(%1), %4\n\t" | ||
| 64 | "mov %3, 2*4(%2)\n\t" | ||
| 65 | "mov %4, 3*4(%2)\n\t" | ||
| 66 | "lea 0x10(%1), %1\n\t" | ||
| 67 | "lea 0x10(%2), %2\n\t" | ||
| 68 | "jae 3b\n\t" | ||
| 69 | "add $0x10, %0\n\t" | ||
| 70 | "jmp 1f\n\t" | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Handle data forward by movs. | ||
| 74 | */ | ||
| 75 | ".p2align 4\n\t" | ||
| 76 | "4:\n\t" | ||
| 77 | "mov -4(%1, %0), %3\n\t" | ||
| 78 | "lea -4(%2, %0), %4\n\t" | ||
| 79 | "shr $2, %0\n\t" | ||
| 80 | "rep movsl\n\t" | ||
| 81 | "mov %3, (%4)\n\t" | ||
| 82 | "jmp 11f\n\t" | ||
| 83 | /* | ||
| 84 | * Handle data backward by movs. | ||
| 85 | */ | ||
| 86 | ".p2align 4\n\t" | ||
| 87 | "6:\n\t" | ||
| 88 | "mov (%1), %3\n\t" | ||
| 89 | "mov %2, %4\n\t" | ||
| 90 | "lea -4(%1, %0), %1\n\t" | ||
| 91 | "lea -4(%2, %0), %2\n\t" | ||
| 92 | "shr $2, %0\n\t" | ||
| 93 | "std\n\t" | ||
| 94 | "rep movsl\n\t" | ||
| 95 | "mov %3,(%4)\n\t" | ||
| 96 | "cld\n\t" | ||
| 97 | "jmp 11f\n\t" | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Start to prepare for backward copy. | ||
| 101 | */ | ||
| 102 | ".p2align 4\n\t" | ||
| 103 | "2:\n\t" | ||
| 104 | "cmp $680, %0\n\t" | ||
| 105 | "jb 5f\n\t" | ||
| 106 | "mov %1, %3\n\t" | ||
| 107 | "xor %2, %3\n\t" | ||
| 108 | "and $0xff, %3\n\t" | ||
| 109 | "jz 6b\n\t" | ||
| 110 | |||
| 111 | /* | ||
| 112 | * Calculate copy position to tail. | ||
| 113 | */ | ||
| 114 | "5:\n\t" | ||
| 115 | "add %0, %1\n\t" | ||
| 116 | "add %0, %2\n\t" | ||
| 117 | "sub $0x10, %0\n\t" | ||
| 118 | |||
| 119 | /* | ||
| 120 | * We gobble 16byts backward in each loop. | ||
| 121 | */ | ||
| 122 | "7:\n\t" | ||
| 123 | "sub $0x10, %0\n\t" | ||
| 124 | |||
| 125 | "mov -1*4(%1), %3\n\t" | ||
| 126 | "mov -2*4(%1), %4\n\t" | ||
| 127 | "mov %3, -1*4(%2)\n\t" | ||
| 128 | "mov %4, -2*4(%2)\n\t" | ||
| 129 | "mov -3*4(%1), %3\n\t" | ||
| 130 | "mov -4*4(%1), %4\n\t" | ||
| 131 | "mov %3, -3*4(%2)\n\t" | ||
| 132 | "mov %4, -4*4(%2)\n\t" | ||
| 133 | "lea -0x10(%1), %1\n\t" | ||
| 134 | "lea -0x10(%2), %2\n\t" | ||
| 135 | "jae 7b\n\t" | ||
| 136 | /* | ||
| 137 | * Calculate copy position to head. | ||
| 138 | */ | ||
| 139 | "add $0x10, %0\n\t" | ||
| 140 | "sub %0, %1\n\t" | ||
| 141 | "sub %0, %2\n\t" | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Move data from 8 bytes to 15 bytes. | ||
| 145 | */ | ||
| 146 | ".p2align 4\n\t" | ||
| 147 | "1:\n\t" | ||
| 148 | "cmp $8, %0\n\t" | ||
| 149 | "jb 8f\n\t" | ||
| 150 | "mov 0*4(%1), %3\n\t" | ||
| 151 | "mov 1*4(%1), %4\n\t" | ||
| 152 | "mov -2*4(%1, %0), %5\n\t" | ||
| 153 | "mov -1*4(%1, %0), %1\n\t" | ||
| 154 | |||
| 155 | "mov %3, 0*4(%2)\n\t" | ||
| 156 | "mov %4, 1*4(%2)\n\t" | ||
| 157 | "mov %5, -2*4(%2, %0)\n\t" | ||
| 158 | "mov %1, -1*4(%2, %0)\n\t" | ||
| 159 | "jmp 11f\n\t" | ||
| 160 | |||
| 161 | /* | ||
| 162 | * Move data from 4 bytes to 7 bytes. | ||
| 163 | */ | ||
| 164 | ".p2align 4\n\t" | ||
| 165 | "8:\n\t" | ||
| 166 | "cmp $4, %0\n\t" | ||
| 167 | "jb 9f\n\t" | ||
| 168 | "mov 0*4(%1), %3\n\t" | ||
| 169 | "mov -1*4(%1, %0), %4\n\t" | ||
| 170 | "mov %3, 0*4(%2)\n\t" | ||
| 171 | "mov %4, -1*4(%2, %0)\n\t" | ||
| 172 | "jmp 11f\n\t" | ||
| 173 | |||
| 174 | /* | ||
| 175 | * Move data from 2 bytes to 3 bytes. | ||
| 176 | */ | ||
| 177 | ".p2align 4\n\t" | ||
| 178 | "9:\n\t" | ||
| 179 | "cmp $2, %0\n\t" | ||
| 180 | "jb 10f\n\t" | ||
| 181 | "movw 0*2(%1), %%dx\n\t" | ||
| 182 | "movw -1*2(%1, %0), %%bx\n\t" | ||
| 183 | "movw %%dx, 0*2(%2)\n\t" | ||
| 184 | "movw %%bx, -1*2(%2, %0)\n\t" | ||
| 185 | "jmp 11f\n\t" | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Move data for 1 byte. | ||
| 189 | */ | ||
| 190 | ".p2align 4\n\t" | ||
| 191 | "10:\n\t" | ||
| 192 | "cmp $1, %0\n\t" | ||
| 193 | "jb 11f\n\t" | ||
| 194 | "movb (%1), %%cl\n\t" | ||
| 195 | "movb %%cl, (%2)\n\t" | ||
| 196 | ".p2align 4\n\t" | ||
| 197 | "11:" | ||
| 198 | : "=&c" (d0), "=&S" (d1), "=&D" (d2), | ||
| 199 | "=r" (d3),"=r" (d4), "=r"(d5) | ||
| 200 | :"0" (n), | ||
| 201 | "1" (src), | ||
| 202 | "2" (dest) | ||
| 203 | :"memory"); | ||
| 204 | |||
| 205 | return ret; | ||
| 206 | |||
| 42 | } | 207 | } |
| 43 | EXPORT_SYMBOL(memmove); | 208 | EXPORT_SYMBOL(memmove); |
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index bcbcd1e0f7d5..75ef61e35e38 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S | |||
| @@ -40,84 +40,132 @@ | |||
| 40 | ENTRY(__memcpy) | 40 | ENTRY(__memcpy) |
| 41 | ENTRY(memcpy) | 41 | ENTRY(memcpy) |
| 42 | CFI_STARTPROC | 42 | CFI_STARTPROC |
| 43 | movq %rdi, %rax | ||
| 43 | 44 | ||
| 44 | /* | 45 | /* |
| 45 | * Put the number of full 64-byte blocks into %ecx. | 46 | * Use 32bit CMP here to avoid long NOP padding. |
| 46 | * Tail portion is handled at the end: | ||
| 47 | */ | 47 | */ |
| 48 | movq %rdi, %rax | 48 | cmp $0x20, %edx |
| 49 | movl %edx, %ecx | 49 | jb .Lhandle_tail |
| 50 | shrl $6, %ecx | ||
| 51 | jz .Lhandle_tail | ||
| 52 | 50 | ||
| 53 | .p2align 4 | ||
| 54 | .Lloop_64: | ||
| 55 | /* | 51 | /* |
| 56 | * We decrement the loop index here - and the zero-flag is | 52 | * We check whether memory false dependece could occur, |
| 57 | * checked at the end of the loop (instructions inbetween do | 53 | * then jump to corresponding copy mode. |
| 58 | * not change the zero flag): | ||
| 59 | */ | 54 | */ |
| 60 | decl %ecx | 55 | cmp %dil, %sil |
| 56 | jl .Lcopy_backward | ||
| 57 | subl $0x20, %edx | ||
| 58 | .Lcopy_forward_loop: | ||
| 59 | subq $0x20, %rdx | ||
| 61 | 60 | ||
| 62 | /* | 61 | /* |
| 63 | * Move in blocks of 4x16 bytes: | 62 | * Move in blocks of 4x8 bytes: |
| 64 | */ | 63 | */ |
| 65 | movq 0*8(%rsi), %r11 | 64 | movq 0*8(%rsi), %r8 |
| 66 | movq 1*8(%rsi), %r8 | 65 | movq 1*8(%rsi), %r9 |
| 67 | movq %r11, 0*8(%rdi) | 66 | movq 2*8(%rsi), %r10 |
| 68 | movq %r8, 1*8(%rdi) | 67 | movq 3*8(%rsi), %r11 |
| 69 | 68 | leaq 4*8(%rsi), %rsi | |
| 70 | movq 2*8(%rsi), %r9 | 69 | |
| 71 | movq 3*8(%rsi), %r10 | 70 | movq %r8, 0*8(%rdi) |
| 72 | movq %r9, 2*8(%rdi) | 71 | movq %r9, 1*8(%rdi) |
| 73 | movq %r10, 3*8(%rdi) | 72 | movq %r10, 2*8(%rdi) |
| 74 | 73 | movq %r11, 3*8(%rdi) | |
| 75 | movq 4*8(%rsi), %r11 | 74 | leaq 4*8(%rdi), %rdi |
| 76 | movq 5*8(%rsi), %r8 | 75 | jae .Lcopy_forward_loop |
| 77 | movq %r11, 4*8(%rdi) | 76 | addq $0x20, %rdx |
| 78 | movq %r8, 5*8(%rdi) | 77 | jmp .Lhandle_tail |
| 79 | 78 | ||
| 80 | movq 6*8(%rsi), %r9 | 79 | .Lcopy_backward: |
| 81 | movq 7*8(%rsi), %r10 | 80 | /* |
| 82 | movq %r9, 6*8(%rdi) | 81 | * Calculate copy position to tail. |
| 83 | movq %r10, 7*8(%rdi) | 82 | */ |
| 84 | 83 | addq %rdx, %rsi | |
| 85 | leaq 64(%rsi), %rsi | 84 | addq %rdx, %rdi |
| 86 | leaq 64(%rdi), %rdi | 85 | subq $0x20, %rdx |
| 87 | 86 | /* | |
| 88 | jnz .Lloop_64 | 87 | * At most 3 ALU operations in one cycle, |
| 88 | * so append NOPS in the same 16bytes trunk. | ||
| 89 | */ | ||
| 90 | .p2align 4 | ||
| 91 | .Lcopy_backward_loop: | ||
| 92 | subq $0x20, %rdx | ||
| 93 | movq -1*8(%rsi), %r8 | ||
| 94 | movq -2*8(%rsi), %r9 | ||
| 95 | movq -3*8(%rsi), %r10 | ||
| 96 | movq -4*8(%rsi), %r11 | ||
| 97 | leaq -4*8(%rsi), %rsi | ||
| 98 | movq %r8, -1*8(%rdi) | ||
| 99 | movq %r9, -2*8(%rdi) | ||
| 100 | movq %r10, -3*8(%rdi) | ||
| 101 | movq %r11, -4*8(%rdi) | ||
| 102 | leaq -4*8(%rdi), %rdi | ||
| 103 | jae .Lcopy_backward_loop | ||
| 89 | 104 | ||
| 105 | /* | ||
| 106 | * Calculate copy position to head. | ||
| 107 | */ | ||
| 108 | addq $0x20, %rdx | ||
| 109 | subq %rdx, %rsi | ||
| 110 | subq %rdx, %rdi | ||
| 90 | .Lhandle_tail: | 111 | .Lhandle_tail: |
| 91 | movl %edx, %ecx | 112 | cmpq $16, %rdx |
| 92 | andl $63, %ecx | 113 | jb .Lless_16bytes |
| 93 | shrl $3, %ecx | ||
| 94 | jz .Lhandle_7 | ||
| 95 | 114 | ||
| 115 | /* | ||
| 116 | * Move data from 16 bytes to 31 bytes. | ||
| 117 | */ | ||
| 118 | movq 0*8(%rsi), %r8 | ||
| 119 | movq 1*8(%rsi), %r9 | ||
| 120 | movq -2*8(%rsi, %rdx), %r10 | ||
| 121 | movq -1*8(%rsi, %rdx), %r11 | ||
| 122 | movq %r8, 0*8(%rdi) | ||
| 123 | movq %r9, 1*8(%rdi) | ||
| 124 | movq %r10, -2*8(%rdi, %rdx) | ||
| 125 | movq %r11, -1*8(%rdi, %rdx) | ||
| 126 | retq | ||
| 96 | .p2align 4 | 127 | .p2align 4 |
| 97 | .Lloop_8: | 128 | .Lless_16bytes: |
| 98 | decl %ecx | 129 | cmpq $8, %rdx |
| 99 | movq (%rsi), %r8 | 130 | jb .Lless_8bytes |
| 100 | movq %r8, (%rdi) | 131 | /* |
| 101 | leaq 8(%rdi), %rdi | 132 | * Move data from 8 bytes to 15 bytes. |
| 102 | leaq 8(%rsi), %rsi | 133 | */ |
| 103 | jnz .Lloop_8 | 134 | movq 0*8(%rsi), %r8 |
| 104 | 135 | movq -1*8(%rsi, %rdx), %r9 | |
| 105 | .Lhandle_7: | 136 | movq %r8, 0*8(%rdi) |
| 106 | movl %edx, %ecx | 137 | movq %r9, -1*8(%rdi, %rdx) |
| 107 | andl $7, %ecx | 138 | retq |
| 108 | jz .Lend | 139 | .p2align 4 |
| 140 | .Lless_8bytes: | ||
| 141 | cmpq $4, %rdx | ||
| 142 | jb .Lless_3bytes | ||
| 109 | 143 | ||
| 144 | /* | ||
| 145 | * Move data from 4 bytes to 7 bytes. | ||
| 146 | */ | ||
| 147 | movl (%rsi), %ecx | ||
| 148 | movl -4(%rsi, %rdx), %r8d | ||
| 149 | movl %ecx, (%rdi) | ||
| 150 | movl %r8d, -4(%rdi, %rdx) | ||
| 151 | retq | ||
| 110 | .p2align 4 | 152 | .p2align 4 |
| 153 | .Lless_3bytes: | ||
| 154 | cmpl $0, %edx | ||
| 155 | je .Lend | ||
| 156 | /* | ||
| 157 | * Move data from 1 bytes to 3 bytes. | ||
| 158 | */ | ||
| 111 | .Lloop_1: | 159 | .Lloop_1: |
| 112 | movb (%rsi), %r8b | 160 | movb (%rsi), %r8b |
| 113 | movb %r8b, (%rdi) | 161 | movb %r8b, (%rdi) |
| 114 | incq %rdi | 162 | incq %rdi |
| 115 | incq %rsi | 163 | incq %rsi |
| 116 | decl %ecx | 164 | decl %edx |
| 117 | jnz .Lloop_1 | 165 | jnz .Lloop_1 |
| 118 | 166 | ||
| 119 | .Lend: | 167 | .Lend: |
| 120 | ret | 168 | retq |
| 121 | CFI_ENDPROC | 169 | CFI_ENDPROC |
| 122 | ENDPROC(memcpy) | 170 | ENDPROC(memcpy) |
| 123 | ENDPROC(__memcpy) | 171 | ENDPROC(__memcpy) |
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c index 0a33909bf122..6d0f0ec41b34 100644 --- a/arch/x86/lib/memmove_64.c +++ b/arch/x86/lib/memmove_64.c | |||
| @@ -8,14 +8,185 @@ | |||
| 8 | #undef memmove | 8 | #undef memmove |
| 9 | void *memmove(void *dest, const void *src, size_t count) | 9 | void *memmove(void *dest, const void *src, size_t count) |
| 10 | { | 10 | { |
| 11 | if (dest < src) { | 11 | unsigned long d0,d1,d2,d3,d4,d5,d6,d7; |
| 12 | return memcpy(dest, src, count); | 12 | char *ret; |
| 13 | } else { | 13 | |
| 14 | char *p = dest + count; | 14 | __asm__ __volatile__( |
| 15 | const char *s = src + count; | 15 | /* Handle more 32bytes in loop */ |
| 16 | while (count--) | 16 | "mov %2, %3\n\t" |
| 17 | *--p = *--s; | 17 | "cmp $0x20, %0\n\t" |
| 18 | } | 18 | "jb 1f\n\t" |
| 19 | return dest; | 19 | |
| 20 | /* Decide forward/backward copy mode */ | ||
| 21 | "cmp %2, %1\n\t" | ||
| 22 | "jb 2f\n\t" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * movsq instruction have many startup latency | ||
| 26 | * so we handle small size by general register. | ||
| 27 | */ | ||
| 28 | "cmp $680, %0\n\t" | ||
| 29 | "jb 3f\n\t" | ||
| 30 | /* | ||
| 31 | * movsq instruction is only good for aligned case. | ||
| 32 | */ | ||
| 33 | "cmpb %%dil, %%sil\n\t" | ||
| 34 | "je 4f\n\t" | ||
| 35 | "3:\n\t" | ||
| 36 | "sub $0x20, %0\n\t" | ||
| 37 | /* | ||
| 38 | * We gobble 32byts forward in each loop. | ||
| 39 | */ | ||
| 40 | "5:\n\t" | ||
| 41 | "sub $0x20, %0\n\t" | ||
| 42 | "movq 0*8(%1), %4\n\t" | ||
| 43 | "movq 1*8(%1), %5\n\t" | ||
| 44 | "movq 2*8(%1), %6\n\t" | ||
| 45 | "movq 3*8(%1), %7\n\t" | ||
| 46 | "leaq 4*8(%1), %1\n\t" | ||
| 47 | |||
| 48 | "movq %4, 0*8(%2)\n\t" | ||
| 49 | "movq %5, 1*8(%2)\n\t" | ||
| 50 | "movq %6, 2*8(%2)\n\t" | ||
| 51 | "movq %7, 3*8(%2)\n\t" | ||
| 52 | "leaq 4*8(%2), %2\n\t" | ||
| 53 | "jae 5b\n\t" | ||
| 54 | "addq $0x20, %0\n\t" | ||
| 55 | "jmp 1f\n\t" | ||
| 56 | /* | ||
| 57 | * Handle data forward by movsq. | ||
| 58 | */ | ||
| 59 | ".p2align 4\n\t" | ||
| 60 | "4:\n\t" | ||
| 61 | "movq %0, %8\n\t" | ||
| 62 | "movq -8(%1, %0), %4\n\t" | ||
| 63 | "lea -8(%2, %0), %5\n\t" | ||
| 64 | "shrq $3, %8\n\t" | ||
| 65 | "rep movsq\n\t" | ||
| 66 | "movq %4, (%5)\n\t" | ||
| 67 | "jmp 13f\n\t" | ||
| 68 | /* | ||
| 69 | * Handle data backward by movsq. | ||
| 70 | */ | ||
| 71 | ".p2align 4\n\t" | ||
| 72 | "7:\n\t" | ||
| 73 | "movq %0, %8\n\t" | ||
| 74 | "movq (%1), %4\n\t" | ||
| 75 | "movq %2, %5\n\t" | ||
| 76 | "leaq -8(%1, %0), %1\n\t" | ||
| 77 | "leaq -8(%2, %0), %2\n\t" | ||
| 78 | "shrq $3, %8\n\t" | ||
| 79 | "std\n\t" | ||
| 80 | "rep movsq\n\t" | ||
| 81 | "cld\n\t" | ||
| 82 | "movq %4, (%5)\n\t" | ||
| 83 | "jmp 13f\n\t" | ||
| 84 | |||
| 85 | /* | ||
| 86 | * Start to prepare for backward copy. | ||
| 87 | */ | ||
| 88 | ".p2align 4\n\t" | ||
| 89 | "2:\n\t" | ||
| 90 | "cmp $680, %0\n\t" | ||
| 91 | "jb 6f \n\t" | ||
| 92 | "cmp %%dil, %%sil\n\t" | ||
| 93 | "je 7b \n\t" | ||
| 94 | "6:\n\t" | ||
| 95 | /* | ||
| 96 | * Calculate copy position to tail. | ||
| 97 | */ | ||
| 98 | "addq %0, %1\n\t" | ||
| 99 | "addq %0, %2\n\t" | ||
| 100 | "subq $0x20, %0\n\t" | ||
| 101 | /* | ||
| 102 | * We gobble 32byts backward in each loop. | ||
| 103 | */ | ||
| 104 | "8:\n\t" | ||
| 105 | "subq $0x20, %0\n\t" | ||
| 106 | "movq -1*8(%1), %4\n\t" | ||
| 107 | "movq -2*8(%1), %5\n\t" | ||
| 108 | "movq -3*8(%1), %6\n\t" | ||
| 109 | "movq -4*8(%1), %7\n\t" | ||
| 110 | "leaq -4*8(%1), %1\n\t" | ||
| 111 | |||
| 112 | "movq %4, -1*8(%2)\n\t" | ||
| 113 | "movq %5, -2*8(%2)\n\t" | ||
| 114 | "movq %6, -3*8(%2)\n\t" | ||
| 115 | "movq %7, -4*8(%2)\n\t" | ||
| 116 | "leaq -4*8(%2), %2\n\t" | ||
| 117 | "jae 8b\n\t" | ||
| 118 | /* | ||
| 119 | * Calculate copy position to head. | ||
| 120 | */ | ||
| 121 | "addq $0x20, %0\n\t" | ||
| 122 | "subq %0, %1\n\t" | ||
| 123 | "subq %0, %2\n\t" | ||
| 124 | "1:\n\t" | ||
| 125 | "cmpq $16, %0\n\t" | ||
| 126 | "jb 9f\n\t" | ||
| 127 | /* | ||
| 128 | * Move data from 16 bytes to 31 bytes. | ||
| 129 | */ | ||
| 130 | "movq 0*8(%1), %4\n\t" | ||
| 131 | "movq 1*8(%1), %5\n\t" | ||
| 132 | "movq -2*8(%1, %0), %6\n\t" | ||
| 133 | "movq -1*8(%1, %0), %7\n\t" | ||
| 134 | "movq %4, 0*8(%2)\n\t" | ||
| 135 | "movq %5, 1*8(%2)\n\t" | ||
| 136 | "movq %6, -2*8(%2, %0)\n\t" | ||
| 137 | "movq %7, -1*8(%2, %0)\n\t" | ||
| 138 | "jmp 13f\n\t" | ||
| 139 | ".p2align 4\n\t" | ||
| 140 | "9:\n\t" | ||
| 141 | "cmpq $8, %0\n\t" | ||
| 142 | "jb 10f\n\t" | ||
| 143 | /* | ||
| 144 | * Move data from 8 bytes to 15 bytes. | ||
| 145 | */ | ||
| 146 | "movq 0*8(%1), %4\n\t" | ||
| 147 | "movq -1*8(%1, %0), %5\n\t" | ||
| 148 | "movq %4, 0*8(%2)\n\t" | ||
| 149 | "movq %5, -1*8(%2, %0)\n\t" | ||
| 150 | "jmp 13f\n\t" | ||
| 151 | "10:\n\t" | ||
| 152 | "cmpq $4, %0\n\t" | ||
| 153 | "jb 11f\n\t" | ||
| 154 | /* | ||
| 155 | * Move data from 4 bytes to 7 bytes. | ||
| 156 | */ | ||
| 157 | "movl (%1), %4d\n\t" | ||
| 158 | "movl -4(%1, %0), %5d\n\t" | ||
| 159 | "movl %4d, (%2)\n\t" | ||
| 160 | "movl %5d, -4(%2, %0)\n\t" | ||
| 161 | "jmp 13f\n\t" | ||
| 162 | "11:\n\t" | ||
| 163 | "cmp $2, %0\n\t" | ||
| 164 | "jb 12f\n\t" | ||
| 165 | /* | ||
| 166 | * Move data from 2 bytes to 3 bytes. | ||
| 167 | */ | ||
| 168 | "movw (%1), %4w\n\t" | ||
| 169 | "movw -2(%1, %0), %5w\n\t" | ||
| 170 | "movw %4w, (%2)\n\t" | ||
| 171 | "movw %5w, -2(%2, %0)\n\t" | ||
| 172 | "jmp 13f\n\t" | ||
| 173 | "12:\n\t" | ||
| 174 | "cmp $1, %0\n\t" | ||
| 175 | "jb 13f\n\t" | ||
| 176 | /* | ||
| 177 | * Move data for 1 byte. | ||
| 178 | */ | ||
| 179 | "movb (%1), %4b\n\t" | ||
| 180 | "movb %4b, (%2)\n\t" | ||
| 181 | "13:\n\t" | ||
| 182 | : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) , | ||
| 183 | "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7) | ||
| 184 | :"0" (count), | ||
| 185 | "1" (src), | ||
| 186 | "2" (dest) | ||
| 187 | :"memory"); | ||
| 188 | |||
| 189 | return ret; | ||
| 190 | |||
| 20 | } | 191 | } |
| 21 | EXPORT_SYMBOL(memmove); | 192 | EXPORT_SYMBOL(memmove); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a24c6cfdccc4..79b0b372d2d0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -229,7 +229,16 @@ void vmalloc_sync_all(void) | |||
| 229 | 229 | ||
| 230 | spin_lock_irqsave(&pgd_lock, flags); | 230 | spin_lock_irqsave(&pgd_lock, flags); |
| 231 | list_for_each_entry(page, &pgd_list, lru) { | 231 | list_for_each_entry(page, &pgd_list, lru) { |
| 232 | if (!vmalloc_sync_one(page_address(page), address)) | 232 | spinlock_t *pgt_lock; |
| 233 | pmd_t *ret; | ||
| 234 | |||
| 235 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
| 236 | |||
| 237 | spin_lock(pgt_lock); | ||
| 238 | ret = vmalloc_sync_one(page_address(page), address); | ||
| 239 | spin_unlock(pgt_lock); | ||
| 240 | |||
| 241 | if (!ret) | ||
| 233 | break; | 242 | break; |
| 234 | } | 243 | } |
| 235 | spin_unlock_irqrestore(&pgd_lock, flags); | 244 | spin_unlock_irqrestore(&pgd_lock, flags); |
| @@ -328,29 +337,7 @@ out: | |||
| 328 | 337 | ||
| 329 | void vmalloc_sync_all(void) | 338 | void vmalloc_sync_all(void) |
| 330 | { | 339 | { |
| 331 | unsigned long address; | 340 | sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); |
| 332 | |||
| 333 | for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; | ||
| 334 | address += PGDIR_SIZE) { | ||
| 335 | |||
| 336 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
| 337 | unsigned long flags; | ||
| 338 | struct page *page; | ||
| 339 | |||
| 340 | if (pgd_none(*pgd_ref)) | ||
| 341 | continue; | ||
| 342 | |||
| 343 | spin_lock_irqsave(&pgd_lock, flags); | ||
| 344 | list_for_each_entry(page, &pgd_list, lru) { | ||
| 345 | pgd_t *pgd; | ||
| 346 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
| 347 | if (pgd_none(*pgd)) | ||
| 348 | set_pgd(pgd, *pgd_ref); | ||
| 349 | else | ||
| 350 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | ||
| 351 | } | ||
| 352 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
| 353 | } | ||
| 354 | } | 341 | } |
| 355 | 342 | ||
| 356 | /* | 343 | /* |
| @@ -898,8 +885,14 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
| 898 | if (pmd_large(*pmd)) | 885 | if (pmd_large(*pmd)) |
| 899 | return spurious_fault_check(error_code, (pte_t *) pmd); | 886 | return spurious_fault_check(error_code, (pte_t *) pmd); |
| 900 | 887 | ||
| 888 | /* | ||
| 889 | * Note: don't use pte_present() here, since it returns true | ||
| 890 | * if the _PAGE_PROTNONE bit is set. However, this aliases the | ||
| 891 | * _PAGE_GLOBAL bit, which for kernel pages give false positives | ||
| 892 | * when CONFIG_DEBUG_PAGEALLOC is used. | ||
| 893 | */ | ||
| 901 | pte = pte_offset_kernel(pmd, address); | 894 | pte = pte_offset_kernel(pmd, address); |
| 902 | if (!pte_present(*pte)) | 895 | if (!(pte_flags(*pte) & _PAGE_PRESENT)) |
| 903 | return 0; | 896 | return 0; |
| 904 | 897 | ||
| 905 | ret = spurious_fault_check(error_code, pte); | 898 | ret = spurious_fault_check(error_code, pte); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bca79091b9d6..558f2d332076 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
| @@ -67,7 +67,7 @@ static __init void *alloc_low_page(void) | |||
| 67 | panic("alloc_low_page: ran out of memory"); | 67 | panic("alloc_low_page: ran out of memory"); |
| 68 | 68 | ||
| 69 | adr = __va(pfn * PAGE_SIZE); | 69 | adr = __va(pfn * PAGE_SIZE); |
| 70 | memset(adr, 0, PAGE_SIZE); | 70 | clear_page(adr); |
| 71 | return adr; | 71 | return adr; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| @@ -558,7 +558,7 @@ char swsusp_pg_dir[PAGE_SIZE] | |||
| 558 | 558 | ||
| 559 | static inline void save_pg_dir(void) | 559 | static inline void save_pg_dir(void) |
| 560 | { | 560 | { |
| 561 | memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); | 561 | copy_page(swsusp_pg_dir, swapper_pg_dir); |
| 562 | } | 562 | } |
| 563 | #else /* !CONFIG_ACPI_SLEEP */ | 563 | #else /* !CONFIG_ACPI_SLEEP */ |
| 564 | static inline void save_pg_dir(void) | 564 | static inline void save_pg_dir(void) |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9a6674689a20..c55f900fbf89 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -98,6 +98,43 @@ static int __init nonx32_setup(char *str) | |||
| 98 | __setup("noexec32=", nonx32_setup); | 98 | __setup("noexec32=", nonx32_setup); |
| 99 | 99 | ||
| 100 | /* | 100 | /* |
| 101 | * When memory was added/removed make sure all the processes MM have | ||
| 102 | * suitable PGD entries in the local PGD level page. | ||
| 103 | */ | ||
| 104 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
| 105 | { | ||
| 106 | unsigned long address; | ||
| 107 | |||
| 108 | for (address = start; address <= end; address += PGDIR_SIZE) { | ||
| 109 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
| 110 | unsigned long flags; | ||
| 111 | struct page *page; | ||
| 112 | |||
| 113 | if (pgd_none(*pgd_ref)) | ||
| 114 | continue; | ||
| 115 | |||
| 116 | spin_lock_irqsave(&pgd_lock, flags); | ||
| 117 | list_for_each_entry(page, &pgd_list, lru) { | ||
| 118 | pgd_t *pgd; | ||
| 119 | spinlock_t *pgt_lock; | ||
| 120 | |||
| 121 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
| 122 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
| 123 | spin_lock(pgt_lock); | ||
| 124 | |||
| 125 | if (pgd_none(*pgd)) | ||
| 126 | set_pgd(pgd, *pgd_ref); | ||
| 127 | else | ||
| 128 | BUG_ON(pgd_page_vaddr(*pgd) | ||
| 129 | != pgd_page_vaddr(*pgd_ref)); | ||
| 130 | |||
| 131 | spin_unlock(pgt_lock); | ||
| 132 | } | ||
| 133 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | /* | ||
| 101 | * NOTE: This function is marked __ref because it calls __init function | 138 | * NOTE: This function is marked __ref because it calls __init function |
| 102 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. | 139 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. |
| 103 | */ | 140 | */ |
| @@ -293,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
| 293 | panic("alloc_low_page: ran out of memory"); | 330 | panic("alloc_low_page: ran out of memory"); |
| 294 | 331 | ||
| 295 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | 332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
| 296 | memset(adr, 0, PAGE_SIZE); | 333 | clear_page(adr); |
| 297 | *phys = pfn * PAGE_SIZE; | 334 | *phys = pfn * PAGE_SIZE; |
| 298 | return adr; | 335 | return adr; |
| 299 | } | 336 | } |
| @@ -534,11 +571,13 @@ kernel_physical_mapping_init(unsigned long start, | |||
| 534 | unsigned long end, | 571 | unsigned long end, |
| 535 | unsigned long page_size_mask) | 572 | unsigned long page_size_mask) |
| 536 | { | 573 | { |
| 537 | 574 | bool pgd_changed = false; | |
| 538 | unsigned long next, last_map_addr = end; | 575 | unsigned long next, last_map_addr = end; |
| 576 | unsigned long addr; | ||
| 539 | 577 | ||
| 540 | start = (unsigned long)__va(start); | 578 | start = (unsigned long)__va(start); |
| 541 | end = (unsigned long)__va(end); | 579 | end = (unsigned long)__va(end); |
| 580 | addr = start; | ||
| 542 | 581 | ||
| 543 | for (; start < end; start = next) { | 582 | for (; start < end; start = next) { |
| 544 | pgd_t *pgd = pgd_offset_k(start); | 583 | pgd_t *pgd = pgd_offset_k(start); |
| @@ -563,7 +602,12 @@ kernel_physical_mapping_init(unsigned long start, | |||
| 563 | spin_lock(&init_mm.page_table_lock); | 602 | spin_lock(&init_mm.page_table_lock); |
| 564 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 603 | pgd_populate(&init_mm, pgd, __va(pud_phys)); |
| 565 | spin_unlock(&init_mm.page_table_lock); | 604 | spin_unlock(&init_mm.page_table_lock); |
| 605 | pgd_changed = true; | ||
| 566 | } | 606 | } |
| 607 | |||
| 608 | if (pgd_changed) | ||
| 609 | sync_global_pgds(addr, end); | ||
| 610 | |||
| 567 | __flush_tlb_all(); | 611 | __flush_tlb_all(); |
| 568 | 612 | ||
| 569 | return last_map_addr; | 613 | return last_map_addr; |
| @@ -1003,6 +1047,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
| 1003 | } | 1047 | } |
| 1004 | 1048 | ||
| 1005 | } | 1049 | } |
| 1050 | sync_global_pgds((unsigned long)start_page, end); | ||
| 1006 | return 0; | 1051 | return 0; |
| 1007 | } | 1052 | } |
| 1008 | 1053 | ||
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 970ed579d4e4..52d54bfc1ebb 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
| 23 | #include <asm/mpspec.h> | 23 | #include <asm/mpspec.h> |
| 24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
| 25 | #include <asm/k8.h> | 25 | #include <asm/amd_nb.h> |
| 26 | 26 | ||
| 27 | static struct bootnode __initdata nodes[8]; | 27 | static struct bootnode __initdata nodes[8]; |
| 28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | 28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; |
| @@ -54,8 +54,8 @@ static __init int find_northbridge(void) | |||
| 54 | static __init void early_get_boot_cpu_id(void) | 54 | static __init void early_get_boot_cpu_id(void) |
| 55 | { | 55 | { |
| 56 | /* | 56 | /* |
| 57 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get the APIC ID of the BSP so can use that to |
| 58 | * in k8_scan_nodes() | 58 | * create apicid_to_node in k8_scan_nodes() |
| 59 | */ | 59 | */ |
| 60 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
| 61 | /* | 61 | /* |
| @@ -212,7 +212,7 @@ int __init k8_scan_nodes(void) | |||
| 212 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
| 213 | cores = (1<<bits); | 213 | cores = (1<<bits); |
| 214 | apicid_base = 0; | 214 | apicid_base = 0; |
| 215 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* get the APIC ID of the BSP early for systems with apicid lifting */ |
| 216 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
| 217 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
| 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index 63c19e27aa6f..324aa3f07237 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
| @@ -9,7 +9,7 @@ static bool opcode_is_prefix(uint8_t b) | |||
| 9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | 9 | b == 0xf0 || b == 0xf2 || b == 0xf3 |
| 10 | /* Group 2 */ | 10 | /* Group 2 */ |
| 11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | 11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 |
| 12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | 12 | || b == 0x64 || b == 0x65 |
| 13 | /* Group 3 */ | 13 | /* Group 3 */ |
| 14 | || b == 0x66 | 14 | || b == 0x66 |
| 15 | /* Group 4 */ | 15 | /* Group 4 */ |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a7bcc23ef96c..4962f1aeda6f 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <asm/dma.h> | 18 | #include <asm/dma.h> |
| 19 | #include <asm/numa.h> | 19 | #include <asm/numa.h> |
| 20 | #include <asm/acpi.h> | 20 | #include <asm/acpi.h> |
| 21 | #include <asm/k8.h> | 21 | #include <asm/amd_nb.h> |
| 22 | 22 | ||
| 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
| 24 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..8be8c7d7bc89 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
| 87 | #define UNSHARED_PTRS_PER_PGD \ | 87 | #define UNSHARED_PTRS_PER_PGD \ |
| 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) |
| 89 | 89 | ||
| 90 | static void pgd_ctor(pgd_t *pgd) | 90 | |
| 91 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | ||
| 92 | { | ||
| 93 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | ||
| 94 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
| 95 | } | ||
| 96 | |||
| 97 | struct mm_struct *pgd_page_get_mm(struct page *page) | ||
| 98 | { | ||
| 99 | return (struct mm_struct *)page->index; | ||
| 100 | } | ||
| 101 | |||
| 102 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | ||
| 91 | { | 103 | { |
| 92 | /* If the pgd points to a shared pagetable level (either the | 104 | /* If the pgd points to a shared pagetable level (either the |
| 93 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 105 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
| @@ -98,15 +110,13 @@ static void pgd_ctor(pgd_t *pgd) | |||
| 98 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 110 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
| 99 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 111 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
| 100 | KERNEL_PGD_PTRS); | 112 | KERNEL_PGD_PTRS); |
| 101 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
| 102 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
| 103 | KERNEL_PGD_BOUNDARY, | ||
| 104 | KERNEL_PGD_PTRS); | ||
| 105 | } | 113 | } |
| 106 | 114 | ||
| 107 | /* list required to sync kernel mapping updates */ | 115 | /* list required to sync kernel mapping updates */ |
| 108 | if (!SHARED_KERNEL_PMD) | 116 | if (!SHARED_KERNEL_PMD) { |
| 117 | pgd_set_mm(pgd, mm); | ||
| 109 | pgd_list_add(pgd); | 118 | pgd_list_add(pgd); |
| 119 | } | ||
| 110 | } | 120 | } |
| 111 | 121 | ||
| 112 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
| @@ -272,7 +282,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 272 | */ | 282 | */ |
| 273 | spin_lock_irqsave(&pgd_lock, flags); | 283 | spin_lock_irqsave(&pgd_lock, flags); |
| 274 | 284 | ||
| 275 | pgd_ctor(pgd); | 285 | pgd_ctor(mm, pgd); |
| 276 | pgd_prepopulate_pmd(mm, pgd, pmds); | 286 | pgd_prepopulate_pmd(mm, pgd, pmds); |
| 277 | 287 | ||
| 278 | spin_unlock_irqrestore(&pgd_lock, flags); | 288 | spin_unlock_irqrestore(&pgd_lock, flags); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c03f14ab6667..49358481c733 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
| 6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
| 7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| 8 | #include <linux/cpu.h> | ||
| 8 | 9 | ||
| 9 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
| 10 | #include <asm/mmu_context.h> | 11 | #include <asm/mmu_context.h> |
| @@ -52,6 +53,8 @@ union smp_flush_state { | |||
| 52 | want false sharing in the per cpu data segment. */ | 53 | want false sharing in the per cpu data segment. */ |
| 53 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | 54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
| 54 | 55 | ||
| 56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
| 57 | |||
| 55 | /* | 58 | /* |
| 56 | * We cannot call mmdrop() because we are in interrupt context, | 59 | * We cannot call mmdrop() because we are in interrupt context, |
| 57 | * instead update mm->cpu_vm_mask. | 60 | * instead update mm->cpu_vm_mask. |
| @@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
| 173 | union smp_flush_state *f; | 176 | union smp_flush_state *f; |
| 174 | 177 | ||
| 175 | /* Caller has disabled preemption */ | 178 | /* Caller has disabled preemption */ |
| 176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 179 | sender = this_cpu_read(tlb_vector_offset); |
| 177 | f = &flush_state[sender]; | 180 | f = &flush_state[sender]; |
| 178 | 181 | ||
| 179 | /* | 182 | /* |
| @@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
| 218 | flush_tlb_others_ipi(cpumask, mm, va); | 221 | flush_tlb_others_ipi(cpumask, mm, va); |
| 219 | } | 222 | } |
| 220 | 223 | ||
| 224 | static void __cpuinit calculate_tlb_offset(void) | ||
| 225 | { | ||
| 226 | int cpu, node, nr_node_vecs; | ||
| 227 | /* | ||
| 228 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
| 229 | * will not cause inconsistency, as the write is atomic under X86. we | ||
| 230 | * might see more lock contentions in a short time, but after all CPU's | ||
| 231 | * tlb_vector_offset are changed, everything should go normal | ||
| 232 | * | ||
| 233 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
| 234 | * waste some vectors. | ||
| 235 | **/ | ||
| 236 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
| 237 | nr_node_vecs = 1; | ||
| 238 | else | ||
| 239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
| 240 | |||
| 241 | for_each_online_node(node) { | ||
| 242 | int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * | ||
| 243 | nr_node_vecs; | ||
| 244 | int cpu_offset = 0; | ||
| 245 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
| 246 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
| 247 | cpu_offset; | ||
| 248 | cpu_offset++; | ||
| 249 | cpu_offset = cpu_offset % nr_node_vecs; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | static int tlb_cpuhp_notify(struct notifier_block *n, | ||
| 255 | unsigned long action, void *hcpu) | ||
| 256 | { | ||
| 257 | switch (action & 0xf) { | ||
| 258 | case CPU_ONLINE: | ||
| 259 | case CPU_DEAD: | ||
| 260 | calculate_tlb_offset(); | ||
| 261 | } | ||
| 262 | return NOTIFY_OK; | ||
| 263 | } | ||
| 264 | |||
| 221 | static int __cpuinit init_smp_flush(void) | 265 | static int __cpuinit init_smp_flush(void) |
| 222 | { | 266 | { |
| 223 | int i; | 267 | int i; |
| @@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void) | |||
| 225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 269 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
| 226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | 270 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
| 227 | 271 | ||
| 272 | calculate_tlb_offset(); | ||
| 273 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
| 228 | return 0; | 274 | return 0; |
| 229 | } | 275 | } |
| 230 | core_initcall(init_smp_flush); | 276 | core_initcall(init_smp_flush); |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index b67a6b5aa8d4..42fb46f83883 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
| @@ -64,15 +64,22 @@ static u64 ibs_op_ctl; | |||
| 64 | * IBS cpuid feature detection | 64 | * IBS cpuid feature detection |
| 65 | */ | 65 | */ |
| 66 | 66 | ||
| 67 | #define IBS_CPUID_FEATURES 0x8000001b | 67 | #define IBS_CPUID_FEATURES 0x8000001b |
| 68 | 68 | ||
| 69 | /* | 69 | /* |
| 70 | * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but | 70 | * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but |
| 71 | * bit 0 is used to indicate the existence of IBS. | 71 | * bit 0 is used to indicate the existence of IBS. |
| 72 | */ | 72 | */ |
| 73 | #define IBS_CAPS_AVAIL (1LL<<0) | 73 | #define IBS_CAPS_AVAIL (1U<<0) |
| 74 | #define IBS_CAPS_RDWROPCNT (1LL<<3) | 74 | #define IBS_CAPS_RDWROPCNT (1U<<3) |
| 75 | #define IBS_CAPS_OPCNT (1LL<<4) | 75 | #define IBS_CAPS_OPCNT (1U<<4) |
| 76 | |||
| 77 | /* | ||
| 78 | * IBS APIC setup | ||
| 79 | */ | ||
| 80 | #define IBSCTL 0x1cc | ||
| 81 | #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) | ||
| 82 | #define IBSCTL_LVT_OFFSET_MASK 0x0F | ||
| 76 | 83 | ||
| 77 | /* | 84 | /* |
| 78 | * IBS randomization macros | 85 | * IBS randomization macros |
| @@ -266,6 +273,74 @@ static void op_amd_stop_ibs(void) | |||
| 266 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 273 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
| 267 | } | 274 | } |
| 268 | 275 | ||
| 276 | static inline int eilvt_is_available(int offset) | ||
| 277 | { | ||
| 278 | /* check if we may assign a vector */ | ||
| 279 | return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); | ||
| 280 | } | ||
| 281 | |||
| 282 | static inline int ibs_eilvt_valid(void) | ||
| 283 | { | ||
| 284 | u64 val; | ||
| 285 | int offset; | ||
| 286 | |||
| 287 | rdmsrl(MSR_AMD64_IBSCTL, val); | ||
| 288 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | ||
| 289 | pr_err(FW_BUG "cpu %d, invalid IBS " | ||
| 290 | "interrupt offset %d (MSR%08X=0x%016llx)", | ||
| 291 | smp_processor_id(), offset, | ||
| 292 | MSR_AMD64_IBSCTL, val); | ||
| 293 | return 0; | ||
| 294 | } | ||
| 295 | |||
| 296 | offset = val & IBSCTL_LVT_OFFSET_MASK; | ||
| 297 | |||
| 298 | if (eilvt_is_available(offset)) | ||
| 299 | return !0; | ||
| 300 | |||
| 301 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d " | ||
| 302 | "not available (MSR%08X=0x%016llx)", | ||
| 303 | smp_processor_id(), offset, | ||
| 304 | MSR_AMD64_IBSCTL, val); | ||
| 305 | |||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | static inline int get_ibs_offset(void) | ||
| 310 | { | ||
| 311 | u64 val; | ||
| 312 | |||
| 313 | rdmsrl(MSR_AMD64_IBSCTL, val); | ||
| 314 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | ||
| 315 | return -EINVAL; | ||
| 316 | |||
| 317 | return val & IBSCTL_LVT_OFFSET_MASK; | ||
| 318 | } | ||
| 319 | |||
| 320 | static void setup_APIC_ibs(void) | ||
| 321 | { | ||
| 322 | int offset; | ||
| 323 | |||
| 324 | offset = get_ibs_offset(); | ||
| 325 | if (offset < 0) | ||
| 326 | goto failed; | ||
| 327 | |||
| 328 | if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) | ||
| 329 | return; | ||
| 330 | failed: | ||
| 331 | pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n", | ||
| 332 | smp_processor_id()); | ||
| 333 | } | ||
| 334 | |||
| 335 | static void clear_APIC_ibs(void) | ||
| 336 | { | ||
| 337 | int offset; | ||
| 338 | |||
| 339 | offset = get_ibs_offset(); | ||
| 340 | if (offset >= 0) | ||
| 341 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | ||
| 342 | } | ||
| 343 | |||
| 269 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 344 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
| 270 | 345 | ||
| 271 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | 346 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, |
| @@ -376,13 +451,13 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 376 | } | 451 | } |
| 377 | 452 | ||
| 378 | if (ibs_caps) | 453 | if (ibs_caps) |
| 379 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | 454 | setup_APIC_ibs(); |
| 380 | } | 455 | } |
| 381 | 456 | ||
| 382 | static void op_amd_cpu_shutdown(void) | 457 | static void op_amd_cpu_shutdown(void) |
| 383 | { | 458 | { |
| 384 | if (ibs_caps) | 459 | if (ibs_caps) |
| 385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | 460 | clear_APIC_ibs(); |
| 386 | } | 461 | } |
| 387 | 462 | ||
| 388 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 463 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
| @@ -445,16 +520,11 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
| 445 | op_amd_stop_ibs(); | 520 | op_amd_stop_ibs(); |
| 446 | } | 521 | } |
| 447 | 522 | ||
| 448 | static int __init_ibs_nmi(void) | 523 | static int setup_ibs_ctl(int ibs_eilvt_off) |
| 449 | { | 524 | { |
| 450 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | ||
| 451 | #define IBSCTL 0x1cc | ||
| 452 | struct pci_dev *cpu_cfg; | 525 | struct pci_dev *cpu_cfg; |
| 453 | int nodes; | 526 | int nodes; |
| 454 | u32 value = 0; | 527 | u32 value = 0; |
| 455 | u8 ibs_eilvt_off; | ||
| 456 | |||
| 457 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
| 458 | 528 | ||
| 459 | nodes = 0; | 529 | nodes = 0; |
| 460 | cpu_cfg = NULL; | 530 | cpu_cfg = NULL; |
| @@ -466,21 +536,60 @@ static int __init_ibs_nmi(void) | |||
| 466 | break; | 536 | break; |
| 467 | ++nodes; | 537 | ++nodes; |
| 468 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | 538 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off |
| 469 | | IBSCTL_LVTOFFSETVAL); | 539 | | IBSCTL_LVT_OFFSET_VALID); |
| 470 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | 540 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); |
| 471 | if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { | 541 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { |
| 472 | pci_dev_put(cpu_cfg); | 542 | pci_dev_put(cpu_cfg); |
| 473 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " | 543 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " |
| 474 | "IBSCTL = 0x%08x", value); | 544 | "IBSCTL = 0x%08x\n", value); |
| 475 | return 1; | 545 | return -EINVAL; |
| 476 | } | 546 | } |
| 477 | } while (1); | 547 | } while (1); |
| 478 | 548 | ||
| 479 | if (!nodes) { | 549 | if (!nodes) { |
| 480 | printk(KERN_DEBUG "No CPU node configured for IBS"); | 550 | printk(KERN_DEBUG "No CPU node configured for IBS\n"); |
| 481 | return 1; | 551 | return -ENODEV; |
| 552 | } | ||
| 553 | |||
| 554 | return 0; | ||
| 555 | } | ||
| 556 | |||
| 557 | static int force_ibs_eilvt_setup(void) | ||
| 558 | { | ||
| 559 | int i; | ||
| 560 | int ret; | ||
| 561 | |||
| 562 | /* find the next free available EILVT entry */ | ||
| 563 | for (i = 1; i < 4; i++) { | ||
| 564 | if (!eilvt_is_available(i)) | ||
| 565 | continue; | ||
| 566 | ret = setup_ibs_ctl(i); | ||
| 567 | if (ret) | ||
| 568 | return ret; | ||
| 569 | return 0; | ||
| 482 | } | 570 | } |
| 483 | 571 | ||
| 572 | printk(KERN_DEBUG "No EILVT entry available\n"); | ||
| 573 | |||
| 574 | return -EBUSY; | ||
| 575 | } | ||
| 576 | |||
| 577 | static int __init_ibs_nmi(void) | ||
| 578 | { | ||
| 579 | int ret; | ||
| 580 | |||
| 581 | if (ibs_eilvt_valid()) | ||
| 582 | return 0; | ||
| 583 | |||
| 584 | ret = force_ibs_eilvt_setup(); | ||
| 585 | if (ret) | ||
| 586 | return ret; | ||
| 587 | |||
| 588 | if (!ibs_eilvt_valid()) | ||
| 589 | return -EFAULT; | ||
| 590 | |||
| 591 | pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); | ||
| 592 | |||
| 484 | return 0; | 593 | return 0; |
| 485 | } | 594 | } |
| 486 | 595 | ||
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index b34815408f58..13700ec8e2e4 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c | |||
| @@ -304,7 +304,7 @@ static struct pci_raw_ops pci_olpc_conf = { | |||
| 304 | 304 | ||
| 305 | int __init pci_olpc_init(void) | 305 | int __init pci_olpc_init(void) |
| 306 | { | 306 | { |
| 307 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); | 307 | printk(KERN_INFO "PCI: Using configuration type OLPC XO-1\n"); |
| 308 | raw_pci_ops = &pci_olpc_conf; | 308 | raw_pci_ops = &pci_olpc_conf; |
| 309 | is_lx = is_geode_lx(); | 309 | is_lx = is_geode_lx(); |
| 310 | return 0; | 310 | return 0; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 42086ac406af..b2363fcbcd0f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -1969,7 +1969,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
| 1969 | .alloc_pte = xen_alloc_pte_init, | 1969 | .alloc_pte = xen_alloc_pte_init, |
| 1970 | .release_pte = xen_release_pte_init, | 1970 | .release_pte = xen_release_pte_init, |
| 1971 | .alloc_pmd = xen_alloc_pmd_init, | 1971 | .alloc_pmd = xen_alloc_pmd_init, |
| 1972 | .alloc_pmd_clone = paravirt_nop, | ||
| 1973 | .release_pmd = xen_release_pmd_init, | 1972 | .release_pmd = xen_release_pmd_init, |
| 1974 | 1973 | ||
| 1975 | #ifdef CONFIG_X86_64 | 1974 | #ifdef CONFIG_X86_64 |
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index c64a5d387de5..87508886cbbd 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c | |||
| @@ -92,7 +92,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
| 92 | for_each_online_cpu(j) | 92 | for_each_online_cpu(j) |
| 93 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 93 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
| 94 | #endif | 94 | #endif |
| 95 | seq_printf(p, " %14s", irq_desc[i].chip->typename); | 95 | seq_printf(p, " %14s", irq_desc[i].chip->name); |
| 96 | seq_printf(p, " %s", action->name); | 96 | seq_printf(p, " %s", action->name); |
| 97 | 97 | ||
| 98 | for (action=action->next; action; action = action->next) | 98 | for (action=action->next; action; action = action->next) |
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6b115f6c4313..6afceb3d4034 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c | |||
| @@ -30,18 +30,13 @@ | |||
| 30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| 31 | #include <acpi/acpi_bus.h> | 31 | #include <acpi/acpi_bus.h> |
| 32 | #include <acpi/acpi_drivers.h> | 32 | #include <acpi/acpi_drivers.h> |
| 33 | #include <asm/mwait.h> | ||
| 33 | 34 | ||
| 34 | #define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" | 35 | #define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" |
| 35 | #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" | 36 | #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" |
| 36 | #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 | 37 | #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 |
| 37 | static DEFINE_MUTEX(isolated_cpus_lock); | 38 | static DEFINE_MUTEX(isolated_cpus_lock); |
| 38 | 39 | ||
| 39 | #define MWAIT_SUBSTATE_MASK (0xf) | ||
| 40 | #define MWAIT_CSTATE_MASK (0xf) | ||
| 41 | #define MWAIT_SUBSTATE_SIZE (4) | ||
| 42 | #define CPUID_MWAIT_LEAF (5) | ||
| 43 | #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) | ||
| 44 | #define CPUID5_ECX_INTERRUPT_BREAK (0x2) | ||
| 45 | static unsigned long power_saving_mwait_eax; | 40 | static unsigned long power_saving_mwait_eax; |
| 46 | 41 | ||
| 47 | static unsigned char tsc_detected_unstable; | 42 | static unsigned char tsc_detected_unstable; |
diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 9fc630ce1ddb..f6f37a05a0c3 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c | |||
| @@ -45,7 +45,8 @@ static ssize_t show_##name(struct sys_device *dev, \ | |||
| 45 | return sprintf(buf, "%d\n", topology_##name(cpu)); \ | 45 | return sprintf(buf, "%d\n", topology_##name(cpu)); \ |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | #if defined(topology_thread_cpumask) || defined(topology_core_cpumask) | 48 | #if defined(topology_thread_cpumask) || defined(topology_core_cpumask) || \ |
| 49 | defined(topology_book_cpumask) | ||
| 49 | static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) | 50 | static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) |
| 50 | { | 51 | { |
| 51 | ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; | 52 | ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; |
| @@ -114,6 +115,14 @@ define_siblings_show_func(core_cpumask); | |||
| 114 | define_one_ro_named(core_siblings, show_core_cpumask); | 115 | define_one_ro_named(core_siblings, show_core_cpumask); |
| 115 | define_one_ro_named(core_siblings_list, show_core_cpumask_list); | 116 | define_one_ro_named(core_siblings_list, show_core_cpumask_list); |
| 116 | 117 | ||
| 118 | #ifdef CONFIG_SCHED_BOOK | ||
| 119 | define_id_show_func(book_id); | ||
| 120 | define_one_ro(book_id); | ||
| 121 | define_siblings_show_func(book_cpumask); | ||
| 122 | define_one_ro_named(book_siblings, show_book_cpumask); | ||
| 123 | define_one_ro_named(book_siblings_list, show_book_cpumask_list); | ||
| 124 | #endif | ||
| 125 | |||
| 117 | static struct attribute *default_attrs[] = { | 126 | static struct attribute *default_attrs[] = { |
| 118 | &attr_physical_package_id.attr, | 127 | &attr_physical_package_id.attr, |
| 119 | &attr_core_id.attr, | 128 | &attr_core_id.attr, |
| @@ -121,6 +130,11 @@ static struct attribute *default_attrs[] = { | |||
| 121 | &attr_thread_siblings_list.attr, | 130 | &attr_thread_siblings_list.attr, |
| 122 | &attr_core_siblings.attr, | 131 | &attr_core_siblings.attr, |
| 123 | &attr_core_siblings_list.attr, | 132 | &attr_core_siblings_list.attr, |
| 133 | #ifdef CONFIG_SCHED_BOOK | ||
| 134 | &attr_book_id.attr, | ||
| 135 | &attr_book_siblings.attr, | ||
| 136 | &attr_book_siblings_list.attr, | ||
| 137 | #endif | ||
| 124 | NULL | 138 | NULL |
| 125 | }; | 139 | }; |
| 126 | 140 | ||
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index de277689da61..4b9359a6f6ca 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
| @@ -488,4 +488,21 @@ config BLK_DEV_HD | |||
| 488 | 488 | ||
| 489 | If unsure, say N. | 489 | If unsure, say N. |
| 490 | 490 | ||
| 491 | config BLK_DEV_RBD | ||
| 492 | tristate "Rados block device (RBD)" | ||
| 493 | depends on INET && EXPERIMENTAL && BLOCK | ||
| 494 | select CEPH_LIB | ||
| 495 | select LIBCRC32C | ||
| 496 | select CRYPTO_AES | ||
| 497 | select CRYPTO | ||
| 498 | default n | ||
| 499 | help | ||
| 500 | Say Y here if you want include the Rados block device, which stripes | ||
| 501 | a block device over objects stored in the Ceph distributed object | ||
| 502 | store. | ||
| 503 | |||
| 504 | More information at http://ceph.newdream.net/. | ||
| 505 | |||
| 506 | If unsure, say N. | ||
| 507 | |||
| 491 | endif # BLK_DEV | 508 | endif # BLK_DEV |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index aff5ac925c34..d7f463d6312d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
| @@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o | |||
| 37 | 37 | ||
| 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | 38 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o |
| 39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ | 39 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ |
| 40 | obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | ||
| 40 | 41 | ||
| 41 | swim_mod-objs := swim.o swim_asm.o | 42 | swim_mod-objs := swim.o swim_asm.o |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c new file mode 100644 index 000000000000..6ec9d53806c5 --- /dev/null +++ b/drivers/block/rbd.c | |||
| @@ -0,0 +1,1841 @@ | |||
| 1 | /* | ||
| 2 | rbd.c -- Export ceph rados objects as a Linux block device | ||
| 3 | |||
| 4 | |||
| 5 | based on drivers/block/osdblk.c: | ||
| 6 | |||
| 7 | Copyright 2009 Red Hat, Inc. | ||
| 8 | |||
| 9 | This program is free software; you can redistribute it and/or modify | ||
| 10 | it under the terms of the GNU General Public License as published by | ||
| 11 | the Free Software Foundation. | ||
| 12 | |||
| 13 | This program is distributed in the hope that it will be useful, | ||
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | GNU General Public License for more details. | ||
| 17 | |||
| 18 | You should have received a copy of the GNU General Public License | ||
| 19 | along with this program; see the file COPYING. If not, write to | ||
| 20 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 21 | |||
| 22 | |||
| 23 | |||
| 24 | Instructions for use | ||
| 25 | -------------------- | ||
| 26 | |||
| 27 | 1) Map a Linux block device to an existing rbd image. | ||
| 28 | |||
| 29 | Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name] | ||
| 30 | |||
| 31 | $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add | ||
| 32 | |||
| 33 | The snapshot name can be "-" or omitted to map the image read/write. | ||
| 34 | |||
| 35 | 2) List all active blkdev<->object mappings. | ||
| 36 | |||
| 37 | In this example, we have performed step #1 twice, creating two blkdevs, | ||
| 38 | mapped to two separate rados objects in the rados rbd pool | ||
| 39 | |||
| 40 | $ cat /sys/class/rbd/list | ||
| 41 | #id major client_name pool name snap KB | ||
| 42 | 0 254 client4143 rbd foo - 1024000 | ||
| 43 | |||
| 44 | The columns, in order, are: | ||
| 45 | - blkdev unique id | ||
| 46 | - blkdev assigned major | ||
| 47 | - rados client id | ||
| 48 | - rados pool name | ||
| 49 | - rados block device name | ||
| 50 | - mapped snapshot ("-" if none) | ||
| 51 | - device size in KB | ||
| 52 | |||
| 53 | |||
| 54 | 3) Create a snapshot. | ||
| 55 | |||
| 56 | Usage: <blkdev id> <snapname> | ||
| 57 | |||
| 58 | $ echo "0 mysnap" > /sys/class/rbd/snap_create | ||
| 59 | |||
| 60 | |||
| 61 | 4) Listing a snapshot. | ||
| 62 | |||
| 63 | $ cat /sys/class/rbd/snaps_list | ||
| 64 | #id snap KB | ||
| 65 | 0 - 1024000 (*) | ||
| 66 | 0 foo 1024000 | ||
| 67 | |||
| 68 | The columns, in order, are: | ||
| 69 | - blkdev unique id | ||
| 70 | - snapshot name, '-' means none (active read/write version) | ||
| 71 | - size of device at time of snapshot | ||
| 72 | - the (*) indicates this is the active version | ||
| 73 | |||
| 74 | 5) Rollback to snapshot. | ||
| 75 | |||
| 76 | Usage: <blkdev id> <snapname> | ||
| 77 | |||
| 78 | $ echo "0 mysnap" > /sys/class/rbd/snap_rollback | ||
| 79 | |||
| 80 | |||
| 81 | 6) Mapping an image using snapshot. | ||
| 82 | |||
| 83 | A snapshot mapping is read-only. This is being done by passing | ||
| 84 | snap=<snapname> to the options when adding a device. | ||
| 85 | |||
| 86 | $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add | ||
| 87 | |||
| 88 | |||
| 89 | 7) Remove an active blkdev<->rbd image mapping. | ||
| 90 | |||
| 91 | In this example, we remove the mapping with blkdev unique id 1. | ||
| 92 | |||
| 93 | $ echo 1 > /sys/class/rbd/remove | ||
| 94 | |||
| 95 | |||
| 96 | NOTE: The actual creation and deletion of rados objects is outside the scope | ||
| 97 | of this driver. | ||
| 98 | |||
| 99 | */ | ||
| 100 | |||
| 101 | #include <linux/ceph/libceph.h> | ||
| 102 | #include <linux/ceph/osd_client.h> | ||
| 103 | #include <linux/ceph/mon_client.h> | ||
| 104 | #include <linux/ceph/decode.h> | ||
| 105 | |||
| 106 | #include <linux/kernel.h> | ||
| 107 | #include <linux/device.h> | ||
| 108 | #include <linux/module.h> | ||
| 109 | #include <linux/fs.h> | ||
| 110 | #include <linux/blkdev.h> | ||
| 111 | |||
| 112 | #include "rbd_types.h" | ||
| 113 | |||
| 114 | #define DRV_NAME "rbd" | ||
| 115 | #define DRV_NAME_LONG "rbd (rados block device)" | ||
| 116 | |||
| 117 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | ||
| 118 | |||
| 119 | #define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX)) | ||
| 120 | #define RBD_MAX_POOL_NAME_LEN 64 | ||
| 121 | #define RBD_MAX_SNAP_NAME_LEN 32 | ||
| 122 | #define RBD_MAX_OPT_LEN 1024 | ||
| 123 | |||
| 124 | #define RBD_SNAP_HEAD_NAME "-" | ||
| 125 | |||
| 126 | #define DEV_NAME_LEN 32 | ||
| 127 | |||
| 128 | /* | ||
| 129 | * block device image metadata (in-memory version) | ||
| 130 | */ | ||
| 131 | struct rbd_image_header { | ||
| 132 | u64 image_size; | ||
| 133 | char block_name[32]; | ||
| 134 | __u8 obj_order; | ||
| 135 | __u8 crypt_type; | ||
| 136 | __u8 comp_type; | ||
| 137 | struct rw_semaphore snap_rwsem; | ||
| 138 | struct ceph_snap_context *snapc; | ||
| 139 | size_t snap_names_len; | ||
| 140 | u64 snap_seq; | ||
| 141 | u32 total_snaps; | ||
| 142 | |||
| 143 | char *snap_names; | ||
| 144 | u64 *snap_sizes; | ||
| 145 | }; | ||
| 146 | |||
| 147 | /* | ||
| 148 | * an instance of the client. multiple devices may share a client. | ||
| 149 | */ | ||
| 150 | struct rbd_client { | ||
| 151 | struct ceph_client *client; | ||
| 152 | struct kref kref; | ||
| 153 | struct list_head node; | ||
| 154 | }; | ||
| 155 | |||
| 156 | /* | ||
| 157 | * a single io request | ||
| 158 | */ | ||
| 159 | struct rbd_request { | ||
| 160 | struct request *rq; /* blk layer request */ | ||
| 161 | struct bio *bio; /* cloned bio */ | ||
| 162 | struct page **pages; /* list of used pages */ | ||
| 163 | u64 len; | ||
| 164 | }; | ||
| 165 | |||
| 166 | /* | ||
| 167 | * a single device | ||
| 168 | */ | ||
| 169 | struct rbd_device { | ||
| 170 | int id; /* blkdev unique id */ | ||
| 171 | |||
| 172 | int major; /* blkdev assigned major */ | ||
| 173 | struct gendisk *disk; /* blkdev's gendisk and rq */ | ||
| 174 | struct request_queue *q; | ||
| 175 | |||
| 176 | struct ceph_client *client; | ||
| 177 | struct rbd_client *rbd_client; | ||
| 178 | |||
| 179 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | ||
| 180 | |||
| 181 | spinlock_t lock; /* queue lock */ | ||
| 182 | |||
| 183 | struct rbd_image_header header; | ||
| 184 | char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ | ||
| 185 | int obj_len; | ||
| 186 | char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ | ||
| 187 | char pool_name[RBD_MAX_POOL_NAME_LEN]; | ||
| 188 | int poolid; | ||
| 189 | |||
| 190 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
| 191 | u32 cur_snap; /* index+1 of current snapshot within snap context | ||
| 192 | 0 - for the head */ | ||
| 193 | int read_only; | ||
| 194 | |||
| 195 | struct list_head node; | ||
| 196 | }; | ||
| 197 | |||
| 198 | static spinlock_t node_lock; /* protects client get/put */ | ||
| 199 | |||
| 200 | static struct class *class_rbd; /* /sys/class/rbd */ | ||
| 201 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | ||
| 202 | static LIST_HEAD(rbd_dev_list); /* devices */ | ||
| 203 | static LIST_HEAD(rbd_client_list); /* clients */ | ||
| 204 | |||
| 205 | |||
| 206 | static int rbd_open(struct block_device *bdev, fmode_t mode) | ||
| 207 | { | ||
| 208 | struct gendisk *disk = bdev->bd_disk; | ||
| 209 | struct rbd_device *rbd_dev = disk->private_data; | ||
| 210 | |||
| 211 | set_device_ro(bdev, rbd_dev->read_only); | ||
| 212 | |||
| 213 | if ((mode & FMODE_WRITE) && rbd_dev->read_only) | ||
| 214 | return -EROFS; | ||
| 215 | |||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | |||
| 219 | static const struct block_device_operations rbd_bd_ops = { | ||
| 220 | .owner = THIS_MODULE, | ||
| 221 | .open = rbd_open, | ||
| 222 | }; | ||
| 223 | |||
| 224 | /* | ||
| 225 | * Initialize an rbd client instance. | ||
| 226 | * We own *opt. | ||
| 227 | */ | ||
| 228 | static struct rbd_client *rbd_client_create(struct ceph_options *opt) | ||
| 229 | { | ||
| 230 | struct rbd_client *rbdc; | ||
| 231 | int ret = -ENOMEM; | ||
| 232 | |||
| 233 | dout("rbd_client_create\n"); | ||
| 234 | rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); | ||
| 235 | if (!rbdc) | ||
| 236 | goto out_opt; | ||
| 237 | |||
| 238 | kref_init(&rbdc->kref); | ||
| 239 | INIT_LIST_HEAD(&rbdc->node); | ||
| 240 | |||
| 241 | rbdc->client = ceph_create_client(opt, rbdc); | ||
| 242 | if (IS_ERR(rbdc->client)) | ||
| 243 | goto out_rbdc; | ||
| 244 | opt = NULL; /* Now rbdc->client is responsible for opt */ | ||
| 245 | |||
| 246 | ret = ceph_open_session(rbdc->client); | ||
| 247 | if (ret < 0) | ||
| 248 | goto out_err; | ||
| 249 | |||
| 250 | spin_lock(&node_lock); | ||
| 251 | list_add_tail(&rbdc->node, &rbd_client_list); | ||
| 252 | spin_unlock(&node_lock); | ||
| 253 | |||
| 254 | dout("rbd_client_create created %p\n", rbdc); | ||
| 255 | return rbdc; | ||
| 256 | |||
| 257 | out_err: | ||
| 258 | ceph_destroy_client(rbdc->client); | ||
| 259 | out_rbdc: | ||
| 260 | kfree(rbdc); | ||
| 261 | out_opt: | ||
| 262 | if (opt) | ||
| 263 | ceph_destroy_options(opt); | ||
| 264 | return ERR_PTR(ret); | ||
| 265 | } | ||
| 266 | |||
| 267 | /* | ||
| 268 | * Find a ceph client with specific addr and configuration. | ||
| 269 | */ | ||
| 270 | static struct rbd_client *__rbd_client_find(struct ceph_options *opt) | ||
| 271 | { | ||
| 272 | struct rbd_client *client_node; | ||
| 273 | |||
| 274 | if (opt->flags & CEPH_OPT_NOSHARE) | ||
| 275 | return NULL; | ||
| 276 | |||
| 277 | list_for_each_entry(client_node, &rbd_client_list, node) | ||
| 278 | if (ceph_compare_options(opt, client_node->client) == 0) | ||
| 279 | return client_node; | ||
| 280 | return NULL; | ||
| 281 | } | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Get a ceph client with specific addr and configuration, if one does | ||
| 285 | * not exist create it. | ||
| 286 | */ | ||
| 287 | static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, | ||
| 288 | char *options) | ||
| 289 | { | ||
| 290 | struct rbd_client *rbdc; | ||
| 291 | struct ceph_options *opt; | ||
| 292 | int ret; | ||
| 293 | |||
| 294 | ret = ceph_parse_options(&opt, options, mon_addr, | ||
| 295 | mon_addr + strlen(mon_addr), NULL, NULL); | ||
| 296 | if (ret < 0) | ||
| 297 | return ret; | ||
| 298 | |||
| 299 | spin_lock(&node_lock); | ||
| 300 | rbdc = __rbd_client_find(opt); | ||
| 301 | if (rbdc) { | ||
| 302 | ceph_destroy_options(opt); | ||
| 303 | |||
| 304 | /* using an existing client */ | ||
| 305 | kref_get(&rbdc->kref); | ||
| 306 | rbd_dev->rbd_client = rbdc; | ||
| 307 | rbd_dev->client = rbdc->client; | ||
| 308 | spin_unlock(&node_lock); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | spin_unlock(&node_lock); | ||
| 312 | |||
| 313 | rbdc = rbd_client_create(opt); | ||
| 314 | if (IS_ERR(rbdc)) | ||
| 315 | return PTR_ERR(rbdc); | ||
| 316 | |||
| 317 | rbd_dev->rbd_client = rbdc; | ||
| 318 | rbd_dev->client = rbdc->client; | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | /* | ||
| 323 | * Destroy ceph client | ||
| 324 | */ | ||
| 325 | static void rbd_client_release(struct kref *kref) | ||
| 326 | { | ||
| 327 | struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); | ||
| 328 | |||
| 329 | dout("rbd_release_client %p\n", rbdc); | ||
| 330 | spin_lock(&node_lock); | ||
| 331 | list_del(&rbdc->node); | ||
| 332 | spin_unlock(&node_lock); | ||
| 333 | |||
| 334 | ceph_destroy_client(rbdc->client); | ||
| 335 | kfree(rbdc); | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * Drop reference to ceph client node. If it's not referenced anymore, release | ||
| 340 | * it. | ||
| 341 | */ | ||
| 342 | static void rbd_put_client(struct rbd_device *rbd_dev) | ||
| 343 | { | ||
| 344 | kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); | ||
| 345 | rbd_dev->rbd_client = NULL; | ||
| 346 | rbd_dev->client = NULL; | ||
| 347 | } | ||
| 348 | |||
| 349 | |||
| 350 | /* | ||
| 351 | * Create a new header structure, translate header format from the on-disk | ||
| 352 | * header. | ||
| 353 | */ | ||
| 354 | static int rbd_header_from_disk(struct rbd_image_header *header, | ||
| 355 | struct rbd_image_header_ondisk *ondisk, | ||
| 356 | int allocated_snaps, | ||
| 357 | gfp_t gfp_flags) | ||
| 358 | { | ||
| 359 | int i; | ||
| 360 | u32 snap_count = le32_to_cpu(ondisk->snap_count); | ||
| 361 | int ret = -ENOMEM; | ||
| 362 | |||
| 363 | init_rwsem(&header->snap_rwsem); | ||
| 364 | |||
| 365 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
| 366 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | ||
| 367 | snap_count * | ||
| 368 | sizeof(struct rbd_image_snap_ondisk), | ||
| 369 | gfp_flags); | ||
| 370 | if (!header->snapc) | ||
| 371 | return -ENOMEM; | ||
| 372 | if (snap_count) { | ||
| 373 | header->snap_names = kmalloc(header->snap_names_len, | ||
| 374 | GFP_KERNEL); | ||
| 375 | if (!header->snap_names) | ||
| 376 | goto err_snapc; | ||
| 377 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), | ||
| 378 | GFP_KERNEL); | ||
| 379 | if (!header->snap_sizes) | ||
| 380 | goto err_names; | ||
| 381 | } else { | ||
| 382 | header->snap_names = NULL; | ||
| 383 | header->snap_sizes = NULL; | ||
| 384 | } | ||
| 385 | memcpy(header->block_name, ondisk->block_name, | ||
| 386 | sizeof(ondisk->block_name)); | ||
| 387 | |||
| 388 | header->image_size = le64_to_cpu(ondisk->image_size); | ||
| 389 | header->obj_order = ondisk->options.order; | ||
| 390 | header->crypt_type = ondisk->options.crypt_type; | ||
| 391 | header->comp_type = ondisk->options.comp_type; | ||
| 392 | |||
| 393 | atomic_set(&header->snapc->nref, 1); | ||
| 394 | header->snap_seq = le64_to_cpu(ondisk->snap_seq); | ||
| 395 | header->snapc->num_snaps = snap_count; | ||
| 396 | header->total_snaps = snap_count; | ||
| 397 | |||
| 398 | if (snap_count && | ||
| 399 | allocated_snaps == snap_count) { | ||
| 400 | for (i = 0; i < snap_count; i++) { | ||
| 401 | header->snapc->snaps[i] = | ||
| 402 | le64_to_cpu(ondisk->snaps[i].id); | ||
| 403 | header->snap_sizes[i] = | ||
| 404 | le64_to_cpu(ondisk->snaps[i].image_size); | ||
| 405 | } | ||
| 406 | |||
| 407 | /* copy snapshot names */ | ||
| 408 | memcpy(header->snap_names, &ondisk->snaps[i], | ||
| 409 | header->snap_names_len); | ||
| 410 | } | ||
| 411 | |||
| 412 | return 0; | ||
| 413 | |||
| 414 | err_names: | ||
| 415 | kfree(header->snap_names); | ||
| 416 | err_snapc: | ||
| 417 | kfree(header->snapc); | ||
| 418 | return ret; | ||
| 419 | } | ||
| 420 | |||
| 421 | static int snap_index(struct rbd_image_header *header, int snap_num) | ||
| 422 | { | ||
| 423 | return header->total_snaps - snap_num; | ||
| 424 | } | ||
| 425 | |||
| 426 | static u64 cur_snap_id(struct rbd_device *rbd_dev) | ||
| 427 | { | ||
| 428 | struct rbd_image_header *header = &rbd_dev->header; | ||
| 429 | |||
| 430 | if (!rbd_dev->cur_snap) | ||
| 431 | return 0; | ||
| 432 | |||
| 433 | return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; | ||
| 434 | } | ||
| 435 | |||
| 436 | static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | ||
| 437 | u64 *seq, u64 *size) | ||
| 438 | { | ||
| 439 | int i; | ||
| 440 | char *p = header->snap_names; | ||
| 441 | |||
| 442 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
| 443 | if (strcmp(snap_name, p) == 0) | ||
| 444 | break; | ||
| 445 | } | ||
| 446 | if (i == header->total_snaps) | ||
| 447 | return -ENOENT; | ||
| 448 | if (seq) | ||
| 449 | *seq = header->snapc->snaps[i]; | ||
| 450 | |||
| 451 | if (size) | ||
| 452 | *size = header->snap_sizes[i]; | ||
| 453 | |||
| 454 | return i; | ||
| 455 | } | ||
| 456 | |||
| 457 | static int rbd_header_set_snap(struct rbd_device *dev, | ||
| 458 | const char *snap_name, | ||
| 459 | u64 *size) | ||
| 460 | { | ||
| 461 | struct rbd_image_header *header = &dev->header; | ||
| 462 | struct ceph_snap_context *snapc = header->snapc; | ||
| 463 | int ret = -ENOENT; | ||
| 464 | |||
| 465 | down_write(&header->snap_rwsem); | ||
| 466 | |||
| 467 | if (!snap_name || | ||
| 468 | !*snap_name || | ||
| 469 | strcmp(snap_name, "-") == 0 || | ||
| 470 | strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) { | ||
| 471 | if (header->total_snaps) | ||
| 472 | snapc->seq = header->snap_seq; | ||
| 473 | else | ||
| 474 | snapc->seq = 0; | ||
| 475 | dev->cur_snap = 0; | ||
| 476 | dev->read_only = 0; | ||
| 477 | if (size) | ||
| 478 | *size = header->image_size; | ||
| 479 | } else { | ||
| 480 | ret = snap_by_name(header, snap_name, &snapc->seq, size); | ||
| 481 | if (ret < 0) | ||
| 482 | goto done; | ||
| 483 | |||
| 484 | dev->cur_snap = header->total_snaps - ret; | ||
| 485 | dev->read_only = 1; | ||
| 486 | } | ||
| 487 | |||
| 488 | ret = 0; | ||
| 489 | done: | ||
| 490 | up_write(&header->snap_rwsem); | ||
| 491 | return ret; | ||
| 492 | } | ||
| 493 | |||
| 494 | static void rbd_header_free(struct rbd_image_header *header) | ||
| 495 | { | ||
| 496 | kfree(header->snapc); | ||
| 497 | kfree(header->snap_names); | ||
| 498 | kfree(header->snap_sizes); | ||
| 499 | } | ||
| 500 | |||
| 501 | /* | ||
| 502 | * get the actual striped segment name, offset and length | ||
| 503 | */ | ||
| 504 | static u64 rbd_get_segment(struct rbd_image_header *header, | ||
| 505 | const char *block_name, | ||
| 506 | u64 ofs, u64 len, | ||
| 507 | char *seg_name, u64 *segofs) | ||
| 508 | { | ||
| 509 | u64 seg = ofs >> header->obj_order; | ||
| 510 | |||
| 511 | if (seg_name) | ||
| 512 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, | ||
| 513 | "%s.%012llx", block_name, seg); | ||
| 514 | |||
| 515 | ofs = ofs & ((1 << header->obj_order) - 1); | ||
| 516 | len = min_t(u64, len, (1 << header->obj_order) - ofs); | ||
| 517 | |||
| 518 | if (segofs) | ||
| 519 | *segofs = ofs; | ||
| 520 | |||
| 521 | return len; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* | ||
| 525 | * bio helpers | ||
| 526 | */ | ||
| 527 | |||
| 528 | static void bio_chain_put(struct bio *chain) | ||
| 529 | { | ||
| 530 | struct bio *tmp; | ||
| 531 | |||
| 532 | while (chain) { | ||
| 533 | tmp = chain; | ||
| 534 | chain = chain->bi_next; | ||
| 535 | bio_put(tmp); | ||
| 536 | } | ||
| 537 | } | ||
| 538 | |||
| 539 | /* | ||
| 540 | * zeros a bio chain, starting at specific offset | ||
| 541 | */ | ||
| 542 | static void zero_bio_chain(struct bio *chain, int start_ofs) | ||
| 543 | { | ||
| 544 | struct bio_vec *bv; | ||
| 545 | unsigned long flags; | ||
| 546 | void *buf; | ||
| 547 | int i; | ||
| 548 | int pos = 0; | ||
| 549 | |||
| 550 | while (chain) { | ||
| 551 | bio_for_each_segment(bv, chain, i) { | ||
| 552 | if (pos + bv->bv_len > start_ofs) { | ||
| 553 | int remainder = max(start_ofs - pos, 0); | ||
| 554 | buf = bvec_kmap_irq(bv, &flags); | ||
| 555 | memset(buf + remainder, 0, | ||
| 556 | bv->bv_len - remainder); | ||
| 557 | bvec_kunmap_irq(buf, &flags); | ||
| 558 | } | ||
| 559 | pos += bv->bv_len; | ||
| 560 | } | ||
| 561 | |||
| 562 | chain = chain->bi_next; | ||
| 563 | } | ||
| 564 | } | ||
| 565 | |||
| 566 | /* | ||
| 567 | * bio_chain_clone - clone a chain of bios up to a certain length. | ||
| 568 | * might return a bio_pair that will need to be released. | ||
| 569 | */ | ||
| 570 | static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | ||
| 571 | struct bio_pair **bp, | ||
| 572 | int len, gfp_t gfpmask) | ||
| 573 | { | ||
| 574 | struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; | ||
| 575 | int total = 0; | ||
| 576 | |||
| 577 | if (*bp) { | ||
| 578 | bio_pair_release(*bp); | ||
| 579 | *bp = NULL; | ||
| 580 | } | ||
| 581 | |||
| 582 | while (old_chain && (total < len)) { | ||
| 583 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | ||
| 584 | if (!tmp) | ||
| 585 | goto err_out; | ||
| 586 | |||
| 587 | if (total + old_chain->bi_size > len) { | ||
| 588 | struct bio_pair *bp; | ||
| 589 | |||
| 590 | /* | ||
| 591 | * this split can only happen with a single paged bio, | ||
| 592 | * split_bio will BUG_ON if this is not the case | ||
| 593 | */ | ||
| 594 | dout("bio_chain_clone split! total=%d remaining=%d" | ||
| 595 | "bi_size=%d\n", | ||
| 596 | (int)total, (int)len-total, | ||
| 597 | (int)old_chain->bi_size); | ||
| 598 | |||
| 599 | /* split the bio. We'll release it either in the next | ||
| 600 | call, or it will have to be released outside */ | ||
| 601 | bp = bio_split(old_chain, (len - total) / 512ULL); | ||
| 602 | if (!bp) | ||
| 603 | goto err_out; | ||
| 604 | |||
| 605 | __bio_clone(tmp, &bp->bio1); | ||
| 606 | |||
| 607 | *next = &bp->bio2; | ||
| 608 | } else { | ||
| 609 | __bio_clone(tmp, old_chain); | ||
| 610 | *next = old_chain->bi_next; | ||
| 611 | } | ||
| 612 | |||
| 613 | tmp->bi_bdev = NULL; | ||
| 614 | gfpmask &= ~__GFP_WAIT; | ||
| 615 | tmp->bi_next = NULL; | ||
| 616 | |||
| 617 | if (!new_chain) { | ||
| 618 | new_chain = tail = tmp; | ||
| 619 | } else { | ||
| 620 | tail->bi_next = tmp; | ||
| 621 | tail = tmp; | ||
| 622 | } | ||
| 623 | old_chain = old_chain->bi_next; | ||
| 624 | |||
| 625 | total += tmp->bi_size; | ||
| 626 | } | ||
| 627 | |||
| 628 | BUG_ON(total < len); | ||
| 629 | |||
| 630 | if (tail) | ||
| 631 | tail->bi_next = NULL; | ||
| 632 | |||
| 633 | *old = old_chain; | ||
| 634 | |||
| 635 | return new_chain; | ||
| 636 | |||
| 637 | err_out: | ||
| 638 | dout("bio_chain_clone with err\n"); | ||
| 639 | bio_chain_put(new_chain); | ||
| 640 | return NULL; | ||
| 641 | } | ||
| 642 | |||
| 643 | /* | ||
| 644 | * helpers for osd request op vectors. | ||
| 645 | */ | ||
| 646 | static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, | ||
| 647 | int num_ops, | ||
| 648 | int opcode, | ||
| 649 | u32 payload_len) | ||
| 650 | { | ||
| 651 | *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), | ||
| 652 | GFP_NOIO); | ||
| 653 | if (!*ops) | ||
| 654 | return -ENOMEM; | ||
| 655 | (*ops)[0].op = opcode; | ||
| 656 | /* | ||
| 657 | * op extent offset and length will be set later on | ||
| 658 | * in calc_raw_layout() | ||
| 659 | */ | ||
| 660 | (*ops)[0].payload_len = payload_len; | ||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | |||
| 664 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | ||
| 665 | { | ||
| 666 | kfree(ops); | ||
| 667 | } | ||
| 668 | |||
| 669 | /* | ||
| 670 | * Send ceph osd request | ||
| 671 | */ | ||
| 672 | static int rbd_do_request(struct request *rq, | ||
| 673 | struct rbd_device *dev, | ||
| 674 | struct ceph_snap_context *snapc, | ||
| 675 | u64 snapid, | ||
| 676 | const char *obj, u64 ofs, u64 len, | ||
| 677 | struct bio *bio, | ||
| 678 | struct page **pages, | ||
| 679 | int num_pages, | ||
| 680 | int flags, | ||
| 681 | struct ceph_osd_req_op *ops, | ||
| 682 | int num_reply, | ||
| 683 | void (*rbd_cb)(struct ceph_osd_request *req, | ||
| 684 | struct ceph_msg *msg)) | ||
| 685 | { | ||
| 686 | struct ceph_osd_request *req; | ||
| 687 | struct ceph_file_layout *layout; | ||
| 688 | int ret; | ||
| 689 | u64 bno; | ||
| 690 | struct timespec mtime = CURRENT_TIME; | ||
| 691 | struct rbd_request *req_data; | ||
| 692 | struct ceph_osd_request_head *reqhead; | ||
| 693 | struct rbd_image_header *header = &dev->header; | ||
| 694 | |||
| 695 | ret = -ENOMEM; | ||
| 696 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | ||
| 697 | if (!req_data) | ||
| 698 | goto done; | ||
| 699 | |||
| 700 | dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); | ||
| 701 | |||
| 702 | down_read(&header->snap_rwsem); | ||
| 703 | |||
| 704 | req = ceph_osdc_alloc_request(&dev->client->osdc, flags, | ||
| 705 | snapc, | ||
| 706 | ops, | ||
| 707 | false, | ||
| 708 | GFP_NOIO, pages, bio); | ||
| 709 | if (IS_ERR(req)) { | ||
| 710 | up_read(&header->snap_rwsem); | ||
| 711 | ret = PTR_ERR(req); | ||
| 712 | goto done_pages; | ||
| 713 | } | ||
| 714 | |||
| 715 | req->r_callback = rbd_cb; | ||
| 716 | |||
| 717 | req_data->rq = rq; | ||
| 718 | req_data->bio = bio; | ||
| 719 | req_data->pages = pages; | ||
| 720 | req_data->len = len; | ||
| 721 | |||
| 722 | req->r_priv = req_data; | ||
| 723 | |||
| 724 | reqhead = req->r_request->front.iov_base; | ||
| 725 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | ||
| 726 | |||
| 727 | strncpy(req->r_oid, obj, sizeof(req->r_oid)); | ||
| 728 | req->r_oid_len = strlen(req->r_oid); | ||
| 729 | |||
| 730 | layout = &req->r_file_layout; | ||
| 731 | memset(layout, 0, sizeof(*layout)); | ||
| 732 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
| 733 | layout->fl_stripe_count = cpu_to_le32(1); | ||
| 734 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
| 735 | layout->fl_pg_preferred = cpu_to_le32(-1); | ||
| 736 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); | ||
| 737 | ceph_calc_raw_layout(&dev->client->osdc, layout, snapid, | ||
| 738 | ofs, &len, &bno, req, ops); | ||
| 739 | |||
| 740 | ceph_osdc_build_request(req, ofs, &len, | ||
| 741 | ops, | ||
| 742 | snapc, | ||
| 743 | &mtime, | ||
| 744 | req->r_oid, req->r_oid_len); | ||
| 745 | up_read(&header->snap_rwsem); | ||
| 746 | |||
| 747 | ret = ceph_osdc_start_request(&dev->client->osdc, req, false); | ||
| 748 | if (ret < 0) | ||
| 749 | goto done_err; | ||
| 750 | |||
| 751 | if (!rbd_cb) { | ||
| 752 | ret = ceph_osdc_wait_request(&dev->client->osdc, req); | ||
| 753 | ceph_osdc_put_request(req); | ||
| 754 | } | ||
| 755 | return ret; | ||
| 756 | |||
| 757 | done_err: | ||
| 758 | bio_chain_put(req_data->bio); | ||
| 759 | ceph_osdc_put_request(req); | ||
| 760 | done_pages: | ||
| 761 | kfree(req_data); | ||
| 762 | done: | ||
| 763 | if (rq) | ||
| 764 | blk_end_request(rq, ret, len); | ||
| 765 | return ret; | ||
| 766 | } | ||
| 767 | |||
| 768 | /* | ||
| 769 | * Ceph osd op callback | ||
| 770 | */ | ||
| 771 | static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | ||
| 772 | { | ||
| 773 | struct rbd_request *req_data = req->r_priv; | ||
| 774 | struct ceph_osd_reply_head *replyhead; | ||
| 775 | struct ceph_osd_op *op; | ||
| 776 | __s32 rc; | ||
| 777 | u64 bytes; | ||
| 778 | int read_op; | ||
| 779 | |||
| 780 | /* parse reply */ | ||
| 781 | replyhead = msg->front.iov_base; | ||
| 782 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); | ||
| 783 | op = (void *)(replyhead + 1); | ||
| 784 | rc = le32_to_cpu(replyhead->result); | ||
| 785 | bytes = le64_to_cpu(op->extent.length); | ||
| 786 | read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ); | ||
| 787 | |||
| 788 | dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); | ||
| 789 | |||
| 790 | if (rc == -ENOENT && read_op) { | ||
| 791 | zero_bio_chain(req_data->bio, 0); | ||
| 792 | rc = 0; | ||
| 793 | } else if (rc == 0 && read_op && bytes < req_data->len) { | ||
| 794 | zero_bio_chain(req_data->bio, bytes); | ||
| 795 | bytes = req_data->len; | ||
| 796 | } | ||
| 797 | |||
| 798 | blk_end_request(req_data->rq, rc, bytes); | ||
| 799 | |||
| 800 | if (req_data->bio) | ||
| 801 | bio_chain_put(req_data->bio); | ||
| 802 | |||
| 803 | ceph_osdc_put_request(req); | ||
| 804 | kfree(req_data); | ||
| 805 | } | ||
| 806 | |||
| 807 | /* | ||
| 808 | * Do a synchronous ceph osd operation | ||
| 809 | */ | ||
| 810 | static int rbd_req_sync_op(struct rbd_device *dev, | ||
| 811 | struct ceph_snap_context *snapc, | ||
| 812 | u64 snapid, | ||
| 813 | int opcode, | ||
| 814 | int flags, | ||
| 815 | struct ceph_osd_req_op *orig_ops, | ||
| 816 | int num_reply, | ||
| 817 | const char *obj, | ||
| 818 | u64 ofs, u64 len, | ||
| 819 | char *buf) | ||
| 820 | { | ||
| 821 | int ret; | ||
| 822 | struct page **pages; | ||
| 823 | int num_pages; | ||
| 824 | struct ceph_osd_req_op *ops = orig_ops; | ||
| 825 | u32 payload_len; | ||
| 826 | |||
| 827 | num_pages = calc_pages_for(ofs , len); | ||
| 828 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | ||
| 829 | if (IS_ERR(pages)) | ||
| 830 | return PTR_ERR(pages); | ||
| 831 | |||
| 832 | if (!orig_ops) { | ||
| 833 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); | ||
| 834 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
| 835 | if (ret < 0) | ||
| 836 | goto done; | ||
| 837 | |||
| 838 | if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { | ||
| 839 | ret = ceph_copy_to_page_vector(pages, buf, ofs, len); | ||
| 840 | if (ret < 0) | ||
| 841 | goto done_ops; | ||
| 842 | } | ||
| 843 | } | ||
| 844 | |||
| 845 | ret = rbd_do_request(NULL, dev, snapc, snapid, | ||
| 846 | obj, ofs, len, NULL, | ||
| 847 | pages, num_pages, | ||
| 848 | flags, | ||
| 849 | ops, | ||
| 850 | 2, | ||
| 851 | NULL); | ||
| 852 | if (ret < 0) | ||
| 853 | goto done_ops; | ||
| 854 | |||
| 855 | if ((flags & CEPH_OSD_FLAG_READ) && buf) | ||
| 856 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); | ||
| 857 | |||
| 858 | done_ops: | ||
| 859 | if (!orig_ops) | ||
| 860 | rbd_destroy_ops(ops); | ||
| 861 | done: | ||
| 862 | ceph_release_page_vector(pages, num_pages); | ||
| 863 | return ret; | ||
| 864 | } | ||
| 865 | |||
| 866 | /* | ||
| 867 | * Do an asynchronous ceph osd operation | ||
| 868 | */ | ||
| 869 | static int rbd_do_op(struct request *rq, | ||
| 870 | struct rbd_device *rbd_dev , | ||
| 871 | struct ceph_snap_context *snapc, | ||
| 872 | u64 snapid, | ||
| 873 | int opcode, int flags, int num_reply, | ||
| 874 | u64 ofs, u64 len, | ||
| 875 | struct bio *bio) | ||
| 876 | { | ||
| 877 | char *seg_name; | ||
| 878 | u64 seg_ofs; | ||
| 879 | u64 seg_len; | ||
| 880 | int ret; | ||
| 881 | struct ceph_osd_req_op *ops; | ||
| 882 | u32 payload_len; | ||
| 883 | |||
| 884 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
| 885 | if (!seg_name) | ||
| 886 | return -ENOMEM; | ||
| 887 | |||
| 888 | seg_len = rbd_get_segment(&rbd_dev->header, | ||
| 889 | rbd_dev->header.block_name, | ||
| 890 | ofs, len, | ||
| 891 | seg_name, &seg_ofs); | ||
| 892 | |||
| 893 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); | ||
| 894 | |||
| 895 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
| 896 | if (ret < 0) | ||
| 897 | goto done; | ||
| 898 | |||
| 899 | /* we've taken care of segment sizes earlier when we | ||
| 900 | cloned the bios. We should never have a segment | ||
| 901 | truncated at this point */ | ||
| 902 | BUG_ON(seg_len < len); | ||
| 903 | |||
| 904 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, | ||
| 905 | seg_name, seg_ofs, seg_len, | ||
| 906 | bio, | ||
| 907 | NULL, 0, | ||
| 908 | flags, | ||
| 909 | ops, | ||
| 910 | num_reply, | ||
| 911 | rbd_req_cb); | ||
| 912 | done: | ||
| 913 | kfree(seg_name); | ||
| 914 | return ret; | ||
| 915 | } | ||
| 916 | |||
| 917 | /* | ||
| 918 | * Request async osd write | ||
| 919 | */ | ||
| 920 | static int rbd_req_write(struct request *rq, | ||
| 921 | struct rbd_device *rbd_dev, | ||
| 922 | struct ceph_snap_context *snapc, | ||
| 923 | u64 ofs, u64 len, | ||
| 924 | struct bio *bio) | ||
| 925 | { | ||
| 926 | return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, | ||
| 927 | CEPH_OSD_OP_WRITE, | ||
| 928 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 929 | 2, | ||
| 930 | ofs, len, bio); | ||
| 931 | } | ||
| 932 | |||
| 933 | /* | ||
| 934 | * Request async osd read | ||
| 935 | */ | ||
| 936 | static int rbd_req_read(struct request *rq, | ||
| 937 | struct rbd_device *rbd_dev, | ||
| 938 | u64 snapid, | ||
| 939 | u64 ofs, u64 len, | ||
| 940 | struct bio *bio) | ||
| 941 | { | ||
| 942 | return rbd_do_op(rq, rbd_dev, NULL, | ||
| 943 | (snapid ? snapid : CEPH_NOSNAP), | ||
| 944 | CEPH_OSD_OP_READ, | ||
| 945 | CEPH_OSD_FLAG_READ, | ||
| 946 | 2, | ||
| 947 | ofs, len, bio); | ||
| 948 | } | ||
| 949 | |||
| 950 | /* | ||
| 951 | * Request sync osd read | ||
| 952 | */ | ||
| 953 | static int rbd_req_sync_read(struct rbd_device *dev, | ||
| 954 | struct ceph_snap_context *snapc, | ||
| 955 | u64 snapid, | ||
| 956 | const char *obj, | ||
| 957 | u64 ofs, u64 len, | ||
| 958 | char *buf) | ||
| 959 | { | ||
| 960 | return rbd_req_sync_op(dev, NULL, | ||
| 961 | (snapid ? snapid : CEPH_NOSNAP), | ||
| 962 | CEPH_OSD_OP_READ, | ||
| 963 | CEPH_OSD_FLAG_READ, | ||
| 964 | NULL, | ||
| 965 | 1, obj, ofs, len, buf); | ||
| 966 | } | ||
| 967 | |||
| 968 | /* | ||
| 969 | * Request sync osd read | ||
| 970 | */ | ||
| 971 | static int rbd_req_sync_rollback_obj(struct rbd_device *dev, | ||
| 972 | u64 snapid, | ||
| 973 | const char *obj) | ||
| 974 | { | ||
| 975 | struct ceph_osd_req_op *ops; | ||
| 976 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0); | ||
| 977 | if (ret < 0) | ||
| 978 | return ret; | ||
| 979 | |||
| 980 | ops[0].snap.snapid = snapid; | ||
| 981 | |||
| 982 | ret = rbd_req_sync_op(dev, NULL, | ||
| 983 | CEPH_NOSNAP, | ||
| 984 | 0, | ||
| 985 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 986 | ops, | ||
| 987 | 1, obj, 0, 0, NULL); | ||
| 988 | |||
| 989 | rbd_destroy_ops(ops); | ||
| 990 | |||
| 991 | if (ret < 0) | ||
| 992 | return ret; | ||
| 993 | |||
| 994 | return ret; | ||
| 995 | } | ||
| 996 | |||
| 997 | /* | ||
| 998 | * Request sync osd read | ||
| 999 | */ | ||
| 1000 | static int rbd_req_sync_exec(struct rbd_device *dev, | ||
| 1001 | const char *obj, | ||
| 1002 | const char *cls, | ||
| 1003 | const char *method, | ||
| 1004 | const char *data, | ||
| 1005 | int len) | ||
| 1006 | { | ||
| 1007 | struct ceph_osd_req_op *ops; | ||
| 1008 | int cls_len = strlen(cls); | ||
| 1009 | int method_len = strlen(method); | ||
| 1010 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, | ||
| 1011 | cls_len + method_len + len); | ||
| 1012 | if (ret < 0) | ||
| 1013 | return ret; | ||
| 1014 | |||
| 1015 | ops[0].cls.class_name = cls; | ||
| 1016 | ops[0].cls.class_len = (__u8)cls_len; | ||
| 1017 | ops[0].cls.method_name = method; | ||
| 1018 | ops[0].cls.method_len = (__u8)method_len; | ||
| 1019 | ops[0].cls.argc = 0; | ||
| 1020 | ops[0].cls.indata = data; | ||
| 1021 | ops[0].cls.indata_len = len; | ||
| 1022 | |||
| 1023 | ret = rbd_req_sync_op(dev, NULL, | ||
| 1024 | CEPH_NOSNAP, | ||
| 1025 | 0, | ||
| 1026 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
| 1027 | ops, | ||
| 1028 | 1, obj, 0, 0, NULL); | ||
| 1029 | |||
| 1030 | rbd_destroy_ops(ops); | ||
| 1031 | |||
| 1032 | dout("cls_exec returned %d\n", ret); | ||
| 1033 | return ret; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | /* | ||
| 1037 | * block device queue callback | ||
| 1038 | */ | ||
| 1039 | static void rbd_rq_fn(struct request_queue *q) | ||
| 1040 | { | ||
| 1041 | struct rbd_device *rbd_dev = q->queuedata; | ||
| 1042 | struct request *rq; | ||
| 1043 | struct bio_pair *bp = NULL; | ||
| 1044 | |||
| 1045 | rq = blk_fetch_request(q); | ||
| 1046 | |||
| 1047 | while (1) { | ||
| 1048 | struct bio *bio; | ||
| 1049 | struct bio *rq_bio, *next_bio = NULL; | ||
| 1050 | bool do_write; | ||
| 1051 | int size, op_size = 0; | ||
| 1052 | u64 ofs; | ||
| 1053 | |||
| 1054 | /* peek at request from block layer */ | ||
| 1055 | if (!rq) | ||
| 1056 | break; | ||
| 1057 | |||
| 1058 | dout("fetched request\n"); | ||
| 1059 | |||
| 1060 | /* filter out block requests we don't understand */ | ||
| 1061 | if ((rq->cmd_type != REQ_TYPE_FS)) { | ||
| 1062 | __blk_end_request_all(rq, 0); | ||
| 1063 | goto next; | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | /* deduce our operation (read, write) */ | ||
| 1067 | do_write = (rq_data_dir(rq) == WRITE); | ||
| 1068 | |||
| 1069 | size = blk_rq_bytes(rq); | ||
| 1070 | ofs = blk_rq_pos(rq) * 512ULL; | ||
| 1071 | rq_bio = rq->bio; | ||
| 1072 | if (do_write && rbd_dev->read_only) { | ||
| 1073 | __blk_end_request_all(rq, -EROFS); | ||
| 1074 | goto next; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | spin_unlock_irq(q->queue_lock); | ||
| 1078 | |||
| 1079 | dout("%s 0x%x bytes at 0x%llx\n", | ||
| 1080 | do_write ? "write" : "read", | ||
| 1081 | size, blk_rq_pos(rq) * 512ULL); | ||
| 1082 | |||
| 1083 | do { | ||
| 1084 | /* a bio clone to be passed down to OSD req */ | ||
| 1085 | dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); | ||
| 1086 | op_size = rbd_get_segment(&rbd_dev->header, | ||
| 1087 | rbd_dev->header.block_name, | ||
| 1088 | ofs, size, | ||
| 1089 | NULL, NULL); | ||
| 1090 | bio = bio_chain_clone(&rq_bio, &next_bio, &bp, | ||
| 1091 | op_size, GFP_ATOMIC); | ||
| 1092 | if (!bio) { | ||
| 1093 | spin_lock_irq(q->queue_lock); | ||
| 1094 | __blk_end_request_all(rq, -ENOMEM); | ||
| 1095 | goto next; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | /* init OSD command: write or read */ | ||
| 1099 | if (do_write) | ||
| 1100 | rbd_req_write(rq, rbd_dev, | ||
| 1101 | rbd_dev->header.snapc, | ||
| 1102 | ofs, | ||
| 1103 | op_size, bio); | ||
| 1104 | else | ||
| 1105 | rbd_req_read(rq, rbd_dev, | ||
| 1106 | cur_snap_id(rbd_dev), | ||
| 1107 | ofs, | ||
| 1108 | op_size, bio); | ||
| 1109 | |||
| 1110 | size -= op_size; | ||
| 1111 | ofs += op_size; | ||
| 1112 | |||
| 1113 | rq_bio = next_bio; | ||
| 1114 | } while (size > 0); | ||
| 1115 | |||
| 1116 | if (bp) | ||
| 1117 | bio_pair_release(bp); | ||
| 1118 | |||
| 1119 | spin_lock_irq(q->queue_lock); | ||
| 1120 | next: | ||
| 1121 | rq = blk_fetch_request(q); | ||
| 1122 | } | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | /* | ||
| 1126 | * a queue callback. Makes sure that we don't create a bio that spans across | ||
| 1127 | * multiple osd objects. One exception would be with a single page bios, | ||
| 1128 | * which we handle later at bio_chain_clone | ||
| 1129 | */ | ||
| 1130 | static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, | ||
| 1131 | struct bio_vec *bvec) | ||
| 1132 | { | ||
| 1133 | struct rbd_device *rbd_dev = q->queuedata; | ||
| 1134 | unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9); | ||
| 1135 | sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); | ||
| 1136 | unsigned int bio_sectors = bmd->bi_size >> 9; | ||
| 1137 | int max; | ||
| 1138 | |||
| 1139 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) | ||
| 1140 | + bio_sectors)) << 9; | ||
| 1141 | if (max < 0) | ||
| 1142 | max = 0; /* bio_add cannot handle a negative return */ | ||
| 1143 | if (max <= bvec->bv_len && bio_sectors == 0) | ||
| 1144 | return bvec->bv_len; | ||
| 1145 | return max; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | static void rbd_free_disk(struct rbd_device *rbd_dev) | ||
| 1149 | { | ||
| 1150 | struct gendisk *disk = rbd_dev->disk; | ||
| 1151 | |||
| 1152 | if (!disk) | ||
| 1153 | return; | ||
| 1154 | |||
| 1155 | rbd_header_free(&rbd_dev->header); | ||
| 1156 | |||
| 1157 | if (disk->flags & GENHD_FL_UP) | ||
| 1158 | del_gendisk(disk); | ||
| 1159 | if (disk->queue) | ||
| 1160 | blk_cleanup_queue(disk->queue); | ||
| 1161 | put_disk(disk); | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | /* | ||
| 1165 | * reload the ondisk the header | ||
| 1166 | */ | ||
| 1167 | static int rbd_read_header(struct rbd_device *rbd_dev, | ||
| 1168 | struct rbd_image_header *header) | ||
| 1169 | { | ||
| 1170 | ssize_t rc; | ||
| 1171 | struct rbd_image_header_ondisk *dh; | ||
| 1172 | int snap_count = 0; | ||
| 1173 | u64 snap_names_len = 0; | ||
| 1174 | |||
| 1175 | while (1) { | ||
| 1176 | int len = sizeof(*dh) + | ||
| 1177 | snap_count * sizeof(struct rbd_image_snap_ondisk) + | ||
| 1178 | snap_names_len; | ||
| 1179 | |||
| 1180 | rc = -ENOMEM; | ||
| 1181 | dh = kmalloc(len, GFP_KERNEL); | ||
| 1182 | if (!dh) | ||
| 1183 | return -ENOMEM; | ||
| 1184 | |||
| 1185 | rc = rbd_req_sync_read(rbd_dev, | ||
| 1186 | NULL, CEPH_NOSNAP, | ||
| 1187 | rbd_dev->obj_md_name, | ||
| 1188 | 0, len, | ||
| 1189 | (char *)dh); | ||
| 1190 | if (rc < 0) | ||
| 1191 | goto out_dh; | ||
| 1192 | |||
| 1193 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); | ||
| 1194 | if (rc < 0) | ||
| 1195 | goto out_dh; | ||
| 1196 | |||
| 1197 | if (snap_count != header->total_snaps) { | ||
| 1198 | snap_count = header->total_snaps; | ||
| 1199 | snap_names_len = header->snap_names_len; | ||
| 1200 | rbd_header_free(header); | ||
| 1201 | kfree(dh); | ||
| 1202 | continue; | ||
| 1203 | } | ||
| 1204 | break; | ||
| 1205 | } | ||
| 1206 | |||
| 1207 | out_dh: | ||
| 1208 | kfree(dh); | ||
| 1209 | return rc; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | /* | ||
| 1213 | * create a snapshot | ||
| 1214 | */ | ||
| 1215 | static int rbd_header_add_snap(struct rbd_device *dev, | ||
| 1216 | const char *snap_name, | ||
| 1217 | gfp_t gfp_flags) | ||
| 1218 | { | ||
| 1219 | int name_len = strlen(snap_name); | ||
| 1220 | u64 new_snapid; | ||
| 1221 | int ret; | ||
| 1222 | void *data, *data_start, *data_end; | ||
| 1223 | |||
| 1224 | /* we should create a snapshot only if we're pointing at the head */ | ||
| 1225 | if (dev->cur_snap) | ||
| 1226 | return -EINVAL; | ||
| 1227 | |||
| 1228 | ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid, | ||
| 1229 | &new_snapid); | ||
| 1230 | dout("created snapid=%lld\n", new_snapid); | ||
| 1231 | if (ret < 0) | ||
| 1232 | return ret; | ||
| 1233 | |||
| 1234 | data = kmalloc(name_len + 16, gfp_flags); | ||
| 1235 | if (!data) | ||
| 1236 | return -ENOMEM; | ||
| 1237 | |||
| 1238 | data_start = data; | ||
| 1239 | data_end = data + name_len + 16; | ||
| 1240 | |||
| 1241 | ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad); | ||
| 1242 | ceph_encode_64_safe(&data, data_end, new_snapid, bad); | ||
| 1243 | |||
| 1244 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", | ||
| 1245 | data_start, data - data_start); | ||
| 1246 | |||
| 1247 | kfree(data_start); | ||
| 1248 | |||
| 1249 | if (ret < 0) | ||
| 1250 | return ret; | ||
| 1251 | |||
| 1252 | dev->header.snapc->seq = new_snapid; | ||
| 1253 | |||
| 1254 | return 0; | ||
| 1255 | bad: | ||
| 1256 | return -ERANGE; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* | ||
| 1260 | * only read the first part of the ondisk header, without the snaps info | ||
| 1261 | */ | ||
| 1262 | static int rbd_update_snaps(struct rbd_device *rbd_dev) | ||
| 1263 | { | ||
| 1264 | int ret; | ||
| 1265 | struct rbd_image_header h; | ||
| 1266 | u64 snap_seq; | ||
| 1267 | |||
| 1268 | ret = rbd_read_header(rbd_dev, &h); | ||
| 1269 | if (ret < 0) | ||
| 1270 | return ret; | ||
| 1271 | |||
| 1272 | down_write(&rbd_dev->header.snap_rwsem); | ||
| 1273 | |||
| 1274 | snap_seq = rbd_dev->header.snapc->seq; | ||
| 1275 | |||
| 1276 | kfree(rbd_dev->header.snapc); | ||
| 1277 | kfree(rbd_dev->header.snap_names); | ||
| 1278 | kfree(rbd_dev->header.snap_sizes); | ||
| 1279 | |||
| 1280 | rbd_dev->header.total_snaps = h.total_snaps; | ||
| 1281 | rbd_dev->header.snapc = h.snapc; | ||
| 1282 | rbd_dev->header.snap_names = h.snap_names; | ||
| 1283 | rbd_dev->header.snap_sizes = h.snap_sizes; | ||
| 1284 | rbd_dev->header.snapc->seq = snap_seq; | ||
| 1285 | |||
| 1286 | up_write(&rbd_dev->header.snap_rwsem); | ||
| 1287 | |||
| 1288 | return 0; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | static int rbd_init_disk(struct rbd_device *rbd_dev) | ||
| 1292 | { | ||
| 1293 | struct gendisk *disk; | ||
| 1294 | struct request_queue *q; | ||
| 1295 | int rc; | ||
| 1296 | u64 total_size = 0; | ||
| 1297 | |||
| 1298 | /* contact OSD, request size info about the object being mapped */ | ||
| 1299 | rc = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
| 1300 | if (rc) | ||
| 1301 | return rc; | ||
| 1302 | |||
| 1303 | rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); | ||
| 1304 | if (rc) | ||
| 1305 | return rc; | ||
| 1306 | |||
| 1307 | /* create gendisk info */ | ||
| 1308 | rc = -ENOMEM; | ||
| 1309 | disk = alloc_disk(RBD_MINORS_PER_MAJOR); | ||
| 1310 | if (!disk) | ||
| 1311 | goto out; | ||
| 1312 | |||
| 1313 | sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id); | ||
| 1314 | disk->major = rbd_dev->major; | ||
| 1315 | disk->first_minor = 0; | ||
| 1316 | disk->fops = &rbd_bd_ops; | ||
| 1317 | disk->private_data = rbd_dev; | ||
| 1318 | |||
| 1319 | /* init rq */ | ||
| 1320 | rc = -ENOMEM; | ||
| 1321 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); | ||
| 1322 | if (!q) | ||
| 1323 | goto out_disk; | ||
| 1324 | blk_queue_merge_bvec(q, rbd_merge_bvec); | ||
| 1325 | disk->queue = q; | ||
| 1326 | |||
| 1327 | q->queuedata = rbd_dev; | ||
| 1328 | |||
| 1329 | rbd_dev->disk = disk; | ||
| 1330 | rbd_dev->q = q; | ||
| 1331 | |||
| 1332 | /* finally, announce the disk to the world */ | ||
| 1333 | set_capacity(disk, total_size / 512ULL); | ||
| 1334 | add_disk(disk); | ||
| 1335 | |||
| 1336 | pr_info("%s: added with size 0x%llx\n", | ||
| 1337 | disk->disk_name, (unsigned long long)total_size); | ||
| 1338 | return 0; | ||
| 1339 | |||
| 1340 | out_disk: | ||
| 1341 | put_disk(disk); | ||
| 1342 | out: | ||
| 1343 | return rc; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | /******************************************************************** | ||
| 1347 | * /sys/class/rbd/ | ||
| 1348 | * add map rados objects to blkdev | ||
| 1349 | * remove unmap rados objects | ||
| 1350 | * list show mappings | ||
| 1351 | *******************************************************************/ | ||
| 1352 | |||
| 1353 | static void class_rbd_release(struct class *cls) | ||
| 1354 | { | ||
| 1355 | kfree(cls); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | static ssize_t class_rbd_list(struct class *c, | ||
| 1359 | struct class_attribute *attr, | ||
| 1360 | char *data) | ||
| 1361 | { | ||
| 1362 | int n = 0; | ||
| 1363 | struct list_head *tmp; | ||
| 1364 | int max = PAGE_SIZE; | ||
| 1365 | |||
| 1366 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1367 | |||
| 1368 | n += snprintf(data, max, | ||
| 1369 | "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); | ||
| 1370 | |||
| 1371 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1372 | struct rbd_device *rbd_dev; | ||
| 1373 | |||
| 1374 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1375 | n += snprintf(data+n, max-n, | ||
| 1376 | "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", | ||
| 1377 | rbd_dev->id, | ||
| 1378 | rbd_dev->major, | ||
| 1379 | ceph_client_id(rbd_dev->client), | ||
| 1380 | rbd_dev->pool_name, | ||
| 1381 | rbd_dev->obj, rbd_dev->snap_name, | ||
| 1382 | rbd_dev->header.image_size >> 10); | ||
| 1383 | if (n == max) | ||
| 1384 | break; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | mutex_unlock(&ctl_mutex); | ||
| 1388 | return n; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | static ssize_t class_rbd_add(struct class *c, | ||
| 1392 | struct class_attribute *attr, | ||
| 1393 | const char *buf, size_t count) | ||
| 1394 | { | ||
| 1395 | struct ceph_osd_client *osdc; | ||
| 1396 | struct rbd_device *rbd_dev; | ||
| 1397 | ssize_t rc = -ENOMEM; | ||
| 1398 | int irc, new_id = 0; | ||
| 1399 | struct list_head *tmp; | ||
| 1400 | char *mon_dev_name; | ||
| 1401 | char *options; | ||
| 1402 | |||
| 1403 | if (!try_module_get(THIS_MODULE)) | ||
| 1404 | return -ENODEV; | ||
| 1405 | |||
| 1406 | mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
| 1407 | if (!mon_dev_name) | ||
| 1408 | goto err_out_mod; | ||
| 1409 | |||
| 1410 | options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL); | ||
| 1411 | if (!options) | ||
| 1412 | goto err_mon_dev; | ||
| 1413 | |||
| 1414 | /* new rbd_device object */ | ||
| 1415 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | ||
| 1416 | if (!rbd_dev) | ||
| 1417 | goto err_out_opt; | ||
| 1418 | |||
| 1419 | /* static rbd_device initialization */ | ||
| 1420 | spin_lock_init(&rbd_dev->lock); | ||
| 1421 | INIT_LIST_HEAD(&rbd_dev->node); | ||
| 1422 | |||
| 1423 | /* generate unique id: find highest unique id, add one */ | ||
| 1424 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1425 | |||
| 1426 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1427 | struct rbd_device *rbd_dev; | ||
| 1428 | |||
| 1429 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1430 | if (rbd_dev->id >= new_id) | ||
| 1431 | new_id = rbd_dev->id + 1; | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | rbd_dev->id = new_id; | ||
| 1435 | |||
| 1436 | /* add to global list */ | ||
| 1437 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | ||
| 1438 | |||
| 1439 | /* parse add command */ | ||
| 1440 | if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
| 1441 | "%" __stringify(RBD_MAX_OPT_LEN) "s " | ||
| 1442 | "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s " | ||
| 1443 | "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s" | ||
| 1444 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1445 | mon_dev_name, options, rbd_dev->pool_name, | ||
| 1446 | rbd_dev->obj, rbd_dev->snap_name) < 4) { | ||
| 1447 | rc = -EINVAL; | ||
| 1448 | goto err_out_slot; | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | if (rbd_dev->snap_name[0] == 0) | ||
| 1452 | rbd_dev->snap_name[0] = '-'; | ||
| 1453 | |||
| 1454 | rbd_dev->obj_len = strlen(rbd_dev->obj); | ||
| 1455 | snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s", | ||
| 1456 | rbd_dev->obj, RBD_SUFFIX); | ||
| 1457 | |||
| 1458 | /* initialize rest of new object */ | ||
| 1459 | snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id); | ||
| 1460 | rc = rbd_get_client(rbd_dev, mon_dev_name, options); | ||
| 1461 | if (rc < 0) | ||
| 1462 | goto err_out_slot; | ||
| 1463 | |||
| 1464 | mutex_unlock(&ctl_mutex); | ||
| 1465 | |||
| 1466 | /* pick the pool */ | ||
| 1467 | osdc = &rbd_dev->client->osdc; | ||
| 1468 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | ||
| 1469 | if (rc < 0) | ||
| 1470 | goto err_out_client; | ||
| 1471 | rbd_dev->poolid = rc; | ||
| 1472 | |||
| 1473 | /* register our block device */ | ||
| 1474 | irc = register_blkdev(0, rbd_dev->name); | ||
| 1475 | if (irc < 0) { | ||
| 1476 | rc = irc; | ||
| 1477 | goto err_out_client; | ||
| 1478 | } | ||
| 1479 | rbd_dev->major = irc; | ||
| 1480 | |||
| 1481 | /* set up and announce blkdev mapping */ | ||
| 1482 | rc = rbd_init_disk(rbd_dev); | ||
| 1483 | if (rc) | ||
| 1484 | goto err_out_blkdev; | ||
| 1485 | |||
| 1486 | return count; | ||
| 1487 | |||
| 1488 | err_out_blkdev: | ||
| 1489 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
| 1490 | err_out_client: | ||
| 1491 | rbd_put_client(rbd_dev); | ||
| 1492 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1493 | err_out_slot: | ||
| 1494 | list_del_init(&rbd_dev->node); | ||
| 1495 | mutex_unlock(&ctl_mutex); | ||
| 1496 | |||
| 1497 | kfree(rbd_dev); | ||
| 1498 | err_out_opt: | ||
| 1499 | kfree(options); | ||
| 1500 | err_mon_dev: | ||
| 1501 | kfree(mon_dev_name); | ||
| 1502 | err_out_mod: | ||
| 1503 | dout("Error adding device %s\n", buf); | ||
| 1504 | module_put(THIS_MODULE); | ||
| 1505 | return rc; | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | static struct rbd_device *__rbd_get_dev(unsigned long id) | ||
| 1509 | { | ||
| 1510 | struct list_head *tmp; | ||
| 1511 | struct rbd_device *rbd_dev; | ||
| 1512 | |||
| 1513 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1514 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1515 | if (rbd_dev->id == id) | ||
| 1516 | return rbd_dev; | ||
| 1517 | } | ||
| 1518 | return NULL; | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | static ssize_t class_rbd_remove(struct class *c, | ||
| 1522 | struct class_attribute *attr, | ||
| 1523 | const char *buf, | ||
| 1524 | size_t count) | ||
| 1525 | { | ||
| 1526 | struct rbd_device *rbd_dev = NULL; | ||
| 1527 | int target_id, rc; | ||
| 1528 | unsigned long ul; | ||
| 1529 | |||
| 1530 | rc = strict_strtoul(buf, 10, &ul); | ||
| 1531 | if (rc) | ||
| 1532 | return rc; | ||
| 1533 | |||
| 1534 | /* convert to int; abort if we lost anything in the conversion */ | ||
| 1535 | target_id = (int) ul; | ||
| 1536 | if (target_id != ul) | ||
| 1537 | return -EINVAL; | ||
| 1538 | |||
| 1539 | /* remove object from list immediately */ | ||
| 1540 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1541 | |||
| 1542 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1543 | if (rbd_dev) | ||
| 1544 | list_del_init(&rbd_dev->node); | ||
| 1545 | |||
| 1546 | mutex_unlock(&ctl_mutex); | ||
| 1547 | |||
| 1548 | if (!rbd_dev) | ||
| 1549 | return -ENOENT; | ||
| 1550 | |||
| 1551 | rbd_put_client(rbd_dev); | ||
| 1552 | |||
| 1553 | /* clean up and free blkdev */ | ||
| 1554 | rbd_free_disk(rbd_dev); | ||
| 1555 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | ||
| 1556 | kfree(rbd_dev); | ||
| 1557 | |||
| 1558 | /* release module ref */ | ||
| 1559 | module_put(THIS_MODULE); | ||
| 1560 | |||
| 1561 | return count; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | static ssize_t class_rbd_snaps_list(struct class *c, | ||
| 1565 | struct class_attribute *attr, | ||
| 1566 | char *data) | ||
| 1567 | { | ||
| 1568 | struct rbd_device *rbd_dev = NULL; | ||
| 1569 | struct list_head *tmp; | ||
| 1570 | struct rbd_image_header *header; | ||
| 1571 | int i, n = 0, max = PAGE_SIZE; | ||
| 1572 | int ret; | ||
| 1573 | |||
| 1574 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1575 | |||
| 1576 | n += snprintf(data, max, "#id\tsnap\tKB\n"); | ||
| 1577 | |||
| 1578 | list_for_each(tmp, &rbd_dev_list) { | ||
| 1579 | char *names, *p; | ||
| 1580 | struct ceph_snap_context *snapc; | ||
| 1581 | |||
| 1582 | rbd_dev = list_entry(tmp, struct rbd_device, node); | ||
| 1583 | header = &rbd_dev->header; | ||
| 1584 | |||
| 1585 | down_read(&header->snap_rwsem); | ||
| 1586 | |||
| 1587 | names = header->snap_names; | ||
| 1588 | snapc = header->snapc; | ||
| 1589 | |||
| 1590 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
| 1591 | rbd_dev->id, RBD_SNAP_HEAD_NAME, | ||
| 1592 | header->image_size >> 10, | ||
| 1593 | (!rbd_dev->cur_snap ? " (*)" : "")); | ||
| 1594 | if (n == max) | ||
| 1595 | break; | ||
| 1596 | |||
| 1597 | p = names; | ||
| 1598 | for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) { | ||
| 1599 | n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n", | ||
| 1600 | rbd_dev->id, p, header->snap_sizes[i] >> 10, | ||
| 1601 | (rbd_dev->cur_snap && | ||
| 1602 | (snap_index(header, i) == rbd_dev->cur_snap) ? | ||
| 1603 | " (*)" : "")); | ||
| 1604 | if (n == max) | ||
| 1605 | break; | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | up_read(&header->snap_rwsem); | ||
| 1609 | } | ||
| 1610 | |||
| 1611 | |||
| 1612 | ret = n; | ||
| 1613 | mutex_unlock(&ctl_mutex); | ||
| 1614 | return ret; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | static ssize_t class_rbd_snaps_refresh(struct class *c, | ||
| 1618 | struct class_attribute *attr, | ||
| 1619 | const char *buf, | ||
| 1620 | size_t count) | ||
| 1621 | { | ||
| 1622 | struct rbd_device *rbd_dev = NULL; | ||
| 1623 | int target_id, rc; | ||
| 1624 | unsigned long ul; | ||
| 1625 | int ret = count; | ||
| 1626 | |||
| 1627 | rc = strict_strtoul(buf, 10, &ul); | ||
| 1628 | if (rc) | ||
| 1629 | return rc; | ||
| 1630 | |||
| 1631 | /* convert to int; abort if we lost anything in the conversion */ | ||
| 1632 | target_id = (int) ul; | ||
| 1633 | if (target_id != ul) | ||
| 1634 | return -EINVAL; | ||
| 1635 | |||
| 1636 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1637 | |||
| 1638 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1639 | if (!rbd_dev) { | ||
| 1640 | ret = -ENOENT; | ||
| 1641 | goto done; | ||
| 1642 | } | ||
| 1643 | |||
| 1644 | rc = rbd_update_snaps(rbd_dev); | ||
| 1645 | if (rc < 0) | ||
| 1646 | ret = rc; | ||
| 1647 | |||
| 1648 | done: | ||
| 1649 | mutex_unlock(&ctl_mutex); | ||
| 1650 | return ret; | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | static ssize_t class_rbd_snap_create(struct class *c, | ||
| 1654 | struct class_attribute *attr, | ||
| 1655 | const char *buf, | ||
| 1656 | size_t count) | ||
| 1657 | { | ||
| 1658 | struct rbd_device *rbd_dev = NULL; | ||
| 1659 | int target_id, ret; | ||
| 1660 | char *name; | ||
| 1661 | |||
| 1662 | name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL); | ||
| 1663 | if (!name) | ||
| 1664 | return -ENOMEM; | ||
| 1665 | |||
| 1666 | /* parse snaps add command */ | ||
| 1667 | if (sscanf(buf, "%d " | ||
| 1668 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1669 | &target_id, | ||
| 1670 | name) != 2) { | ||
| 1671 | ret = -EINVAL; | ||
| 1672 | goto done; | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1676 | |||
| 1677 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1678 | if (!rbd_dev) { | ||
| 1679 | ret = -ENOENT; | ||
| 1680 | goto done_unlock; | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | ret = rbd_header_add_snap(rbd_dev, | ||
| 1684 | name, GFP_KERNEL); | ||
| 1685 | if (ret < 0) | ||
| 1686 | goto done_unlock; | ||
| 1687 | |||
| 1688 | ret = rbd_update_snaps(rbd_dev); | ||
| 1689 | if (ret < 0) | ||
| 1690 | goto done_unlock; | ||
| 1691 | |||
| 1692 | ret = count; | ||
| 1693 | done_unlock: | ||
| 1694 | mutex_unlock(&ctl_mutex); | ||
| 1695 | done: | ||
| 1696 | kfree(name); | ||
| 1697 | return ret; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | static ssize_t class_rbd_rollback(struct class *c, | ||
| 1701 | struct class_attribute *attr, | ||
| 1702 | const char *buf, | ||
| 1703 | size_t count) | ||
| 1704 | { | ||
| 1705 | struct rbd_device *rbd_dev = NULL; | ||
| 1706 | int target_id, ret; | ||
| 1707 | u64 snapid; | ||
| 1708 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | ||
| 1709 | u64 cur_ofs; | ||
| 1710 | char *seg_name; | ||
| 1711 | |||
| 1712 | /* parse snaps add command */ | ||
| 1713 | if (sscanf(buf, "%d " | ||
| 1714 | "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s", | ||
| 1715 | &target_id, | ||
| 1716 | snap_name) != 2) { | ||
| 1717 | return -EINVAL; | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | ret = -ENOMEM; | ||
| 1721 | seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); | ||
| 1722 | if (!seg_name) | ||
| 1723 | return ret; | ||
| 1724 | |||
| 1725 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
| 1726 | |||
| 1727 | rbd_dev = __rbd_get_dev(target_id); | ||
| 1728 | if (!rbd_dev) { | ||
| 1729 | ret = -ENOENT; | ||
| 1730 | goto done_unlock; | ||
| 1731 | } | ||
| 1732 | |||
| 1733 | ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); | ||
| 1734 | if (ret < 0) | ||
| 1735 | goto done_unlock; | ||
| 1736 | |||
| 1737 | dout("snapid=%lld\n", snapid); | ||
| 1738 | |||
| 1739 | cur_ofs = 0; | ||
| 1740 | while (cur_ofs < rbd_dev->header.image_size) { | ||
| 1741 | cur_ofs += rbd_get_segment(&rbd_dev->header, | ||
| 1742 | rbd_dev->obj, | ||
| 1743 | cur_ofs, (u64)-1, | ||
| 1744 | seg_name, NULL); | ||
| 1745 | dout("seg_name=%s\n", seg_name); | ||
| 1746 | |||
| 1747 | ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name); | ||
| 1748 | if (ret < 0) | ||
| 1749 | pr_warning("could not roll back obj %s err=%d\n", | ||
| 1750 | seg_name, ret); | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | ret = rbd_update_snaps(rbd_dev); | ||
| 1754 | if (ret < 0) | ||
| 1755 | goto done_unlock; | ||
| 1756 | |||
| 1757 | ret = count; | ||
| 1758 | |||
| 1759 | done_unlock: | ||
| 1760 | mutex_unlock(&ctl_mutex); | ||
| 1761 | kfree(seg_name); | ||
| 1762 | |||
| 1763 | return ret; | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | static struct class_attribute class_rbd_attrs[] = { | ||
| 1767 | __ATTR(add, 0200, NULL, class_rbd_add), | ||
| 1768 | __ATTR(remove, 0200, NULL, class_rbd_remove), | ||
| 1769 | __ATTR(list, 0444, class_rbd_list, NULL), | ||
| 1770 | __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh), | ||
| 1771 | __ATTR(snap_create, 0200, NULL, class_rbd_snap_create), | ||
| 1772 | __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL), | ||
| 1773 | __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback), | ||
| 1774 | __ATTR_NULL | ||
| 1775 | }; | ||
| 1776 | |||
| 1777 | /* | ||
| 1778 | * create control files in sysfs | ||
| 1779 | * /sys/class/rbd/... | ||
| 1780 | */ | ||
| 1781 | static int rbd_sysfs_init(void) | ||
| 1782 | { | ||
| 1783 | int ret = -ENOMEM; | ||
| 1784 | |||
| 1785 | class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); | ||
| 1786 | if (!class_rbd) | ||
| 1787 | goto out; | ||
| 1788 | |||
| 1789 | class_rbd->name = DRV_NAME; | ||
| 1790 | class_rbd->owner = THIS_MODULE; | ||
| 1791 | class_rbd->class_release = class_rbd_release; | ||
| 1792 | class_rbd->class_attrs = class_rbd_attrs; | ||
| 1793 | |||
| 1794 | ret = class_register(class_rbd); | ||
| 1795 | if (ret) | ||
| 1796 | goto out_class; | ||
| 1797 | return 0; | ||
| 1798 | |||
| 1799 | out_class: | ||
| 1800 | kfree(class_rbd); | ||
| 1801 | class_rbd = NULL; | ||
| 1802 | pr_err(DRV_NAME ": failed to create class rbd\n"); | ||
| 1803 | out: | ||
| 1804 | return ret; | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | static void rbd_sysfs_cleanup(void) | ||
| 1808 | { | ||
| 1809 | if (class_rbd) | ||
| 1810 | class_destroy(class_rbd); | ||
| 1811 | class_rbd = NULL; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | int __init rbd_init(void) | ||
| 1815 | { | ||
| 1816 | int rc; | ||
| 1817 | |||
| 1818 | rc = rbd_sysfs_init(); | ||
| 1819 | if (rc) | ||
| 1820 | return rc; | ||
| 1821 | spin_lock_init(&node_lock); | ||
| 1822 | pr_info("loaded " DRV_NAME_LONG "\n"); | ||
| 1823 | return 0; | ||
| 1824 | } | ||
| 1825 | |||
| 1826 | void __exit rbd_exit(void) | ||
| 1827 | { | ||
| 1828 | rbd_sysfs_cleanup(); | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | module_init(rbd_init); | ||
| 1832 | module_exit(rbd_exit); | ||
| 1833 | |||
| 1834 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
| 1835 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
| 1836 | MODULE_DESCRIPTION("rados block device"); | ||
| 1837 | |||
| 1838 | /* following authorship retained from original osdblk.c */ | ||
| 1839 | MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); | ||
| 1840 | |||
| 1841 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h new file mode 100644 index 000000000000..fc6c678aa2cb --- /dev/null +++ b/drivers/block/rbd_types.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* | ||
| 2 | * Ceph - scalable distributed file system | ||
| 3 | * | ||
| 4 | * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net> | ||
| 5 | * | ||
| 6 | * This is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License version 2.1, as published by the Free Software | ||
| 9 | * Foundation. See file COPYING. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #ifndef CEPH_RBD_TYPES_H | ||
| 14 | #define CEPH_RBD_TYPES_H | ||
| 15 | |||
| 16 | #include <linux/types.h> | ||
| 17 | |||
| 18 | /* | ||
| 19 | * rbd image 'foo' consists of objects | ||
| 20 | * foo.rbd - image metadata | ||
| 21 | * foo.00000000 | ||
| 22 | * foo.00000001 | ||
| 23 | * ... - data | ||
| 24 | */ | ||
| 25 | |||
| 26 | #define RBD_SUFFIX ".rbd" | ||
| 27 | #define RBD_DIRECTORY "rbd_directory" | ||
| 28 | #define RBD_INFO "rbd_info" | ||
| 29 | |||
| 30 | #define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */ | ||
| 31 | #define RBD_MIN_OBJ_ORDER 16 | ||
| 32 | #define RBD_MAX_OBJ_ORDER 30 | ||
| 33 | |||
| 34 | #define RBD_MAX_OBJ_NAME_LEN 96 | ||
| 35 | #define RBD_MAX_SEG_NAME_LEN 128 | ||
| 36 | |||
| 37 | #define RBD_COMP_NONE 0 | ||
| 38 | #define RBD_CRYPT_NONE 0 | ||
| 39 | |||
| 40 | #define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n" | ||
| 41 | #define RBD_HEADER_SIGNATURE "RBD" | ||
| 42 | #define RBD_HEADER_VERSION "001.005" | ||
| 43 | |||
| 44 | struct rbd_info { | ||
| 45 | __le64 max_id; | ||
| 46 | } __attribute__ ((packed)); | ||
| 47 | |||
| 48 | struct rbd_image_snap_ondisk { | ||
| 49 | __le64 id; | ||
| 50 | __le64 image_size; | ||
| 51 | } __attribute__((packed)); | ||
| 52 | |||
| 53 | struct rbd_image_header_ondisk { | ||
| 54 | char text[40]; | ||
| 55 | char block_name[24]; | ||
| 56 | char signature[4]; | ||
| 57 | char version[8]; | ||
| 58 | struct { | ||
| 59 | __u8 order; | ||
| 60 | __u8 crypt_type; | ||
| 61 | __u8 comp_type; | ||
| 62 | __u8 unused; | ||
| 63 | } __attribute__((packed)) options; | ||
| 64 | __le64 image_size; | ||
| 65 | __le64 snap_seq; | ||
| 66 | __le32 snap_count; | ||
| 67 | __le32 reserved; | ||
| 68 | __le64 snap_names_len; | ||
| 69 | struct rbd_image_snap_ondisk snaps[0]; | ||
| 70 | } __attribute__((packed)); | ||
| 71 | |||
| 72 | |||
| 73 | #endif | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1101e251a629..8320490226b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | #include <linux/spinlock.h> | 2 | #include <linux/spinlock.h> |
| 3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
| 4 | #include <linux/blkdev.h> | 4 | #include <linux/blkdev.h> |
| 5 | #include <linux/smp_lock.h> | ||
| 6 | #include <linux/hdreg.h> | 5 | #include <linux/hdreg.h> |
| 7 | #include <linux/virtio.h> | 6 | #include <linux/virtio.h> |
| 8 | #include <linux/virtio_blk.h> | 7 | #include <linux/virtio_blk.h> |
| @@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) | |||
| 222 | return err; | 221 | return err; |
| 223 | } | 222 | } |
| 224 | 223 | ||
| 225 | static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | 224 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, |
| 226 | unsigned cmd, unsigned long data) | 225 | unsigned int cmd, unsigned long data) |
| 227 | { | 226 | { |
| 228 | struct gendisk *disk = bdev->bd_disk; | 227 | struct gendisk *disk = bdev->bd_disk; |
| 229 | struct virtio_blk *vblk = disk->private_data; | 228 | struct virtio_blk *vblk = disk->private_data; |
| @@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode, | |||
| 238 | (void __user *)data); | 237 | (void __user *)data); |
| 239 | } | 238 | } |
| 240 | 239 | ||
| 241 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | ||
| 242 | unsigned int cmd, unsigned long param) | ||
| 243 | { | ||
| 244 | int ret; | ||
| 245 | |||
| 246 | lock_kernel(); | ||
| 247 | ret = virtblk_locked_ioctl(bdev, mode, cmd, param); | ||
| 248 | unlock_kernel(); | ||
| 249 | |||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* We provide getgeo only to please some old bootloader/partitioning tools */ | 240 | /* We provide getgeo only to please some old bootloader/partitioning tools */ |
| 254 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) | 241 | static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) |
| 255 | { | 242 | { |
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index 4b66c69eaf57..5ddf67e76f8b 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig | |||
| @@ -57,7 +57,7 @@ config AGP_AMD | |||
| 57 | 57 | ||
| 58 | config AGP_AMD64 | 58 | config AGP_AMD64 |
| 59 | tristate "AMD Opteron/Athlon64 on-CPU GART support" | 59 | tristate "AMD Opteron/Athlon64 on-CPU GART support" |
| 60 | depends on AGP && X86 && K8_NB | 60 | depends on AGP && X86 && AMD_NB |
| 61 | help | 61 | help |
| 62 | This option gives you AGP support for the GLX component of | 62 | This option gives you AGP support for the GLX component of |
| 63 | X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. | 63 | X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. |
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 70312da4c968..42396df55556 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | #include <linux/mmzone.h> | 15 | #include <linux/mmzone.h> |
| 16 | #include <asm/page.h> /* PAGE_SIZE */ | 16 | #include <asm/page.h> /* PAGE_SIZE */ |
| 17 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
| 18 | #include <asm/k8.h> | 18 | #include <asm/amd_nb.h> |
| 19 | #include <asm/gart.h> | 19 | #include <asm/gart.h> |
| 20 | #include "agp.h" | 20 | #include "agp.h" |
| 21 | 21 | ||
| @@ -124,7 +124,7 @@ static int amd64_fetch_size(void) | |||
| 124 | u32 temp; | 124 | u32 temp; |
| 125 | struct aper_size_info_32 *values; | 125 | struct aper_size_info_32 *values; |
| 126 | 126 | ||
| 127 | dev = k8_northbridges[0]; | 127 | dev = k8_northbridges.nb_misc[0]; |
| 128 | if (dev==NULL) | 128 | if (dev==NULL) |
| 129 | return 0; | 129 | return 0; |
| 130 | 130 | ||
| @@ -181,10 +181,14 @@ static int amd_8151_configure(void) | |||
| 181 | unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); | 181 | unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); |
| 182 | int i; | 182 | int i; |
| 183 | 183 | ||
| 184 | if (!k8_northbridges.gart_supported) | ||
| 185 | return 0; | ||
| 186 | |||
| 184 | /* Configure AGP regs in each x86-64 host bridge. */ | 187 | /* Configure AGP regs in each x86-64 host bridge. */ |
| 185 | for (i = 0; i < num_k8_northbridges; i++) { | 188 | for (i = 0; i < k8_northbridges.num; i++) { |
| 186 | agp_bridge->gart_bus_addr = | 189 | agp_bridge->gart_bus_addr = |
| 187 | amd64_configure(k8_northbridges[i], gatt_bus); | 190 | amd64_configure(k8_northbridges.nb_misc[i], |
| 191 | gatt_bus); | ||
| 188 | } | 192 | } |
| 189 | k8_flush_garts(); | 193 | k8_flush_garts(); |
| 190 | return 0; | 194 | return 0; |
| @@ -195,11 +199,15 @@ static void amd64_cleanup(void) | |||
| 195 | { | 199 | { |
| 196 | u32 tmp; | 200 | u32 tmp; |
| 197 | int i; | 201 | int i; |
| 198 | for (i = 0; i < num_k8_northbridges; i++) { | 202 | |
| 199 | struct pci_dev *dev = k8_northbridges[i]; | 203 | if (!k8_northbridges.gart_supported) |
| 204 | return; | ||
| 205 | |||
| 206 | for (i = 0; i < k8_northbridges.num; i++) { | ||
| 207 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; | ||
| 200 | /* disable gart translation */ | 208 | /* disable gart translation */ |
| 201 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); | 209 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); |
| 202 | tmp &= ~AMD64_GARTEN; | 210 | tmp &= ~GARTEN; |
| 203 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); | 211 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); |
| 204 | } | 212 | } |
| 205 | } | 213 | } |
| @@ -313,22 +321,25 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, | |||
| 313 | if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) | 321 | if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order)) |
| 314 | return -1; | 322 | return -1; |
| 315 | 323 | ||
| 316 | pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); | 324 | gart_set_size_and_enable(nb, order); |
| 317 | pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); | 325 | pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); |
| 318 | 326 | ||
| 319 | return 0; | 327 | return 0; |
| 320 | } | 328 | } |
| 321 | 329 | ||
| 322 | static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) | 330 | static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr) |
| 323 | { | 331 | { |
| 324 | int i; | 332 | int i; |
| 325 | 333 | ||
| 326 | if (cache_k8_northbridges() < 0) | 334 | if (cache_k8_northbridges() < 0) |
| 327 | return -ENODEV; | 335 | return -ENODEV; |
| 328 | 336 | ||
| 337 | if (!k8_northbridges.gart_supported) | ||
| 338 | return -ENODEV; | ||
| 339 | |||
| 329 | i = 0; | 340 | i = 0; |
| 330 | for (i = 0; i < num_k8_northbridges; i++) { | 341 | for (i = 0; i < k8_northbridges.num; i++) { |
| 331 | struct pci_dev *dev = k8_northbridges[i]; | 342 | struct pci_dev *dev = k8_northbridges.nb_misc[i]; |
| 332 | if (fix_northbridge(dev, pdev, cap_ptr) < 0) { | 343 | if (fix_northbridge(dev, pdev, cap_ptr) < 0) { |
| 333 | dev_err(&dev->dev, "no usable aperture found\n"); | 344 | dev_err(&dev->dev, "no usable aperture found\n"); |
| 334 | #ifdef __x86_64__ | 345 | #ifdef __x86_64__ |
| @@ -405,7 +416,8 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) | |||
| 405 | } | 416 | } |
| 406 | 417 | ||
| 407 | /* shadow x86-64 registers into ULi registers */ | 418 | /* shadow x86-64 registers into ULi registers */ |
| 408 | pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea); | 419 | pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, |
| 420 | &httfea); | ||
| 409 | 421 | ||
| 410 | /* if x86-64 aperture base is beyond 4G, exit here */ | 422 | /* if x86-64 aperture base is beyond 4G, exit here */ |
| 411 | if ((httfea & 0x7fff) >> (32 - 25)) { | 423 | if ((httfea & 0x7fff) >> (32 - 25)) { |
| @@ -472,7 +484,8 @@ static int nforce3_agp_init(struct pci_dev *pdev) | |||
| 472 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); | 484 | pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); |
| 473 | 485 | ||
| 474 | /* shadow x86-64 registers into NVIDIA registers */ | 486 | /* shadow x86-64 registers into NVIDIA registers */ |
| 475 | pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase); | 487 | pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, |
| 488 | &apbase); | ||
| 476 | 489 | ||
| 477 | /* if x86-64 aperture base is beyond 4G, exit here */ | 490 | /* if x86-64 aperture base is beyond 4G, exit here */ |
| 478 | if ( (apbase & 0x7fff) >> (32 - 25) ) { | 491 | if ( (apbase & 0x7fff) >> (32 - 25) ) { |
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index d2abf5143983..64255cef8a7d 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c | |||
| @@ -984,7 +984,9 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) | |||
| 984 | 984 | ||
| 985 | bridge->driver->cache_flush(); | 985 | bridge->driver->cache_flush(); |
| 986 | #ifdef CONFIG_X86 | 986 | #ifdef CONFIG_X86 |
| 987 | set_memory_uc((unsigned long)table, 1 << page_order); | 987 | if (set_memory_uc((unsigned long)table, 1 << page_order)) |
| 988 | printk(KERN_WARNING "Could not set GATT table memory to UC!"); | ||
| 989 | |||
| 988 | bridge->gatt_table = (void *)table; | 990 | bridge->gatt_table = (void *)table; |
| 989 | #else | 991 | #else |
| 990 | bridge->gatt_table = ioremap_nocache(virt_to_phys(table), | 992 | bridge->gatt_table = ioremap_nocache(virt_to_phys(table), |
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 05ad4a17a28f..7c4133582dba 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c | |||
| @@ -47,6 +47,16 @@ enum tpm_duration { | |||
| 47 | #define TPM_MAX_PROTECTED_ORDINAL 12 | 47 | #define TPM_MAX_PROTECTED_ORDINAL 12 |
| 48 | #define TPM_PROTECTED_ORDINAL_MASK 0xFF | 48 | #define TPM_PROTECTED_ORDINAL_MASK 0xFF |
| 49 | 49 | ||
| 50 | /* | ||
| 51 | * Bug workaround - some TPM's don't flush the most | ||
| 52 | * recently changed pcr on suspend, so force the flush | ||
| 53 | * with an extend to the selected _unused_ non-volatile pcr. | ||
| 54 | */ | ||
| 55 | static int tpm_suspend_pcr; | ||
| 56 | module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644); | ||
| 57 | MODULE_PARM_DESC(suspend_pcr, | ||
| 58 | "PCR to use for dummy writes to faciltate flush on suspend."); | ||
| 59 | |||
| 50 | static LIST_HEAD(tpm_chip_list); | 60 | static LIST_HEAD(tpm_chip_list); |
| 51 | static DEFINE_SPINLOCK(driver_lock); | 61 | static DEFINE_SPINLOCK(driver_lock); |
| 52 | static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); | 62 | static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); |
| @@ -1077,18 +1087,6 @@ static struct tpm_input_header savestate_header = { | |||
| 1077 | .ordinal = TPM_ORD_SAVESTATE | 1087 | .ordinal = TPM_ORD_SAVESTATE |
| 1078 | }; | 1088 | }; |
| 1079 | 1089 | ||
| 1080 | /* Bug workaround - some TPM's don't flush the most | ||
| 1081 | * recently changed pcr on suspend, so force the flush | ||
| 1082 | * with an extend to the selected _unused_ non-volatile pcr. | ||
| 1083 | */ | ||
| 1084 | static int tpm_suspend_pcr; | ||
| 1085 | static int __init tpm_suspend_setup(char *str) | ||
| 1086 | { | ||
| 1087 | get_option(&str, &tpm_suspend_pcr); | ||
| 1088 | return 1; | ||
| 1089 | } | ||
| 1090 | __setup("tpm_suspend_pcr=", tpm_suspend_setup); | ||
| 1091 | |||
| 1092 | /* | 1090 | /* |
| 1093 | * We are about to suspend. Save the TPM state | 1091 | * We are about to suspend. Save the TPM state |
| 1094 | * so that it can be restored. | 1092 | * so that it can be restored. |
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 0f69c5ec0ecd..6c1b676643a9 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
| @@ -48,6 +48,9 @@ struct ports_driver_data { | |||
| 48 | /* Used for exporting per-port information to debugfs */ | 48 | /* Used for exporting per-port information to debugfs */ |
| 49 | struct dentry *debugfs_dir; | 49 | struct dentry *debugfs_dir; |
| 50 | 50 | ||
| 51 | /* List of all the devices we're handling */ | ||
| 52 | struct list_head portdevs; | ||
| 53 | |||
| 51 | /* Number of devices this driver is handling */ | 54 | /* Number of devices this driver is handling */ |
| 52 | unsigned int index; | 55 | unsigned int index; |
| 53 | 56 | ||
| @@ -108,6 +111,9 @@ struct port_buffer { | |||
| 108 | * ports for that device (vdev->priv). | 111 | * ports for that device (vdev->priv). |
| 109 | */ | 112 | */ |
| 110 | struct ports_device { | 113 | struct ports_device { |
| 114 | /* Next portdev in the list, head is in the pdrvdata struct */ | ||
| 115 | struct list_head list; | ||
| 116 | |||
| 111 | /* | 117 | /* |
| 112 | * Workqueue handlers where we process deferred work after | 118 | * Workqueue handlers where we process deferred work after |
| 113 | * notification | 119 | * notification |
| @@ -178,15 +184,21 @@ struct port { | |||
| 178 | struct console cons; | 184 | struct console cons; |
| 179 | 185 | ||
| 180 | /* Each port associates with a separate char device */ | 186 | /* Each port associates with a separate char device */ |
| 181 | struct cdev cdev; | 187 | struct cdev *cdev; |
| 182 | struct device *dev; | 188 | struct device *dev; |
| 183 | 189 | ||
| 190 | /* Reference-counting to handle port hot-unplugs and file operations */ | ||
| 191 | struct kref kref; | ||
| 192 | |||
| 184 | /* A waitqueue for poll() or blocking read operations */ | 193 | /* A waitqueue for poll() or blocking read operations */ |
| 185 | wait_queue_head_t waitqueue; | 194 | wait_queue_head_t waitqueue; |
| 186 | 195 | ||
| 187 | /* The 'name' of the port that we expose via sysfs properties */ | 196 | /* The 'name' of the port that we expose via sysfs properties */ |
| 188 | char *name; | 197 | char *name; |
| 189 | 198 | ||
| 199 | /* We can notify apps of host connect / disconnect events via SIGIO */ | ||
| 200 | struct fasync_struct *async_queue; | ||
| 201 | |||
| 190 | /* The 'id' to identify the port with the Host */ | 202 | /* The 'id' to identify the port with the Host */ |
| 191 | u32 id; | 203 | u32 id; |
| 192 | 204 | ||
| @@ -221,6 +233,41 @@ out: | |||
| 221 | return port; | 233 | return port; |
| 222 | } | 234 | } |
| 223 | 235 | ||
| 236 | static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev, | ||
| 237 | dev_t dev) | ||
| 238 | { | ||
| 239 | struct port *port; | ||
| 240 | unsigned long flags; | ||
| 241 | |||
| 242 | spin_lock_irqsave(&portdev->ports_lock, flags); | ||
| 243 | list_for_each_entry(port, &portdev->ports, list) | ||
| 244 | if (port->cdev->dev == dev) | ||
| 245 | goto out; | ||
| 246 | port = NULL; | ||
| 247 | out: | ||
| 248 | spin_unlock_irqrestore(&portdev->ports_lock, flags); | ||
| 249 | |||
| 250 | return port; | ||
| 251 | } | ||
| 252 | |||
| 253 | static struct port *find_port_by_devt(dev_t dev) | ||
| 254 | { | ||
| 255 | struct ports_device *portdev; | ||
| 256 | struct port *port; | ||
| 257 | unsigned long flags; | ||
| 258 | |||
| 259 | spin_lock_irqsave(&pdrvdata_lock, flags); | ||
| 260 | list_for_each_entry(portdev, &pdrvdata.portdevs, list) { | ||
| 261 | port = find_port_by_devt_in_portdev(portdev, dev); | ||
| 262 | if (port) | ||
| 263 | goto out; | ||
| 264 | } | ||
| 265 | port = NULL; | ||
| 266 | out: | ||
| 267 | spin_unlock_irqrestore(&pdrvdata_lock, flags); | ||
| 268 | return port; | ||
| 269 | } | ||
| 270 | |||
| 224 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) | 271 | static struct port *find_port_by_id(struct ports_device *portdev, u32 id) |
| 225 | { | 272 | { |
| 226 | struct port *port; | 273 | struct port *port; |
| @@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, | |||
| 410 | static ssize_t send_control_msg(struct port *port, unsigned int event, | 457 | static ssize_t send_control_msg(struct port *port, unsigned int event, |
| 411 | unsigned int value) | 458 | unsigned int value) |
| 412 | { | 459 | { |
| 413 | return __send_control_msg(port->portdev, port->id, event, value); | 460 | /* Did the port get unplugged before userspace closed it? */ |
| 461 | if (port->portdev) | ||
| 462 | return __send_control_msg(port->portdev, port->id, event, value); | ||
| 463 | return 0; | ||
| 414 | } | 464 | } |
| 415 | 465 | ||
| 416 | /* Callers must take the port->outvq_lock */ | 466 | /* Callers must take the port->outvq_lock */ |
| @@ -525,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count, | |||
| 525 | /* The condition that must be true for polling to end */ | 575 | /* The condition that must be true for polling to end */ |
| 526 | static bool will_read_block(struct port *port) | 576 | static bool will_read_block(struct port *port) |
| 527 | { | 577 | { |
| 578 | if (!port->guest_connected) { | ||
| 579 | /* Port got hot-unplugged. Let's exit. */ | ||
| 580 | return false; | ||
| 581 | } | ||
| 528 | return !port_has_data(port) && port->host_connected; | 582 | return !port_has_data(port) && port->host_connected; |
| 529 | } | 583 | } |
| 530 | 584 | ||
| @@ -575,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf, | |||
| 575 | if (ret < 0) | 629 | if (ret < 0) |
| 576 | return ret; | 630 | return ret; |
| 577 | } | 631 | } |
| 632 | /* Port got hot-unplugged. */ | ||
| 633 | if (!port->guest_connected) | ||
| 634 | return -ENODEV; | ||
| 578 | /* | 635 | /* |
| 579 | * We could've received a disconnection message while we were | 636 | * We could've received a disconnection message while we were |
| 580 | * waiting for more data. | 637 | * waiting for more data. |
| @@ -616,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, | |||
| 616 | if (ret < 0) | 673 | if (ret < 0) |
| 617 | return ret; | 674 | return ret; |
| 618 | } | 675 | } |
| 676 | /* Port got hot-unplugged. */ | ||
| 677 | if (!port->guest_connected) | ||
| 678 | return -ENODEV; | ||
| 619 | 679 | ||
| 620 | count = min((size_t)(32 * 1024), count); | 680 | count = min((size_t)(32 * 1024), count); |
| 621 | 681 | ||
| @@ -656,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
| 656 | port = filp->private_data; | 716 | port = filp->private_data; |
| 657 | poll_wait(filp, &port->waitqueue, wait); | 717 | poll_wait(filp, &port->waitqueue, wait); |
| 658 | 718 | ||
| 719 | if (!port->guest_connected) { | ||
| 720 | /* Port got unplugged */ | ||
| 721 | return POLLHUP; | ||
| 722 | } | ||
| 659 | ret = 0; | 723 | ret = 0; |
| 660 | if (!will_read_block(port)) | 724 | if (!will_read_block(port)) |
| 661 | ret |= POLLIN | POLLRDNORM; | 725 | ret |= POLLIN | POLLRDNORM; |
| @@ -667,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait) | |||
| 667 | return ret; | 731 | return ret; |
| 668 | } | 732 | } |
| 669 | 733 | ||
| 734 | static void remove_port(struct kref *kref); | ||
| 735 | |||
| 670 | static int port_fops_release(struct inode *inode, struct file *filp) | 736 | static int port_fops_release(struct inode *inode, struct file *filp) |
| 671 | { | 737 | { |
| 672 | struct port *port; | 738 | struct port *port; |
| @@ -687,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp) | |||
| 687 | reclaim_consumed_buffers(port); | 753 | reclaim_consumed_buffers(port); |
| 688 | spin_unlock_irq(&port->outvq_lock); | 754 | spin_unlock_irq(&port->outvq_lock); |
| 689 | 755 | ||
| 756 | /* | ||
| 757 | * Locks aren't necessary here as a port can't be opened after | ||
| 758 | * unplug, and if a port isn't unplugged, a kref would already | ||
| 759 | * exist for the port. Plus, taking ports_lock here would | ||
| 760 | * create a dependency on other locks taken by functions | ||
| 761 | * inside remove_port if we're the last holder of the port, | ||
| 762 | * creating many problems. | ||
| 763 | */ | ||
| 764 | kref_put(&port->kref, remove_port); | ||
| 765 | |||
| 690 | return 0; | 766 | return 0; |
| 691 | } | 767 | } |
| 692 | 768 | ||
| @@ -694,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
| 694 | { | 770 | { |
| 695 | struct cdev *cdev = inode->i_cdev; | 771 | struct cdev *cdev = inode->i_cdev; |
| 696 | struct port *port; | 772 | struct port *port; |
| 773 | int ret; | ||
| 697 | 774 | ||
| 698 | port = container_of(cdev, struct port, cdev); | 775 | port = find_port_by_devt(cdev->dev); |
| 699 | filp->private_data = port; | 776 | filp->private_data = port; |
| 700 | 777 | ||
| 778 | /* Prevent against a port getting hot-unplugged at the same time */ | ||
| 779 | spin_lock_irq(&port->portdev->ports_lock); | ||
| 780 | kref_get(&port->kref); | ||
| 781 | spin_unlock_irq(&port->portdev->ports_lock); | ||
| 782 | |||
| 701 | /* | 783 | /* |
| 702 | * Don't allow opening of console port devices -- that's done | 784 | * Don't allow opening of console port devices -- that's done |
| 703 | * via /dev/hvc | 785 | * via /dev/hvc |
| 704 | */ | 786 | */ |
| 705 | if (is_console_port(port)) | 787 | if (is_console_port(port)) { |
| 706 | return -ENXIO; | 788 | ret = -ENXIO; |
| 789 | goto out; | ||
| 790 | } | ||
| 707 | 791 | ||
| 708 | /* Allow only one process to open a particular port at a time */ | 792 | /* Allow only one process to open a particular port at a time */ |
| 709 | spin_lock_irq(&port->inbuf_lock); | 793 | spin_lock_irq(&port->inbuf_lock); |
| 710 | if (port->guest_connected) { | 794 | if (port->guest_connected) { |
| 711 | spin_unlock_irq(&port->inbuf_lock); | 795 | spin_unlock_irq(&port->inbuf_lock); |
| 712 | return -EMFILE; | 796 | ret = -EMFILE; |
| 797 | goto out; | ||
| 713 | } | 798 | } |
| 714 | 799 | ||
| 715 | port->guest_connected = true; | 800 | port->guest_connected = true; |
| @@ -724,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp) | |||
| 724 | reclaim_consumed_buffers(port); | 809 | reclaim_consumed_buffers(port); |
| 725 | spin_unlock_irq(&port->outvq_lock); | 810 | spin_unlock_irq(&port->outvq_lock); |
| 726 | 811 | ||
| 812 | nonseekable_open(inode, filp); | ||
| 813 | |||
| 727 | /* Notify host of port being opened */ | 814 | /* Notify host of port being opened */ |
| 728 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); | 815 | send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1); |
| 729 | 816 | ||
| 730 | return 0; | 817 | return 0; |
| 818 | out: | ||
| 819 | kref_put(&port->kref, remove_port); | ||
| 820 | return ret; | ||
| 821 | } | ||
| 822 | |||
| 823 | static int port_fops_fasync(int fd, struct file *filp, int mode) | ||
| 824 | { | ||
| 825 | struct port *port; | ||
| 826 | |||
| 827 | port = filp->private_data; | ||
| 828 | return fasync_helper(fd, filp, mode, &port->async_queue); | ||
| 731 | } | 829 | } |
| 732 | 830 | ||
| 733 | /* | 831 | /* |
| @@ -743,6 +841,8 @@ static const struct file_operations port_fops = { | |||
| 743 | .write = port_fops_write, | 841 | .write = port_fops_write, |
| 744 | .poll = port_fops_poll, | 842 | .poll = port_fops_poll, |
| 745 | .release = port_fops_release, | 843 | .release = port_fops_release, |
| 844 | .fasync = port_fops_fasync, | ||
| 845 | .llseek = no_llseek, | ||
| 746 | }; | 846 | }; |
| 747 | 847 | ||
| 748 | /* | 848 | /* |
| @@ -1001,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) | |||
| 1001 | return nr_added_bufs; | 1101 | return nr_added_bufs; |
| 1002 | } | 1102 | } |
| 1003 | 1103 | ||
| 1104 | static void send_sigio_to_port(struct port *port) | ||
| 1105 | { | ||
| 1106 | if (port->async_queue && port->guest_connected) | ||
| 1107 | kill_fasync(&port->async_queue, SIGIO, POLL_OUT); | ||
| 1108 | } | ||
| 1109 | |||
| 1004 | static int add_port(struct ports_device *portdev, u32 id) | 1110 | static int add_port(struct ports_device *portdev, u32 id) |
| 1005 | { | 1111 | { |
| 1006 | char debugfs_name[16]; | 1112 | char debugfs_name[16]; |
| @@ -1015,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1015 | err = -ENOMEM; | 1121 | err = -ENOMEM; |
| 1016 | goto fail; | 1122 | goto fail; |
| 1017 | } | 1123 | } |
| 1124 | kref_init(&port->kref); | ||
| 1018 | 1125 | ||
| 1019 | port->portdev = portdev; | 1126 | port->portdev = portdev; |
| 1020 | port->id = id; | 1127 | port->id = id; |
| @@ -1022,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1022 | port->name = NULL; | 1129 | port->name = NULL; |
| 1023 | port->inbuf = NULL; | 1130 | port->inbuf = NULL; |
| 1024 | port->cons.hvc = NULL; | 1131 | port->cons.hvc = NULL; |
| 1132 | port->async_queue = NULL; | ||
| 1025 | 1133 | ||
| 1026 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; | 1134 | port->cons.ws.ws_row = port->cons.ws.ws_col = 0; |
| 1027 | 1135 | ||
| @@ -1032,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id) | |||
| 1032 | port->in_vq = portdev->in_vqs[port->id]; | 1140 | port->in_vq = portdev->in_vqs[port->id]; |
| 1033 | port->out_vq = portdev->out_vqs[port->id]; | 1141 | port->out_vq = portdev->out_vqs[port->id]; |
| 1034 | 1142 | ||
| 1035 | cdev_init(&port->cdev, &port_fops); | 1143 | port->cdev = cdev_alloc(); |
| 1144 | if (!port->cdev) { | ||
| 1145 | dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n"); | ||
| 1146 | err = -ENOMEM; | ||
| 1147 | goto free_port; | ||
| 1148 | } | ||
| 1149 | port->cdev->ops = &port_fops; | ||
| 1036 | 1150 | ||
| 1037 | devt = MKDEV(portdev->chr_major, id); | 1151 | devt = MKDEV(portdev->chr_major, id); |
| 1038 | err = cdev_add(&port->cdev, devt, 1); | 1152 | err = cdev_add(port->cdev, devt, 1); |
| 1039 | if (err < 0) { | 1153 | if (err < 0) { |
| 1040 | dev_err(&port->portdev->vdev->dev, | 1154 | dev_err(&port->portdev->vdev->dev, |
| 1041 | "Error %d adding cdev for port %u\n", err, id); | 1155 | "Error %d adding cdev for port %u\n", err, id); |
| 1042 | goto free_port; | 1156 | goto free_cdev; |
| 1043 | } | 1157 | } |
| 1044 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, | 1158 | port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev, |
| 1045 | devt, port, "vport%up%u", | 1159 | devt, port, "vport%up%u", |
| @@ -1104,7 +1218,7 @@ free_inbufs: | |||
| 1104 | free_device: | 1218 | free_device: |
| 1105 | device_destroy(pdrvdata.class, port->dev->devt); | 1219 | device_destroy(pdrvdata.class, port->dev->devt); |
| 1106 | free_cdev: | 1220 | free_cdev: |
| 1107 | cdev_del(&port->cdev); | 1221 | cdev_del(port->cdev); |
| 1108 | free_port: | 1222 | free_port: |
| 1109 | kfree(port); | 1223 | kfree(port); |
| 1110 | fail: | 1224 | fail: |
| @@ -1113,21 +1227,45 @@ fail: | |||
| 1113 | return err; | 1227 | return err; |
| 1114 | } | 1228 | } |
| 1115 | 1229 | ||
| 1116 | /* Remove all port-specific data. */ | 1230 | /* No users remain, remove all port-specific data. */ |
| 1117 | static int remove_port(struct port *port) | 1231 | static void remove_port(struct kref *kref) |
| 1232 | { | ||
| 1233 | struct port *port; | ||
| 1234 | |||
| 1235 | port = container_of(kref, struct port, kref); | ||
| 1236 | |||
| 1237 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
| 1238 | device_destroy(pdrvdata.class, port->dev->devt); | ||
| 1239 | cdev_del(port->cdev); | ||
| 1240 | |||
| 1241 | kfree(port->name); | ||
| 1242 | |||
| 1243 | debugfs_remove(port->debugfs_file); | ||
| 1244 | |||
| 1245 | kfree(port); | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* | ||
| 1249 | * Port got unplugged. Remove port from portdev's list and drop the | ||
| 1250 | * kref reference. If no userspace has this port opened, it will | ||
| 1251 | * result in immediate removal the port. | ||
| 1252 | */ | ||
| 1253 | static void unplug_port(struct port *port) | ||
| 1118 | { | 1254 | { |
| 1119 | struct port_buffer *buf; | 1255 | struct port_buffer *buf; |
| 1120 | 1256 | ||
| 1257 | spin_lock_irq(&port->portdev->ports_lock); | ||
| 1258 | list_del(&port->list); | ||
| 1259 | spin_unlock_irq(&port->portdev->ports_lock); | ||
| 1260 | |||
| 1121 | if (port->guest_connected) { | 1261 | if (port->guest_connected) { |
| 1122 | port->guest_connected = false; | 1262 | port->guest_connected = false; |
| 1123 | port->host_connected = false; | 1263 | port->host_connected = false; |
| 1124 | wake_up_interruptible(&port->waitqueue); | 1264 | wake_up_interruptible(&port->waitqueue); |
| 1125 | send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0); | ||
| 1126 | } | ||
| 1127 | 1265 | ||
| 1128 | spin_lock_irq(&port->portdev->ports_lock); | 1266 | /* Let the app know the port is going down. */ |
| 1129 | list_del(&port->list); | 1267 | send_sigio_to_port(port); |
| 1130 | spin_unlock_irq(&port->portdev->ports_lock); | 1268 | } |
| 1131 | 1269 | ||
| 1132 | if (is_console_port(port)) { | 1270 | if (is_console_port(port)) { |
| 1133 | spin_lock_irq(&pdrvdata_lock); | 1271 | spin_lock_irq(&pdrvdata_lock); |
| @@ -1146,9 +1284,6 @@ static int remove_port(struct port *port) | |||
| 1146 | hvc_remove(port->cons.hvc); | 1284 | hvc_remove(port->cons.hvc); |
| 1147 | #endif | 1285 | #endif |
| 1148 | } | 1286 | } |
| 1149 | sysfs_remove_group(&port->dev->kobj, &port_attribute_group); | ||
| 1150 | device_destroy(pdrvdata.class, port->dev->devt); | ||
| 1151 | cdev_del(&port->cdev); | ||
| 1152 | 1287 | ||
| 1153 | /* Remove unused data this port might have received. */ | 1288 | /* Remove unused data this port might have received. */ |
| 1154 | discard_port_data(port); | 1289 | discard_port_data(port); |
| @@ -1159,12 +1294,19 @@ static int remove_port(struct port *port) | |||
| 1159 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) | 1294 | while ((buf = virtqueue_detach_unused_buf(port->in_vq))) |
| 1160 | free_buf(buf); | 1295 | free_buf(buf); |
| 1161 | 1296 | ||
| 1162 | kfree(port->name); | 1297 | /* |
| 1163 | 1298 | * We should just assume the device itself has gone off -- | |
| 1164 | debugfs_remove(port->debugfs_file); | 1299 | * else a close on an open port later will try to send out a |
| 1300 | * control message. | ||
| 1301 | */ | ||
| 1302 | port->portdev = NULL; | ||
| 1165 | 1303 | ||
| 1166 | kfree(port); | 1304 | /* |
| 1167 | return 0; | 1305 | * Locks around here are not necessary - a port can't be |
| 1306 | * opened after we removed the port struct from ports_list | ||
| 1307 | * above. | ||
| 1308 | */ | ||
| 1309 | kref_put(&port->kref, remove_port); | ||
| 1168 | } | 1310 | } |
| 1169 | 1311 | ||
| 1170 | /* Any private messages that the Host and Guest want to share */ | 1312 | /* Any private messages that the Host and Guest want to share */ |
| @@ -1203,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev, | |||
| 1203 | add_port(portdev, cpkt->id); | 1345 | add_port(portdev, cpkt->id); |
| 1204 | break; | 1346 | break; |
| 1205 | case VIRTIO_CONSOLE_PORT_REMOVE: | 1347 | case VIRTIO_CONSOLE_PORT_REMOVE: |
| 1206 | remove_port(port); | 1348 | unplug_port(port); |
| 1207 | break; | 1349 | break; |
| 1208 | case VIRTIO_CONSOLE_CONSOLE_PORT: | 1350 | case VIRTIO_CONSOLE_CONSOLE_PORT: |
| 1209 | if (!cpkt->value) | 1351 | if (!cpkt->value) |
| @@ -1245,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev, | |||
| 1245 | spin_lock_irq(&port->outvq_lock); | 1387 | spin_lock_irq(&port->outvq_lock); |
| 1246 | reclaim_consumed_buffers(port); | 1388 | reclaim_consumed_buffers(port); |
| 1247 | spin_unlock_irq(&port->outvq_lock); | 1389 | spin_unlock_irq(&port->outvq_lock); |
| 1390 | |||
| 1391 | /* | ||
| 1392 | * If the guest is connected, it'll be interested in | ||
| 1393 | * knowing the host connection state changed. | ||
| 1394 | */ | ||
| 1395 | send_sigio_to_port(port); | ||
| 1248 | break; | 1396 | break; |
| 1249 | case VIRTIO_CONSOLE_PORT_NAME: | 1397 | case VIRTIO_CONSOLE_PORT_NAME: |
| 1250 | /* | 1398 | /* |
| @@ -1341,6 +1489,9 @@ static void in_intr(struct virtqueue *vq) | |||
| 1341 | 1489 | ||
| 1342 | wake_up_interruptible(&port->waitqueue); | 1490 | wake_up_interruptible(&port->waitqueue); |
| 1343 | 1491 | ||
| 1492 | /* Send a SIGIO indicating new data in case the process asked for it */ | ||
| 1493 | send_sigio_to_port(port); | ||
| 1494 | |||
| 1344 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) | 1495 | if (is_console_port(port) && hvc_poll(port->cons.hvc)) |
| 1345 | hvc_kick(); | 1496 | hvc_kick(); |
| 1346 | } | 1497 | } |
| @@ -1577,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) | |||
| 1577 | add_port(portdev, 0); | 1728 | add_port(portdev, 0); |
| 1578 | } | 1729 | } |
| 1579 | 1730 | ||
| 1731 | spin_lock_irq(&pdrvdata_lock); | ||
| 1732 | list_add_tail(&portdev->list, &pdrvdata.portdevs); | ||
| 1733 | spin_unlock_irq(&pdrvdata_lock); | ||
| 1734 | |||
| 1580 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, | 1735 | __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, |
| 1581 | VIRTIO_CONSOLE_DEVICE_READY, 1); | 1736 | VIRTIO_CONSOLE_DEVICE_READY, 1); |
| 1582 | return 0; | 1737 | return 0; |
| @@ -1600,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev) | |||
| 1600 | { | 1755 | { |
| 1601 | struct ports_device *portdev; | 1756 | struct ports_device *portdev; |
| 1602 | struct port *port, *port2; | 1757 | struct port *port, *port2; |
| 1603 | struct port_buffer *buf; | ||
| 1604 | unsigned int len; | ||
| 1605 | 1758 | ||
| 1606 | portdev = vdev->priv; | 1759 | portdev = vdev->priv; |
| 1607 | 1760 | ||
| 1761 | spin_lock_irq(&pdrvdata_lock); | ||
| 1762 | list_del(&portdev->list); | ||
| 1763 | spin_unlock_irq(&pdrvdata_lock); | ||
| 1764 | |||
| 1765 | /* Disable interrupts for vqs */ | ||
| 1766 | vdev->config->reset(vdev); | ||
| 1767 | /* Finish up work that's lined up */ | ||
| 1608 | cancel_work_sync(&portdev->control_work); | 1768 | cancel_work_sync(&portdev->control_work); |
| 1609 | 1769 | ||
| 1610 | list_for_each_entry_safe(port, port2, &portdev->ports, list) | 1770 | list_for_each_entry_safe(port, port2, &portdev->ports, list) |
| 1611 | remove_port(port); | 1771 | unplug_port(port); |
| 1612 | 1772 | ||
| 1613 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); | 1773 | unregister_chrdev(portdev->chr_major, "virtio-portsdev"); |
| 1614 | 1774 | ||
| 1615 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) | 1775 | /* |
| 1616 | free_buf(buf); | 1776 | * When yanking out a device, we immediately lose the |
| 1777 | * (device-side) queues. So there's no point in keeping the | ||
| 1778 | * guest side around till we drop our final reference. This | ||
| 1779 | * also means that any ports which are in an open state will | ||
| 1780 | * have to just stop using the port, as the vqs are going | ||
| 1781 | * away. | ||
| 1782 | */ | ||
| 1783 | if (use_multiport(portdev)) { | ||
| 1784 | struct port_buffer *buf; | ||
| 1785 | unsigned int len; | ||
| 1617 | 1786 | ||
| 1618 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | 1787 | while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) |
| 1619 | free_buf(buf); | 1788 | free_buf(buf); |
| 1789 | |||
| 1790 | while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) | ||
| 1791 | free_buf(buf); | ||
| 1792 | } | ||
| 1620 | 1793 | ||
| 1621 | vdev->config->del_vqs(vdev); | 1794 | vdev->config->del_vqs(vdev); |
| 1622 | kfree(portdev->in_vqs); | 1795 | kfree(portdev->in_vqs); |
| @@ -1663,6 +1836,7 @@ static int __init init(void) | |||
| 1663 | PTR_ERR(pdrvdata.debugfs_dir)); | 1836 | PTR_ERR(pdrvdata.debugfs_dir)); |
| 1664 | } | 1837 | } |
| 1665 | INIT_LIST_HEAD(&pdrvdata.consoles); | 1838 | INIT_LIST_HEAD(&pdrvdata.consoles); |
| 1839 | INIT_LIST_HEAD(&pdrvdata.portdevs); | ||
| 1666 | 1840 | ||
| 1667 | return register_virtio_driver(&virtio_console); | 1841 | return register_virtio_driver(&virtio_console); |
| 1668 | } | 1842 | } |
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 70bb350de996..9dbb28b9559f 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
| @@ -39,7 +39,7 @@ config EDAC_DEBUG | |||
| 39 | there're four debug levels (x=0,1,2,3 from low to high). | 39 | there're four debug levels (x=0,1,2,3 from low to high). |
| 40 | Usually you should select 'N'. | 40 | Usually you should select 'N'. |
| 41 | 41 | ||
| 42 | config EDAC_DECODE_MCE | 42 | config EDAC_DECODE_MCE |
| 43 | tristate "Decode MCEs in human-readable form (only on AMD for now)" | 43 | tristate "Decode MCEs in human-readable form (only on AMD for now)" |
| 44 | depends on CPU_SUP_AMD && X86_MCE | 44 | depends on CPU_SUP_AMD && X86_MCE |
| 45 | default y | 45 | default y |
| @@ -51,6 +51,16 @@ config EDAC_DEBUG | |||
| 51 | which occur really early upon boot, before the module infrastructure | 51 | which occur really early upon boot, before the module infrastructure |
| 52 | has been initialized. | 52 | has been initialized. |
| 53 | 53 | ||
| 54 | config EDAC_MCE_INJ | ||
| 55 | tristate "Simple MCE injection interface over /sysfs" | ||
| 56 | depends on EDAC_DECODE_MCE | ||
| 57 | default n | ||
| 58 | help | ||
| 59 | This is a simple interface to inject MCEs over /sysfs and test | ||
| 60 | the MCE decoding code in EDAC. | ||
| 61 | |||
| 62 | This is currently AMD-only. | ||
| 63 | |||
| 54 | config EDAC_MM_EDAC | 64 | config EDAC_MM_EDAC |
| 55 | tristate "Main Memory EDAC (Error Detection And Correction) reporting" | 65 | tristate "Main Memory EDAC (Error Detection And Correction) reporting" |
| 56 | help | 66 | help |
| @@ -66,13 +76,13 @@ config EDAC_MCE | |||
| 66 | 76 | ||
| 67 | config EDAC_AMD64 | 77 | config EDAC_AMD64 |
| 68 | tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" | 78 | tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" |
| 69 | depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE | 79 | depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE |
| 70 | help | 80 | help |
| 71 | Support for error detection and correction on the AMD 64 | 81 | Support for error detection and correction on the AMD 64 |
| 72 | Families of Memory Controllers (K8, F10h and F11h) | 82 | Families of Memory Controllers (K8, F10h and F11h) |
| 73 | 83 | ||
| 74 | config EDAC_AMD64_ERROR_INJECTION | 84 | config EDAC_AMD64_ERROR_INJECTION |
| 75 | bool "Sysfs Error Injection facilities" | 85 | bool "Sysfs HW Error injection facilities" |
| 76 | depends on EDAC_AMD64 | 86 | depends on EDAC_AMD64 |
| 77 | help | 87 | help |
| 78 | Recent Opterons (Family 10h and later) provide for Memory Error | 88 | Recent Opterons (Family 10h and later) provide for Memory Error |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index ca6b1bb24ccc..32c7bc93c525 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
| @@ -17,6 +17,9 @@ ifdef CONFIG_PCI | |||
| 17 | edac_core-objs += edac_pci.o edac_pci_sysfs.o | 17 | edac_core-objs += edac_pci.o edac_pci_sysfs.o |
| 18 | endif | 18 | endif |
| 19 | 19 | ||
| 20 | obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o | ||
| 21 | |||
| 22 | edac_mce_amd-objs := mce_amd.o | ||
| 20 | obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o | 23 | obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o |
| 21 | 24 | ||
| 22 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o | 25 | obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o |
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e7d5d6b5dcf6..8521401bbd75 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #include "amd64_edac.h" | 1 | #include "amd64_edac.h" |
| 2 | #include <asm/k8.h> | 2 | #include <asm/amd_nb.h> |
| 3 | 3 | ||
| 4 | static struct edac_pci_ctl_info *amd64_ctl_pci; | 4 | static struct edac_pci_ctl_info *amd64_ctl_pci; |
| 5 | 5 | ||
| @@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
| 2073 | amd64_handle_ue(mci, info); | 2073 | amd64_handle_ue(mci, info); |
| 2074 | } | 2074 | } |
| 2075 | 2075 | ||
| 2076 | void amd64_decode_bus_error(int node_id, struct err_regs *regs) | 2076 | void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) |
| 2077 | { | 2077 | { |
| 2078 | struct mem_ctl_info *mci = mci_lookup[node_id]; | 2078 | struct mem_ctl_info *mci = mci_lookup[node_id]; |
| 2079 | struct err_regs regs; | ||
| 2079 | 2080 | ||
| 2080 | __amd64_decode_bus_error(mci, regs); | 2081 | regs.nbsl = (u32) m->status; |
| 2082 | regs.nbsh = (u32)(m->status >> 32); | ||
| 2083 | regs.nbeal = (u32) m->addr; | ||
| 2084 | regs.nbeah = (u32)(m->addr >> 32); | ||
| 2085 | regs.nbcfg = nbcfg; | ||
| 2086 | |||
| 2087 | __amd64_decode_bus_error(mci, ®s); | ||
| 2081 | 2088 | ||
| 2082 | /* | 2089 | /* |
| 2083 | * Check the UE bit of the NB status high register, if set generate some | 2090 | * Check the UE bit of the NB status high register, if set generate some |
| @@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs) | |||
| 2086 | * | 2093 | * |
| 2087 | * FIXME: this should go somewhere else, if at all. | 2094 | * FIXME: this should go somewhere else, if at all. |
| 2088 | */ | 2095 | */ |
| 2089 | if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) | 2096 | if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors) |
| 2090 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); | 2097 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); |
| 2091 | 2098 | ||
| 2092 | } | 2099 | } |
| @@ -2927,7 +2934,7 @@ static int __init amd64_edac_init(void) | |||
| 2927 | * to finish initialization of the MC instances. | 2934 | * to finish initialization of the MC instances. |
| 2928 | */ | 2935 | */ |
| 2929 | err = -ENODEV; | 2936 | err = -ENODEV; |
| 2930 | for (nb = 0; nb < num_k8_northbridges; nb++) { | 2937 | for (nb = 0; nb < k8_northbridges.num; nb++) { |
| 2931 | if (!pvt_lookup[nb]) | 2938 | if (!pvt_lookup[nb]) |
| 2932 | continue; | 2939 | continue; |
| 2933 | 2940 | ||
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 613b9381e71a..044aee4f944d 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h | |||
| @@ -72,7 +72,7 @@ | |||
| 72 | #include <linux/edac.h> | 72 | #include <linux/edac.h> |
| 73 | #include <asm/msr.h> | 73 | #include <asm/msr.h> |
| 74 | #include "edac_core.h" | 74 | #include "edac_core.h" |
| 75 | #include "edac_mce_amd.h" | 75 | #include "mce_amd.h" |
| 76 | 76 | ||
| 77 | #define amd64_printk(level, fmt, arg...) \ | 77 | #define amd64_printk(level, fmt, arg...) \ |
| 78 | edac_printk(level, "amd64", fmt, ##arg) | 78 | edac_printk(level, "amd64", fmt, ##arg) |
| @@ -482,11 +482,10 @@ extern const char *rrrr_msgs[16]; | |||
| 482 | extern const char *to_msgs[2]; | 482 | extern const char *to_msgs[2]; |
| 483 | extern const char *pp_msgs[4]; | 483 | extern const char *pp_msgs[4]; |
| 484 | extern const char *ii_msgs[4]; | 484 | extern const char *ii_msgs[4]; |
| 485 | extern const char *ext_msgs[32]; | ||
| 486 | extern const char *htlink_msgs[8]; | 485 | extern const char *htlink_msgs[8]; |
| 487 | 486 | ||
| 488 | #ifdef CONFIG_EDAC_DEBUG | 487 | #ifdef CONFIG_EDAC_DEBUG |
| 489 | #define NUM_DBG_ATTRS 9 | 488 | #define NUM_DBG_ATTRS 5 |
| 490 | #else | 489 | #else |
| 491 | #define NUM_DBG_ATTRS 0 | 490 | #define NUM_DBG_ATTRS 0 |
| 492 | #endif | 491 | #endif |
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c index 59cf2cf6e11e..e3562288f4ce 100644 --- a/drivers/edac/amd64_edac_dbg.c +++ b/drivers/edac/amd64_edac_dbg.c | |||
| @@ -1,167 +1,16 @@ | |||
| 1 | #include "amd64_edac.h" | 1 | #include "amd64_edac.h" |
| 2 | 2 | ||
| 3 | /* | 3 | #define EDAC_DCT_ATTR_SHOW(reg) \ |
| 4 | * accept a hex value and store it into the virtual error register file, field: | 4 | static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \ |
| 5 | * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, | 5 | { \ |
| 6 | * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and | 6 | struct amd64_pvt *pvt = mci->pvt_info; \ |
| 7 | * CHANNEL | 7 | return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \ |
| 8 | */ | ||
| 9 | static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, | ||
| 10 | size_t count) | ||
| 11 | { | ||
| 12 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 13 | unsigned long long value; | ||
| 14 | int ret = 0; | ||
| 15 | |||
| 16 | ret = strict_strtoull(data, 16, &value); | ||
| 17 | if (ret != -EINVAL) { | ||
| 18 | debugf0("received NBEA= 0x%llx\n", value); | ||
| 19 | |||
| 20 | /* place the value into the virtual error packet */ | ||
| 21 | pvt->ctl_error_info.nbeal = (u32) value; | ||
| 22 | value >>= 32; | ||
| 23 | pvt->ctl_error_info.nbeah = (u32) value; | ||
| 24 | |||
| 25 | /* Process the Mapping request */ | ||
| 26 | /* TODO: Add race prevention */ | ||
| 27 | amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1); | ||
| 28 | |||
| 29 | return count; | ||
| 30 | } | ||
| 31 | return ret; | ||
| 32 | } | ||
| 33 | |||
| 34 | /* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */ | ||
| 35 | static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data) | ||
| 36 | { | ||
| 37 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 38 | u64 value; | ||
| 39 | |||
| 40 | value = pvt->ctl_error_info.nbeah; | ||
| 41 | value <<= 32; | ||
| 42 | value |= pvt->ctl_error_info.nbeal; | ||
| 43 | |||
| 44 | return sprintf(data, "%llx\n", value); | ||
| 45 | } | ||
| 46 | |||
| 47 | /* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */ | ||
| 48 | static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data, | ||
| 49 | size_t count) | ||
| 50 | { | ||
| 51 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 52 | unsigned long value; | ||
| 53 | int ret = 0; | ||
| 54 | |||
| 55 | ret = strict_strtoul(data, 16, &value); | ||
| 56 | if (ret != -EINVAL) { | ||
| 57 | debugf0("received NBSL= 0x%lx\n", value); | ||
| 58 | |||
| 59 | pvt->ctl_error_info.nbsl = (u32) value; | ||
| 60 | |||
| 61 | return count; | ||
| 62 | } | ||
| 63 | return ret; | ||
| 64 | } | ||
| 65 | |||
| 66 | /* display back what the last NBSL value written */ | ||
| 67 | static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data) | ||
| 68 | { | ||
| 69 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 70 | u32 value; | ||
| 71 | |||
| 72 | value = pvt->ctl_error_info.nbsl; | ||
| 73 | |||
| 74 | return sprintf(data, "%x\n", value); | ||
| 75 | } | ||
| 76 | |||
| 77 | /* store the NBSH (MCA NB Status High) value user desires */ | ||
| 78 | static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data, | ||
| 79 | size_t count) | ||
| 80 | { | ||
| 81 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 82 | unsigned long value; | ||
| 83 | int ret = 0; | ||
| 84 | |||
| 85 | ret = strict_strtoul(data, 16, &value); | ||
| 86 | if (ret != -EINVAL) { | ||
| 87 | debugf0("received NBSH= 0x%lx\n", value); | ||
| 88 | |||
| 89 | pvt->ctl_error_info.nbsh = (u32) value; | ||
| 90 | |||
| 91 | return count; | ||
| 92 | } | ||
| 93 | return ret; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* display back what the last NBSH value written */ | ||
| 97 | static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data) | ||
| 98 | { | ||
| 99 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 100 | u32 value; | ||
| 101 | |||
| 102 | value = pvt->ctl_error_info.nbsh; | ||
| 103 | |||
| 104 | return sprintf(data, "%x\n", value); | ||
| 105 | } | 8 | } |
| 106 | 9 | ||
| 107 | /* accept and store the NBCFG (MCA NB Configuration) value user desires */ | 10 | EDAC_DCT_ATTR_SHOW(dhar); |
| 108 | static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci, | 11 | EDAC_DCT_ATTR_SHOW(dbam0); |
| 109 | const char *data, size_t count) | 12 | EDAC_DCT_ATTR_SHOW(top_mem); |
| 110 | { | 13 | EDAC_DCT_ATTR_SHOW(top_mem2); |
| 111 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 112 | unsigned long value; | ||
| 113 | int ret = 0; | ||
| 114 | |||
| 115 | ret = strict_strtoul(data, 16, &value); | ||
| 116 | if (ret != -EINVAL) { | ||
| 117 | debugf0("received NBCFG= 0x%lx\n", value); | ||
| 118 | |||
| 119 | pvt->ctl_error_info.nbcfg = (u32) value; | ||
| 120 | |||
| 121 | return count; | ||
| 122 | } | ||
| 123 | return ret; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* various show routines for the controls of a MCI */ | ||
| 127 | static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data) | ||
| 128 | { | ||
| 129 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 130 | |||
| 131 | return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg); | ||
| 132 | } | ||
| 133 | |||
| 134 | |||
| 135 | static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data) | ||
| 136 | { | ||
| 137 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 138 | |||
| 139 | return sprintf(data, "%x\n", pvt->dhar); | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data) | ||
| 144 | { | ||
| 145 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 146 | |||
| 147 | return sprintf(data, "%x\n", pvt->dbam0); | ||
| 148 | } | ||
| 149 | |||
| 150 | |||
| 151 | static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data) | ||
| 152 | { | ||
| 153 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 154 | |||
| 155 | return sprintf(data, "%llx\n", pvt->top_mem); | ||
| 156 | } | ||
| 157 | |||
| 158 | |||
| 159 | static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data) | ||
| 160 | { | ||
| 161 | struct amd64_pvt *pvt = mci->pvt_info; | ||
| 162 | |||
| 163 | return sprintf(data, "%llx\n", pvt->top_mem2); | ||
| 164 | } | ||
| 165 | 14 | ||
| 166 | static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) | 15 | static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) |
| 167 | { | 16 | { |
| @@ -182,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = { | |||
| 182 | 31 | ||
| 183 | { | 32 | { |
| 184 | .attr = { | 33 | .attr = { |
| 185 | .name = "nbea_ctl", | ||
| 186 | .mode = (S_IRUGO | S_IWUSR) | ||
| 187 | }, | ||
| 188 | .show = amd64_nbea_show, | ||
| 189 | .store = amd64_nbea_store, | ||
| 190 | }, | ||
| 191 | { | ||
| 192 | .attr = { | ||
| 193 | .name = "nbsl_ctl", | ||
| 194 | .mode = (S_IRUGO | S_IWUSR) | ||
| 195 | }, | ||
| 196 | .show = amd64_nbsl_show, | ||
| 197 | .store = amd64_nbsl_store, | ||
| 198 | }, | ||
| 199 | { | ||
| 200 | .attr = { | ||
| 201 | .name = "nbsh_ctl", | ||
| 202 | .mode = (S_IRUGO | S_IWUSR) | ||
| 203 | }, | ||
| 204 | .show = amd64_nbsh_show, | ||
| 205 | .store = amd64_nbsh_store, | ||
| 206 | }, | ||
| 207 | { | ||
| 208 | .attr = { | ||
| 209 | .name = "nbcfg_ctl", | ||
| 210 | .mode = (S_IRUGO | S_IWUSR) | ||
| 211 | }, | ||
| 212 | .show = amd64_nbcfg_show, | ||
| 213 | .store = amd64_nbcfg_store, | ||
| 214 | }, | ||
| 215 | { | ||
| 216 | .attr = { | ||
| 217 | .name = "dhar", | 34 | .name = "dhar", |
| 218 | .mode = (S_IRUGO) | 35 | .mode = (S_IRUGO) |
| 219 | }, | 36 | }, |
| @@ -225,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = { | |||
| 225 | .name = "dbam", | 42 | .name = "dbam", |
| 226 | .mode = (S_IRUGO) | 43 | .mode = (S_IRUGO) |
| 227 | }, | 44 | }, |
| 228 | .show = amd64_dbam_show, | 45 | .show = amd64_dbam0_show, |
| 229 | .store = NULL, | 46 | .store = NULL, |
| 230 | }, | 47 | }, |
| 231 | { | 48 | { |
| @@ -233,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = { | |||
| 233 | .name = "topmem", | 50 | .name = "topmem", |
| 234 | .mode = (S_IRUGO) | 51 | .mode = (S_IRUGO) |
| 235 | }, | 52 | }, |
| 236 | .show = amd64_topmem_show, | 53 | .show = amd64_top_mem_show, |
| 237 | .store = NULL, | 54 | .store = NULL, |
| 238 | }, | 55 | }, |
| 239 | { | 56 | { |
| @@ -241,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = { | |||
| 241 | .name = "topmem2", | 58 | .name = "topmem2", |
| 242 | .mode = (S_IRUGO) | 59 | .mode = (S_IRUGO) |
| 243 | }, | 60 | }, |
| 244 | .show = amd64_topmem2_show, | 61 | .show = amd64_top_mem2_show, |
| 245 | .store = NULL, | 62 | .store = NULL, |
| 246 | }, | 63 | }, |
| 247 | { | 64 | { |
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 070968178a24..2941dca91aae 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/ctype.h> | 13 | #include <linux/ctype.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/edac.h> | ||
| 16 | 17 | ||
| 17 | #include "edac_core.h" | 18 | #include "edac_core.h" |
| 18 | #include "edac_module.h" | 19 | #include "edac_module.h" |
| @@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) | |||
| 235 | debugf1("%s()\n", __func__); | 236 | debugf1("%s()\n", __func__); |
| 236 | 237 | ||
| 237 | /* get the /sys/devices/system/edac reference */ | 238 | /* get the /sys/devices/system/edac reference */ |
| 238 | edac_class = edac_get_edac_class(); | 239 | edac_class = edac_get_sysfs_class(); |
| 239 | if (edac_class == NULL) { | 240 | if (edac_class == NULL) { |
| 240 | debugf1("%s() no edac_class error\n", __func__); | 241 | debugf1("%s() no edac_class error\n", __func__); |
| 241 | err = -ENODEV; | 242 | err = -ENODEV; |
| @@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) | |||
| 255 | 256 | ||
| 256 | if (!try_module_get(edac_dev->owner)) { | 257 | if (!try_module_get(edac_dev->owner)) { |
| 257 | err = -ENODEV; | 258 | err = -ENODEV; |
| 258 | goto err_out; | 259 | goto err_mod_get; |
| 259 | } | 260 | } |
| 260 | 261 | ||
| 261 | /* register */ | 262 | /* register */ |
| @@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) | |||
| 282 | err_kobj_reg: | 283 | err_kobj_reg: |
| 283 | module_put(edac_dev->owner); | 284 | module_put(edac_dev->owner); |
| 284 | 285 | ||
| 286 | err_mod_get: | ||
| 287 | edac_put_sysfs_class(); | ||
| 288 | |||
| 285 | err_out: | 289 | err_out: |
| 286 | return err; | 290 | return err; |
| 287 | } | 291 | } |
| @@ -290,12 +294,11 @@ err_out: | |||
| 290 | * edac_device_unregister_sysfs_main_kobj: | 294 | * edac_device_unregister_sysfs_main_kobj: |
| 291 | * the '..../edac/<name>' kobject | 295 | * the '..../edac/<name>' kobject |
| 292 | */ | 296 | */ |
| 293 | void edac_device_unregister_sysfs_main_kobj( | 297 | void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev) |
| 294 | struct edac_device_ctl_info *edac_dev) | ||
| 295 | { | 298 | { |
| 296 | debugf0("%s()\n", __func__); | 299 | debugf0("%s()\n", __func__); |
| 297 | debugf4("%s() name of kobject is: %s\n", | 300 | debugf4("%s() name of kobject is: %s\n", |
| 298 | __func__, kobject_name(&edac_dev->kobj)); | 301 | __func__, kobject_name(&dev->kobj)); |
| 299 | 302 | ||
| 300 | /* | 303 | /* |
| 301 | * Unregister the edac device's kobject and | 304 | * Unregister the edac device's kobject and |
| @@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj( | |||
| 304 | * a) module_put() this module | 307 | * a) module_put() this module |
| 305 | * b) 'kfree' the memory | 308 | * b) 'kfree' the memory |
| 306 | */ | 309 | */ |
| 307 | kobject_put(&edac_dev->kobj); | 310 | kobject_put(&dev->kobj); |
| 311 | edac_put_sysfs_class(); | ||
| 308 | } | 312 | } |
| 309 | 313 | ||
| 310 | /* edac_dev -> instance information */ | 314 | /* edac_dev -> instance information */ |
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 8aad94d10c0c..a4135860149b 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include <linux/ctype.h> | 12 | #include <linux/ctype.h> |
| 13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
| 14 | #include <linux/edac.h> | ||
| 14 | #include <linux/bug.h> | 15 | #include <linux/bug.h> |
| 15 | 16 | ||
| 16 | #include "edac_core.h" | 17 | #include "edac_core.h" |
| @@ -1011,13 +1012,13 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) | |||
| 1011 | */ | 1012 | */ |
| 1012 | int edac_sysfs_setup_mc_kset(void) | 1013 | int edac_sysfs_setup_mc_kset(void) |
| 1013 | { | 1014 | { |
| 1014 | int err = 0; | 1015 | int err = -EINVAL; |
| 1015 | struct sysdev_class *edac_class; | 1016 | struct sysdev_class *edac_class; |
| 1016 | 1017 | ||
| 1017 | debugf1("%s()\n", __func__); | 1018 | debugf1("%s()\n", __func__); |
| 1018 | 1019 | ||
| 1019 | /* get the /sys/devices/system/edac class reference */ | 1020 | /* get the /sys/devices/system/edac class reference */ |
| 1020 | edac_class = edac_get_edac_class(); | 1021 | edac_class = edac_get_sysfs_class(); |
| 1021 | if (edac_class == NULL) { | 1022 | if (edac_class == NULL) { |
| 1022 | debugf1("%s() no edac_class error=%d\n", __func__, err); | 1023 | debugf1("%s() no edac_class error=%d\n", __func__, err); |
| 1023 | goto fail_out; | 1024 | goto fail_out; |
| @@ -1028,15 +1029,16 @@ int edac_sysfs_setup_mc_kset(void) | |||
| 1028 | if (!mc_kset) { | 1029 | if (!mc_kset) { |
| 1029 | err = -ENOMEM; | 1030 | err = -ENOMEM; |
| 1030 | debugf1("%s() Failed to register '.../edac/mc'\n", __func__); | 1031 | debugf1("%s() Failed to register '.../edac/mc'\n", __func__); |
| 1031 | goto fail_out; | 1032 | goto fail_kset; |
| 1032 | } | 1033 | } |
| 1033 | 1034 | ||
| 1034 | debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); | 1035 | debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); |
| 1035 | 1036 | ||
| 1036 | return 0; | 1037 | return 0; |
| 1037 | 1038 | ||
| 1039 | fail_kset: | ||
| 1040 | edac_put_sysfs_class(); | ||
| 1038 | 1041 | ||
| 1039 | /* error unwind stack */ | ||
| 1040 | fail_out: | 1042 | fail_out: |
| 1041 | return err; | 1043 | return err; |
| 1042 | } | 1044 | } |
| @@ -1049,5 +1051,6 @@ fail_out: | |||
| 1049 | void edac_sysfs_teardown_mc_kset(void) | 1051 | void edac_sysfs_teardown_mc_kset(void) |
| 1050 | { | 1052 | { |
| 1051 | kset_unregister(mc_kset); | 1053 | kset_unregister(mc_kset); |
| 1054 | edac_put_sysfs_class(); | ||
| 1052 | } | 1055 | } |
| 1053 | 1056 | ||
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c deleted file mode 100644 index 9014df6f605d..000000000000 --- a/drivers/edac/edac_mce_amd.c +++ /dev/null | |||
| @@ -1,452 +0,0 @@ | |||
| 1 | #include <linux/module.h> | ||
| 2 | #include "edac_mce_amd.h" | ||
| 3 | |||
| 4 | static bool report_gart_errors; | ||
| 5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); | ||
| 6 | |||
| 7 | void amd_report_gart_errors(bool v) | ||
| 8 | { | ||
| 9 | report_gart_errors = v; | ||
| 10 | } | ||
| 11 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | ||
| 12 | |||
| 13 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)) | ||
| 14 | { | ||
| 15 | nb_bus_decoder = f; | ||
| 16 | } | ||
| 17 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | ||
| 18 | |||
| 19 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)) | ||
| 20 | { | ||
| 21 | if (nb_bus_decoder) { | ||
| 22 | WARN_ON(nb_bus_decoder != f); | ||
| 23 | |||
| 24 | nb_bus_decoder = NULL; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); | ||
| 28 | |||
| 29 | /* | ||
| 30 | * string representation for the different MCA reported error types, see F3x48 | ||
| 31 | * or MSR0000_0411. | ||
| 32 | */ | ||
| 33 | const char *tt_msgs[] = { /* transaction type */ | ||
| 34 | "instruction", | ||
| 35 | "data", | ||
| 36 | "generic", | ||
| 37 | "reserved" | ||
| 38 | }; | ||
| 39 | EXPORT_SYMBOL_GPL(tt_msgs); | ||
| 40 | |||
| 41 | const char *ll_msgs[] = { /* cache level */ | ||
| 42 | "L0", | ||
| 43 | "L1", | ||
| 44 | "L2", | ||
| 45 | "L3/generic" | ||
| 46 | }; | ||
| 47 | EXPORT_SYMBOL_GPL(ll_msgs); | ||
| 48 | |||
| 49 | const char *rrrr_msgs[] = { | ||
| 50 | "generic", | ||
| 51 | "generic read", | ||
| 52 | "generic write", | ||
| 53 | "data read", | ||
| 54 | "data write", | ||
| 55 | "inst fetch", | ||
| 56 | "prefetch", | ||
| 57 | "evict", | ||
| 58 | "snoop", | ||
| 59 | "reserved RRRR= 9", | ||
| 60 | "reserved RRRR= 10", | ||
| 61 | "reserved RRRR= 11", | ||
| 62 | "reserved RRRR= 12", | ||
| 63 | "reserved RRRR= 13", | ||
| 64 | "reserved RRRR= 14", | ||
| 65 | "reserved RRRR= 15" | ||
| 66 | }; | ||
| 67 | EXPORT_SYMBOL_GPL(rrrr_msgs); | ||
| 68 | |||
| 69 | const char *pp_msgs[] = { /* participating processor */ | ||
| 70 | "local node originated (SRC)", | ||
| 71 | "local node responded to request (RES)", | ||
| 72 | "local node observed as 3rd party (OBS)", | ||
| 73 | "generic" | ||
| 74 | }; | ||
| 75 | EXPORT_SYMBOL_GPL(pp_msgs); | ||
| 76 | |||
| 77 | const char *to_msgs[] = { | ||
| 78 | "no timeout", | ||
| 79 | "timed out" | ||
| 80 | }; | ||
| 81 | EXPORT_SYMBOL_GPL(to_msgs); | ||
| 82 | |||
| 83 | const char *ii_msgs[] = { /* memory or i/o */ | ||
| 84 | "mem access", | ||
| 85 | "reserved", | ||
| 86 | "i/o access", | ||
| 87 | "generic" | ||
| 88 | }; | ||
| 89 | EXPORT_SYMBOL_GPL(ii_msgs); | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Map the 4 or 5 (family-specific) bits of Extended Error code to the | ||
| 93 | * string table. | ||
| 94 | */ | ||
| 95 | const char *ext_msgs[] = { | ||
| 96 | "K8 ECC error", /* 0_0000b */ | ||
| 97 | "CRC error on link", /* 0_0001b */ | ||
| 98 | "Sync error packets on link", /* 0_0010b */ | ||
| 99 | "Master Abort during link operation", /* 0_0011b */ | ||
| 100 | "Target Abort during link operation", /* 0_0100b */ | ||
| 101 | "Invalid GART PTE entry during table walk", /* 0_0101b */ | ||
| 102 | "Unsupported atomic RMW command received", /* 0_0110b */ | ||
| 103 | "WDT error: NB transaction timeout", /* 0_0111b */ | ||
| 104 | "ECC/ChipKill ECC error", /* 0_1000b */ | ||
| 105 | "SVM DEV Error", /* 0_1001b */ | ||
| 106 | "Link Data error", /* 0_1010b */ | ||
| 107 | "Link/L3/Probe Filter Protocol error", /* 0_1011b */ | ||
| 108 | "NB Internal Arrays Parity error", /* 0_1100b */ | ||
| 109 | "DRAM Address/Control Parity error", /* 0_1101b */ | ||
| 110 | "Link Transmission error", /* 0_1110b */ | ||
| 111 | "GART/DEV Table Walk Data error" /* 0_1111b */ | ||
| 112 | "Res 0x100 error", /* 1_0000b */ | ||
| 113 | "Res 0x101 error", /* 1_0001b */ | ||
| 114 | "Res 0x102 error", /* 1_0010b */ | ||
| 115 | "Res 0x103 error", /* 1_0011b */ | ||
| 116 | "Res 0x104 error", /* 1_0100b */ | ||
| 117 | "Res 0x105 error", /* 1_0101b */ | ||
| 118 | "Res 0x106 error", /* 1_0110b */ | ||
| 119 | "Res 0x107 error", /* 1_0111b */ | ||
| 120 | "Res 0x108 error", /* 1_1000b */ | ||
| 121 | "Res 0x109 error", /* 1_1001b */ | ||
| 122 | "Res 0x10A error", /* 1_1010b */ | ||
| 123 | "Res 0x10B error", /* 1_1011b */ | ||
| 124 | "ECC error in L3 Cache Data", /* 1_1100b */ | ||
| 125 | "L3 Cache Tag error", /* 1_1101b */ | ||
| 126 | "L3 Cache LRU Parity error", /* 1_1110b */ | ||
| 127 | "Probe Filter error" /* 1_1111b */ | ||
| 128 | }; | ||
| 129 | EXPORT_SYMBOL_GPL(ext_msgs); | ||
| 130 | |||
| 131 | static void amd_decode_dc_mce(u64 mc0_status) | ||
| 132 | { | ||
| 133 | u32 ec = mc0_status & 0xffff; | ||
| 134 | u32 xec = (mc0_status >> 16) & 0xf; | ||
| 135 | |||
| 136 | pr_emerg("Data Cache Error"); | ||
| 137 | |||
| 138 | if (xec == 1 && TLB_ERROR(ec)) | ||
| 139 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); | ||
| 140 | else if (xec == 0) { | ||
| 141 | if (mc0_status & (1ULL << 40)) | ||
| 142 | pr_cont(" during Data Scrub.\n"); | ||
| 143 | else if (TLB_ERROR(ec)) | ||
| 144 | pr_cont(": %s TLB parity error.\n", LL_MSG(ec)); | ||
| 145 | else if (MEM_ERROR(ec)) { | ||
| 146 | u8 ll = ec & 0x3; | ||
| 147 | u8 tt = (ec >> 2) & 0x3; | ||
| 148 | u8 rrrr = (ec >> 4) & 0xf; | ||
| 149 | |||
| 150 | /* see F10h BKDG (31116), Table 92. */ | ||
| 151 | if (ll == 0x1) { | ||
| 152 | if (tt != 0x1) | ||
| 153 | goto wrong_dc_mce; | ||
| 154 | |||
| 155 | pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec)); | ||
| 156 | |||
| 157 | } else if (ll == 0x2 && rrrr == 0x3) | ||
| 158 | pr_cont(" during L1 linefill from L2.\n"); | ||
| 159 | else | ||
| 160 | goto wrong_dc_mce; | ||
| 161 | } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf) | ||
| 162 | pr_cont(" during system linefill.\n"); | ||
| 163 | else | ||
| 164 | goto wrong_dc_mce; | ||
| 165 | } else | ||
| 166 | goto wrong_dc_mce; | ||
| 167 | |||
| 168 | return; | ||
| 169 | |||
| 170 | wrong_dc_mce: | ||
| 171 | pr_warning("Corrupted DC MCE info?\n"); | ||
| 172 | } | ||
| 173 | |||
| 174 | static void amd_decode_ic_mce(u64 mc1_status) | ||
| 175 | { | ||
| 176 | u32 ec = mc1_status & 0xffff; | ||
| 177 | u32 xec = (mc1_status >> 16) & 0xf; | ||
| 178 | |||
| 179 | pr_emerg("Instruction Cache Error"); | ||
| 180 | |||
| 181 | if (xec == 1 && TLB_ERROR(ec)) | ||
| 182 | pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); | ||
| 183 | else if (xec == 0) { | ||
| 184 | if (TLB_ERROR(ec)) | ||
| 185 | pr_cont(": %s TLB Parity error.\n", LL_MSG(ec)); | ||
| 186 | else if (BUS_ERROR(ec)) { | ||
| 187 | if (boot_cpu_data.x86 == 0xf && | ||
| 188 | (mc1_status & (1ULL << 58))) | ||
| 189 | pr_cont(" during system linefill.\n"); | ||
| 190 | else | ||
| 191 | pr_cont(" during attempted NB data read.\n"); | ||
| 192 | } else if (MEM_ERROR(ec)) { | ||
| 193 | u8 ll = ec & 0x3; | ||
| 194 | u8 rrrr = (ec >> 4) & 0xf; | ||
| 195 | |||
| 196 | if (ll == 0x2) | ||
| 197 | pr_cont(" during a linefill from L2.\n"); | ||
| 198 | else if (ll == 0x1) { | ||
| 199 | |||
| 200 | switch (rrrr) { | ||
| 201 | case 0x5: | ||
| 202 | pr_cont(": Parity error during " | ||
| 203 | "data load.\n"); | ||
| 204 | break; | ||
| 205 | |||
| 206 | case 0x7: | ||
| 207 | pr_cont(": Copyback Parity/Victim" | ||
| 208 | " error.\n"); | ||
| 209 | break; | ||
| 210 | |||
| 211 | case 0x8: | ||
| 212 | pr_cont(": Tag Snoop error.\n"); | ||
| 213 | break; | ||
| 214 | |||
| 215 | default: | ||
| 216 | goto wrong_ic_mce; | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | } else | ||
| 221 | goto wrong_ic_mce; | ||
| 222 | } else | ||
| 223 | goto wrong_ic_mce; | ||
| 224 | |||
| 225 | return; | ||
| 226 | |||
| 227 | wrong_ic_mce: | ||
| 228 | pr_warning("Corrupted IC MCE info?\n"); | ||
| 229 | } | ||
| 230 | |||
| 231 | static void amd_decode_bu_mce(u64 mc2_status) | ||
| 232 | { | ||
| 233 | u32 ec = mc2_status & 0xffff; | ||
| 234 | u32 xec = (mc2_status >> 16) & 0xf; | ||
| 235 | |||
| 236 | pr_emerg("Bus Unit Error"); | ||
| 237 | |||
| 238 | if (xec == 0x1) | ||
| 239 | pr_cont(" in the write data buffers.\n"); | ||
| 240 | else if (xec == 0x3) | ||
| 241 | pr_cont(" in the victim data buffers.\n"); | ||
| 242 | else if (xec == 0x2 && MEM_ERROR(ec)) | ||
| 243 | pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); | ||
| 244 | else if (xec == 0x0) { | ||
| 245 | if (TLB_ERROR(ec)) | ||
| 246 | pr_cont(": %s error in a Page Descriptor Cache or " | ||
| 247 | "Guest TLB.\n", TT_MSG(ec)); | ||
| 248 | else if (BUS_ERROR(ec)) | ||
| 249 | pr_cont(": %s/ECC error in data read from NB: %s.\n", | ||
| 250 | RRRR_MSG(ec), PP_MSG(ec)); | ||
| 251 | else if (MEM_ERROR(ec)) { | ||
| 252 | u8 rrrr = (ec >> 4) & 0xf; | ||
| 253 | |||
| 254 | if (rrrr >= 0x7) | ||
| 255 | pr_cont(": %s error during data copyback.\n", | ||
| 256 | RRRR_MSG(ec)); | ||
| 257 | else if (rrrr <= 0x1) | ||
| 258 | pr_cont(": %s parity/ECC error during data " | ||
| 259 | "access from L2.\n", RRRR_MSG(ec)); | ||
| 260 | else | ||
| 261 | goto wrong_bu_mce; | ||
| 262 | } else | ||
| 263 | goto wrong_bu_mce; | ||
| 264 | } else | ||
| 265 | goto wrong_bu_mce; | ||
| 266 | |||
| 267 | return; | ||
| 268 | |||
| 269 | wrong_bu_mce: | ||
| 270 | pr_warning("Corrupted BU MCE info?\n"); | ||
| 271 | } | ||
| 272 | |||
| 273 | static void amd_decode_ls_mce(u64 mc3_status) | ||
| 274 | { | ||
| 275 | u32 ec = mc3_status & 0xffff; | ||
| 276 | u32 xec = (mc3_status >> 16) & 0xf; | ||
| 277 | |||
| 278 | pr_emerg("Load Store Error"); | ||
| 279 | |||
| 280 | if (xec == 0x0) { | ||
| 281 | u8 rrrr = (ec >> 4) & 0xf; | ||
| 282 | |||
| 283 | if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4)) | ||
| 284 | goto wrong_ls_mce; | ||
| 285 | |||
| 286 | pr_cont(" during %s.\n", RRRR_MSG(ec)); | ||
| 287 | } | ||
| 288 | return; | ||
| 289 | |||
| 290 | wrong_ls_mce: | ||
| 291 | pr_warning("Corrupted LS MCE info?\n"); | ||
| 292 | } | ||
| 293 | |||
| 294 | void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) | ||
| 295 | { | ||
| 296 | u32 ec = ERROR_CODE(regs->nbsl); | ||
| 297 | |||
| 298 | if (!handle_errors) | ||
| 299 | return; | ||
| 300 | |||
| 301 | /* | ||
| 302 | * GART TLB error reporting is disabled by default. Bail out early. | ||
| 303 | */ | ||
| 304 | if (TLB_ERROR(ec) && !report_gart_errors) | ||
| 305 | return; | ||
| 306 | |||
| 307 | pr_emerg("Northbridge Error, node %d", node_id); | ||
| 308 | |||
| 309 | /* | ||
| 310 | * F10h, revD can disable ErrCpu[3:0] so check that first and also the | ||
| 311 | * value encoding has changed so interpret those differently | ||
| 312 | */ | ||
| 313 | if ((boot_cpu_data.x86 == 0x10) && | ||
| 314 | (boot_cpu_data.x86_model > 7)) { | ||
| 315 | if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) | ||
| 316 | pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); | ||
| 317 | } else { | ||
| 318 | u8 assoc_cpus = regs->nbsh & 0xf; | ||
| 319 | |||
| 320 | if (assoc_cpus > 0) | ||
| 321 | pr_cont(", core: %d", fls(assoc_cpus) - 1); | ||
| 322 | |||
| 323 | pr_cont("\n"); | ||
| 324 | } | ||
| 325 | |||
| 326 | pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl)); | ||
| 327 | |||
| 328 | if (BUS_ERROR(ec) && nb_bus_decoder) | ||
| 329 | nb_bus_decoder(node_id, regs); | ||
| 330 | } | ||
| 331 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); | ||
| 332 | |||
| 333 | static void amd_decode_fr_mce(u64 mc5_status) | ||
| 334 | { | ||
| 335 | /* we have only one error signature so match all fields at once. */ | ||
| 336 | if ((mc5_status & 0xffff) == 0x0f0f) | ||
| 337 | pr_emerg(" FR Error: CPU Watchdog timer expire.\n"); | ||
| 338 | else | ||
| 339 | pr_warning("Corrupted FR MCE info?\n"); | ||
| 340 | } | ||
| 341 | |||
| 342 | static inline void amd_decode_err_code(unsigned int ec) | ||
| 343 | { | ||
| 344 | if (TLB_ERROR(ec)) { | ||
| 345 | pr_emerg("Transaction: %s, Cache Level %s\n", | ||
| 346 | TT_MSG(ec), LL_MSG(ec)); | ||
| 347 | } else if (MEM_ERROR(ec)) { | ||
| 348 | pr_emerg("Transaction: %s, Type: %s, Cache Level: %s", | ||
| 349 | RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); | ||
| 350 | } else if (BUS_ERROR(ec)) { | ||
| 351 | pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, " | ||
| 352 | "Participating Processor: %s\n", | ||
| 353 | RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), | ||
| 354 | PP_MSG(ec)); | ||
| 355 | } else | ||
| 356 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); | ||
| 357 | } | ||
| 358 | |||
| 359 | static int amd_decode_mce(struct notifier_block *nb, unsigned long val, | ||
| 360 | void *data) | ||
| 361 | { | ||
| 362 | struct mce *m = (struct mce *)data; | ||
| 363 | struct err_regs regs; | ||
| 364 | int node, ecc; | ||
| 365 | |||
| 366 | pr_emerg("MC%d_STATUS: ", m->bank); | ||
| 367 | |||
| 368 | pr_cont("%sorrected error, other errors lost: %s, " | ||
| 369 | "CPU context corrupt: %s", | ||
| 370 | ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), | ||
| 371 | ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), | ||
| 372 | ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); | ||
| 373 | |||
| 374 | /* do the two bits[14:13] together */ | ||
| 375 | ecc = (m->status >> 45) & 0x3; | ||
| 376 | if (ecc) | ||
| 377 | pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); | ||
| 378 | |||
| 379 | pr_cont("\n"); | ||
| 380 | |||
| 381 | switch (m->bank) { | ||
| 382 | case 0: | ||
| 383 | amd_decode_dc_mce(m->status); | ||
| 384 | break; | ||
| 385 | |||
| 386 | case 1: | ||
| 387 | amd_decode_ic_mce(m->status); | ||
| 388 | break; | ||
| 389 | |||
| 390 | case 2: | ||
| 391 | amd_decode_bu_mce(m->status); | ||
| 392 | break; | ||
| 393 | |||
| 394 | case 3: | ||
| 395 | amd_decode_ls_mce(m->status); | ||
| 396 | break; | ||
| 397 | |||
| 398 | case 4: | ||
| 399 | regs.nbsl = (u32) m->status; | ||
| 400 | regs.nbsh = (u32)(m->status >> 32); | ||
| 401 | regs.nbeal = (u32) m->addr; | ||
| 402 | regs.nbeah = (u32)(m->addr >> 32); | ||
| 403 | node = amd_get_nb_id(m->extcpu); | ||
| 404 | |||
| 405 | amd_decode_nb_mce(node, ®s, 1); | ||
| 406 | break; | ||
| 407 | |||
| 408 | case 5: | ||
| 409 | amd_decode_fr_mce(m->status); | ||
| 410 | break; | ||
| 411 | |||
| 412 | default: | ||
| 413 | break; | ||
| 414 | } | ||
| 415 | |||
| 416 | amd_decode_err_code(m->status & 0xffff); | ||
| 417 | |||
| 418 | return NOTIFY_STOP; | ||
| 419 | } | ||
| 420 | |||
| 421 | static struct notifier_block amd_mce_dec_nb = { | ||
| 422 | .notifier_call = amd_decode_mce, | ||
| 423 | }; | ||
| 424 | |||
| 425 | static int __init mce_amd_init(void) | ||
| 426 | { | ||
| 427 | /* | ||
| 428 | * We can decode MCEs for K8, F10h and F11h CPUs: | ||
| 429 | */ | ||
| 430 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
| 431 | return 0; | ||
| 432 | |||
| 433 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | ||
| 434 | return 0; | ||
| 435 | |||
| 436 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
| 437 | |||
| 438 | return 0; | ||
| 439 | } | ||
| 440 | early_initcall(mce_amd_init); | ||
| 441 | |||
| 442 | #ifdef MODULE | ||
| 443 | static void __exit mce_amd_exit(void) | ||
| 444 | { | ||
| 445 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
| 446 | } | ||
| 447 | |||
| 448 | MODULE_DESCRIPTION("AMD MCE decoder"); | ||
| 449 | MODULE_ALIAS("edac-mce-amd"); | ||
| 450 | MODULE_LICENSE("GPL"); | ||
| 451 | module_exit(mce_amd_exit); | ||
| 452 | #endif | ||
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 7e1374afd967..be4b075c3098 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c | |||
| @@ -27,15 +27,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level); | |||
| 27 | struct workqueue_struct *edac_workqueue; | 27 | struct workqueue_struct *edac_workqueue; |
| 28 | 28 | ||
| 29 | /* | 29 | /* |
| 30 | * sysfs object: /sys/devices/system/edac | ||
| 31 | * need to export to other files in this modules | ||
| 32 | */ | ||
| 33 | static struct sysdev_class edac_class = { | ||
| 34 | .name = "edac", | ||
| 35 | }; | ||
| 36 | static int edac_class_valid; | ||
| 37 | |||
| 38 | /* | ||
| 39 | * edac_op_state_to_string() | 30 | * edac_op_state_to_string() |
| 40 | */ | 31 | */ |
| 41 | char *edac_op_state_to_string(int opstate) | 32 | char *edac_op_state_to_string(int opstate) |
| @@ -55,60 +46,6 @@ char *edac_op_state_to_string(int opstate) | |||
| 55 | } | 46 | } |
| 56 | 47 | ||
| 57 | /* | 48 | /* |
| 58 | * edac_get_edac_class() | ||
| 59 | * | ||
| 60 | * return pointer to the edac class of 'edac' | ||
| 61 | */ | ||
| 62 | struct sysdev_class *edac_get_edac_class(void) | ||
| 63 | { | ||
| 64 | struct sysdev_class *classptr = NULL; | ||
| 65 | |||
| 66 | if (edac_class_valid) | ||
| 67 | classptr = &edac_class; | ||
| 68 | |||
| 69 | return classptr; | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * edac_register_sysfs_edac_name() | ||
| 74 | * | ||
| 75 | * register the 'edac' into /sys/devices/system | ||
| 76 | * | ||
| 77 | * return: | ||
| 78 | * 0 success | ||
| 79 | * !0 error | ||
| 80 | */ | ||
| 81 | static int edac_register_sysfs_edac_name(void) | ||
| 82 | { | ||
| 83 | int err; | ||
| 84 | |||
| 85 | /* create the /sys/devices/system/edac directory */ | ||
| 86 | err = sysdev_class_register(&edac_class); | ||
| 87 | |||
| 88 | if (err) { | ||
| 89 | debugf1("%s() error=%d\n", __func__, err); | ||
| 90 | return err; | ||
| 91 | } | ||
| 92 | |||
| 93 | edac_class_valid = 1; | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * sysdev_class_unregister() | ||
| 99 | * | ||
| 100 | * unregister the 'edac' from /sys/devices/system | ||
| 101 | */ | ||
| 102 | static void edac_unregister_sysfs_edac_name(void) | ||
| 103 | { | ||
| 104 | /* only if currently registered, then unregister it */ | ||
| 105 | if (edac_class_valid) | ||
| 106 | sysdev_class_unregister(&edac_class); | ||
| 107 | |||
| 108 | edac_class_valid = 0; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* | ||
| 112 | * edac_workqueue_setup | 49 | * edac_workqueue_setup |
| 113 | * initialize the edac work queue for polling operations | 50 | * initialize the edac work queue for polling operations |
| 114 | */ | 51 | */ |
| @@ -154,21 +91,11 @@ static int __init edac_init(void) | |||
| 154 | edac_pci_clear_parity_errors(); | 91 | edac_pci_clear_parity_errors(); |
| 155 | 92 | ||
| 156 | /* | 93 | /* |
| 157 | * perform the registration of the /sys/devices/system/edac class object | ||
| 158 | */ | ||
| 159 | if (edac_register_sysfs_edac_name()) { | ||
| 160 | edac_printk(KERN_ERR, EDAC_MC, | ||
| 161 | "Error initializing 'edac' kobject\n"); | ||
| 162 | err = -ENODEV; | ||
| 163 | goto error; | ||
| 164 | } | ||
| 165 | |||
| 166 | /* | ||
| 167 | * now set up the mc_kset under the edac class object | 94 | * now set up the mc_kset under the edac class object |
| 168 | */ | 95 | */ |
| 169 | err = edac_sysfs_setup_mc_kset(); | 96 | err = edac_sysfs_setup_mc_kset(); |
| 170 | if (err) | 97 | if (err) |
| 171 | goto sysfs_setup_fail; | 98 | goto error; |
| 172 | 99 | ||
| 173 | /* Setup/Initialize the workq for this core */ | 100 | /* Setup/Initialize the workq for this core */ |
| 174 | err = edac_workqueue_setup(); | 101 | err = edac_workqueue_setup(); |
| @@ -183,9 +110,6 @@ static int __init edac_init(void) | |||
| 183 | workq_fail: | 110 | workq_fail: |
| 184 | edac_sysfs_teardown_mc_kset(); | 111 | edac_sysfs_teardown_mc_kset(); |
| 185 | 112 | ||
| 186 | sysfs_setup_fail: | ||
| 187 | edac_unregister_sysfs_edac_name(); | ||
| 188 | |||
| 189 | error: | 113 | error: |
| 190 | return err; | 114 | return err; |
| 191 | } | 115 | } |
| @@ -201,7 +125,6 @@ static void __exit edac_exit(void) | |||
| 201 | /* tear down the various subsystems */ | 125 | /* tear down the various subsystems */ |
| 202 | edac_workqueue_teardown(); | 126 | edac_workqueue_teardown(); |
| 203 | edac_sysfs_teardown_mc_kset(); | 127 | edac_sysfs_teardown_mc_kset(); |
| 204 | edac_unregister_sysfs_edac_name(); | ||
| 205 | } | 128 | } |
| 206 | 129 | ||
| 207 | /* | 130 | /* |
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 233d4798c3aa..17aabb7b90ec 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h | |||
| @@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj( | |||
| 42 | struct edac_device_ctl_info *edac_dev); | 42 | struct edac_device_ctl_info *edac_dev); |
| 43 | extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); | 43 | extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); |
| 44 | extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); | 44 | extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); |
| 45 | extern struct sysdev_class *edac_get_edac_class(void); | ||
| 46 | 45 | ||
| 47 | /* edac core workqueue: single CPU mode */ | 46 | /* edac core workqueue: single CPU mode */ |
| 48 | extern struct workqueue_struct *edac_workqueue; | 47 | extern struct workqueue_struct *edac_workqueue; |
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index c39697df9cb4..023b01cb5175 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | * | 7 | * |
| 8 | */ | 8 | */ |
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/sysdev.h> | 10 | #include <linux/edac.h> |
| 11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
| 12 | #include <linux/ctype.h> | 12 | #include <linux/ctype.h> |
| 13 | 13 | ||
| @@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void) | |||
| 354 | /* First time, so create the main kobject and its | 354 | /* First time, so create the main kobject and its |
| 355 | * controls and atributes | 355 | * controls and atributes |
| 356 | */ | 356 | */ |
| 357 | edac_class = edac_get_edac_class(); | 357 | edac_class = edac_get_sysfs_class(); |
| 358 | if (edac_class == NULL) { | 358 | if (edac_class == NULL) { |
| 359 | debugf1("%s() no edac_class\n", __func__); | 359 | debugf1("%s() no edac_class\n", __func__); |
| 360 | err = -ENODEV; | 360 | err = -ENODEV; |
| @@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void) | |||
| 368 | if (!try_module_get(THIS_MODULE)) { | 368 | if (!try_module_get(THIS_MODULE)) { |
| 369 | debugf1("%s() try_module_get() failed\n", __func__); | 369 | debugf1("%s() try_module_get() failed\n", __func__); |
| 370 | err = -ENODEV; | 370 | err = -ENODEV; |
| 371 | goto decrement_count_fail; | 371 | goto mod_get_fail; |
| 372 | } | 372 | } |
| 373 | 373 | ||
| 374 | edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); | 374 | edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); |
| @@ -403,6 +403,9 @@ kobject_init_and_add_fail: | |||
| 403 | kzalloc_fail: | 403 | kzalloc_fail: |
| 404 | module_put(THIS_MODULE); | 404 | module_put(THIS_MODULE); |
| 405 | 405 | ||
| 406 | mod_get_fail: | ||
| 407 | edac_put_sysfs_class(); | ||
| 408 | |||
| 406 | decrement_count_fail: | 409 | decrement_count_fail: |
| 407 | /* if are on this error exit, nothing to tear down */ | 410 | /* if are on this error exit, nothing to tear down */ |
| 408 | atomic_dec(&edac_pci_sysfs_refcount); | 411 | atomic_dec(&edac_pci_sysfs_refcount); |
| @@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void) | |||
| 429 | __func__); | 432 | __func__); |
| 430 | kobject_put(edac_pci_top_main_kobj); | 433 | kobject_put(edac_pci_top_main_kobj); |
| 431 | } | 434 | } |
| 435 | edac_put_sysfs_class(); | ||
| 432 | } | 436 | } |
| 433 | 437 | ||
| 434 | /* | 438 | /* |
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 20b428aa155e..aab970760b75 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c | |||
| @@ -3,10 +3,13 @@ | |||
| 3 | * | 3 | * |
| 4 | * Author: Dave Jiang <djiang@mvista.com> | 4 | * Author: Dave Jiang <djiang@mvista.com> |
| 5 | * | 5 | * |
| 6 | * 2007 (c) MontaVista Software, Inc. This file is licensed under | 6 | * 2007 (c) MontaVista Software, Inc. |
| 7 | * the terms of the GNU General Public License version 2. This program | 7 | * 2010 (c) Advanced Micro Devices Inc. |
| 8 | * is licensed "as is" without any warranty of any kind, whether express | 8 | * Borislav Petkov <borislav.petkov@amd.com> |
| 9 | * or implied. | 9 | * |
| 10 | * This file is licensed under the terms of the GNU General Public | ||
| 11 | * License version 2. This program is licensed "as is" without any | ||
| 12 | * warranty of any kind, whether express or implied. | ||
| 10 | * | 13 | * |
| 11 | */ | 14 | */ |
| 12 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| @@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers); | |||
| 23 | int edac_err_assert = 0; | 26 | int edac_err_assert = 0; |
| 24 | EXPORT_SYMBOL_GPL(edac_err_assert); | 27 | EXPORT_SYMBOL_GPL(edac_err_assert); |
| 25 | 28 | ||
| 29 | static atomic_t edac_class_valid = ATOMIC_INIT(0); | ||
| 30 | |||
| 26 | /* | 31 | /* |
| 27 | * called to determine if there is an EDAC driver interested in | 32 | * called to determine if there is an EDAC driver interested in |
| 28 | * knowing an event (such as NMI) occurred | 33 | * knowing an event (such as NMI) occurred |
| @@ -44,3 +49,41 @@ void edac_atomic_assert_error(void) | |||
| 44 | edac_err_assert++; | 49 | edac_err_assert++; |
| 45 | } | 50 | } |
| 46 | EXPORT_SYMBOL_GPL(edac_atomic_assert_error); | 51 | EXPORT_SYMBOL_GPL(edac_atomic_assert_error); |
| 52 | |||
| 53 | /* | ||
| 54 | * sysfs object: /sys/devices/system/edac | ||
| 55 | * need to export to other files | ||
| 56 | */ | ||
| 57 | struct sysdev_class edac_class = { | ||
| 58 | .name = "edac", | ||
| 59 | }; | ||
| 60 | EXPORT_SYMBOL_GPL(edac_class); | ||
| 61 | |||
| 62 | /* return pointer to the 'edac' node in sysfs */ | ||
| 63 | struct sysdev_class *edac_get_sysfs_class(void) | ||
| 64 | { | ||
| 65 | int err = 0; | ||
| 66 | |||
| 67 | if (atomic_read(&edac_class_valid)) | ||
| 68 | goto out; | ||
| 69 | |||
| 70 | /* create the /sys/devices/system/edac directory */ | ||
| 71 | err = sysdev_class_register(&edac_class); | ||
| 72 | if (err) { | ||
| 73 | printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); | ||
| 74 | return NULL; | ||
| 75 | } | ||
| 76 | |||
| 77 | out: | ||
| 78 | atomic_inc(&edac_class_valid); | ||
| 79 | return &edac_class; | ||
| 80 | } | ||
| 81 | EXPORT_SYMBOL_GPL(edac_get_sysfs_class); | ||
| 82 | |||
| 83 | void edac_put_sysfs_class(void) | ||
| 84 | { | ||
| 85 | /* last user unregisters it */ | ||
| 86 | if (atomic_dec_and_test(&edac_class_valid)) | ||
| 87 | sysdev_class_unregister(&edac_class); | ||
| 88 | } | ||
| 89 | EXPORT_SYMBOL_GPL(edac_put_sysfs_class); | ||
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c new file mode 100644 index 000000000000..c0181093b490 --- /dev/null +++ b/drivers/edac/mce_amd.c | |||
| @@ -0,0 +1,680 @@ | |||
| 1 | #include <linux/module.h> | ||
| 2 | #include <linux/slab.h> | ||
| 3 | |||
| 4 | #include "mce_amd.h" | ||
| 5 | |||
| 6 | static struct amd_decoder_ops *fam_ops; | ||
| 7 | |||
| 8 | static u8 nb_err_cpumask = 0xf; | ||
| 9 | |||
| 10 | static bool report_gart_errors; | ||
| 11 | static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); | ||
| 12 | |||
| 13 | void amd_report_gart_errors(bool v) | ||
| 14 | { | ||
| 15 | report_gart_errors = v; | ||
| 16 | } | ||
| 17 | EXPORT_SYMBOL_GPL(amd_report_gart_errors); | ||
| 18 | |||
| 19 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) | ||
| 20 | { | ||
| 21 | nb_bus_decoder = f; | ||
| 22 | } | ||
| 23 | EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); | ||
| 24 | |||
| 25 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) | ||
| 26 | { | ||
| 27 | if (nb_bus_decoder) { | ||
| 28 | WARN_ON(nb_bus_decoder != f); | ||
| 29 | |||
| 30 | nb_bus_decoder = NULL; | ||
| 31 | } | ||
| 32 | } | ||
| 33 | EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); | ||
| 34 | |||
| 35 | /* | ||
| 36 | * string representation for the different MCA reported error types, see F3x48 | ||
| 37 | * or MSR0000_0411. | ||
| 38 | */ | ||
| 39 | |||
| 40 | /* transaction type */ | ||
| 41 | const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; | ||
| 42 | EXPORT_SYMBOL_GPL(tt_msgs); | ||
| 43 | |||
| 44 | /* cache level */ | ||
| 45 | const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; | ||
| 46 | EXPORT_SYMBOL_GPL(ll_msgs); | ||
| 47 | |||
| 48 | /* memory transaction type */ | ||
| 49 | const char *rrrr_msgs[] = { | ||
| 50 | "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" | ||
| 51 | }; | ||
| 52 | EXPORT_SYMBOL_GPL(rrrr_msgs); | ||
| 53 | |||
| 54 | /* participating processor */ | ||
| 55 | const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; | ||
| 56 | EXPORT_SYMBOL_GPL(pp_msgs); | ||
| 57 | |||
| 58 | /* request timeout */ | ||
| 59 | const char *to_msgs[] = { "no timeout", "timed out" }; | ||
| 60 | EXPORT_SYMBOL_GPL(to_msgs); | ||
| 61 | |||
| 62 | /* memory or i/o */ | ||
| 63 | const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; | ||
| 64 | EXPORT_SYMBOL_GPL(ii_msgs); | ||
| 65 | |||
| 66 | static const char *f10h_nb_mce_desc[] = { | ||
| 67 | "HT link data error", | ||
| 68 | "Protocol error (link, L3, probe filter, etc.)", | ||
| 69 | "Parity error in NB-internal arrays", | ||
| 70 | "Link Retry due to IO link transmission error", | ||
| 71 | "L3 ECC data cache error", | ||
| 72 | "ECC error in L3 cache tag", | ||
| 73 | "L3 LRU parity bits error", | ||
| 74 | "ECC Error in the Probe Filter directory" | ||
| 75 | }; | ||
| 76 | |||
| 77 | static bool f12h_dc_mce(u16 ec) | ||
| 78 | { | ||
| 79 | bool ret = false; | ||
| 80 | |||
| 81 | if (MEM_ERROR(ec)) { | ||
| 82 | u8 ll = ec & 0x3; | ||
| 83 | ret = true; | ||
| 84 | |||
| 85 | if (ll == LL_L2) | ||
| 86 | pr_cont("during L1 linefill from L2.\n"); | ||
| 87 | else if (ll == LL_L1) | ||
| 88 | pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec)); | ||
| 89 | else | ||
| 90 | ret = false; | ||
| 91 | } | ||
| 92 | return ret; | ||
| 93 | } | ||
| 94 | |||
| 95 | static bool f10h_dc_mce(u16 ec) | ||
| 96 | { | ||
| 97 | u8 r4 = (ec >> 4) & 0xf; | ||
| 98 | u8 ll = ec & 0x3; | ||
| 99 | |||
| 100 | if (r4 == R4_GEN && ll == LL_L1) { | ||
| 101 | pr_cont("during data scrub.\n"); | ||
| 102 | return true; | ||
| 103 | } | ||
| 104 | return f12h_dc_mce(ec); | ||
| 105 | } | ||
| 106 | |||
| 107 | static bool k8_dc_mce(u16 ec) | ||
| 108 | { | ||
| 109 | if (BUS_ERROR(ec)) { | ||
| 110 | pr_cont("during system linefill.\n"); | ||
| 111 | return true; | ||
| 112 | } | ||
| 113 | |||
| 114 | return f10h_dc_mce(ec); | ||
| 115 | } | ||
| 116 | |||
| 117 | static bool f14h_dc_mce(u16 ec) | ||
| 118 | { | ||
| 119 | u8 r4 = (ec >> 4) & 0xf; | ||
| 120 | u8 ll = ec & 0x3; | ||
| 121 | u8 tt = (ec >> 2) & 0x3; | ||
| 122 | u8 ii = tt; | ||
| 123 | bool ret = true; | ||
| 124 | |||
| 125 | if (MEM_ERROR(ec)) { | ||
| 126 | |||
| 127 | if (tt != TT_DATA || ll != LL_L1) | ||
| 128 | return false; | ||
| 129 | |||
| 130 | switch (r4) { | ||
| 131 | case R4_DRD: | ||
| 132 | case R4_DWR: | ||
| 133 | pr_cont("Data/Tag parity error due to %s.\n", | ||
| 134 | (r4 == R4_DRD ? "load/hw prf" : "store")); | ||
| 135 | break; | ||
| 136 | case R4_EVICT: | ||
| 137 | pr_cont("Copyback parity error on a tag miss.\n"); | ||
| 138 | break; | ||
| 139 | case R4_SNOOP: | ||
| 140 | pr_cont("Tag parity error during snoop.\n"); | ||
| 141 | break; | ||
| 142 | default: | ||
| 143 | ret = false; | ||
| 144 | } | ||
| 145 | } else if (BUS_ERROR(ec)) { | ||
| 146 | |||
| 147 | if ((ii != II_MEM && ii != II_IO) || ll != LL_LG) | ||
| 148 | return false; | ||
| 149 | |||
| 150 | pr_cont("System read data error on a "); | ||
| 151 | |||
| 152 | switch (r4) { | ||
| 153 | case R4_RD: | ||
| 154 | pr_cont("TLB reload.\n"); | ||
| 155 | break; | ||
| 156 | case R4_DWR: | ||
| 157 | pr_cont("store.\n"); | ||
| 158 | break; | ||
| 159 | case R4_DRD: | ||
| 160 | pr_cont("load.\n"); | ||
| 161 | break; | ||
| 162 | default: | ||
| 163 | ret = false; | ||
| 164 | } | ||
| 165 | } else { | ||
| 166 | ret = false; | ||
| 167 | } | ||
| 168 | |||
| 169 | return ret; | ||
| 170 | } | ||
| 171 | |||
| 172 | static void amd_decode_dc_mce(struct mce *m) | ||
| 173 | { | ||
| 174 | u16 ec = m->status & 0xffff; | ||
| 175 | u8 xec = (m->status >> 16) & 0xf; | ||
| 176 | |||
| 177 | pr_emerg(HW_ERR "Data Cache Error: "); | ||
| 178 | |||
| 179 | /* TLB error signatures are the same across families */ | ||
| 180 | if (TLB_ERROR(ec)) { | ||
| 181 | u8 tt = (ec >> 2) & 0x3; | ||
| 182 | |||
| 183 | if (tt == TT_DATA) { | ||
| 184 | pr_cont("%s TLB %s.\n", LL_MSG(ec), | ||
| 185 | (xec ? "multimatch" : "parity error")); | ||
| 186 | return; | ||
| 187 | } | ||
| 188 | else | ||
| 189 | goto wrong_dc_mce; | ||
| 190 | } | ||
| 191 | |||
| 192 | if (!fam_ops->dc_mce(ec)) | ||
| 193 | goto wrong_dc_mce; | ||
| 194 | |||
| 195 | return; | ||
| 196 | |||
| 197 | wrong_dc_mce: | ||
| 198 | pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); | ||
| 199 | } | ||
| 200 | |||
| 201 | static bool k8_ic_mce(u16 ec) | ||
| 202 | { | ||
| 203 | u8 ll = ec & 0x3; | ||
| 204 | u8 r4 = (ec >> 4) & 0xf; | ||
| 205 | bool ret = true; | ||
| 206 | |||
| 207 | if (!MEM_ERROR(ec)) | ||
| 208 | return false; | ||
| 209 | |||
| 210 | if (ll == 0x2) | ||
| 211 | pr_cont("during a linefill from L2.\n"); | ||
| 212 | else if (ll == 0x1) { | ||
| 213 | switch (r4) { | ||
| 214 | case R4_IRD: | ||
| 215 | pr_cont("Parity error during data load.\n"); | ||
| 216 | break; | ||
| 217 | |||
| 218 | case R4_EVICT: | ||
| 219 | pr_cont("Copyback Parity/Victim error.\n"); | ||
| 220 | break; | ||
| 221 | |||
| 222 | case R4_SNOOP: | ||
| 223 | pr_cont("Tag Snoop error.\n"); | ||
| 224 | break; | ||
| 225 | |||
| 226 | default: | ||
| 227 | ret = false; | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | } else | ||
| 231 | ret = false; | ||
| 232 | |||
| 233 | return ret; | ||
| 234 | } | ||
| 235 | |||
| 236 | static bool f14h_ic_mce(u16 ec) | ||
| 237 | { | ||
| 238 | u8 ll = ec & 0x3; | ||
| 239 | u8 tt = (ec >> 2) & 0x3; | ||
| 240 | u8 r4 = (ec >> 4) & 0xf; | ||
| 241 | bool ret = true; | ||
| 242 | |||
| 243 | if (MEM_ERROR(ec)) { | ||
| 244 | if (tt != 0 || ll != 1) | ||
| 245 | ret = false; | ||
| 246 | |||
| 247 | if (r4 == R4_IRD) | ||
| 248 | pr_cont("Data/tag array parity error for a tag hit.\n"); | ||
| 249 | else if (r4 == R4_SNOOP) | ||
| 250 | pr_cont("Tag error during snoop/victimization.\n"); | ||
| 251 | else | ||
| 252 | ret = false; | ||
| 253 | } | ||
| 254 | return ret; | ||
| 255 | } | ||
| 256 | |||
| 257 | static void amd_decode_ic_mce(struct mce *m) | ||
| 258 | { | ||
| 259 | u16 ec = m->status & 0xffff; | ||
| 260 | u8 xec = (m->status >> 16) & 0xf; | ||
| 261 | |||
| 262 | pr_emerg(HW_ERR "Instruction Cache Error: "); | ||
| 263 | |||
| 264 | if (TLB_ERROR(ec)) | ||
| 265 | pr_cont("%s TLB %s.\n", LL_MSG(ec), | ||
| 266 | (xec ? "multimatch" : "parity error")); | ||
| 267 | else if (BUS_ERROR(ec)) { | ||
| 268 | bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); | ||
| 269 | |||
| 270 | pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); | ||
| 271 | } else if (fam_ops->ic_mce(ec)) | ||
| 272 | ; | ||
| 273 | else | ||
| 274 | pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); | ||
| 275 | } | ||
| 276 | |||
| 277 | static void amd_decode_bu_mce(struct mce *m) | ||
| 278 | { | ||
| 279 | u32 ec = m->status & 0xffff; | ||
| 280 | u32 xec = (m->status >> 16) & 0xf; | ||
| 281 | |||
| 282 | pr_emerg(HW_ERR "Bus Unit Error"); | ||
| 283 | |||
| 284 | if (xec == 0x1) | ||
| 285 | pr_cont(" in the write data buffers.\n"); | ||
| 286 | else if (xec == 0x3) | ||
| 287 | pr_cont(" in the victim data buffers.\n"); | ||
| 288 | else if (xec == 0x2 && MEM_ERROR(ec)) | ||
| 289 | pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); | ||
| 290 | else if (xec == 0x0) { | ||
| 291 | if (TLB_ERROR(ec)) | ||
| 292 | pr_cont(": %s error in a Page Descriptor Cache or " | ||
| 293 | "Guest TLB.\n", TT_MSG(ec)); | ||
| 294 | else if (BUS_ERROR(ec)) | ||
| 295 | pr_cont(": %s/ECC error in data read from NB: %s.\n", | ||
| 296 | RRRR_MSG(ec), PP_MSG(ec)); | ||
| 297 | else if (MEM_ERROR(ec)) { | ||
| 298 | u8 rrrr = (ec >> 4) & 0xf; | ||
| 299 | |||
| 300 | if (rrrr >= 0x7) | ||
| 301 | pr_cont(": %s error during data copyback.\n", | ||
| 302 | RRRR_MSG(ec)); | ||
| 303 | else if (rrrr <= 0x1) | ||
| 304 | pr_cont(": %s parity/ECC error during data " | ||
| 305 | "access from L2.\n", RRRR_MSG(ec)); | ||
| 306 | else | ||
| 307 | goto wrong_bu_mce; | ||
| 308 | } else | ||
| 309 | goto wrong_bu_mce; | ||
| 310 | } else | ||
| 311 | goto wrong_bu_mce; | ||
| 312 | |||
| 313 | return; | ||
| 314 | |||
| 315 | wrong_bu_mce: | ||
| 316 | pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); | ||
| 317 | } | ||
| 318 | |||
| 319 | static void amd_decode_ls_mce(struct mce *m) | ||
| 320 | { | ||
| 321 | u16 ec = m->status & 0xffff; | ||
| 322 | u8 xec = (m->status >> 16) & 0xf; | ||
| 323 | |||
| 324 | if (boot_cpu_data.x86 == 0x14) { | ||
| 325 | pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," | ||
| 326 | " please report on LKML.\n"); | ||
| 327 | return; | ||
| 328 | } | ||
| 329 | |||
| 330 | pr_emerg(HW_ERR "Load Store Error"); | ||
| 331 | |||
| 332 | if (xec == 0x0) { | ||
| 333 | u8 r4 = (ec >> 4) & 0xf; | ||
| 334 | |||
| 335 | if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) | ||
| 336 | goto wrong_ls_mce; | ||
| 337 | |||
| 338 | pr_cont(" during %s.\n", RRRR_MSG(ec)); | ||
| 339 | } else | ||
| 340 | goto wrong_ls_mce; | ||
| 341 | |||
| 342 | return; | ||
| 343 | |||
| 344 | wrong_ls_mce: | ||
| 345 | pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); | ||
| 346 | } | ||
| 347 | |||
| 348 | static bool k8_nb_mce(u16 ec, u8 xec) | ||
| 349 | { | ||
| 350 | bool ret = true; | ||
| 351 | |||
| 352 | switch (xec) { | ||
| 353 | case 0x1: | ||
| 354 | pr_cont("CRC error detected on HT link.\n"); | ||
| 355 | break; | ||
| 356 | |||
| 357 | case 0x5: | ||
| 358 | pr_cont("Invalid GART PTE entry during GART table walk.\n"); | ||
| 359 | break; | ||
| 360 | |||
| 361 | case 0x6: | ||
| 362 | pr_cont("Unsupported atomic RMW received from an IO link.\n"); | ||
| 363 | break; | ||
| 364 | |||
| 365 | case 0x0: | ||
| 366 | case 0x8: | ||
| 367 | if (boot_cpu_data.x86 == 0x11) | ||
| 368 | return false; | ||
| 369 | |||
| 370 | pr_cont("DRAM ECC error detected on the NB.\n"); | ||
| 371 | break; | ||
| 372 | |||
| 373 | case 0xd: | ||
| 374 | pr_cont("Parity error on the DRAM addr/ctl signals.\n"); | ||
| 375 | break; | ||
| 376 | |||
| 377 | default: | ||
| 378 | ret = false; | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | |||
| 382 | return ret; | ||
| 383 | } | ||
| 384 | |||
| 385 | static bool f10h_nb_mce(u16 ec, u8 xec) | ||
| 386 | { | ||
| 387 | bool ret = true; | ||
| 388 | u8 offset = 0; | ||
| 389 | |||
| 390 | if (k8_nb_mce(ec, xec)) | ||
| 391 | return true; | ||
| 392 | |||
| 393 | switch(xec) { | ||
| 394 | case 0xa ... 0xc: | ||
| 395 | offset = 10; | ||
| 396 | break; | ||
| 397 | |||
| 398 | case 0xe: | ||
| 399 | offset = 11; | ||
| 400 | break; | ||
| 401 | |||
| 402 | case 0xf: | ||
| 403 | if (TLB_ERROR(ec)) | ||
| 404 | pr_cont("GART Table Walk data error.\n"); | ||
| 405 | else if (BUS_ERROR(ec)) | ||
| 406 | pr_cont("DMA Exclusion Vector Table Walk error.\n"); | ||
| 407 | else | ||
| 408 | ret = false; | ||
| 409 | |||
| 410 | goto out; | ||
| 411 | break; | ||
| 412 | |||
| 413 | case 0x1c ... 0x1f: | ||
| 414 | offset = 24; | ||
| 415 | break; | ||
| 416 | |||
| 417 | default: | ||
| 418 | ret = false; | ||
| 419 | |||
| 420 | goto out; | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | |||
| 424 | pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); | ||
| 425 | |||
| 426 | out: | ||
| 427 | return ret; | ||
| 428 | } | ||
| 429 | |||
| 430 | static bool nb_noop_mce(u16 ec, u8 xec) | ||
| 431 | { | ||
| 432 | return false; | ||
| 433 | } | ||
| 434 | |||
| 435 | void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) | ||
| 436 | { | ||
| 437 | u8 xec = (m->status >> 16) & 0x1f; | ||
| 438 | u16 ec = m->status & 0xffff; | ||
| 439 | u32 nbsh = (u32)(m->status >> 32); | ||
| 440 | |||
| 441 | pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); | ||
| 442 | |||
| 443 | /* | ||
| 444 | * F10h, revD can disable ErrCpu[3:0] so check that first and also the | ||
| 445 | * value encoding has changed so interpret those differently | ||
| 446 | */ | ||
| 447 | if ((boot_cpu_data.x86 == 0x10) && | ||
| 448 | (boot_cpu_data.x86_model > 7)) { | ||
| 449 | if (nbsh & K8_NBSH_ERR_CPU_VAL) | ||
| 450 | pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask)); | ||
| 451 | } else { | ||
| 452 | u8 assoc_cpus = nbsh & nb_err_cpumask; | ||
| 453 | |||
| 454 | if (assoc_cpus > 0) | ||
| 455 | pr_cont(", core: %d", fls(assoc_cpus) - 1); | ||
| 456 | } | ||
| 457 | |||
| 458 | switch (xec) { | ||
| 459 | case 0x2: | ||
| 460 | pr_cont("Sync error (sync packets on HT link detected).\n"); | ||
| 461 | return; | ||
| 462 | |||
| 463 | case 0x3: | ||
| 464 | pr_cont("HT Master abort.\n"); | ||
| 465 | return; | ||
| 466 | |||
| 467 | case 0x4: | ||
| 468 | pr_cont("HT Target abort.\n"); | ||
| 469 | return; | ||
| 470 | |||
| 471 | case 0x7: | ||
| 472 | pr_cont("NB Watchdog timeout.\n"); | ||
| 473 | return; | ||
| 474 | |||
| 475 | case 0x9: | ||
| 476 | pr_cont("SVM DMA Exclusion Vector error.\n"); | ||
| 477 | return; | ||
| 478 | |||
| 479 | default: | ||
| 480 | break; | ||
| 481 | } | ||
| 482 | |||
| 483 | if (!fam_ops->nb_mce(ec, xec)) | ||
| 484 | goto wrong_nb_mce; | ||
| 485 | |||
| 486 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) | ||
| 487 | if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) | ||
| 488 | nb_bus_decoder(node_id, m, nbcfg); | ||
| 489 | |||
| 490 | return; | ||
| 491 | |||
| 492 | wrong_nb_mce: | ||
| 493 | pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); | ||
| 494 | } | ||
| 495 | EXPORT_SYMBOL_GPL(amd_decode_nb_mce); | ||
| 496 | |||
| 497 | static void amd_decode_fr_mce(struct mce *m) | ||
| 498 | { | ||
| 499 | if (boot_cpu_data.x86 == 0xf || | ||
| 500 | boot_cpu_data.x86 == 0x11) | ||
| 501 | goto wrong_fr_mce; | ||
| 502 | |||
| 503 | /* we have only one error signature so match all fields at once. */ | ||
| 504 | if ((m->status & 0xffff) == 0x0f0f) { | ||
| 505 | pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n"); | ||
| 506 | return; | ||
| 507 | } | ||
| 508 | |||
| 509 | wrong_fr_mce: | ||
| 510 | pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); | ||
| 511 | } | ||
| 512 | |||
| 513 | static inline void amd_decode_err_code(u16 ec) | ||
| 514 | { | ||
| 515 | if (TLB_ERROR(ec)) { | ||
| 516 | pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n", | ||
| 517 | TT_MSG(ec), LL_MSG(ec)); | ||
| 518 | } else if (MEM_ERROR(ec)) { | ||
| 519 | pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", | ||
| 520 | RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); | ||
| 521 | } else if (BUS_ERROR(ec)) { | ||
| 522 | pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " | ||
| 523 | "Participating Processor: %s\n", | ||
| 524 | RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), | ||
| 525 | PP_MSG(ec)); | ||
| 526 | } else | ||
| 527 | pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); | ||
| 528 | } | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Filter out unwanted MCE signatures here. | ||
| 532 | */ | ||
| 533 | static bool amd_filter_mce(struct mce *m) | ||
| 534 | { | ||
| 535 | u8 xec = (m->status >> 16) & 0x1f; | ||
| 536 | |||
| 537 | /* | ||
| 538 | * NB GART TLB error reporting is disabled by default. | ||
| 539 | */ | ||
| 540 | if (m->bank == 4 && xec == 0x5 && !report_gart_errors) | ||
| 541 | return true; | ||
| 542 | |||
| 543 | return false; | ||
| 544 | } | ||
| 545 | |||
| 546 | int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) | ||
| 547 | { | ||
| 548 | struct mce *m = (struct mce *)data; | ||
| 549 | int node, ecc; | ||
| 550 | |||
| 551 | if (amd_filter_mce(m)) | ||
| 552 | return NOTIFY_STOP; | ||
| 553 | |||
| 554 | pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); | ||
| 555 | |||
| 556 | pr_cont("%sorrected error, other errors lost: %s, " | ||
| 557 | "CPU context corrupt: %s", | ||
| 558 | ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), | ||
| 559 | ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), | ||
| 560 | ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); | ||
| 561 | |||
| 562 | /* do the two bits[14:13] together */ | ||
| 563 | ecc = (m->status >> 45) & 0x3; | ||
| 564 | if (ecc) | ||
| 565 | pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); | ||
| 566 | |||
| 567 | pr_cont("\n"); | ||
| 568 | |||
| 569 | switch (m->bank) { | ||
| 570 | case 0: | ||
| 571 | amd_decode_dc_mce(m); | ||
| 572 | break; | ||
| 573 | |||
| 574 | case 1: | ||
| 575 | amd_decode_ic_mce(m); | ||
| 576 | break; | ||
| 577 | |||
| 578 | case 2: | ||
| 579 | amd_decode_bu_mce(m); | ||
| 580 | break; | ||
| 581 | |||
| 582 | case 3: | ||
| 583 | amd_decode_ls_mce(m); | ||
| 584 | break; | ||
| 585 | |||
| 586 | case 4: | ||
| 587 | node = amd_get_nb_id(m->extcpu); | ||
| 588 | amd_decode_nb_mce(node, m, 0); | ||
| 589 | break; | ||
| 590 | |||
| 591 | case 5: | ||
| 592 | amd_decode_fr_mce(m); | ||
| 593 | break; | ||
| 594 | |||
| 595 | default: | ||
| 596 | break; | ||
| 597 | } | ||
| 598 | |||
| 599 | amd_decode_err_code(m->status & 0xffff); | ||
| 600 | |||
| 601 | return NOTIFY_STOP; | ||
| 602 | } | ||
| 603 | EXPORT_SYMBOL_GPL(amd_decode_mce); | ||
| 604 | |||
| 605 | static struct notifier_block amd_mce_dec_nb = { | ||
| 606 | .notifier_call = amd_decode_mce, | ||
| 607 | }; | ||
| 608 | |||
| 609 | static int __init mce_amd_init(void) | ||
| 610 | { | ||
| 611 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
| 612 | return 0; | ||
| 613 | |||
| 614 | if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) && | ||
| 615 | (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf)) | ||
| 616 | return 0; | ||
| 617 | |||
| 618 | fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); | ||
| 619 | if (!fam_ops) | ||
| 620 | return -ENOMEM; | ||
| 621 | |||
| 622 | switch (boot_cpu_data.x86) { | ||
| 623 | case 0xf: | ||
| 624 | fam_ops->dc_mce = k8_dc_mce; | ||
| 625 | fam_ops->ic_mce = k8_ic_mce; | ||
| 626 | fam_ops->nb_mce = k8_nb_mce; | ||
| 627 | break; | ||
| 628 | |||
| 629 | case 0x10: | ||
| 630 | fam_ops->dc_mce = f10h_dc_mce; | ||
| 631 | fam_ops->ic_mce = k8_ic_mce; | ||
| 632 | fam_ops->nb_mce = f10h_nb_mce; | ||
| 633 | break; | ||
| 634 | |||
| 635 | case 0x11: | ||
| 636 | fam_ops->dc_mce = k8_dc_mce; | ||
| 637 | fam_ops->ic_mce = k8_ic_mce; | ||
| 638 | fam_ops->nb_mce = f10h_nb_mce; | ||
| 639 | break; | ||
| 640 | |||
| 641 | case 0x12: | ||
| 642 | fam_ops->dc_mce = f12h_dc_mce; | ||
| 643 | fam_ops->ic_mce = k8_ic_mce; | ||
| 644 | fam_ops->nb_mce = nb_noop_mce; | ||
| 645 | break; | ||
| 646 | |||
| 647 | case 0x14: | ||
| 648 | nb_err_cpumask = 0x3; | ||
| 649 | fam_ops->dc_mce = f14h_dc_mce; | ||
| 650 | fam_ops->ic_mce = f14h_ic_mce; | ||
| 651 | fam_ops->nb_mce = nb_noop_mce; | ||
| 652 | break; | ||
| 653 | |||
| 654 | default: | ||
| 655 | printk(KERN_WARNING "Huh? What family is that: %d?!\n", | ||
| 656 | boot_cpu_data.x86); | ||
| 657 | kfree(fam_ops); | ||
| 658 | return -EINVAL; | ||
| 659 | } | ||
| 660 | |||
| 661 | pr_info("MCE: In-kernel MCE decoding enabled.\n"); | ||
| 662 | |||
| 663 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
| 664 | |||
| 665 | return 0; | ||
| 666 | } | ||
| 667 | early_initcall(mce_amd_init); | ||
| 668 | |||
| 669 | #ifdef MODULE | ||
| 670 | static void __exit mce_amd_exit(void) | ||
| 671 | { | ||
| 672 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); | ||
| 673 | kfree(fam_ops); | ||
| 674 | } | ||
| 675 | |||
| 676 | MODULE_DESCRIPTION("AMD MCE decoder"); | ||
| 677 | MODULE_ALIAS("edac-mce-amd"); | ||
| 678 | MODULE_LICENSE("GPL"); | ||
| 679 | module_exit(mce_amd_exit); | ||
| 680 | #endif | ||
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/mce_amd.h index df23ee065f79..35f6e0e3b297 100644 --- a/drivers/edac/edac_mce_amd.h +++ b/drivers/edac/mce_amd.h | |||
| @@ -1,11 +1,14 @@ | |||
| 1 | #ifndef _EDAC_MCE_AMD_H | 1 | #ifndef _EDAC_MCE_AMD_H |
| 2 | #define _EDAC_MCE_AMD_H | 2 | #define _EDAC_MCE_AMD_H |
| 3 | 3 | ||
| 4 | #include <linux/notifier.h> | ||
| 5 | |||
| 4 | #include <asm/mce.h> | 6 | #include <asm/mce.h> |
| 5 | 7 | ||
| 8 | #define BIT_64(n) (U64_C(1) << (n)) | ||
| 9 | |||
| 6 | #define ERROR_CODE(x) ((x) & 0xffff) | 10 | #define ERROR_CODE(x) ((x) & 0xffff) |
| 7 | #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) | 11 | #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) |
| 8 | #define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)] | ||
| 9 | 12 | ||
| 10 | #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) | 13 | #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) |
| 11 | #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) | 14 | #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) |
| @@ -20,13 +23,14 @@ | |||
| 20 | #define II_MSG(x) ii_msgs[II(x)] | 23 | #define II_MSG(x) ii_msgs[II(x)] |
| 21 | #define LL(x) (((x) >> 0) & 0x3) | 24 | #define LL(x) (((x) >> 0) & 0x3) |
| 22 | #define LL_MSG(x) ll_msgs[LL(x)] | 25 | #define LL_MSG(x) ll_msgs[LL(x)] |
| 23 | #define RRRR(x) (((x) >> 4) & 0xf) | ||
| 24 | #define RRRR_MSG(x) rrrr_msgs[RRRR(x)] | ||
| 25 | #define TO(x) (((x) >> 8) & 0x1) | 26 | #define TO(x) (((x) >> 8) & 0x1) |
| 26 | #define TO_MSG(x) to_msgs[TO(x)] | 27 | #define TO_MSG(x) to_msgs[TO(x)] |
| 27 | #define PP(x) (((x) >> 9) & 0x3) | 28 | #define PP(x) (((x) >> 9) & 0x3) |
| 28 | #define PP_MSG(x) pp_msgs[PP(x)] | 29 | #define PP_MSG(x) pp_msgs[PP(x)] |
| 29 | 30 | ||
| 31 | #define RRRR(x) (((x) >> 4) & 0xf) | ||
| 32 | #define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!") | ||
| 33 | |||
| 30 | #define K8_NBSH 0x4C | 34 | #define K8_NBSH 0x4C |
| 31 | 35 | ||
| 32 | #define K8_NBSH_VALID_BIT BIT(31) | 36 | #define K8_NBSH_VALID_BIT BIT(31) |
| @@ -41,13 +45,45 @@ | |||
| 41 | #define K8_NBSH_UECC BIT(13) | 45 | #define K8_NBSH_UECC BIT(13) |
| 42 | #define K8_NBSH_ERR_SCRUBER BIT(8) | 46 | #define K8_NBSH_ERR_SCRUBER BIT(8) |
| 43 | 47 | ||
| 48 | enum tt_ids { | ||
| 49 | TT_INSTR = 0, | ||
| 50 | TT_DATA, | ||
| 51 | TT_GEN, | ||
| 52 | TT_RESV, | ||
| 53 | }; | ||
| 54 | |||
| 55 | enum ll_ids { | ||
| 56 | LL_RESV = 0, | ||
| 57 | LL_L1, | ||
| 58 | LL_L2, | ||
| 59 | LL_LG, | ||
| 60 | }; | ||
| 61 | |||
| 62 | enum ii_ids { | ||
| 63 | II_MEM = 0, | ||
| 64 | II_RESV, | ||
| 65 | II_IO, | ||
| 66 | II_GEN, | ||
| 67 | }; | ||
| 68 | |||
| 69 | enum rrrr_ids { | ||
| 70 | R4_GEN = 0, | ||
| 71 | R4_RD, | ||
| 72 | R4_WR, | ||
| 73 | R4_DRD, | ||
| 74 | R4_DWR, | ||
| 75 | R4_IRD, | ||
| 76 | R4_PREF, | ||
| 77 | R4_EVICT, | ||
| 78 | R4_SNOOP, | ||
| 79 | }; | ||
| 80 | |||
| 44 | extern const char *tt_msgs[]; | 81 | extern const char *tt_msgs[]; |
| 45 | extern const char *ll_msgs[]; | 82 | extern const char *ll_msgs[]; |
| 46 | extern const char *rrrr_msgs[]; | 83 | extern const char *rrrr_msgs[]; |
| 47 | extern const char *pp_msgs[]; | 84 | extern const char *pp_msgs[]; |
| 48 | extern const char *to_msgs[]; | 85 | extern const char *to_msgs[]; |
| 49 | extern const char *ii_msgs[]; | 86 | extern const char *ii_msgs[]; |
| 50 | extern const char *ext_msgs[]; | ||
| 51 | 87 | ||
| 52 | /* | 88 | /* |
| 53 | * relevant NB regs | 89 | * relevant NB regs |
| @@ -60,10 +96,19 @@ struct err_regs { | |||
| 60 | u32 nbeal; | 96 | u32 nbeal; |
| 61 | }; | 97 | }; |
| 62 | 98 | ||
| 99 | /* | ||
| 100 | * per-family decoder ops | ||
| 101 | */ | ||
| 102 | struct amd_decoder_ops { | ||
| 103 | bool (*dc_mce)(u16); | ||
| 104 | bool (*ic_mce)(u16); | ||
| 105 | bool (*nb_mce)(u16, u8); | ||
| 106 | }; | ||
| 63 | 107 | ||
| 64 | void amd_report_gart_errors(bool); | 108 | void amd_report_gart_errors(bool); |
| 65 | void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); | 109 | void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); |
| 66 | void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); | 110 | void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); |
| 67 | void amd_decode_nb_mce(int, struct err_regs *, int); | 111 | void amd_decode_nb_mce(int, struct mce *, u32); |
| 112 | int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); | ||
| 68 | 113 | ||
| 69 | #endif /* _EDAC_MCE_AMD_H */ | 114 | #endif /* _EDAC_MCE_AMD_H */ |
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c new file mode 100644 index 000000000000..8d0688f36d4c --- /dev/null +++ b/drivers/edac/mce_amd_inj.c | |||
| @@ -0,0 +1,171 @@ | |||
| 1 | /* | ||
| 2 | * A simple MCE injection facility for testing the MCE decoding code. This | ||
| 3 | * driver should be built as module so that it can be loaded on production | ||
| 4 | * kernels for testing purposes. | ||
| 5 | * | ||
| 6 | * This file may be distributed under the terms of the GNU General Public | ||
| 7 | * License version 2. | ||
| 8 | * | ||
| 9 | * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com> | ||
| 10 | * Advanced Micro Devices Inc. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/kobject.h> | ||
| 14 | #include <linux/sysdev.h> | ||
| 15 | #include <linux/edac.h> | ||
| 16 | #include <asm/mce.h> | ||
| 17 | |||
| 18 | #include "mce_amd.h" | ||
| 19 | |||
| 20 | struct edac_mce_attr { | ||
| 21 | struct attribute attr; | ||
| 22 | ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf); | ||
| 23 | ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr, | ||
| 24 | const char *buf, size_t count); | ||
| 25 | }; | ||
| 26 | |||
| 27 | #define EDAC_MCE_ATTR(_name, _mode, _show, _store) \ | ||
| 28 | static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store) | ||
| 29 | |||
| 30 | static struct kobject *mce_kobj; | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Collect all the MCi_XXX settings | ||
| 34 | */ | ||
| 35 | static struct mce i_mce; | ||
| 36 | |||
| 37 | #define MCE_INJECT_STORE(reg) \ | ||
| 38 | static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ | ||
| 39 | struct edac_mce_attr *attr, \ | ||
| 40 | const char *data, size_t count)\ | ||
| 41 | { \ | ||
| 42 | int ret = 0; \ | ||
| 43 | unsigned long value; \ | ||
| 44 | \ | ||
| 45 | ret = strict_strtoul(data, 16, &value); \ | ||
| 46 | if (ret < 0) \ | ||
| 47 | printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ | ||
| 48 | \ | ||
| 49 | i_mce.reg = value; \ | ||
| 50 | \ | ||
| 51 | return count; \ | ||
| 52 | } | ||
| 53 | |||
| 54 | MCE_INJECT_STORE(status); | ||
| 55 | MCE_INJECT_STORE(misc); | ||
| 56 | MCE_INJECT_STORE(addr); | ||
| 57 | |||
| 58 | #define MCE_INJECT_SHOW(reg) \ | ||
| 59 | static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \ | ||
| 60 | struct edac_mce_attr *attr, \ | ||
| 61 | char *buf) \ | ||
| 62 | { \ | ||
| 63 | return sprintf(buf, "0x%016llx\n", i_mce.reg); \ | ||
| 64 | } | ||
| 65 | |||
| 66 | MCE_INJECT_SHOW(status); | ||
| 67 | MCE_INJECT_SHOW(misc); | ||
| 68 | MCE_INJECT_SHOW(addr); | ||
| 69 | |||
| 70 | EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store); | ||
| 71 | EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store); | ||
| 72 | EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store); | ||
| 73 | |||
| 74 | /* | ||
| 75 | * This denotes into which bank we're injecting and triggers | ||
| 76 | * the injection, at the same time. | ||
| 77 | */ | ||
| 78 | static ssize_t edac_inject_bank_store(struct kobject *kobj, | ||
| 79 | struct edac_mce_attr *attr, | ||
| 80 | const char *data, size_t count) | ||
| 81 | { | ||
| 82 | int ret = 0; | ||
| 83 | unsigned long value; | ||
| 84 | |||
| 85 | ret = strict_strtoul(data, 10, &value); | ||
| 86 | if (ret < 0) { | ||
| 87 | printk(KERN_ERR "Invalid bank value!\n"); | ||
| 88 | return -EINVAL; | ||
| 89 | } | ||
| 90 | |||
| 91 | if (value > 5) { | ||
| 92 | printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); | ||
| 93 | return -EINVAL; | ||
| 94 | } | ||
| 95 | |||
| 96 | i_mce.bank = value; | ||
| 97 | |||
| 98 | amd_decode_mce(NULL, 0, &i_mce); | ||
| 99 | |||
| 100 | return count; | ||
| 101 | } | ||
| 102 | |||
| 103 | static ssize_t edac_inject_bank_show(struct kobject *kobj, | ||
| 104 | struct edac_mce_attr *attr, char *buf) | ||
| 105 | { | ||
| 106 | return sprintf(buf, "%d\n", i_mce.bank); | ||
| 107 | } | ||
| 108 | |||
| 109 | EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store); | ||
| 110 | |||
| 111 | static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc, | ||
| 112 | &mce_attr_addr, &mce_attr_bank | ||
| 113 | }; | ||
| 114 | |||
| 115 | static int __init edac_init_mce_inject(void) | ||
| 116 | { | ||
| 117 | struct sysdev_class *edac_class = NULL; | ||
| 118 | int i, err = 0; | ||
| 119 | |||
| 120 | edac_class = edac_get_sysfs_class(); | ||
| 121 | if (!edac_class) | ||
| 122 | return -EINVAL; | ||
| 123 | |||
| 124 | mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj); | ||
| 125 | if (!mce_kobj) { | ||
| 126 | printk(KERN_ERR "Error creating a mce kset.\n"); | ||
| 127 | err = -ENOMEM; | ||
| 128 | goto err_mce_kobj; | ||
| 129 | } | ||
| 130 | |||
| 131 | for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) { | ||
| 132 | err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr); | ||
| 133 | if (err) { | ||
| 134 | printk(KERN_ERR "Error creating %s in sysfs.\n", | ||
| 135 | sysfs_attrs[i]->attr.name); | ||
| 136 | goto err_sysfs_create; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | return 0; | ||
| 140 | |||
| 141 | err_sysfs_create: | ||
| 142 | while (i-- >= 0) | ||
| 143 | sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); | ||
| 144 | |||
| 145 | kobject_del(mce_kobj); | ||
| 146 | |||
| 147 | err_mce_kobj: | ||
| 148 | edac_put_sysfs_class(); | ||
| 149 | |||
| 150 | return err; | ||
| 151 | } | ||
| 152 | |||
| 153 | static void __exit edac_exit_mce_inject(void) | ||
| 154 | { | ||
| 155 | int i; | ||
| 156 | |||
| 157 | for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) | ||
| 158 | sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); | ||
| 159 | |||
| 160 | kobject_del(mce_kobj); | ||
| 161 | |||
| 162 | edac_put_sysfs_class(); | ||
| 163 | } | ||
| 164 | |||
| 165 | module_init(edac_init_mce_inject); | ||
| 166 | module_exit(edac_exit_mce_inject); | ||
| 167 | |||
| 168 | MODULE_LICENSE("GPL"); | ||
| 169 | MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>"); | ||
| 170 | MODULE_AUTHOR("AMD Inc."); | ||
| 171 | MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); | ||
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 280c9b5ad9e3..88a3ae6cd023 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig | |||
| @@ -125,7 +125,7 @@ config ISCSI_IBFT_FIND | |||
| 125 | config ISCSI_IBFT | 125 | config ISCSI_IBFT |
| 126 | tristate "iSCSI Boot Firmware Table Attributes module" | 126 | tristate "iSCSI Boot Firmware Table Attributes module" |
| 127 | select ISCSI_BOOT_SYSFS | 127 | select ISCSI_BOOT_SYSFS |
| 128 | depends on ISCSI_IBFT_FIND && SCSI | 128 | depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL |
| 129 | default n | 129 | default n |
| 130 | help | 130 | help |
| 131 | This option enables support for detection and exposing of iSCSI | 131 | This option enables support for detection and exposing of iSCSI |
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c37ef64d1465..cb3ccf3ed221 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c | |||
| @@ -59,18 +59,11 @@ | |||
| 59 | #include <linux/hrtimer.h> /* ktime_get_real() */ | 59 | #include <linux/hrtimer.h> /* ktime_get_real() */ |
| 60 | #include <trace/events/power.h> | 60 | #include <trace/events/power.h> |
| 61 | #include <linux/sched.h> | 61 | #include <linux/sched.h> |
| 62 | #include <asm/mwait.h> | ||
| 62 | 63 | ||
| 63 | #define INTEL_IDLE_VERSION "0.4" | 64 | #define INTEL_IDLE_VERSION "0.4" |
| 64 | #define PREFIX "intel_idle: " | 65 | #define PREFIX "intel_idle: " |
| 65 | 66 | ||
| 66 | #define MWAIT_SUBSTATE_MASK (0xf) | ||
| 67 | #define MWAIT_CSTATE_MASK (0xf) | ||
| 68 | #define MWAIT_SUBSTATE_SIZE (4) | ||
| 69 | #define MWAIT_MAX_NUM_CSTATES 8 | ||
| 70 | #define CPUID_MWAIT_LEAF (5) | ||
| 71 | #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) | ||
| 72 | #define CPUID5_ECX_INTERRUPT_BREAK (0x2) | ||
| 73 | |||
| 74 | static struct cpuidle_driver intel_idle_driver = { | 67 | static struct cpuidle_driver intel_idle_driver = { |
| 75 | .name = "intel_idle", | 68 | .name = "intel_idle", |
| 76 | .owner = THIS_MODULE, | 69 | .owner = THIS_MODULE, |
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 9ddafc30f432..af9ee313c10b 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c | |||
| @@ -28,7 +28,7 @@ struct evdev { | |||
| 28 | int minor; | 28 | int minor; |
| 29 | struct input_handle handle; | 29 | struct input_handle handle; |
| 30 | wait_queue_head_t wait; | 30 | wait_queue_head_t wait; |
| 31 | struct evdev_client *grab; | 31 | struct evdev_client __rcu *grab; |
| 32 | struct list_head client_list; | 32 | struct list_head client_list; |
| 33 | spinlock_t client_lock; /* protects client_list */ | 33 | spinlock_t client_lock; /* protects client_list */ |
| 34 | struct mutex mutex; | 34 | struct mutex mutex; |
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index c19066479057..7e2c12a5b839 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c | |||
| @@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) | |||
| 104 | t.endidx = 91; | 104 | t.endidx = 91; |
| 105 | t.seq = tseq; | 105 | t.seq = tseq; |
| 106 | t.act.semaphore = &tsem; | 106 | t.act.semaphore = &tsem; |
| 107 | init_MUTEX_LOCKED(&tsem); | 107 | sema_init(&tsem, 0); |
| 108 | 108 | ||
| 109 | if (hp_sdc_enqueue_transaction(&t)) return -1; | 109 | if (hp_sdc_enqueue_transaction(&t)) return -1; |
| 110 | 110 | ||
| @@ -698,7 +698,7 @@ static int __init hp_sdc_rtc_init(void) | |||
| 698 | return -ENODEV; | 698 | return -ENODEV; |
| 699 | #endif | 699 | #endif |
| 700 | 700 | ||
| 701 | init_MUTEX(&i8042tregs); | 701 | sema_init(&i8042tregs, 1); |
| 702 | 702 | ||
| 703 | if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) | 703 | if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) |
| 704 | return ret; | 704 | return ret; |
diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index c92f4edfee7b..e5624d8f1709 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c | |||
| @@ -915,15 +915,15 @@ int hil_mlc_register(hil_mlc *mlc) | |||
| 915 | mlc->ostarted = 0; | 915 | mlc->ostarted = 0; |
| 916 | 916 | ||
| 917 | rwlock_init(&mlc->lock); | 917 | rwlock_init(&mlc->lock); |
| 918 | init_MUTEX(&mlc->osem); | 918 | sema_init(&mlc->osem, 1); |
| 919 | 919 | ||
| 920 | init_MUTEX(&mlc->isem); | 920 | sema_init(&mlc->isem, 1); |
| 921 | mlc->icount = -1; | 921 | mlc->icount = -1; |
| 922 | mlc->imatch = 0; | 922 | mlc->imatch = 0; |
| 923 | 923 | ||
| 924 | mlc->opercnt = 0; | 924 | mlc->opercnt = 0; |
| 925 | 925 | ||
| 926 | init_MUTEX_LOCKED(&(mlc->csem)); | 926 | sema_init(&(mlc->csem), 0); |
| 927 | 927 | ||
| 928 | hil_mlc_clear_di_scratch(mlc); | 928 | hil_mlc_clear_di_scratch(mlc); |
| 929 | hil_mlc_clear_di_map(mlc, 0); | 929 | hil_mlc_clear_di_map(mlc, 0); |
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index bcc2d30ec245..8c0b51c31424 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c | |||
| @@ -905,7 +905,7 @@ static int __init hp_sdc_init(void) | |||
| 905 | ts_sync[1] = 0x0f; | 905 | ts_sync[1] = 0x0f; |
| 906 | ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; | 906 | ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0; |
| 907 | t_sync.act.semaphore = &s_sync; | 907 | t_sync.act.semaphore = &s_sync; |
| 908 | init_MUTEX_LOCKED(&s_sync); | 908 | sema_init(&s_sync, 0); |
| 909 | hp_sdc_enqueue_transaction(&t_sync); | 909 | hp_sdc_enqueue_transaction(&t_sync); |
| 910 | down(&s_sync); /* Wait for t_sync to complete */ | 910 | down(&s_sync); /* Wait for t_sync to complete */ |
| 911 | 911 | ||
| @@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void) | |||
| 1039 | return hp_sdc.dev_err; | 1039 | return hp_sdc.dev_err; |
| 1040 | } | 1040 | } |
| 1041 | 1041 | ||
| 1042 | init_MUTEX_LOCKED(&tq_init_sem); | 1042 | sema_init(&tq_init_sem, 0); |
| 1043 | 1043 | ||
| 1044 | tq_init.actidx = 0; | 1044 | tq_init.actidx = 0; |
| 1045 | tq_init.idx = 1; | 1045 | tq_init.idx = 1; |
diff --git a/drivers/isdn/act2000/act2000.h b/drivers/isdn/act2000/act2000.h index d4c50512a1ff..88c9423500d8 100644 --- a/drivers/isdn/act2000/act2000.h +++ b/drivers/isdn/act2000/act2000.h | |||
| @@ -141,9 +141,9 @@ typedef struct irq_data_isa { | |||
| 141 | __u8 rcvhdr[8]; | 141 | __u8 rcvhdr[8]; |
| 142 | } irq_data_isa; | 142 | } irq_data_isa; |
| 143 | 143 | ||
| 144 | typedef union irq_data { | 144 | typedef union act2000_irq_data { |
| 145 | irq_data_isa isa; | 145 | irq_data_isa isa; |
| 146 | } irq_data; | 146 | } act2000_irq_data; |
| 147 | 147 | ||
| 148 | /* | 148 | /* |
| 149 | * Per card driver data | 149 | * Per card driver data |
| @@ -176,7 +176,7 @@ typedef struct act2000_card { | |||
| 176 | char *status_buf_read; | 176 | char *status_buf_read; |
| 177 | char *status_buf_write; | 177 | char *status_buf_write; |
| 178 | char *status_buf_end; | 178 | char *status_buf_end; |
| 179 | irq_data idat; /* Data used for IRQ handler */ | 179 | act2000_irq_data idat; /* Data used for IRQ handler */ |
| 180 | isdn_if interface; /* Interface to upper layer */ | 180 | isdn_if interface; /* Interface to upper layer */ |
| 181 | char regname[35]; /* Name used for request_region */ | 181 | char regname[35]; /* Name used for request_region */ |
| 182 | } act2000_card; | 182 | } act2000_card; |
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index 6f9afcd5ca4e..b133378d4dc9 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c | |||
| @@ -801,6 +801,16 @@ static void closecard(int cardnr) | |||
| 801 | ll_unload(csta); | 801 | ll_unload(csta); |
| 802 | } | 802 | } |
| 803 | 803 | ||
| 804 | static irqreturn_t card_irq(int intno, void *dev_id) | ||
| 805 | { | ||
| 806 | struct IsdnCardState *cs = dev_id; | ||
| 807 | irqreturn_t ret = cs->irq_func(intno, cs); | ||
| 808 | |||
| 809 | if (ret == IRQ_HANDLED) | ||
| 810 | cs->irq_cnt++; | ||
| 811 | return ret; | ||
| 812 | } | ||
| 813 | |||
| 804 | static int init_card(struct IsdnCardState *cs) | 814 | static int init_card(struct IsdnCardState *cs) |
| 805 | { | 815 | { |
| 806 | int irq_cnt, cnt = 3, ret; | 816 | int irq_cnt, cnt = 3, ret; |
| @@ -809,10 +819,10 @@ static int init_card(struct IsdnCardState *cs) | |||
| 809 | ret = cs->cardmsg(cs, CARD_INIT, NULL); | 819 | ret = cs->cardmsg(cs, CARD_INIT, NULL); |
| 810 | return(ret); | 820 | return(ret); |
| 811 | } | 821 | } |
| 812 | irq_cnt = kstat_irqs(cs->irq); | 822 | irq_cnt = cs->irq_cnt = 0; |
| 813 | printk(KERN_INFO "%s: IRQ %d count %d\n", CardType[cs->typ], | 823 | printk(KERN_INFO "%s: IRQ %d count %d\n", CardType[cs->typ], |
| 814 | cs->irq, irq_cnt); | 824 | cs->irq, irq_cnt); |
| 815 | if (request_irq(cs->irq, cs->irq_func, cs->irq_flags, "HiSax", cs)) { | 825 | if (request_irq(cs->irq, card_irq, cs->irq_flags, "HiSax", cs)) { |
| 816 | printk(KERN_WARNING "HiSax: couldn't get interrupt %d\n", | 826 | printk(KERN_WARNING "HiSax: couldn't get interrupt %d\n", |
| 817 | cs->irq); | 827 | cs->irq); |
| 818 | return 1; | 828 | return 1; |
| @@ -822,8 +832,8 @@ static int init_card(struct IsdnCardState *cs) | |||
| 822 | /* Timeout 10ms */ | 832 | /* Timeout 10ms */ |
| 823 | msleep(10); | 833 | msleep(10); |
| 824 | printk(KERN_INFO "%s: IRQ %d count %d\n", | 834 | printk(KERN_INFO "%s: IRQ %d count %d\n", |
| 825 | CardType[cs->typ], cs->irq, kstat_irqs(cs->irq)); | 835 | CardType[cs->typ], cs->irq, cs->irq_cnt); |
| 826 | if (kstat_irqs(cs->irq) == irq_cnt) { | 836 | if (cs->irq_cnt == irq_cnt) { |
| 827 | printk(KERN_WARNING | 837 | printk(KERN_WARNING |
| 828 | "%s: IRQ(%d) getting no interrupts during init %d\n", | 838 | "%s: IRQ(%d) getting no interrupts during init %d\n", |
| 829 | CardType[cs->typ], cs->irq, 4 - cnt); | 839 | CardType[cs->typ], cs->irq, 4 - cnt); |
diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h index 832a87855ffb..32ab3924aa73 100644 --- a/drivers/isdn/hisax/hisax.h +++ b/drivers/isdn/hisax/hisax.h | |||
| @@ -959,6 +959,7 @@ struct IsdnCardState { | |||
| 959 | u_long event; | 959 | u_long event; |
| 960 | struct work_struct tqueue; | 960 | struct work_struct tqueue; |
| 961 | struct timer_list dbusytimer; | 961 | struct timer_list dbusytimer; |
| 962 | unsigned int irq_cnt; | ||
| 962 | #ifdef ERROR_STATISTIC | 963 | #ifdef ERROR_STATISTIC |
| 963 | int err_crc; | 964 | int err_crc; |
| 964 | int err_tx; | 965 | int err_tx; |
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 1c4ee6e77937..bf64e49d996a 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c | |||
| @@ -83,7 +83,7 @@ static struct adb_driver *adb_controller; | |||
| 83 | BLOCKING_NOTIFIER_HEAD(adb_client_list); | 83 | BLOCKING_NOTIFIER_HEAD(adb_client_list); |
| 84 | static int adb_got_sleep; | 84 | static int adb_got_sleep; |
| 85 | static int adb_inited; | 85 | static int adb_inited; |
| 86 | static DECLARE_MUTEX(adb_probe_mutex); | 86 | static DEFINE_SEMAPHORE(adb_probe_mutex); |
| 87 | static int sleepy_trackpad; | 87 | static int sleepy_trackpad; |
| 88 | static int autopoll_devs; | 88 | static int autopoll_devs; |
| 89 | int __adb_probe_sync; | 89 | int __adb_probe_sync; |
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 097f24d8bceb..b9fda7018cef 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c | |||
| @@ -78,7 +78,7 @@ struct sih { | |||
| 78 | u8 irq_lines; /* number of supported irq lines */ | 78 | u8 irq_lines; /* number of supported irq lines */ |
| 79 | 79 | ||
| 80 | /* SIR ignored -- set interrupt, for testing only */ | 80 | /* SIR ignored -- set interrupt, for testing only */ |
| 81 | struct irq_data { | 81 | struct sih_irq_data { |
| 82 | u8 isr_offset; | 82 | u8 isr_offset; |
| 83 | u8 imr_offset; | 83 | u8 imr_offset; |
| 84 | } mask[2]; | 84 | } mask[2]; |
| @@ -810,7 +810,7 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) | |||
| 810 | twl4030_irq_chip = dummy_irq_chip; | 810 | twl4030_irq_chip = dummy_irq_chip; |
| 811 | twl4030_irq_chip.name = "twl4030"; | 811 | twl4030_irq_chip.name = "twl4030"; |
| 812 | 812 | ||
| 813 | twl4030_sih_irq_chip.ack = dummy_irq_chip.ack; | 813 | twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack; |
| 814 | 814 | ||
| 815 | for (i = irq_base; i < irq_end; i++) { | 815 | for (i = irq_base; i < irq_end; i++) { |
| 816 | set_irq_chip_and_handler(i, &twl4030_irq_chip, | 816 | set_irq_chip_and_handler(i, &twl4030_irq_chip, |
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index 70705d1306b9..eca55c52bdfd 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c | |||
| @@ -522,7 +522,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot) | |||
| 522 | lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ | 522 | lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ |
| 523 | lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ | 523 | lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ |
| 524 | 524 | ||
| 525 | init_MUTEX_LOCKED(&lp->cmd_mutex); | 525 | sema_init(&lp->cmd_mutex, 0); |
| 526 | init_completion(&lp->execution_cmd); | 526 | init_completion(&lp->execution_cmd); |
| 527 | init_completion(&lp->xceiver_cmd); | 527 | init_completion(&lp->xceiver_cmd); |
| 528 | 528 | ||
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 4b52c767ad05..3e5d0b6b6516 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c | |||
| @@ -608,7 +608,7 @@ static int sixpack_open(struct tty_struct *tty) | |||
| 608 | 608 | ||
| 609 | spin_lock_init(&sp->lock); | 609 | spin_lock_init(&sp->lock); |
| 610 | atomic_set(&sp->refcnt, 1); | 610 | atomic_set(&sp->refcnt, 1); |
| 611 | init_MUTEX_LOCKED(&sp->dead_sem); | 611 | sema_init(&sp->dead_sem, 0); |
| 612 | 612 | ||
| 613 | /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ | 613 | /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ |
| 614 | 614 | ||
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 66e88bd59caa..4c628393c8b1 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c | |||
| @@ -747,7 +747,7 @@ static int mkiss_open(struct tty_struct *tty) | |||
| 747 | 747 | ||
| 748 | spin_lock_init(&ax->buflock); | 748 | spin_lock_init(&ax->buflock); |
| 749 | atomic_set(&ax->refcnt, 1); | 749 | atomic_set(&ax->refcnt, 1); |
| 750 | init_MUTEX_LOCKED(&ax->dead_sem); | 750 | sema_init(&ax->dead_sem, 0); |
| 751 | 751 | ||
| 752 | ax->tty = tty; | 752 | ax->tty = tty; |
| 753 | tty->disc_data = ax; | 753 | tty->disc_data = ax; |
diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index 1b051dab7b29..51d74447f8f8 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c | |||
| @@ -909,7 +909,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n | |||
| 909 | dev->tx_skb = NULL; | 909 | dev->tx_skb = NULL; |
| 910 | 910 | ||
| 911 | spin_lock_init(&dev->tx_lock); | 911 | spin_lock_init(&dev->tx_lock); |
| 912 | init_MUTEX(&dev->fsm.sem); | 912 | sema_init(&dev->fsm.sem, 1); |
| 913 | 913 | ||
| 914 | dev->drv = drv; | 914 | dev->drv = drv; |
| 915 | dev->netdev = ndev; | 915 | dev->netdev = ndev; |
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index af50a530daee..78d70a6481bf 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c | |||
| @@ -184,7 +184,7 @@ ppp_asynctty_open(struct tty_struct *tty) | |||
| 184 | tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); | 184 | tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); |
| 185 | 185 | ||
| 186 | atomic_set(&ap->refcnt, 1); | 186 | atomic_set(&ap->refcnt, 1); |
| 187 | init_MUTEX_LOCKED(&ap->dead_sem); | 187 | sema_init(&ap->dead_sem, 0); |
| 188 | 188 | ||
| 189 | ap->chan.private = ap; | 189 | ap->chan.private = ap; |
| 190 | ap->chan.ops = &async_ops; | 190 | ap->chan.ops = &async_ops; |
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 04c6cd4333f1..10bafd59f9c3 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c | |||
| @@ -575,7 +575,7 @@ static int cosa_probe(int base, int irq, int dma) | |||
| 575 | 575 | ||
| 576 | /* Initialize the chardev data structures */ | 576 | /* Initialize the chardev data structures */ |
| 577 | mutex_init(&chan->rlock); | 577 | mutex_init(&chan->rlock); |
| 578 | init_MUTEX(&chan->wsem); | 578 | sema_init(&chan->wsem, 1); |
| 579 | 579 | ||
| 580 | /* Register the network interface */ | 580 | /* Register the network interface */ |
| 581 | if (!(chan->netdev = alloc_hdlcdev(chan))) { | 581 | if (!(chan->netdev = alloc_hdlcdev(chan))) { |
diff --git a/drivers/parport/share.c b/drivers/parport/share.c index dffa5d4fb298..a2d9d1e59260 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c | |||
| @@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, | |||
| 306 | spin_lock_init(&tmp->pardevice_lock); | 306 | spin_lock_init(&tmp->pardevice_lock); |
| 307 | tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; | 307 | tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; |
| 308 | tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; | 308 | tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; |
| 309 | init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ | 309 | sema_init(&tmp->ieee1284.irq, 0); |
| 310 | tmp->spintime = parport_default_spintime; | 310 | tmp->spintime = parport_default_spintime; |
| 311 | atomic_set (&tmp->ref_count, 1); | 311 | atomic_set (&tmp->ref_count, 1); |
| 312 | INIT_LIST_HEAD(&tmp->full_list); | 312 | INIT_LIST_HEAD(&tmp->full_list); |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0a19708074c2..3de3a436a432 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c | |||
| @@ -1221,9 +1221,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) | |||
| 1221 | } | 1221 | } |
| 1222 | } | 1222 | } |
| 1223 | 1223 | ||
| 1224 | void dmar_msi_unmask(unsigned int irq) | 1224 | void dmar_msi_unmask(struct irq_data *data) |
| 1225 | { | 1225 | { |
| 1226 | struct intel_iommu *iommu = get_irq_data(irq); | 1226 | struct intel_iommu *iommu = irq_data_get_irq_data(data); |
| 1227 | unsigned long flag; | 1227 | unsigned long flag; |
| 1228 | 1228 | ||
| 1229 | /* unmask it */ | 1229 | /* unmask it */ |
| @@ -1234,10 +1234,10 @@ void dmar_msi_unmask(unsigned int irq) | |||
| 1234 | spin_unlock_irqrestore(&iommu->register_lock, flag); | 1234 | spin_unlock_irqrestore(&iommu->register_lock, flag); |
| 1235 | } | 1235 | } |
| 1236 | 1236 | ||
| 1237 | void dmar_msi_mask(unsigned int irq) | 1237 | void dmar_msi_mask(struct irq_data *data) |
| 1238 | { | 1238 | { |
| 1239 | unsigned long flag; | 1239 | unsigned long flag; |
| 1240 | struct intel_iommu *iommu = get_irq_data(irq); | 1240 | struct intel_iommu *iommu = irq_data_get_irq_data(data); |
| 1241 | 1241 | ||
| 1242 | /* mask it */ | 1242 | /* mask it */ |
| 1243 | spin_lock_irqsave(&iommu->register_lock, flag); | 1243 | spin_lock_irqsave(&iommu->register_lock, flag); |
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index 98abf8b91294..834842aa5bbf 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c | |||
| @@ -57,28 +57,22 @@ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) | |||
| 57 | *msg = cfg->msg; | 57 | *msg = cfg->msg; |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | void mask_ht_irq(unsigned int irq) | 60 | void mask_ht_irq(struct irq_data *data) |
| 61 | { | 61 | { |
| 62 | struct ht_irq_cfg *cfg; | 62 | struct ht_irq_cfg *cfg = irq_data_get_irq_data(data); |
| 63 | struct ht_irq_msg msg; | 63 | struct ht_irq_msg msg = cfg->msg; |
| 64 | |||
| 65 | cfg = get_irq_data(irq); | ||
| 66 | 64 | ||
| 67 | msg = cfg->msg; | ||
| 68 | msg.address_lo |= 1; | 65 | msg.address_lo |= 1; |
| 69 | write_ht_irq_msg(irq, &msg); | 66 | write_ht_irq_msg(data->irq, &msg); |
| 70 | } | 67 | } |
| 71 | 68 | ||
| 72 | void unmask_ht_irq(unsigned int irq) | 69 | void unmask_ht_irq(struct irq_data *data) |
| 73 | { | 70 | { |
| 74 | struct ht_irq_cfg *cfg; | 71 | struct ht_irq_cfg *cfg = irq_data_get_irq_data(data); |
| 75 | struct ht_irq_msg msg; | 72 | struct ht_irq_msg msg = cfg->msg; |
| 76 | |||
| 77 | cfg = get_irq_data(irq); | ||
| 78 | 73 | ||
| 79 | msg = cfg->msg; | ||
| 80 | msg.address_lo &= ~1; | 74 | msg.address_lo &= ~1; |
| 81 | write_ht_irq_msg(irq, &msg); | 75 | write_ht_irq_msg(data->irq, &msg); |
| 82 | } | 76 | } |
| 83 | 77 | ||
| 84 | /** | 78 | /** |
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index fd1d2867cdcc..ec87cd66f3eb 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c | |||
| @@ -46,109 +46,24 @@ static __init int setup_intremap(char *str) | |||
| 46 | } | 46 | } |
| 47 | early_param("intremap", setup_intremap); | 47 | early_param("intremap", setup_intremap); |
| 48 | 48 | ||
| 49 | struct irq_2_iommu { | ||
| 50 | struct intel_iommu *iommu; | ||
| 51 | u16 irte_index; | ||
| 52 | u16 sub_handle; | ||
| 53 | u8 irte_mask; | ||
| 54 | }; | ||
| 55 | |||
| 56 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
| 57 | static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) | ||
| 58 | { | ||
| 59 | struct irq_2_iommu *iommu; | ||
| 60 | |||
| 61 | iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); | ||
| 62 | printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); | ||
| 63 | |||
| 64 | return iommu; | ||
| 65 | } | ||
| 66 | |||
| 67 | static struct irq_2_iommu *irq_2_iommu(unsigned int irq) | ||
| 68 | { | ||
| 69 | struct irq_desc *desc; | ||
| 70 | |||
| 71 | desc = irq_to_desc(irq); | ||
| 72 | |||
| 73 | if (WARN_ON_ONCE(!desc)) | ||
| 74 | return NULL; | ||
| 75 | |||
| 76 | return desc->irq_2_iommu; | ||
| 77 | } | ||
| 78 | |||
| 79 | static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) | ||
| 80 | { | ||
| 81 | struct irq_desc *desc; | ||
| 82 | struct irq_2_iommu *irq_iommu; | ||
| 83 | |||
| 84 | desc = irq_to_desc(irq); | ||
| 85 | if (!desc) { | ||
| 86 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
| 87 | return NULL; | ||
| 88 | } | ||
| 89 | |||
| 90 | irq_iommu = desc->irq_2_iommu; | ||
| 91 | |||
| 92 | if (!irq_iommu) | ||
| 93 | desc->irq_2_iommu = get_one_free_irq_2_iommu(irq_node(irq)); | ||
| 94 | |||
| 95 | return desc->irq_2_iommu; | ||
| 96 | } | ||
| 97 | |||
| 98 | #else /* !CONFIG_SPARSE_IRQ */ | ||
| 99 | |||
| 100 | static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; | ||
| 101 | |||
| 102 | static struct irq_2_iommu *irq_2_iommu(unsigned int irq) | ||
| 103 | { | ||
| 104 | if (irq < nr_irqs) | ||
| 105 | return &irq_2_iommuX[irq]; | ||
| 106 | |||
| 107 | return NULL; | ||
| 108 | } | ||
| 109 | static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) | ||
| 110 | { | ||
| 111 | return irq_2_iommu(irq); | ||
| 112 | } | ||
| 113 | #endif | ||
| 114 | |||
| 115 | static DEFINE_SPINLOCK(irq_2_ir_lock); | 49 | static DEFINE_SPINLOCK(irq_2_ir_lock); |
| 116 | 50 | ||
| 117 | static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq) | 51 | static struct irq_2_iommu *irq_2_iommu(unsigned int irq) |
| 118 | { | ||
| 119 | struct irq_2_iommu *irq_iommu; | ||
| 120 | |||
| 121 | irq_iommu = irq_2_iommu(irq); | ||
| 122 | |||
| 123 | if (!irq_iommu) | ||
| 124 | return NULL; | ||
| 125 | |||
| 126 | if (!irq_iommu->iommu) | ||
| 127 | return NULL; | ||
| 128 | |||
| 129 | return irq_iommu; | ||
| 130 | } | ||
| 131 | |||
| 132 | int irq_remapped(int irq) | ||
| 133 | { | 52 | { |
| 134 | return valid_irq_2_iommu(irq) != NULL; | 53 | struct irq_cfg *cfg = get_irq_chip_data(irq); |
| 54 | return cfg ? &cfg->irq_2_iommu : NULL; | ||
| 135 | } | 55 | } |
| 136 | 56 | ||
| 137 | int get_irte(int irq, struct irte *entry) | 57 | int get_irte(int irq, struct irte *entry) |
| 138 | { | 58 | { |
| 139 | int index; | 59 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 140 | struct irq_2_iommu *irq_iommu; | ||
| 141 | unsigned long flags; | 60 | unsigned long flags; |
| 61 | int index; | ||
| 142 | 62 | ||
| 143 | if (!entry) | 63 | if (!entry || !irq_iommu) |
| 144 | return -1; | 64 | return -1; |
| 145 | 65 | ||
| 146 | spin_lock_irqsave(&irq_2_ir_lock, flags); | 66 | spin_lock_irqsave(&irq_2_ir_lock, flags); |
| 147 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 148 | if (!irq_iommu) { | ||
| 149 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 150 | return -1; | ||
| 151 | } | ||
| 152 | 67 | ||
| 153 | index = irq_iommu->irte_index + irq_iommu->sub_handle; | 68 | index = irq_iommu->irte_index + irq_iommu->sub_handle; |
| 154 | *entry = *(irq_iommu->iommu->ir_table->base + index); | 69 | *entry = *(irq_iommu->iommu->ir_table->base + index); |
| @@ -160,20 +75,14 @@ int get_irte(int irq, struct irte *entry) | |||
| 160 | int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) | 75 | int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) |
| 161 | { | 76 | { |
| 162 | struct ir_table *table = iommu->ir_table; | 77 | struct ir_table *table = iommu->ir_table; |
| 163 | struct irq_2_iommu *irq_iommu; | 78 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 164 | u16 index, start_index; | 79 | u16 index, start_index; |
| 165 | unsigned int mask = 0; | 80 | unsigned int mask = 0; |
| 166 | unsigned long flags; | 81 | unsigned long flags; |
| 167 | int i; | 82 | int i; |
| 168 | 83 | ||
| 169 | if (!count) | 84 | if (!count || !irq_iommu) |
| 170 | return -1; | ||
| 171 | |||
| 172 | #ifndef CONFIG_SPARSE_IRQ | ||
| 173 | /* protect irq_2_iommu_alloc later */ | ||
| 174 | if (irq >= nr_irqs) | ||
| 175 | return -1; | 85 | return -1; |
| 176 | #endif | ||
| 177 | 86 | ||
| 178 | /* | 87 | /* |
| 179 | * start the IRTE search from index 0. | 88 | * start the IRTE search from index 0. |
| @@ -214,13 +123,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) | |||
| 214 | for (i = index; i < index + count; i++) | 123 | for (i = index; i < index + count; i++) |
| 215 | table->base[i].present = 1; | 124 | table->base[i].present = 1; |
| 216 | 125 | ||
| 217 | irq_iommu = irq_2_iommu_alloc(irq); | ||
| 218 | if (!irq_iommu) { | ||
| 219 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 220 | printk(KERN_ERR "can't allocate irq_2_iommu\n"); | ||
| 221 | return -1; | ||
| 222 | } | ||
| 223 | |||
| 224 | irq_iommu->iommu = iommu; | 126 | irq_iommu->iommu = iommu; |
| 225 | irq_iommu->irte_index = index; | 127 | irq_iommu->irte_index = index; |
| 226 | irq_iommu->sub_handle = 0; | 128 | irq_iommu->sub_handle = 0; |
| @@ -244,17 +146,14 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) | |||
| 244 | 146 | ||
| 245 | int map_irq_to_irte_handle(int irq, u16 *sub_handle) | 147 | int map_irq_to_irte_handle(int irq, u16 *sub_handle) |
| 246 | { | 148 | { |
| 247 | int index; | 149 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 248 | struct irq_2_iommu *irq_iommu; | ||
| 249 | unsigned long flags; | 150 | unsigned long flags; |
| 151 | int index; | ||
| 250 | 152 | ||
| 251 | spin_lock_irqsave(&irq_2_ir_lock, flags); | 153 | if (!irq_iommu) |
| 252 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 253 | if (!irq_iommu) { | ||
| 254 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 255 | return -1; | 154 | return -1; |
| 256 | } | ||
| 257 | 155 | ||
| 156 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 258 | *sub_handle = irq_iommu->sub_handle; | 157 | *sub_handle = irq_iommu->sub_handle; |
| 259 | index = irq_iommu->irte_index; | 158 | index = irq_iommu->irte_index; |
| 260 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | 159 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); |
| @@ -263,18 +162,13 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle) | |||
| 263 | 162 | ||
| 264 | int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) | 163 | int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) |
| 265 | { | 164 | { |
| 266 | struct irq_2_iommu *irq_iommu; | 165 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 267 | unsigned long flags; | 166 | unsigned long flags; |
| 268 | 167 | ||
| 269 | spin_lock_irqsave(&irq_2_ir_lock, flags); | 168 | if (!irq_iommu) |
| 270 | |||
| 271 | irq_iommu = irq_2_iommu_alloc(irq); | ||
| 272 | |||
| 273 | if (!irq_iommu) { | ||
| 274 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 275 | printk(KERN_ERR "can't allocate irq_2_iommu\n"); | ||
| 276 | return -1; | 169 | return -1; |
| 277 | } | 170 | |
| 171 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 278 | 172 | ||
| 279 | irq_iommu->iommu = iommu; | 173 | irq_iommu->iommu = iommu; |
| 280 | irq_iommu->irte_index = index; | 174 | irq_iommu->irte_index = index; |
| @@ -286,43 +180,18 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) | |||
| 286 | return 0; | 180 | return 0; |
| 287 | } | 181 | } |
| 288 | 182 | ||
| 289 | int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) | ||
| 290 | { | ||
| 291 | struct irq_2_iommu *irq_iommu; | ||
| 292 | unsigned long flags; | ||
| 293 | |||
| 294 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 295 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 296 | if (!irq_iommu) { | ||
| 297 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 298 | return -1; | ||
| 299 | } | ||
| 300 | |||
| 301 | irq_iommu->iommu = NULL; | ||
| 302 | irq_iommu->irte_index = 0; | ||
| 303 | irq_iommu->sub_handle = 0; | ||
| 304 | irq_2_iommu(irq)->irte_mask = 0; | ||
| 305 | |||
| 306 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 307 | |||
| 308 | return 0; | ||
| 309 | } | ||
| 310 | |||
| 311 | int modify_irte(int irq, struct irte *irte_modified) | 183 | int modify_irte(int irq, struct irte *irte_modified) |
| 312 | { | 184 | { |
| 313 | int rc; | 185 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 314 | int index; | ||
| 315 | struct irte *irte; | ||
| 316 | struct intel_iommu *iommu; | 186 | struct intel_iommu *iommu; |
| 317 | struct irq_2_iommu *irq_iommu; | ||
| 318 | unsigned long flags; | 187 | unsigned long flags; |
| 188 | struct irte *irte; | ||
| 189 | int rc, index; | ||
| 319 | 190 | ||
| 320 | spin_lock_irqsave(&irq_2_ir_lock, flags); | 191 | if (!irq_iommu) |
| 321 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 322 | if (!irq_iommu) { | ||
| 323 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 324 | return -1; | 192 | return -1; |
| 325 | } | 193 | |
| 194 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 326 | 195 | ||
| 327 | iommu = irq_iommu->iommu; | 196 | iommu = irq_iommu->iommu; |
| 328 | 197 | ||
| @@ -339,31 +208,6 @@ int modify_irte(int irq, struct irte *irte_modified) | |||
| 339 | return rc; | 208 | return rc; |
| 340 | } | 209 | } |
| 341 | 210 | ||
| 342 | int flush_irte(int irq) | ||
| 343 | { | ||
| 344 | int rc; | ||
| 345 | int index; | ||
| 346 | struct intel_iommu *iommu; | ||
| 347 | struct irq_2_iommu *irq_iommu; | ||
| 348 | unsigned long flags; | ||
| 349 | |||
| 350 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 351 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 352 | if (!irq_iommu) { | ||
| 353 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 354 | return -1; | ||
| 355 | } | ||
| 356 | |||
| 357 | iommu = irq_iommu->iommu; | ||
| 358 | |||
| 359 | index = irq_iommu->irte_index + irq_iommu->sub_handle; | ||
| 360 | |||
| 361 | rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); | ||
| 362 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 363 | |||
| 364 | return rc; | ||
| 365 | } | ||
| 366 | |||
| 367 | struct intel_iommu *map_hpet_to_ir(u8 hpet_id) | 211 | struct intel_iommu *map_hpet_to_ir(u8 hpet_id) |
| 368 | { | 212 | { |
| 369 | int i; | 213 | int i; |
| @@ -420,16 +264,14 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) | |||
| 420 | 264 | ||
| 421 | int free_irte(int irq) | 265 | int free_irte(int irq) |
| 422 | { | 266 | { |
| 423 | int rc = 0; | 267 | struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); |
| 424 | struct irq_2_iommu *irq_iommu; | ||
| 425 | unsigned long flags; | 268 | unsigned long flags; |
| 269 | int rc; | ||
| 426 | 270 | ||
| 427 | spin_lock_irqsave(&irq_2_ir_lock, flags); | 271 | if (!irq_iommu) |
| 428 | irq_iommu = valid_irq_2_iommu(irq); | ||
| 429 | if (!irq_iommu) { | ||
| 430 | spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||
| 431 | return -1; | 272 | return -1; |
| 432 | } | 273 | |
| 274 | spin_lock_irqsave(&irq_2_ir_lock, flags); | ||
| 433 | 275 | ||
| 434 | rc = clear_entries(irq_iommu); | 276 | rc = clear_entries(irq_iommu); |
| 435 | 277 | ||
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 69b7be33b3a2..5fcf5aec680f 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c | |||
| @@ -170,33 +170,31 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) | |||
| 170 | desc->masked = __msix_mask_irq(desc, flag); | 170 | desc->masked = __msix_mask_irq(desc, flag); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | static void msi_set_mask_bit(unsigned irq, u32 flag) | 173 | static void msi_set_mask_bit(struct irq_data *data, u32 flag) |
| 174 | { | 174 | { |
| 175 | struct msi_desc *desc = get_irq_msi(irq); | 175 | struct msi_desc *desc = irq_data_get_msi(data); |
| 176 | 176 | ||
| 177 | if (desc->msi_attrib.is_msix) { | 177 | if (desc->msi_attrib.is_msix) { |
| 178 | msix_mask_irq(desc, flag); | 178 | msix_mask_irq(desc, flag); |
| 179 | readl(desc->mask_base); /* Flush write to device */ | 179 | readl(desc->mask_base); /* Flush write to device */ |
| 180 | } else { | 180 | } else { |
| 181 | unsigned offset = irq - desc->dev->irq; | 181 | unsigned offset = data->irq - desc->dev->irq; |
| 182 | msi_mask_irq(desc, 1 << offset, flag << offset); | 182 | msi_mask_irq(desc, 1 << offset, flag << offset); |
| 183 | } | 183 | } |
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | void mask_msi_irq(unsigned int irq) | 186 | void mask_msi_irq(struct irq_data *data) |
| 187 | { | 187 | { |
| 188 | msi_set_mask_bit(irq, 1); | 188 | msi_set_mask_bit(data, 1); |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | void unmask_msi_irq(unsigned int irq) | 191 | void unmask_msi_irq(struct irq_data *data) |
| 192 | { | 192 | { |
| 193 | msi_set_mask_bit(irq, 0); | 193 | msi_set_mask_bit(data, 0); |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | 196 | void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) |
| 197 | { | 197 | { |
| 198 | struct msi_desc *entry = get_irq_desc_msi(desc); | ||
| 199 | |||
| 200 | BUG_ON(entry->dev->current_state != PCI_D0); | 198 | BUG_ON(entry->dev->current_state != PCI_D0); |
| 201 | 199 | ||
| 202 | if (entry->msi_attrib.is_msix) { | 200 | if (entry->msi_attrib.is_msix) { |
| @@ -227,15 +225,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | |||
| 227 | 225 | ||
| 228 | void read_msi_msg(unsigned int irq, struct msi_msg *msg) | 226 | void read_msi_msg(unsigned int irq, struct msi_msg *msg) |
| 229 | { | 227 | { |
| 230 | struct irq_desc *desc = irq_to_desc(irq); | 228 | struct msi_desc *entry = get_irq_msi(irq); |
| 231 | 229 | ||
| 232 | read_msi_msg_desc(desc, msg); | 230 | __read_msi_msg(entry, msg); |
| 233 | } | 231 | } |
| 234 | 232 | ||
| 235 | void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | 233 | void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) |
| 236 | { | 234 | { |
| 237 | struct msi_desc *entry = get_irq_desc_msi(desc); | ||
| 238 | |||
| 239 | /* Assert that the cache is valid, assuming that | 235 | /* Assert that the cache is valid, assuming that |
| 240 | * valid messages are not all-zeroes. */ | 236 | * valid messages are not all-zeroes. */ |
| 241 | BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | | 237 | BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | |
| @@ -246,15 +242,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | |||
| 246 | 242 | ||
| 247 | void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) | 243 | void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) |
| 248 | { | 244 | { |
| 249 | struct irq_desc *desc = irq_to_desc(irq); | 245 | struct msi_desc *entry = get_irq_msi(irq); |
| 250 | 246 | ||
| 251 | get_cached_msi_msg_desc(desc, msg); | 247 | __get_cached_msi_msg(entry, msg); |
| 252 | } | 248 | } |
| 253 | 249 | ||
| 254 | void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | 250 | void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) |
| 255 | { | 251 | { |
| 256 | struct msi_desc *entry = get_irq_desc_msi(desc); | ||
| 257 | |||
| 258 | if (entry->dev->current_state != PCI_D0) { | 252 | if (entry->dev->current_state != PCI_D0) { |
| 259 | /* Don't touch the hardware now */ | 253 | /* Don't touch the hardware now */ |
| 260 | } else if (entry->msi_attrib.is_msix) { | 254 | } else if (entry->msi_attrib.is_msix) { |
| @@ -292,9 +286,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) | |||
| 292 | 286 | ||
| 293 | void write_msi_msg(unsigned int irq, struct msi_msg *msg) | 287 | void write_msi_msg(unsigned int irq, struct msi_msg *msg) |
| 294 | { | 288 | { |
| 295 | struct irq_desc *desc = irq_to_desc(irq); | 289 | struct msi_desc *entry = get_irq_msi(irq); |
| 296 | 290 | ||
| 297 | write_msi_msg_desc(desc, msg); | 291 | __write_msi_msg(entry, msg); |
| 298 | } | 292 | } |
| 299 | 293 | ||
| 300 | static void free_msi_irqs(struct pci_dev *dev) | 294 | static void free_msi_irqs(struct pci_dev *dev) |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7c8008225ee3..17927b1f9334 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
| @@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net) | |||
| 127 | size_t len, total_len = 0; | 127 | size_t len, total_len = 0; |
| 128 | int err, wmem; | 128 | int err, wmem; |
| 129 | size_t hdr_size; | 129 | size_t hdr_size; |
| 130 | struct socket *sock = rcu_dereference(vq->private_data); | 130 | struct socket *sock; |
| 131 | |||
| 132 | sock = rcu_dereference_check(vq->private_data, | ||
| 133 | lockdep_is_held(&vq->mutex)); | ||
| 131 | if (!sock) | 134 | if (!sock) |
| 132 | return; | 135 | return; |
| 133 | 136 | ||
| @@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n, | |||
| 582 | static void vhost_net_enable_vq(struct vhost_net *n, | 585 | static void vhost_net_enable_vq(struct vhost_net *n, |
| 583 | struct vhost_virtqueue *vq) | 586 | struct vhost_virtqueue *vq) |
| 584 | { | 587 | { |
| 585 | struct socket *sock = vq->private_data; | 588 | struct socket *sock; |
| 589 | |||
| 590 | sock = rcu_dereference_protected(vq->private_data, | ||
| 591 | lockdep_is_held(&vq->mutex)); | ||
| 586 | if (!sock) | 592 | if (!sock) |
| 587 | return; | 593 | return; |
| 588 | if (vq == n->vqs + VHOST_NET_VQ_TX) { | 594 | if (vq == n->vqs + VHOST_NET_VQ_TX) { |
| @@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, | |||
| 598 | struct socket *sock; | 604 | struct socket *sock; |
| 599 | 605 | ||
| 600 | mutex_lock(&vq->mutex); | 606 | mutex_lock(&vq->mutex); |
| 601 | sock = vq->private_data; | 607 | sock = rcu_dereference_protected(vq->private_data, |
| 608 | lockdep_is_held(&vq->mutex)); | ||
| 602 | vhost_net_disable_vq(n, vq); | 609 | vhost_net_disable_vq(n, vq); |
| 603 | rcu_assign_pointer(vq->private_data, NULL); | 610 | rcu_assign_pointer(vq->private_data, NULL); |
| 604 | mutex_unlock(&vq->mutex); | 611 | mutex_unlock(&vq->mutex); |
| @@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
| 736 | } | 743 | } |
| 737 | 744 | ||
| 738 | /* start polling new socket */ | 745 | /* start polling new socket */ |
| 739 | oldsock = vq->private_data; | 746 | oldsock = rcu_dereference_protected(vq->private_data, |
| 747 | lockdep_is_held(&vq->mutex)); | ||
| 740 | if (sock != oldsock) { | 748 | if (sock != oldsock) { |
| 741 | vhost_net_disable_vq(n, vq); | 749 | vhost_net_disable_vq(n, vq); |
| 742 | rcu_assign_pointer(vq->private_data, sock); | 750 | rcu_assign_pointer(vq->private_data, sock); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index dd3d6f7406f8..8b5a1b33d0fe 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
| @@ -320,7 +320,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) | |||
| 320 | vhost_dev_cleanup(dev); | 320 | vhost_dev_cleanup(dev); |
| 321 | 321 | ||
| 322 | memory->nregions = 0; | 322 | memory->nregions = 0; |
| 323 | dev->memory = memory; | 323 | RCU_INIT_POINTER(dev->memory, memory); |
| 324 | return 0; | 324 | return 0; |
| 325 | } | 325 | } |
| 326 | 326 | ||
| @@ -352,8 +352,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
| 352 | fput(dev->log_file); | 352 | fput(dev->log_file); |
| 353 | dev->log_file = NULL; | 353 | dev->log_file = NULL; |
| 354 | /* No one will access memory at this point */ | 354 | /* No one will access memory at this point */ |
| 355 | kfree(dev->memory); | 355 | kfree(rcu_dereference_protected(dev->memory, |
| 356 | dev->memory = NULL; | 356 | lockdep_is_held(&dev->mutex))); |
| 357 | RCU_INIT_POINTER(dev->memory, NULL); | ||
| 357 | if (dev->mm) | 358 | if (dev->mm) |
| 358 | mmput(dev->mm); | 359 | mmput(dev->mm); |
| 359 | dev->mm = NULL; | 360 | dev->mm = NULL; |
| @@ -440,14 +441,22 @@ static int vq_access_ok(unsigned int num, | |||
| 440 | /* Caller should have device mutex but not vq mutex */ | 441 | /* Caller should have device mutex but not vq mutex */ |
| 441 | int vhost_log_access_ok(struct vhost_dev *dev) | 442 | int vhost_log_access_ok(struct vhost_dev *dev) |
| 442 | { | 443 | { |
| 443 | return memory_access_ok(dev, dev->memory, 1); | 444 | struct vhost_memory *mp; |
| 445 | |||
| 446 | mp = rcu_dereference_protected(dev->memory, | ||
| 447 | lockdep_is_held(&dev->mutex)); | ||
| 448 | return memory_access_ok(dev, mp, 1); | ||
| 444 | } | 449 | } |
| 445 | 450 | ||
| 446 | /* Verify access for write logging. */ | 451 | /* Verify access for write logging. */ |
| 447 | /* Caller should have vq mutex and device mutex */ | 452 | /* Caller should have vq mutex and device mutex */ |
| 448 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) | 453 | static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) |
| 449 | { | 454 | { |
| 450 | return vq_memory_access_ok(log_base, vq->dev->memory, | 455 | struct vhost_memory *mp; |
| 456 | |||
| 457 | mp = rcu_dereference_protected(vq->dev->memory, | ||
| 458 | lockdep_is_held(&vq->mutex)); | ||
| 459 | return vq_memory_access_ok(log_base, mp, | ||
| 451 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && | 460 | vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && |
| 452 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, | 461 | (!vq->log_used || log_access_ok(log_base, vq->log_addr, |
| 453 | sizeof *vq->used + | 462 | sizeof *vq->used + |
| @@ -487,7 +496,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) | |||
| 487 | kfree(newmem); | 496 | kfree(newmem); |
| 488 | return -EFAULT; | 497 | return -EFAULT; |
| 489 | } | 498 | } |
| 490 | oldmem = d->memory; | 499 | oldmem = rcu_dereference_protected(d->memory, |
| 500 | lockdep_is_held(&d->mutex)); | ||
| 491 | rcu_assign_pointer(d->memory, newmem); | 501 | rcu_assign_pointer(d->memory, newmem); |
| 492 | synchronize_rcu(); | 502 | synchronize_rcu(); |
| 493 | kfree(oldmem); | 503 | kfree(oldmem); |
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index afd77295971c..af3c11ded5fd 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
| @@ -106,7 +106,7 @@ struct vhost_virtqueue { | |||
| 106 | * vhost_work execution acts instead of rcu_read_lock() and the end of | 106 | * vhost_work execution acts instead of rcu_read_lock() and the end of |
| 107 | * vhost_work execution acts instead of rcu_read_lock(). | 107 | * vhost_work execution acts instead of rcu_read_lock(). |
| 108 | * Writers use virtqueue mutex. */ | 108 | * Writers use virtqueue mutex. */ |
| 109 | void *private_data; | 109 | void __rcu *private_data; |
| 110 | /* Log write descriptors */ | 110 | /* Log write descriptors */ |
| 111 | void __user *log_base; | 111 | void __user *log_base; |
| 112 | struct vhost_log log[VHOST_NET_MAX_SG]; | 112 | struct vhost_log log[VHOST_NET_MAX_SG]; |
| @@ -116,7 +116,7 @@ struct vhost_dev { | |||
| 116 | /* Readers use RCU to access memory table pointer | 116 | /* Readers use RCU to access memory table pointer |
| 117 | * log base pointer and features. | 117 | * log base pointer and features. |
| 118 | * Writers use mutex below.*/ | 118 | * Writers use mutex below.*/ |
| 119 | struct vhost_memory *memory; | 119 | struct vhost_memory __rcu *memory; |
| 120 | struct mm_struct *mm; | 120 | struct mm_struct *mm; |
| 121 | struct mutex mutex; | 121 | struct mutex mutex; |
| 122 | unsigned acked_features; | 122 | unsigned acked_features; |
| @@ -173,7 +173,11 @@ enum { | |||
| 173 | 173 | ||
| 174 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) | 174 | static inline int vhost_has_feature(struct vhost_dev *dev, int bit) |
| 175 | { | 175 | { |
| 176 | unsigned acked_features = rcu_dereference(dev->acked_features); | 176 | unsigned acked_features; |
| 177 | |||
| 178 | acked_features = | ||
| 179 | rcu_dereference_index_check(dev->acked_features, | ||
| 180 | lockdep_is_held(&dev->mutex)); | ||
| 177 | return acked_features & (1 << bit); | 181 | return acked_features & (1 << bit); |
| 178 | } | 182 | } |
| 179 | 183 | ||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 13365ba35218..7d24b0d94ed4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
| @@ -338,30 +338,29 @@ static void unmask_evtchn(int port) | |||
| 338 | 338 | ||
| 339 | static int find_unbound_irq(void) | 339 | static int find_unbound_irq(void) |
| 340 | { | 340 | { |
| 341 | int irq; | 341 | struct irq_data *data; |
| 342 | struct irq_desc *desc; | 342 | int irq, res; |
| 343 | 343 | ||
| 344 | for (irq = 0; irq < nr_irqs; irq++) { | 344 | for (irq = 0; irq < nr_irqs; irq++) { |
| 345 | desc = irq_to_desc(irq); | 345 | data = irq_get_irq_data(irq); |
| 346 | /* only 0->15 have init'd desc; handle irq > 16 */ | 346 | /* only 0->15 have init'd desc; handle irq > 16 */ |
| 347 | if (desc == NULL) | 347 | if (!data) |
| 348 | break; | 348 | break; |
| 349 | if (desc->chip == &no_irq_chip) | 349 | if (data->chip == &no_irq_chip) |
| 350 | break; | 350 | break; |
| 351 | if (desc->chip != &xen_dynamic_chip) | 351 | if (data->chip != &xen_dynamic_chip) |
| 352 | continue; | 352 | continue; |
| 353 | if (irq_info[irq].type == IRQT_UNBOUND) | 353 | if (irq_info[irq].type == IRQT_UNBOUND) |
| 354 | break; | 354 | return irq; |
| 355 | } | 355 | } |
| 356 | 356 | ||
| 357 | if (irq == nr_irqs) | 357 | if (irq == nr_irqs) |
| 358 | panic("No available IRQ to bind to: increase nr_irqs!\n"); | 358 | panic("No available IRQ to bind to: increase nr_irqs!\n"); |
| 359 | 359 | ||
| 360 | desc = irq_to_desc_alloc_node(irq, 0); | 360 | res = irq_alloc_desc_at(irq, 0); |
| 361 | if (WARN_ON(desc == NULL)) | ||
| 362 | return -1; | ||
| 363 | 361 | ||
| 364 | dynamic_irq_init_keep_chip_data(irq); | 362 | if (WARN_ON(res != irq)) |
| 363 | return -1; | ||
| 365 | 364 | ||
| 366 | return irq; | 365 | return irq; |
| 367 | } | 366 | } |
| @@ -495,7 +494,7 @@ static void unbind_from_irq(unsigned int irq) | |||
| 495 | if (irq_info[irq].type != IRQT_UNBOUND) { | 494 | if (irq_info[irq].type != IRQT_UNBOUND) { |
| 496 | irq_info[irq] = mk_unbound_info(); | 495 | irq_info[irq] = mk_unbound_info(); |
| 497 | 496 | ||
| 498 | dynamic_irq_cleanup(irq); | 497 | irq_free_desc(irq); |
| 499 | } | 498 | } |
| 500 | 499 | ||
| 501 | spin_unlock(&irq_mapping_update_lock); | 500 | spin_unlock(&irq_mapping_update_lock); |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 33c4e7eef470..9581ea94d5a1 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
| @@ -109,8 +109,8 @@ static void init_once(void *foo) | |||
| 109 | { | 109 | { |
| 110 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; | 110 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; |
| 111 | 111 | ||
| 112 | init_MUTEX(&ei->i_link_lock); | 112 | sema_init(&ei->i_link_lock, 1); |
| 113 | init_MUTEX(&ei->i_ext_lock); | 113 | sema_init(&ei->i_ext_lock, 1); |
| 114 | inode_init_once(&ei->vfs_inode); | 114 | inode_init_once(&ei->vfs_inode); |
| 115 | } | 115 | } |
| 116 | 116 | ||
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 0fcd2640c23f..9eb134ea6eb2 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
| @@ -1,9 +1,11 @@ | |||
| 1 | config CEPH_FS | 1 | config CEPH_FS |
| 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" | 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" |
| 3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
| 4 | select CEPH_LIB | ||
| 4 | select LIBCRC32C | 5 | select LIBCRC32C |
| 5 | select CRYPTO_AES | 6 | select CRYPTO_AES |
| 6 | select CRYPTO | 7 | select CRYPTO |
| 8 | default n | ||
| 7 | help | 9 | help |
| 8 | Choose Y or M here to include support for mounting the | 10 | Choose Y or M here to include support for mounting the |
| 9 | experimental Ceph distributed file system. Ceph is an extremely | 11 | experimental Ceph distributed file system. Ceph is an extremely |
| @@ -14,15 +16,3 @@ config CEPH_FS | |||
| 14 | 16 | ||
| 15 | If unsure, say N. | 17 | If unsure, say N. |
| 16 | 18 | ||
| 17 | config CEPH_FS_PRETTYDEBUG | ||
| 18 | bool "Include file:line in ceph debug output" | ||
| 19 | depends on CEPH_FS | ||
| 20 | default n | ||
| 21 | help | ||
| 22 | If you say Y here, debug output will include a filename and | ||
| 23 | line to aid debugging. This icnreases kernel size and slows | ||
| 24 | execution slightly when debug call sites are enabled (e.g., | ||
| 25 | via CONFIG_DYNAMIC_DEBUG). | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 278e1172600d..9e6c4f2e8ff1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
| @@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o | |||
| 8 | 8 | ||
| 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
| 10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
| 11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
| 12 | mds_client.o mdsmap.o \ | 12 | debugfs.o |
| 13 | mon_client.o \ | ||
| 14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
| 15 | debugfs.o \ | ||
| 16 | auth.o auth_none.o \ | ||
| 17 | crypto.o armor.o \ | ||
| 18 | auth_x.o \ | ||
| 19 | ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o | ||
| 20 | 13 | ||
| 21 | else | 14 | else |
| 22 | #Otherwise we were called directly from the command | 15 | #Otherwise we were called directly from the command |
diff --git a/fs/ceph/README b/fs/ceph/README deleted file mode 100644 index 18352fab37c0..000000000000 --- a/fs/ceph/README +++ /dev/null | |||
| @@ -1,20 +0,0 @@ | |||
| 1 | # | ||
| 2 | # The following files are shared by (and manually synchronized | ||
| 3 | # between) the Ceph userland and kernel client. | ||
| 4 | # | ||
| 5 | # userland kernel | ||
| 6 | src/include/ceph_fs.h fs/ceph/ceph_fs.h | ||
| 7 | src/include/ceph_fs.cc fs/ceph/ceph_fs.c | ||
| 8 | src/include/msgr.h fs/ceph/msgr.h | ||
| 9 | src/include/rados.h fs/ceph/rados.h | ||
| 10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c | ||
| 11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h | ||
| 12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c | ||
| 13 | src/include/ceph_hash.h fs/ceph/ceph_hash.h | ||
| 14 | src/include/ceph_hash.cc fs/ceph/ceph_hash.c | ||
| 15 | src/crush/crush.c fs/ceph/crush/crush.c | ||
| 16 | src/crush/crush.h fs/ceph/crush/crush.h | ||
| 17 | src/crush/mapper.c fs/ceph/crush/mapper.c | ||
| 18 | src/crush/mapper.h fs/ceph/crush/mapper.h | ||
| 19 | src/crush/hash.h fs/ceph/crush/hash.h | ||
| 20 | src/crush/hash.c fs/ceph/crush/hash.c | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index efbc604001c8..51bcc5ce3230 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| @@ -10,7 +10,8 @@ | |||
| 10 | #include <linux/task_io_accounting_ops.h> | 10 | #include <linux/task_io_accounting_ops.h> |
| 11 | 11 | ||
| 12 | #include "super.h" | 12 | #include "super.h" |
| 13 | #include "osd_client.h" | 13 | #include "mds_client.h" |
| 14 | #include <linux/ceph/osd_client.h> | ||
| 14 | 15 | ||
| 15 | /* | 16 | /* |
| 16 | * Ceph address space ops. | 17 | * Ceph address space ops. |
| @@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 193 | { | 194 | { |
| 194 | struct inode *inode = filp->f_dentry->d_inode; | 195 | struct inode *inode = filp->f_dentry->d_inode; |
| 195 | struct ceph_inode_info *ci = ceph_inode(inode); | 196 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 196 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 197 | struct ceph_osd_client *osdc = |
| 198 | &ceph_inode_to_client(inode)->client->osdc; | ||
| 197 | int err = 0; | 199 | int err = 0; |
| 198 | u64 len = PAGE_CACHE_SIZE; | 200 | u64 len = PAGE_CACHE_SIZE; |
| 199 | 201 | ||
| @@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 265 | { | 267 | { |
| 266 | struct inode *inode = file->f_dentry->d_inode; | 268 | struct inode *inode = file->f_dentry->d_inode; |
| 267 | struct ceph_inode_info *ci = ceph_inode(inode); | 269 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 268 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 270 | struct ceph_osd_client *osdc = |
| 271 | &ceph_inode_to_client(inode)->client->osdc; | ||
| 269 | int rc = 0; | 272 | int rc = 0; |
| 270 | struct page **pages; | 273 | struct page **pages; |
| 271 | loff_t offset; | 274 | loff_t offset; |
| @@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 365 | { | 368 | { |
| 366 | struct inode *inode; | 369 | struct inode *inode; |
| 367 | struct ceph_inode_info *ci; | 370 | struct ceph_inode_info *ci; |
| 368 | struct ceph_client *client; | 371 | struct ceph_fs_client *fsc; |
| 369 | struct ceph_osd_client *osdc; | 372 | struct ceph_osd_client *osdc; |
| 370 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 373 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; |
| 371 | int len = PAGE_CACHE_SIZE; | 374 | int len = PAGE_CACHE_SIZE; |
| @@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 383 | } | 386 | } |
| 384 | inode = page->mapping->host; | 387 | inode = page->mapping->host; |
| 385 | ci = ceph_inode(inode); | 388 | ci = ceph_inode(inode); |
| 386 | client = ceph_inode_to_client(inode); | 389 | fsc = ceph_inode_to_client(inode); |
| 387 | osdc = &client->osdc; | 390 | osdc = &fsc->client->osdc; |
| 388 | 391 | ||
| 389 | /* verify this is a writeable snap context */ | 392 | /* verify this is a writeable snap context */ |
| 390 | snapc = (void *)page->private; | 393 | snapc = (void *)page->private; |
| @@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
| 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", | 417 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
| 415 | inode, page, page->index, page_off, len, snapc); | 418 | inode, page, page->index, page_off, len, snapc); |
| 416 | 419 | ||
| 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 420 | writeback_stat = atomic_long_inc_return(&fsc->writeback_count); |
| 418 | if (writeback_stat > | 421 | if (writeback_stat > |
| 419 | CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) | 422 | CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) |
| 420 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 423 | set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); |
| 421 | 424 | ||
| 422 | set_page_writeback(page); | 425 | set_page_writeback(page); |
| 423 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 426 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
| @@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 496 | struct address_space *mapping = inode->i_mapping; | 499 | struct address_space *mapping = inode->i_mapping; |
| 497 | __s32 rc = -EIO; | 500 | __s32 rc = -EIO; |
| 498 | u64 bytes = 0; | 501 | u64 bytes = 0; |
| 499 | struct ceph_client *client = ceph_inode_to_client(inode); | 502 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 500 | long writeback_stat; | 503 | long writeback_stat; |
| 501 | unsigned issued = ceph_caps_issued(ci); | 504 | unsigned issued = ceph_caps_issued(ci); |
| 502 | 505 | ||
| @@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 529 | WARN_ON(!PageUptodate(page)); | 532 | WARN_ON(!PageUptodate(page)); |
| 530 | 533 | ||
| 531 | writeback_stat = | 534 | writeback_stat = |
| 532 | atomic_long_dec_return(&client->writeback_count); | 535 | atomic_long_dec_return(&fsc->writeback_count); |
| 533 | if (writeback_stat < | 536 | if (writeback_stat < |
| 534 | CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) | 537 | CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) |
| 535 | clear_bdi_congested(&client->backing_dev_info, | 538 | clear_bdi_congested(&fsc->backing_dev_info, |
| 536 | BLK_RW_ASYNC); | 539 | BLK_RW_ASYNC); |
| 537 | 540 | ||
| 538 | ceph_put_snap_context((void *)page->private); | 541 | ceph_put_snap_context((void *)page->private); |
| @@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 569 | * mempool. we avoid the mempool if we can because req->r_num_pages | 572 | * mempool. we avoid the mempool if we can because req->r_num_pages |
| 570 | * may be less than the maximum write size. | 573 | * may be less than the maximum write size. |
| 571 | */ | 574 | */ |
| 572 | static void alloc_page_vec(struct ceph_client *client, | 575 | static void alloc_page_vec(struct ceph_fs_client *fsc, |
| 573 | struct ceph_osd_request *req) | 576 | struct ceph_osd_request *req) |
| 574 | { | 577 | { |
| 575 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | 578 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, |
| 576 | GFP_NOFS); | 579 | GFP_NOFS); |
| 577 | if (!req->r_pages) { | 580 | if (!req->r_pages) { |
| 578 | req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); | 581 | req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); |
| 579 | req->r_pages_from_pool = 1; | 582 | req->r_pages_from_pool = 1; |
| 580 | WARN_ON(!req->r_pages); | 583 | WARN_ON(!req->r_pages); |
| 581 | } | 584 | } |
| @@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 590 | struct inode *inode = mapping->host; | 593 | struct inode *inode = mapping->host; |
| 591 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 594 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
| 592 | struct ceph_inode_info *ci = ceph_inode(inode); | 595 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 593 | struct ceph_client *client; | 596 | struct ceph_fs_client *fsc; |
| 594 | pgoff_t index, start, end; | 597 | pgoff_t index, start, end; |
| 595 | int range_whole = 0; | 598 | int range_whole = 0; |
| 596 | int should_loop = 1; | 599 | int should_loop = 1; |
| @@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 617 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 620 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
| 618 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 621 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
| 619 | 622 | ||
| 620 | client = ceph_inode_to_client(inode); | 623 | fsc = ceph_inode_to_client(inode); |
| 621 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { | 624 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
| 622 | pr_warning("writepage_start %p on forced umount\n", inode); | 625 | pr_warning("writepage_start %p on forced umount\n", inode); |
| 623 | return -EIO; /* we're in a forced umount, don't write! */ | 626 | return -EIO; /* we're in a forced umount, don't write! */ |
| 624 | } | 627 | } |
| 625 | if (client->mount_args->wsize && client->mount_args->wsize < wsize) | 628 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
| 626 | wsize = client->mount_args->wsize; | 629 | wsize = fsc->mount_options->wsize; |
| 627 | if (wsize < PAGE_CACHE_SIZE) | 630 | if (wsize < PAGE_CACHE_SIZE) |
| 628 | wsize = PAGE_CACHE_SIZE; | 631 | wsize = PAGE_CACHE_SIZE; |
| 629 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | 632 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; |
| @@ -769,7 +772,7 @@ get_more_pages: | |||
| 769 | offset = (unsigned long long)page->index | 772 | offset = (unsigned long long)page->index |
| 770 | << PAGE_CACHE_SHIFT; | 773 | << PAGE_CACHE_SHIFT; |
| 771 | len = wsize; | 774 | len = wsize; |
| 772 | req = ceph_osdc_new_request(&client->osdc, | 775 | req = ceph_osdc_new_request(&fsc->client->osdc, |
| 773 | &ci->i_layout, | 776 | &ci->i_layout, |
| 774 | ceph_vino(inode), | 777 | ceph_vino(inode), |
| 775 | offset, &len, | 778 | offset, &len, |
| @@ -782,7 +785,7 @@ get_more_pages: | |||
| 782 | &inode->i_mtime, true, 1); | 785 | &inode->i_mtime, true, 1); |
| 783 | max_pages = req->r_num_pages; | 786 | max_pages = req->r_num_pages; |
| 784 | 787 | ||
| 785 | alloc_page_vec(client, req); | 788 | alloc_page_vec(fsc, req); |
| 786 | req->r_callback = writepages_finish; | 789 | req->r_callback = writepages_finish; |
| 787 | req->r_inode = inode; | 790 | req->r_inode = inode; |
| 788 | } | 791 | } |
| @@ -794,10 +797,10 @@ get_more_pages: | |||
| 794 | inode, page, page->index); | 797 | inode, page, page->index); |
| 795 | 798 | ||
| 796 | writeback_stat = | 799 | writeback_stat = |
| 797 | atomic_long_inc_return(&client->writeback_count); | 800 | atomic_long_inc_return(&fsc->writeback_count); |
| 798 | if (writeback_stat > CONGESTION_ON_THRESH( | 801 | if (writeback_stat > CONGESTION_ON_THRESH( |
| 799 | client->mount_args->congestion_kb)) { | 802 | fsc->mount_options->congestion_kb)) { |
| 800 | set_bdi_congested(&client->backing_dev_info, | 803 | set_bdi_congested(&fsc->backing_dev_info, |
| 801 | BLK_RW_ASYNC); | 804 | BLK_RW_ASYNC); |
| 802 | } | 805 | } |
| 803 | 806 | ||
| @@ -846,7 +849,7 @@ get_more_pages: | |||
| 846 | op->payload_len = cpu_to_le32(len); | 849 | op->payload_len = cpu_to_le32(len); |
| 847 | req->r_request->hdr.data_len = cpu_to_le32(len); | 850 | req->r_request->hdr.data_len = cpu_to_le32(len); |
| 848 | 851 | ||
| 849 | ceph_osdc_start_request(&client->osdc, req, true); | 852 | ceph_osdc_start_request(&fsc->client->osdc, req, true); |
| 850 | req = NULL; | 853 | req = NULL; |
| 851 | 854 | ||
| 852 | /* continue? */ | 855 | /* continue? */ |
| @@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file, | |||
| 915 | { | 918 | { |
| 916 | struct inode *inode = file->f_dentry->d_inode; | 919 | struct inode *inode = file->f_dentry->d_inode; |
| 917 | struct ceph_inode_info *ci = ceph_inode(inode); | 920 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 918 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 921 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 919 | loff_t page_off = pos & PAGE_CACHE_MASK; | 922 | loff_t page_off = pos & PAGE_CACHE_MASK; |
| 920 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 923 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
| 921 | int end_in_page = pos_in_page + len; | 924 | int end_in_page = pos_in_page + len; |
| @@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
| 1053 | struct page *page, void *fsdata) | 1056 | struct page *page, void *fsdata) |
| 1054 | { | 1057 | { |
| 1055 | struct inode *inode = file->f_dentry->d_inode; | 1058 | struct inode *inode = file->f_dentry->d_inode; |
| 1056 | struct ceph_client *client = ceph_inode_to_client(inode); | 1059 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 1057 | struct ceph_mds_client *mdsc = &client->mdsc; | 1060 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1058 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 1061 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 1059 | int check_cap = 0; | 1062 | int check_cap = 0; |
| 1060 | 1063 | ||
| @@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 1123 | { | 1126 | { |
| 1124 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 1127 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
| 1125 | struct page *page = vmf->page; | 1128 | struct page *page = vmf->page; |
| 1126 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1129 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 1127 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1130 | loff_t off = page->index << PAGE_CACHE_SHIFT; |
| 1128 | loff_t size, len; | 1131 | loff_t size, len; |
| 1129 | int ret; | 1132 | int ret; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 5e9da996a151..98ab13e2b71d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
| 4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
| @@ -9,8 +9,9 @@ | |||
| 9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
| 10 | 10 | ||
| 11 | #include "super.h" | 11 | #include "super.h" |
| 12 | #include "decode.h" | 12 | #include "mds_client.h" |
| 13 | #include "messenger.h" | 13 | #include <linux/ceph/decode.h> |
| 14 | #include <linux/ceph/messenger.h> | ||
| 14 | 15 | ||
| 15 | /* | 16 | /* |
| 16 | * Capability management | 17 | * Capability management |
| @@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) | |||
| 287 | spin_unlock(&mdsc->caps_list_lock); | 288 | spin_unlock(&mdsc->caps_list_lock); |
| 288 | } | 289 | } |
| 289 | 290 | ||
| 290 | void ceph_reservation_status(struct ceph_client *client, | 291 | void ceph_reservation_status(struct ceph_fs_client *fsc, |
| 291 | int *total, int *avail, int *used, int *reserved, | 292 | int *total, int *avail, int *used, int *reserved, |
| 292 | int *min) | 293 | int *min) |
| 293 | { | 294 | { |
| 294 | struct ceph_mds_client *mdsc = &client->mdsc; | 295 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 295 | 296 | ||
| 296 | if (total) | 297 | if (total) |
| 297 | *total = mdsc->caps_total_count; | 298 | *total = mdsc->caps_total_count; |
| @@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, | |||
| 399 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | 400 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, |
| 400 | struct ceph_inode_info *ci) | 401 | struct ceph_inode_info *ci) |
| 401 | { | 402 | { |
| 402 | struct ceph_mount_args *ma = mdsc->client->mount_args; | 403 | struct ceph_mount_options *ma = mdsc->fsc->mount_options; |
| 403 | 404 | ||
| 404 | ci->i_hold_caps_min = round_jiffies(jiffies + | 405 | ci->i_hold_caps_min = round_jiffies(jiffies + |
| 405 | ma->caps_wanted_delay_min * HZ); | 406 | ma->caps_wanted_delay_min * HZ); |
| @@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, | |||
| 515 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 516 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
| 516 | struct ceph_cap_reservation *caps_reservation) | 517 | struct ceph_cap_reservation *caps_reservation) |
| 517 | { | 518 | { |
| 518 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 519 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 519 | struct ceph_inode_info *ci = ceph_inode(inode); | 520 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 520 | struct ceph_cap *new_cap = NULL; | 521 | struct ceph_cap *new_cap = NULL; |
| 521 | struct ceph_cap *cap; | 522 | struct ceph_cap *cap; |
| @@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 873 | struct ceph_mds_session *session = cap->session; | 874 | struct ceph_mds_session *session = cap->session; |
| 874 | struct ceph_inode_info *ci = cap->ci; | 875 | struct ceph_inode_info *ci = cap->ci; |
| 875 | struct ceph_mds_client *mdsc = | 876 | struct ceph_mds_client *mdsc = |
| 876 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 877 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 877 | int removed = 0; | 878 | int removed = 0; |
| 878 | 879 | ||
| 879 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 880 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
| @@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||
| 1210 | int mds; | 1211 | int mds; |
| 1211 | struct ceph_cap_snap *capsnap; | 1212 | struct ceph_cap_snap *capsnap; |
| 1212 | u32 mseq; | 1213 | u32 mseq; |
| 1213 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1214 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
| 1214 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | 1215 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold |
| 1215 | session->s_mutex */ | 1216 | session->s_mutex */ |
| 1216 | u64 next_follows = 0; /* keep track of how far we've gotten through the | 1217 | u64 next_follows = 0; /* keep track of how far we've gotten through the |
| @@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
| 1336 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1337 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
| 1337 | { | 1338 | { |
| 1338 | struct ceph_mds_client *mdsc = | 1339 | struct ceph_mds_client *mdsc = |
| 1339 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 1340 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 1340 | struct inode *inode = &ci->vfs_inode; | 1341 | struct inode *inode = &ci->vfs_inode; |
| 1341 | int was = ci->i_dirty_caps; | 1342 | int was = ci->i_dirty_caps; |
| 1342 | int dirty = 0; | 1343 | int dirty = 0; |
| @@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
| 1378 | static int __mark_caps_flushing(struct inode *inode, | 1379 | static int __mark_caps_flushing(struct inode *inode, |
| 1379 | struct ceph_mds_session *session) | 1380 | struct ceph_mds_session *session) |
| 1380 | { | 1381 | { |
| 1381 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1382 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1382 | struct ceph_inode_info *ci = ceph_inode(inode); | 1383 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1383 | int flushing; | 1384 | int flushing; |
| 1384 | 1385 | ||
| @@ -1416,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||
| 1416 | /* | 1417 | /* |
| 1417 | * try to invalidate mapping pages without blocking. | 1418 | * try to invalidate mapping pages without blocking. |
| 1418 | */ | 1419 | */ |
| 1419 | static int mapping_is_empty(struct address_space *mapping) | ||
| 1420 | { | ||
| 1421 | struct page *page = find_get_page(mapping, 0); | ||
| 1422 | |||
| 1423 | if (!page) | ||
| 1424 | return 1; | ||
| 1425 | |||
| 1426 | put_page(page); | ||
| 1427 | return 0; | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | static int try_nonblocking_invalidate(struct inode *inode) | 1420 | static int try_nonblocking_invalidate(struct inode *inode) |
| 1431 | { | 1421 | { |
| 1432 | struct ceph_inode_info *ci = ceph_inode(inode); | 1422 | struct ceph_inode_info *ci = ceph_inode(inode); |
| @@ -1436,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
| 1436 | invalidate_mapping_pages(&inode->i_data, 0, -1); | 1426 | invalidate_mapping_pages(&inode->i_data, 0, -1); |
| 1437 | spin_lock(&inode->i_lock); | 1427 | spin_lock(&inode->i_lock); |
| 1438 | 1428 | ||
| 1439 | if (mapping_is_empty(&inode->i_data) && | 1429 | if (inode->i_data.nrpages == 0 && |
| 1440 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
| 1441 | /* success. */ | 1431 | /* success. */ |
| 1442 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
| @@ -1462,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
| 1462 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1452 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
| 1463 | struct ceph_mds_session *session) | 1453 | struct ceph_mds_session *session) |
| 1464 | { | 1454 | { |
| 1465 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1455 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); |
| 1466 | struct ceph_mds_client *mdsc = &client->mdsc; | 1456 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1467 | struct inode *inode = &ci->vfs_inode; | 1457 | struct inode *inode = &ci->vfs_inode; |
| 1468 | struct ceph_cap *cap; | 1458 | struct ceph_cap *cap; |
| 1469 | int file_wanted, used; | 1459 | int file_wanted, used; |
| @@ -1533,7 +1523,7 @@ retry_locked: | |||
| 1533 | */ | 1523 | */ |
| 1534 | if ((!is_delayed || mdsc->stopping) && | 1524 | if ((!is_delayed || mdsc->stopping) && |
| 1535 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1525 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
| 1536 | ci->i_rdcache_gen && /* may have cached pages */ | 1526 | inode->i_data.nrpages && /* have cached pages */ |
| 1537 | (file_wanted == 0 || /* no open files */ | 1527 | (file_wanted == 0 || /* no open files */ |
| 1538 | (revoking & (CEPH_CAP_FILE_CACHE| | 1528 | (revoking & (CEPH_CAP_FILE_CACHE| |
| 1539 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | 1529 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ |
| @@ -1706,7 +1696,7 @@ ack: | |||
| 1706 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1696 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
| 1707 | unsigned *flush_tid) | 1697 | unsigned *flush_tid) |
| 1708 | { | 1698 | { |
| 1709 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1699 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1710 | struct ceph_inode_info *ci = ceph_inode(inode); | 1700 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1711 | int unlock_session = session ? 0 : 1; | 1701 | int unlock_session = session ? 0 : 1; |
| 1712 | int flushing = 0; | 1702 | int flushing = 0; |
| @@ -1872,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 1872 | caps_are_flushed(inode, flush_tid)); | 1862 | caps_are_flushed(inode, flush_tid)); |
| 1873 | } else { | 1863 | } else { |
| 1874 | struct ceph_mds_client *mdsc = | 1864 | struct ceph_mds_client *mdsc = |
| 1875 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 1865 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 1876 | 1866 | ||
| 1877 | spin_lock(&inode->i_lock); | 1867 | spin_lock(&inode->i_lock); |
| 1878 | if (__ceph_caps_dirty(ci)) | 1868 | if (__ceph_caps_dirty(ci)) |
| @@ -2465,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 2465 | __releases(inode->i_lock) | 2455 | __releases(inode->i_lock) |
| 2466 | { | 2456 | { |
| 2467 | struct ceph_inode_info *ci = ceph_inode(inode); | 2457 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2468 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 2458 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 2469 | unsigned seq = le32_to_cpu(m->seq); | 2459 | unsigned seq = le32_to_cpu(m->seq); |
| 2470 | int dirty = le32_to_cpu(m->dirty); | 2460 | int dirty = le32_to_cpu(m->dirty); |
| 2471 | int cleaned = 0; | 2461 | int cleaned = 0; |
| @@ -2713,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2713 | struct ceph_msg *msg) | 2703 | struct ceph_msg *msg) |
| 2714 | { | 2704 | { |
| 2715 | struct ceph_mds_client *mdsc = session->s_mdsc; | 2705 | struct ceph_mds_client *mdsc = session->s_mdsc; |
| 2716 | struct super_block *sb = mdsc->client->sb; | 2706 | struct super_block *sb = mdsc->fsc->sb; |
| 2717 | struct inode *inode; | 2707 | struct inode *inode; |
| 2718 | struct ceph_cap *cap; | 2708 | struct ceph_cap *cap; |
| 2719 | struct ceph_mds_caps *h; | 2709 | struct ceph_mds_caps *h; |
diff --git a/fs/ceph/ceph_frag.c b/fs/ceph/ceph_frag.c index ab6cf35c4091..bdce8b1fbd06 100644 --- a/fs/ceph/ceph_frag.c +++ b/fs/ceph/ceph_frag.c | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Ceph 'frag' type | 2 | * Ceph 'frag' type |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | int ceph_frag_compare(__u32 a, __u32 b) | 7 | int ceph_frag_compare(__u32 a, __u32 b) |
| 7 | { | 8 | { |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6fd8b20a8611..7ae1b3d55b58 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| @@ -7,143 +7,49 @@ | |||
| 7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
| 8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
| 9 | 9 | ||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | #include <linux/ceph/mon_client.h> | ||
| 12 | #include <linux/ceph/auth.h> | ||
| 13 | #include <linux/ceph/debugfs.h> | ||
| 14 | |||
| 10 | #include "super.h" | 15 | #include "super.h" |
| 11 | #include "mds_client.h" | ||
| 12 | #include "mon_client.h" | ||
| 13 | #include "auth.h" | ||
| 14 | 16 | ||
| 15 | #ifdef CONFIG_DEBUG_FS | 17 | #ifdef CONFIG_DEBUG_FS |
| 16 | 18 | ||
| 17 | /* | 19 | #include "mds_client.h" |
| 18 | * Implement /sys/kernel/debug/ceph fun | ||
| 19 | * | ||
| 20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
| 21 | * .../osdmap - current osdmap | ||
| 22 | * .../mdsmap - current mdsmap | ||
| 23 | * .../monmap - current monmap | ||
| 24 | * .../osdc - active osd requests | ||
| 25 | * .../mdsc - active mds requests | ||
| 26 | * .../monc - mon client state | ||
| 27 | * .../dentry_lru - dump contents of dentry lru | ||
| 28 | * .../caps - expose cap (reservation) stats | ||
| 29 | * .../bdi - symlink to ../../bdi/something | ||
| 30 | */ | ||
| 31 | |||
| 32 | static struct dentry *ceph_debugfs_dir; | ||
| 33 | |||
| 34 | static int monmap_show(struct seq_file *s, void *p) | ||
| 35 | { | ||
| 36 | int i; | ||
| 37 | struct ceph_client *client = s->private; | ||
| 38 | |||
| 39 | if (client->monc.monmap == NULL) | ||
| 40 | return 0; | ||
| 41 | |||
| 42 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
| 43 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
| 44 | struct ceph_entity_inst *inst = | ||
| 45 | &client->monc.monmap->mon_inst[i]; | ||
| 46 | |||
| 47 | seq_printf(s, "\t%s%lld\t%s\n", | ||
| 48 | ENTITY_NAME(inst->name), | ||
| 49 | pr_addr(&inst->addr.in_addr)); | ||
| 50 | } | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | 20 | ||
| 54 | static int mdsmap_show(struct seq_file *s, void *p) | 21 | static int mdsmap_show(struct seq_file *s, void *p) |
| 55 | { | 22 | { |
| 56 | int i; | 23 | int i; |
| 57 | struct ceph_client *client = s->private; | 24 | struct ceph_fs_client *fsc = s->private; |
| 58 | 25 | ||
| 59 | if (client->mdsc.mdsmap == NULL) | 26 | if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) |
| 60 | return 0; | 27 | return 0; |
| 61 | seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); | 28 | seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); |
| 62 | seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); | 29 | seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); |
| 63 | seq_printf(s, "session_timeout %d\n", | 30 | seq_printf(s, "session_timeout %d\n", |
| 64 | client->mdsc.mdsmap->m_session_timeout); | 31 | fsc->mdsc->mdsmap->m_session_timeout); |
| 65 | seq_printf(s, "session_autoclose %d\n", | 32 | seq_printf(s, "session_autoclose %d\n", |
| 66 | client->mdsc.mdsmap->m_session_autoclose); | 33 | fsc->mdsc->mdsmap->m_session_autoclose); |
| 67 | for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { | 34 | for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { |
| 68 | struct ceph_entity_addr *addr = | 35 | struct ceph_entity_addr *addr = |
| 69 | &client->mdsc.mdsmap->m_info[i].addr; | 36 | &fsc->mdsc->mdsmap->m_info[i].addr; |
| 70 | int state = client->mdsc.mdsmap->m_info[i].state; | 37 | int state = fsc->mdsc->mdsmap->m_info[i].state; |
| 71 | 38 | ||
| 72 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), | 39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, |
| 40 | ceph_pr_addr(&addr->in_addr), | ||
| 73 | ceph_mds_state_name(state)); | 41 | ceph_mds_state_name(state)); |
| 74 | } | 42 | } |
| 75 | return 0; | 43 | return 0; |
| 76 | } | 44 | } |
| 77 | 45 | ||
| 78 | static int osdmap_show(struct seq_file *s, void *p) | 46 | /* |
| 79 | { | 47 | * mdsc debugfs |
| 80 | int i; | 48 | */ |
| 81 | struct ceph_client *client = s->private; | ||
| 82 | struct rb_node *n; | ||
| 83 | |||
| 84 | if (client->osdc.osdmap == NULL) | ||
| 85 | return 0; | ||
| 86 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
| 87 | seq_printf(s, "flags%s%s\n", | ||
| 88 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
| 89 | " NEARFULL" : "", | ||
| 90 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
| 91 | " FULL" : ""); | ||
| 92 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
| 93 | struct ceph_pg_pool_info *pool = | ||
| 94 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
| 95 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
| 96 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
| 97 | pool->v.lpg_num, pool->lpg_num_mask); | ||
| 98 | } | ||
| 99 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
| 100 | struct ceph_entity_addr *addr = | ||
| 101 | &client->osdc.osdmap->osd_addr[i]; | ||
| 102 | int state = client->osdc.osdmap->osd_state[i]; | ||
| 103 | char sb[64]; | ||
| 104 | |||
| 105 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
| 106 | i, pr_addr(&addr->in_addr), | ||
| 107 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
| 108 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
| 109 | } | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | |||
| 113 | static int monc_show(struct seq_file *s, void *p) | ||
| 114 | { | ||
| 115 | struct ceph_client *client = s->private; | ||
| 116 | struct ceph_mon_generic_request *req; | ||
| 117 | struct ceph_mon_client *monc = &client->monc; | ||
| 118 | struct rb_node *rp; | ||
| 119 | |||
| 120 | mutex_lock(&monc->mutex); | ||
| 121 | |||
| 122 | if (monc->have_mdsmap) | ||
| 123 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
| 124 | if (monc->have_osdmap) | ||
| 125 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
| 126 | if (monc->want_next_osdmap) | ||
| 127 | seq_printf(s, "want next osdmap\n"); | ||
| 128 | |||
| 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
| 130 | __u16 op; | ||
| 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
| 132 | op = le16_to_cpu(req->request->hdr.type); | ||
| 133 | if (op == CEPH_MSG_STATFS) | ||
| 134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
| 135 | else | ||
| 136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
| 137 | } | ||
| 138 | |||
| 139 | mutex_unlock(&monc->mutex); | ||
| 140 | return 0; | ||
| 141 | } | ||
| 142 | |||
| 143 | static int mdsc_show(struct seq_file *s, void *p) | 49 | static int mdsc_show(struct seq_file *s, void *p) |
| 144 | { | 50 | { |
| 145 | struct ceph_client *client = s->private; | 51 | struct ceph_fs_client *fsc = s->private; |
| 146 | struct ceph_mds_client *mdsc = &client->mdsc; | 52 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 147 | struct ceph_mds_request *req; | 53 | struct ceph_mds_request *req; |
| 148 | struct rb_node *rp; | 54 | struct rb_node *rp; |
| 149 | int pathlen; | 55 | int pathlen; |
| @@ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
| 214 | return 0; | 120 | return 0; |
| 215 | } | 121 | } |
| 216 | 122 | ||
| 217 | static int osdc_show(struct seq_file *s, void *pp) | ||
| 218 | { | ||
| 219 | struct ceph_client *client = s->private; | ||
| 220 | struct ceph_osd_client *osdc = &client->osdc; | ||
| 221 | struct rb_node *p; | ||
| 222 | |||
| 223 | mutex_lock(&osdc->request_mutex); | ||
| 224 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
| 225 | struct ceph_osd_request *req; | ||
| 226 | struct ceph_osd_request_head *head; | ||
| 227 | struct ceph_osd_op *op; | ||
| 228 | int num_ops; | ||
| 229 | int opcode, olen; | ||
| 230 | int i; | ||
| 231 | |||
| 232 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
| 233 | |||
| 234 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
| 235 | req->r_osd ? req->r_osd->o_osd : -1, | ||
| 236 | le32_to_cpu(req->r_pgid.pool), | ||
| 237 | le16_to_cpu(req->r_pgid.ps)); | ||
| 238 | |||
| 239 | head = req->r_request->front.iov_base; | ||
| 240 | op = (void *)(head + 1); | ||
| 241 | |||
| 242 | num_ops = le16_to_cpu(head->num_ops); | ||
| 243 | olen = le32_to_cpu(head->object_len); | ||
| 244 | seq_printf(s, "%.*s", olen, | ||
| 245 | (const char *)(head->ops + num_ops)); | ||
| 246 | |||
| 247 | if (req->r_reassert_version.epoch) | ||
| 248 | seq_printf(s, "\t%u'%llu", | ||
| 249 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
| 250 | le64_to_cpu(req->r_reassert_version.version)); | ||
| 251 | else | ||
| 252 | seq_printf(s, "\t"); | ||
| 253 | |||
| 254 | for (i = 0; i < num_ops; i++) { | ||
| 255 | opcode = le16_to_cpu(op->op); | ||
| 256 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
| 257 | op++; | ||
| 258 | } | ||
| 259 | |||
| 260 | seq_printf(s, "\n"); | ||
| 261 | } | ||
| 262 | mutex_unlock(&osdc->request_mutex); | ||
| 263 | return 0; | ||
| 264 | } | ||
| 265 | |||
| 266 | static int caps_show(struct seq_file *s, void *p) | 123 | static int caps_show(struct seq_file *s, void *p) |
| 267 | { | 124 | { |
| 268 | struct ceph_client *client = s->private; | 125 | struct ceph_fs_client *fsc = s->private; |
| 269 | int total, avail, used, reserved, min; | 126 | int total, avail, used, reserved, min; |
| 270 | 127 | ||
| 271 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 128 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); |
| 272 | seq_printf(s, "total\t\t%d\n" | 129 | seq_printf(s, "total\t\t%d\n" |
| 273 | "avail\t\t%d\n" | 130 | "avail\t\t%d\n" |
| 274 | "used\t\t%d\n" | 131 | "used\t\t%d\n" |
| @@ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p) | |||
| 280 | 137 | ||
| 281 | static int dentry_lru_show(struct seq_file *s, void *ptr) | 138 | static int dentry_lru_show(struct seq_file *s, void *ptr) |
| 282 | { | 139 | { |
| 283 | struct ceph_client *client = s->private; | 140 | struct ceph_fs_client *fsc = s->private; |
| 284 | struct ceph_mds_client *mdsc = &client->mdsc; | 141 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 285 | struct ceph_dentry_info *di; | 142 | struct ceph_dentry_info *di; |
| 286 | 143 | ||
| 287 | spin_lock(&mdsc->dentry_lru_lock); | 144 | spin_lock(&mdsc->dentry_lru_lock); |
| @@ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
| 295 | return 0; | 152 | return 0; |
| 296 | } | 153 | } |
| 297 | 154 | ||
| 298 | #define DEFINE_SHOW_FUNC(name) \ | 155 | CEPH_DEFINE_SHOW_FUNC(mdsmap_show) |
| 299 | static int name##_open(struct inode *inode, struct file *file) \ | 156 | CEPH_DEFINE_SHOW_FUNC(mdsc_show) |
| 300 | { \ | 157 | CEPH_DEFINE_SHOW_FUNC(caps_show) |
| 301 | struct seq_file *sf; \ | 158 | CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) |
| 302 | int ret; \ | 159 | |
| 303 | \ | ||
| 304 | ret = single_open(file, name, NULL); \ | ||
| 305 | sf = file->private_data; \ | ||
| 306 | sf->private = inode->i_private; \ | ||
| 307 | return ret; \ | ||
| 308 | } \ | ||
| 309 | \ | ||
| 310 | static const struct file_operations name##_fops = { \ | ||
| 311 | .open = name##_open, \ | ||
| 312 | .read = seq_read, \ | ||
| 313 | .llseek = seq_lseek, \ | ||
| 314 | .release = single_release, \ | ||
| 315 | }; | ||
| 316 | |||
| 317 | DEFINE_SHOW_FUNC(monmap_show) | ||
| 318 | DEFINE_SHOW_FUNC(mdsmap_show) | ||
| 319 | DEFINE_SHOW_FUNC(osdmap_show) | ||
| 320 | DEFINE_SHOW_FUNC(monc_show) | ||
| 321 | DEFINE_SHOW_FUNC(mdsc_show) | ||
| 322 | DEFINE_SHOW_FUNC(osdc_show) | ||
| 323 | DEFINE_SHOW_FUNC(dentry_lru_show) | ||
| 324 | DEFINE_SHOW_FUNC(caps_show) | ||
| 325 | 160 | ||
| 161 | /* | ||
| 162 | * debugfs | ||
| 163 | */ | ||
| 326 | static int congestion_kb_set(void *data, u64 val) | 164 | static int congestion_kb_set(void *data, u64 val) |
| 327 | { | 165 | { |
| 328 | struct ceph_client *client = (struct ceph_client *)data; | 166 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
| 329 | |||
| 330 | if (client) | ||
| 331 | client->mount_args->congestion_kb = (int)val; | ||
| 332 | 167 | ||
| 168 | fsc->mount_options->congestion_kb = (int)val; | ||
| 333 | return 0; | 169 | return 0; |
| 334 | } | 170 | } |
| 335 | 171 | ||
| 336 | static int congestion_kb_get(void *data, u64 *val) | 172 | static int congestion_kb_get(void *data, u64 *val) |
| 337 | { | 173 | { |
| 338 | struct ceph_client *client = (struct ceph_client *)data; | 174 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
| 339 | |||
| 340 | if (client) | ||
| 341 | *val = (u64)client->mount_args->congestion_kb; | ||
| 342 | 175 | ||
| 176 | *val = (u64)fsc->mount_options->congestion_kb; | ||
| 343 | return 0; | 177 | return 0; |
| 344 | } | 178 | } |
| 345 | 179 | ||
| 346 | |||
| 347 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | 180 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, |
| 348 | congestion_kb_set, "%llu\n"); | 181 | congestion_kb_set, "%llu\n"); |
| 349 | 182 | ||
| 350 | int __init ceph_debugfs_init(void) | ||
| 351 | { | ||
| 352 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
| 353 | if (!ceph_debugfs_dir) | ||
| 354 | return -ENOMEM; | ||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | 183 | ||
| 358 | void ceph_debugfs_cleanup(void) | 184 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
| 359 | { | 185 | { |
| 360 | debugfs_remove(ceph_debugfs_dir); | 186 | dout("ceph_fs_debugfs_cleanup\n"); |
| 187 | debugfs_remove(fsc->debugfs_bdi); | ||
| 188 | debugfs_remove(fsc->debugfs_congestion_kb); | ||
| 189 | debugfs_remove(fsc->debugfs_mdsmap); | ||
| 190 | debugfs_remove(fsc->debugfs_caps); | ||
| 191 | debugfs_remove(fsc->debugfs_mdsc); | ||
| 192 | debugfs_remove(fsc->debugfs_dentry_lru); | ||
| 361 | } | 193 | } |
| 362 | 194 | ||
| 363 | int ceph_debugfs_client_init(struct ceph_client *client) | 195 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
| 364 | { | 196 | { |
| 365 | int ret = 0; | 197 | char name[100]; |
| 366 | char name[80]; | 198 | int err = -ENOMEM; |
| 367 | |||
| 368 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
| 369 | client->monc.auth->global_id); | ||
| 370 | 199 | ||
| 371 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 200 | dout("ceph_fs_debugfs_init\n"); |
| 372 | if (!client->debugfs_dir) | 201 | fsc->debugfs_congestion_kb = |
| 373 | goto out; | 202 | debugfs_create_file("writeback_congestion_kb", |
| 374 | 203 | 0600, | |
| 375 | client->monc.debugfs_file = debugfs_create_file("monc", | 204 | fsc->client->debugfs_dir, |
| 376 | 0600, | 205 | fsc, |
| 377 | client->debugfs_dir, | 206 | &congestion_kb_fops); |
| 378 | client, | 207 | if (!fsc->debugfs_congestion_kb) |
| 379 | &monc_show_fops); | ||
| 380 | if (!client->monc.debugfs_file) | ||
| 381 | goto out; | 208 | goto out; |
| 382 | 209 | ||
| 383 | client->mdsc.debugfs_file = debugfs_create_file("mdsc", | 210 | dout("a\n"); |
| 384 | 0600, | ||
| 385 | client->debugfs_dir, | ||
| 386 | client, | ||
| 387 | &mdsc_show_fops); | ||
| 388 | if (!client->mdsc.debugfs_file) | ||
| 389 | goto out; | ||
| 390 | 211 | ||
| 391 | client->osdc.debugfs_file = debugfs_create_file("osdc", | 212 | snprintf(name, sizeof(name), "../../bdi/%s", |
| 392 | 0600, | 213 | dev_name(fsc->backing_dev_info.dev)); |
| 393 | client->debugfs_dir, | 214 | fsc->debugfs_bdi = |
| 394 | client, | 215 | debugfs_create_symlink("bdi", |
| 395 | &osdc_show_fops); | 216 | fsc->client->debugfs_dir, |
| 396 | if (!client->osdc.debugfs_file) | 217 | name); |
| 218 | if (!fsc->debugfs_bdi) | ||
| 397 | goto out; | 219 | goto out; |
| 398 | 220 | ||
| 399 | client->debugfs_monmap = debugfs_create_file("monmap", | 221 | dout("b\n"); |
| 222 | fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
| 400 | 0600, | 223 | 0600, |
| 401 | client->debugfs_dir, | 224 | fsc->client->debugfs_dir, |
| 402 | client, | 225 | fsc, |
| 403 | &monmap_show_fops); | ||
| 404 | if (!client->debugfs_monmap) | ||
| 405 | goto out; | ||
| 406 | |||
| 407 | client->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
| 408 | 0600, | ||
| 409 | client->debugfs_dir, | ||
| 410 | client, | ||
| 411 | &mdsmap_show_fops); | 226 | &mdsmap_show_fops); |
| 412 | if (!client->debugfs_mdsmap) | 227 | if (!fsc->debugfs_mdsmap) |
| 413 | goto out; | ||
| 414 | |||
| 415 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
| 416 | 0600, | ||
| 417 | client->debugfs_dir, | ||
| 418 | client, | ||
| 419 | &osdmap_show_fops); | ||
| 420 | if (!client->debugfs_osdmap) | ||
| 421 | goto out; | 228 | goto out; |
| 422 | 229 | ||
| 423 | client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", | 230 | dout("ca\n"); |
| 424 | 0600, | 231 | fsc->debugfs_mdsc = debugfs_create_file("mdsc", |
| 425 | client->debugfs_dir, | 232 | 0600, |
| 426 | client, | 233 | fsc->client->debugfs_dir, |
| 427 | &dentry_lru_show_fops); | 234 | fsc, |
| 428 | if (!client->debugfs_dentry_lru) | 235 | &mdsc_show_fops); |
| 236 | if (!fsc->debugfs_mdsc) | ||
| 429 | goto out; | 237 | goto out; |
| 430 | 238 | ||
| 431 | client->debugfs_caps = debugfs_create_file("caps", | 239 | dout("da\n"); |
| 240 | fsc->debugfs_caps = debugfs_create_file("caps", | ||
| 432 | 0400, | 241 | 0400, |
| 433 | client->debugfs_dir, | 242 | fsc->client->debugfs_dir, |
| 434 | client, | 243 | fsc, |
| 435 | &caps_show_fops); | 244 | &caps_show_fops); |
| 436 | if (!client->debugfs_caps) | 245 | if (!fsc->debugfs_caps) |
| 437 | goto out; | 246 | goto out; |
| 438 | 247 | ||
| 439 | client->debugfs_congestion_kb = | 248 | dout("ea\n"); |
| 440 | debugfs_create_file("writeback_congestion_kb", | 249 | fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", |
| 441 | 0600, | 250 | 0600, |
| 442 | client->debugfs_dir, | 251 | fsc->client->debugfs_dir, |
| 443 | client, | 252 | fsc, |
| 444 | &congestion_kb_fops); | 253 | &dentry_lru_show_fops); |
| 445 | if (!client->debugfs_congestion_kb) | 254 | if (!fsc->debugfs_dentry_lru) |
| 446 | goto out; | 255 | goto out; |
| 447 | 256 | ||
| 448 | sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); | ||
| 449 | client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, | ||
| 450 | name); | ||
| 451 | |||
| 452 | return 0; | 257 | return 0; |
| 453 | 258 | ||
| 454 | out: | 259 | out: |
| 455 | ceph_debugfs_client_cleanup(client); | 260 | ceph_fs_debugfs_cleanup(fsc); |
| 456 | return ret; | 261 | return err; |
| 457 | } | 262 | } |
| 458 | 263 | ||
| 459 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 460 | { | ||
| 461 | debugfs_remove(client->debugfs_bdi); | ||
| 462 | debugfs_remove(client->debugfs_caps); | ||
| 463 | debugfs_remove(client->debugfs_dentry_lru); | ||
| 464 | debugfs_remove(client->debugfs_osdmap); | ||
| 465 | debugfs_remove(client->debugfs_mdsmap); | ||
| 466 | debugfs_remove(client->debugfs_monmap); | ||
| 467 | debugfs_remove(client->osdc.debugfs_file); | ||
| 468 | debugfs_remove(client->mdsc.debugfs_file); | ||
| 469 | debugfs_remove(client->monc.debugfs_file); | ||
| 470 | debugfs_remove(client->debugfs_congestion_kb); | ||
| 471 | debugfs_remove(client->debugfs_dir); | ||
| 472 | } | ||
| 473 | 264 | ||
| 474 | #else /* CONFIG_DEBUG_FS */ | 265 | #else /* CONFIG_DEBUG_FS */ |
| 475 | 266 | ||
| 476 | int __init ceph_debugfs_init(void) | 267 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
| 477 | { | ||
| 478 | return 0; | ||
| 479 | } | ||
| 480 | |||
| 481 | void ceph_debugfs_cleanup(void) | ||
| 482 | { | ||
| 483 | } | ||
| 484 | |||
| 485 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 486 | { | 268 | { |
| 487 | return 0; | 269 | return 0; |
| 488 | } | 270 | } |
| 489 | 271 | ||
| 490 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 272 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
| 491 | { | 273 | { |
| 492 | } | 274 | } |
| 493 | 275 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a1986eb52045..e0a2dc6fcafc 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/spinlock.h> | 3 | #include <linux/spinlock.h> |
| 4 | #include <linux/fs_struct.h> | 4 | #include <linux/fs_struct.h> |
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 8 | 8 | ||
| 9 | #include "super.h" | 9 | #include "super.h" |
| 10 | #include "mds_client.h" | ||
| 10 | 11 | ||
| 11 | /* | 12 | /* |
| 12 | * Directory operations: readdir, lookup, create, link, unlink, | 13 | * Directory operations: readdir, lookup, create, link, unlink, |
| @@ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p) | |||
| 94 | */ | 95 | */ |
| 95 | static int __dcache_readdir(struct file *filp, | 96 | static int __dcache_readdir(struct file *filp, |
| 96 | void *dirent, filldir_t filldir) | 97 | void *dirent, filldir_t filldir) |
| 97 | __releases(inode->i_lock) | ||
| 98 | __acquires(inode->i_lock) | ||
| 99 | { | 98 | { |
| 100 | struct inode *inode = filp->f_dentry->d_inode; | ||
| 101 | struct ceph_file_info *fi = filp->private_data; | 99 | struct ceph_file_info *fi = filp->private_data; |
| 102 | struct dentry *parent = filp->f_dentry; | 100 | struct dentry *parent = filp->f_dentry; |
| 103 | struct inode *dir = parent->d_inode; | 101 | struct inode *dir = parent->d_inode; |
| @@ -153,7 +151,6 @@ more: | |||
| 153 | 151 | ||
| 154 | atomic_inc(&dentry->d_count); | 152 | atomic_inc(&dentry->d_count); |
| 155 | spin_unlock(&dcache_lock); | 153 | spin_unlock(&dcache_lock); |
| 156 | spin_unlock(&inode->i_lock); | ||
| 157 | 154 | ||
| 158 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 155 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, |
| 159 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 156 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
| @@ -171,35 +168,30 @@ more: | |||
| 171 | } else { | 168 | } else { |
| 172 | dput(last); | 169 | dput(last); |
| 173 | } | 170 | } |
| 174 | last = NULL; | ||
| 175 | } | 171 | } |
| 176 | |||
| 177 | spin_lock(&inode->i_lock); | ||
| 178 | spin_lock(&dcache_lock); | ||
| 179 | |||
| 180 | last = dentry; | 172 | last = dentry; |
| 181 | 173 | ||
| 182 | if (err < 0) | 174 | if (err < 0) |
| 183 | goto out_unlock; | 175 | goto out; |
| 184 | 176 | ||
| 185 | p = p->prev; | ||
| 186 | filp->f_pos++; | 177 | filp->f_pos++; |
| 187 | 178 | ||
| 188 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | 179 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ |
| 189 | if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) | 180 | if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { |
| 190 | goto more; | 181 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); |
| 191 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | 182 | err = -EAGAIN; |
| 192 | err = -EAGAIN; | 183 | goto out; |
| 184 | } | ||
| 185 | |||
| 186 | spin_lock(&dcache_lock); | ||
| 187 | p = p->prev; /* advance to next dentry */ | ||
| 188 | goto more; | ||
| 193 | 189 | ||
| 194 | out_unlock: | 190 | out_unlock: |
| 195 | spin_unlock(&dcache_lock); | 191 | spin_unlock(&dcache_lock); |
| 196 | 192 | out: | |
| 197 | if (last) { | 193 | if (last) |
| 198 | spin_unlock(&inode->i_lock); | ||
| 199 | dput(last); | 194 | dput(last); |
| 200 | spin_lock(&inode->i_lock); | ||
| 201 | } | ||
| 202 | |||
| 203 | return err; | 195 | return err; |
| 204 | } | 196 | } |
| 205 | 197 | ||
| @@ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 227 | struct ceph_file_info *fi = filp->private_data; | 219 | struct ceph_file_info *fi = filp->private_data; |
| 228 | struct inode *inode = filp->f_dentry->d_inode; | 220 | struct inode *inode = filp->f_dentry->d_inode; |
| 229 | struct ceph_inode_info *ci = ceph_inode(inode); | 221 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 230 | struct ceph_client *client = ceph_inode_to_client(inode); | 222 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 231 | struct ceph_mds_client *mdsc = &client->mdsc; | 223 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 232 | unsigned frag = fpos_frag(filp->f_pos); | 224 | unsigned frag = fpos_frag(filp->f_pos); |
| 233 | int off = fpos_off(filp->f_pos); | 225 | int off = fpos_off(filp->f_pos); |
| 234 | int err; | 226 | int err; |
| 235 | u32 ftype; | 227 | u32 ftype; |
| 236 | struct ceph_mds_reply_info_parsed *rinfo; | 228 | struct ceph_mds_reply_info_parsed *rinfo; |
| 237 | const int max_entries = client->mount_args->max_readdir; | 229 | const int max_entries = fsc->mount_options->max_readdir; |
| 238 | const int max_bytes = client->mount_args->max_readdir_bytes; | 230 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
| 239 | 231 | ||
| 240 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 232 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
| 241 | if (fi->at_end) | 233 | if (fi->at_end) |
| @@ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 267 | /* can we use the dcache? */ | 259 | /* can we use the dcache? */ |
| 268 | spin_lock(&inode->i_lock); | 260 | spin_lock(&inode->i_lock); |
| 269 | if ((filp->f_pos == 2 || fi->dentry) && | 261 | if ((filp->f_pos == 2 || fi->dentry) && |
| 270 | !ceph_test_opt(client, NOASYNCREADDIR) && | 262 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
| 271 | ceph_snap(inode) != CEPH_SNAPDIR && | 263 | ceph_snap(inode) != CEPH_SNAPDIR && |
| 272 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 264 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
| 273 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 265 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
| 266 | spin_unlock(&inode->i_lock); | ||
| 274 | err = __dcache_readdir(filp, dirent, filldir); | 267 | err = __dcache_readdir(filp, dirent, filldir); |
| 275 | if (err != -EAGAIN) { | 268 | if (err != -EAGAIN) |
| 276 | spin_unlock(&inode->i_lock); | ||
| 277 | return err; | 269 | return err; |
| 278 | } | 270 | } else { |
| 271 | spin_unlock(&inode->i_lock); | ||
| 279 | } | 272 | } |
| 280 | spin_unlock(&inode->i_lock); | ||
| 281 | if (fi->dentry) { | 273 | if (fi->dentry) { |
| 282 | err = note_last_dentry(fi, fi->dentry->d_name.name, | 274 | err = note_last_dentry(fi, fi->dentry->d_name.name, |
| 283 | fi->dentry->d_name.len); | 275 | fi->dentry->d_name.len); |
| @@ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
| 487 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 479 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
| 488 | struct dentry *dentry, int err) | 480 | struct dentry *dentry, int err) |
| 489 | { | 481 | { |
| 490 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 482 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 491 | struct inode *parent = dentry->d_parent->d_inode; | 483 | struct inode *parent = dentry->d_parent->d_inode; |
| 492 | 484 | ||
| 493 | /* .snap dir? */ | 485 | /* .snap dir? */ |
| 494 | if (err == -ENOENT && | 486 | if (err == -ENOENT && |
| 495 | ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ | ||
| 496 | strcmp(dentry->d_name.name, | 487 | strcmp(dentry->d_name.name, |
| 497 | client->mount_args->snapdir_name) == 0) { | 488 | fsc->mount_options->snapdir_name) == 0) { |
| 498 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
| 499 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
| 500 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
| @@ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) | |||
| 539 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | 530 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, |
| 540 | struct nameidata *nd) | 531 | struct nameidata *nd) |
| 541 | { | 532 | { |
| 542 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 533 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 543 | struct ceph_mds_client *mdsc = &client->mdsc; | 534 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 544 | struct ceph_mds_request *req; | 535 | struct ceph_mds_request *req; |
| 545 | int op; | 536 | int op; |
| 546 | int err; | 537 | int err; |
| @@ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
| 572 | spin_lock(&dir->i_lock); | 563 | spin_lock(&dir->i_lock); |
| 573 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | 564 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); |
| 574 | if (strncmp(dentry->d_name.name, | 565 | if (strncmp(dentry->d_name.name, |
| 575 | client->mount_args->snapdir_name, | 566 | fsc->mount_options->snapdir_name, |
| 576 | dentry->d_name.len) && | 567 | dentry->d_name.len) && |
| 577 | !is_root_ceph_dentry(dir, dentry) && | 568 | !is_root_ceph_dentry(dir, dentry) && |
| 578 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
| @@ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||
| 629 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, | 620 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, |
| 630 | int mode, dev_t rdev) | 621 | int mode, dev_t rdev) |
| 631 | { | 622 | { |
| 632 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 623 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 633 | struct ceph_mds_client *mdsc = &client->mdsc; | 624 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 634 | struct ceph_mds_request *req; | 625 | struct ceph_mds_request *req; |
| 635 | int err; | 626 | int err; |
| 636 | 627 | ||
| @@ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 685 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, | 676 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, |
| 686 | const char *dest) | 677 | const char *dest) |
| 687 | { | 678 | { |
| 688 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 679 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 689 | struct ceph_mds_client *mdsc = &client->mdsc; | 680 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 690 | struct ceph_mds_request *req; | 681 | struct ceph_mds_request *req; |
| 691 | int err; | 682 | int err; |
| 692 | 683 | ||
| @@ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
| 716 | 707 | ||
| 717 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 708 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
| 718 | { | 709 | { |
| 719 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 710 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 720 | struct ceph_mds_client *mdsc = &client->mdsc; | 711 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 721 | struct ceph_mds_request *req; | 712 | struct ceph_mds_request *req; |
| 722 | int err = -EROFS; | 713 | int err = -EROFS; |
| 723 | int op; | 714 | int op; |
| @@ -758,8 +749,8 @@ out: | |||
| 758 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, | 749 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, |
| 759 | struct dentry *dentry) | 750 | struct dentry *dentry) |
| 760 | { | 751 | { |
| 761 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 752 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 762 | struct ceph_mds_client *mdsc = &client->mdsc; | 753 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 763 | struct ceph_mds_request *req; | 754 | struct ceph_mds_request *req; |
| 764 | int err; | 755 | int err; |
| 765 | 756 | ||
| @@ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode) | |||
| 813 | */ | 804 | */ |
| 814 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) | 805 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) |
| 815 | { | 806 | { |
| 816 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 807 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 817 | struct ceph_mds_client *mdsc = &client->mdsc; | 808 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 818 | struct inode *inode = dentry->d_inode; | 809 | struct inode *inode = dentry->d_inode; |
| 819 | struct ceph_mds_request *req; | 810 | struct ceph_mds_request *req; |
| 820 | int err = -EROFS; | 811 | int err = -EROFS; |
| @@ -854,8 +845,8 @@ out: | |||
| 854 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | 845 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 855 | struct inode *new_dir, struct dentry *new_dentry) | 846 | struct inode *new_dir, struct dentry *new_dentry) |
| 856 | { | 847 | { |
| 857 | struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); | 848 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
| 858 | struct ceph_mds_client *mdsc = &client->mdsc; | 849 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 859 | struct ceph_mds_request *req; | 850 | struct ceph_mds_request *req; |
| 860 | int err; | 851 | int err; |
| 861 | 852 | ||
| @@ -1076,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
| 1076 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1077 | int left; | 1068 | int left; |
| 1078 | 1069 | ||
| 1079 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
| 1080 | return -EISDIR; | 1071 | return -EISDIR; |
| 1081 | 1072 | ||
| 1082 | if (!cf->dir_info) { | 1073 | if (!cf->dir_info) { |
| @@ -1177,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
| 1177 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1168 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
| 1178 | dn->d_name.len, dn->d_name.name); | 1169 | dn->d_name.len, dn->d_name.name); |
| 1179 | if (di) { | 1170 | if (di) { |
| 1180 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1171 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1181 | spin_lock(&mdsc->dentry_lru_lock); | 1172 | spin_lock(&mdsc->dentry_lru_lock); |
| 1182 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1173 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
| 1183 | mdsc->num_dentry++; | 1174 | mdsc->num_dentry++; |
| @@ -1193,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
| 1193 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 1184 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
| 1194 | dn->d_name.len, dn->d_name.name, di->offset); | 1185 | dn->d_name.len, dn->d_name.name, di->offset); |
| 1195 | if (di) { | 1186 | if (di) { |
| 1196 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1187 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1197 | spin_lock(&mdsc->dentry_lru_lock); | 1188 | spin_lock(&mdsc->dentry_lru_lock); |
| 1198 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1189 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
| 1199 | spin_unlock(&mdsc->dentry_lru_lock); | 1190 | spin_unlock(&mdsc->dentry_lru_lock); |
| @@ -1208,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
| 1208 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1199 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
| 1209 | dn->d_name.len, dn->d_name.name); | 1200 | dn->d_name.len, dn->d_name.name); |
| 1210 | if (di) { | 1201 | if (di) { |
| 1211 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1202 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
| 1212 | spin_lock(&mdsc->dentry_lru_lock); | 1203 | spin_lock(&mdsc->dentry_lru_lock); |
| 1213 | list_del_init(&di->lru); | 1204 | list_del_init(&di->lru); |
| 1214 | mdsc->num_dentry--; | 1205 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e38423e82f2e..2297d9426992 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/exportfs.h> | 3 | #include <linux/exportfs.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
| 6 | 6 | ||
| 7 | #include "super.h" | 7 | #include "super.h" |
| 8 | #include "mds_client.h" | ||
| 8 | 9 | ||
| 9 | /* | 10 | /* |
| 10 | * NFS export support | 11 | * NFS export support |
| @@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
| 120 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 121 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
| 121 | struct ceph_nfs_confh *cfh) | 122 | struct ceph_nfs_confh *cfh) |
| 122 | { | 123 | { |
| 123 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; | 124 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
| 124 | struct inode *inode; | 125 | struct inode *inode; |
| 125 | struct dentry *dentry; | 126 | struct dentry *dentry; |
| 126 | struct ceph_vino vino; | 127 | struct ceph_vino vino; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66e4da6dba22..e77c28cf3690 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
| @@ -38,8 +39,8 @@ | |||
| 38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
| 39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
| 40 | { | 41 | { |
| 41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
| 44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
| 45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
| @@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
| 117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
| 118 | { | 119 | { |
| 119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
| 121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
| 123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
| 124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
| @@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
| 216 | struct nameidata *nd, int mode, | 217 | struct nameidata *nd, int mode, |
| 217 | int locked_dir) | 218 | int locked_dir) |
| 218 | { | 219 | { |
| 219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 220 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
| 220 | struct ceph_mds_client *mdsc = &client->mdsc; | 221 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 221 | struct file *file = nd->intent.open.file; | 222 | struct file *file = nd->intent.open.file; |
| 222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 223 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
| 223 | struct ceph_mds_request *req; | 224 | struct ceph_mds_request *req; |
| @@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
| 270 | } | 271 | } |
| 271 | 272 | ||
| 272 | /* | 273 | /* |
| 273 | * build a vector of user pages | ||
| 274 | */ | ||
| 275 | static struct page **get_direct_page_vector(const char __user *data, | ||
| 276 | int num_pages, | ||
| 277 | loff_t off, size_t len) | ||
| 278 | { | ||
| 279 | struct page **pages; | ||
| 280 | int rc; | ||
| 281 | |||
| 282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
| 283 | if (!pages) | ||
| 284 | return ERR_PTR(-ENOMEM); | ||
| 285 | |||
| 286 | down_read(¤t->mm->mmap_sem); | ||
| 287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
| 288 | num_pages, 0, 0, pages, NULL); | ||
| 289 | up_read(¤t->mm->mmap_sem); | ||
| 290 | if (rc < 0) | ||
| 291 | goto fail; | ||
| 292 | return pages; | ||
| 293 | |||
| 294 | fail: | ||
| 295 | kfree(pages); | ||
| 296 | return ERR_PTR(rc); | ||
| 297 | } | ||
| 298 | |||
| 299 | static void put_page_vector(struct page **pages, int num_pages) | ||
| 300 | { | ||
| 301 | int i; | ||
| 302 | |||
| 303 | for (i = 0; i < num_pages; i++) | ||
| 304 | put_page(pages[i]); | ||
| 305 | kfree(pages); | ||
| 306 | } | ||
| 307 | |||
| 308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
| 309 | { | ||
| 310 | int i; | ||
| 311 | |||
| 312 | for (i = 0; i < num_pages; i++) | ||
| 313 | __free_pages(pages[i], 0); | ||
| 314 | kfree(pages); | ||
| 315 | } | ||
| 316 | |||
| 317 | /* | ||
| 318 | * allocate a vector new pages | ||
| 319 | */ | ||
| 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
| 321 | { | ||
| 322 | struct page **pages; | ||
| 323 | int i; | ||
| 324 | |||
| 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
| 326 | if (!pages) | ||
| 327 | return ERR_PTR(-ENOMEM); | ||
| 328 | for (i = 0; i < num_pages; i++) { | ||
| 329 | pages[i] = __page_cache_alloc(flags); | ||
| 330 | if (pages[i] == NULL) { | ||
| 331 | ceph_release_page_vector(pages, i); | ||
| 332 | return ERR_PTR(-ENOMEM); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | return pages; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * copy user data into a page vector | ||
| 340 | */ | ||
| 341 | static int copy_user_to_page_vector(struct page **pages, | ||
| 342 | const char __user *data, | ||
| 343 | loff_t off, size_t len) | ||
| 344 | { | ||
| 345 | int i = 0; | ||
| 346 | int po = off & ~PAGE_CACHE_MASK; | ||
| 347 | int left = len; | ||
| 348 | int l, bad; | ||
| 349 | |||
| 350 | while (left > 0) { | ||
| 351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
| 352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
| 353 | if (bad == l) | ||
| 354 | return -EFAULT; | ||
| 355 | data += l - bad; | ||
| 356 | left -= l - bad; | ||
| 357 | po += l - bad; | ||
| 358 | if (po == PAGE_CACHE_SIZE) { | ||
| 359 | po = 0; | ||
| 360 | i++; | ||
| 361 | } | ||
| 362 | } | ||
| 363 | return len; | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 367 | * copy user data from a page vector into a user pointer | ||
| 368 | */ | ||
| 369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
| 370 | loff_t off, size_t len) | ||
| 371 | { | ||
| 372 | int i = 0; | ||
| 373 | int po = off & ~PAGE_CACHE_MASK; | ||
| 374 | int left = len; | ||
| 375 | int l, bad; | ||
| 376 | |||
| 377 | while (left > 0) { | ||
| 378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
| 379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
| 380 | if (bad == l) | ||
| 381 | return -EFAULT; | ||
| 382 | data += l - bad; | ||
| 383 | left -= l - bad; | ||
| 384 | if (po) { | ||
| 385 | po += l - bad; | ||
| 386 | if (po == PAGE_CACHE_SIZE) | ||
| 387 | po = 0; | ||
| 388 | } | ||
| 389 | i++; | ||
| 390 | } | ||
| 391 | return len; | ||
| 392 | } | ||
| 393 | |||
| 394 | /* | ||
| 395 | * Zero an extent within a page vector. Offset is relative to the | ||
| 396 | * start of the first page. | ||
| 397 | */ | ||
| 398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
| 399 | { | ||
| 400 | int i = off >> PAGE_CACHE_SHIFT; | ||
| 401 | |||
| 402 | off &= ~PAGE_CACHE_MASK; | ||
| 403 | |||
| 404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
| 405 | |||
| 406 | /* leading partial page? */ | ||
| 407 | if (off) { | ||
| 408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
| 409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
| 410 | (int)off); | ||
| 411 | zero_user_segment(pages[i], off, end); | ||
| 412 | len -= (end - off); | ||
| 413 | i++; | ||
| 414 | } | ||
| 415 | while (len >= PAGE_CACHE_SIZE) { | ||
| 416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
| 417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
| 418 | len -= PAGE_CACHE_SIZE; | ||
| 419 | i++; | ||
| 420 | } | ||
| 421 | /* trailing partial page? */ | ||
| 422 | if (len) { | ||
| 423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
| 424 | zero_user_segment(pages[i], 0, len); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | |||
| 428 | |||
| 429 | /* | ||
| 430 | * Read a range of bytes striped over one or more objects. Iterate over | 274 | * Read a range of bytes striped over one or more objects. Iterate over |
| 431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 275 | * objects we stripe over. (That's not atomic, but good enough for now.) |
| 432 | * | 276 | * |
| @@ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||
| 438 | struct page **pages, int num_pages, | 282 | struct page **pages, int num_pages, |
| 439 | int *checkeof) | 283 | int *checkeof) |
| 440 | { | 284 | { |
| 441 | struct ceph_client *client = ceph_inode_to_client(inode); | 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 442 | struct ceph_inode_info *ci = ceph_inode(inode); | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 443 | u64 pos, this_len; | 287 | u64 pos, this_len; |
| 444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
| @@ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||
| 459 | 303 | ||
| 460 | more: | 304 | more: |
| 461 | this_len = left; | 305 | this_len = left; |
| 462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
| 463 | &ci->i_layout, pos, &this_len, | 307 | &ci->i_layout, pos, &this_len, |
| 464 | ci->i_truncate_seq, | 308 | ci->i_truncate_seq, |
| 465 | ci->i_truncate_size, | 309 | ci->i_truncate_size, |
| @@ -477,8 +321,8 @@ more: | |||
| 477 | 321 | ||
| 478 | if (read < pos - off) { | 322 | if (read < pos - off) { |
| 479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 323 | dout(" zero gap %llu to %llu\n", off + read, pos); |
| 480 | zero_page_vector_range(page_off + read, | 324 | ceph_zero_page_vector_range(page_off + read, |
| 481 | pos - off - read, pages); | 325 | pos - off - read, pages); |
| 482 | } | 326 | } |
| 483 | pos += ret; | 327 | pos += ret; |
| 484 | read = pos - off; | 328 | read = pos - off; |
| @@ -495,8 +339,8 @@ more: | |||
| 495 | /* was original extent fully inside i_size? */ | 339 | /* was original extent fully inside i_size? */ |
| 496 | if (pos + left <= inode->i_size) { | 340 | if (pos + left <= inode->i_size) { |
| 497 | dout("zero tail\n"); | 341 | dout("zero tail\n"); |
| 498 | zero_page_vector_range(page_off + read, len - read, | 342 | ceph_zero_page_vector_range(page_off + read, len - read, |
| 499 | pages); | 343 | pages); |
| 500 | read = len; | 344 | read = len; |
| 501 | goto out; | 345 | goto out; |
| 502 | } | 346 | } |
| @@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
| 532 | 376 | ||
| 533 | if (file->f_flags & O_DIRECT) { | 377 | if (file->f_flags & O_DIRECT) { |
| 534 | pages = get_direct_page_vector(data, num_pages, off, len); | 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); |
| 535 | 379 | ||
| 536 | /* | 380 | /* |
| 537 | * flush any page cache pages in this range. this | 381 | * flush any page cache pages in this range. this |
| @@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
| 552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); |
| 553 | 397 | ||
| 554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
| 555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
| 556 | if (ret >= 0) | 400 | if (ret >= 0) |
| 557 | *poff = off + ret; | 401 | *poff = off + ret; |
| 558 | 402 | ||
| 559 | done: | 403 | done: |
| 560 | if (file->f_flags & O_DIRECT) | 404 | if (file->f_flags & O_DIRECT) |
| 561 | put_page_vector(pages, num_pages); | 405 | ceph_put_page_vector(pages, num_pages); |
| 562 | else | 406 | else |
| 563 | ceph_release_page_vector(pages, num_pages); | 407 | ceph_release_page_vector(pages, num_pages); |
| 564 | dout("sync_read result %d\n", ret); | 408 | dout("sync_read result %d\n", ret); |
| @@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 594 | { | 438 | { |
| 595 | struct inode *inode = file->f_dentry->d_inode; | 439 | struct inode *inode = file->f_dentry->d_inode; |
| 596 | struct ceph_inode_info *ci = ceph_inode(inode); | 440 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 597 | struct ceph_client *client = ceph_inode_to_client(inode); | 441 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 598 | struct ceph_osd_request *req; | 442 | struct ceph_osd_request *req; |
| 599 | struct page **pages; | 443 | struct page **pages; |
| 600 | int num_pages; | 444 | int num_pages; |
| @@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
| 642 | */ | 486 | */ |
| 643 | more: | 487 | more: |
| 644 | len = left; | 488 | len = left; |
| 645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
| 646 | ceph_vino(inode), pos, &len, | 490 | ceph_vino(inode), pos, &len, |
| 647 | CEPH_OSD_OP_WRITE, flags, | 491 | CEPH_OSD_OP_WRITE, flags, |
| 648 | ci->i_snap_realm->cached_context, | 492 | ci->i_snap_realm->cached_context, |
| @@ -655,7 +499,7 @@ more: | |||
| 655 | num_pages = calc_pages_for(pos, len); | 499 | num_pages = calc_pages_for(pos, len); |
| 656 | 500 | ||
| 657 | if (file->f_flags & O_DIRECT) { | 501 | if (file->f_flags & O_DIRECT) { |
| 658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); |
| 659 | if (IS_ERR(pages)) { | 503 | if (IS_ERR(pages)) { |
| 660 | ret = PTR_ERR(pages); | 504 | ret = PTR_ERR(pages); |
| 661 | goto out; | 505 | goto out; |
| @@ -673,7 +517,7 @@ more: | |||
| 673 | ret = PTR_ERR(pages); | 517 | ret = PTR_ERR(pages); |
| 674 | goto out; | 518 | goto out; |
| 675 | } | 519 | } |
| 676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 520 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
| 677 | if (ret < 0) { | 521 | if (ret < 0) { |
| 678 | ceph_release_page_vector(pages, num_pages); | 522 | ceph_release_page_vector(pages, num_pages); |
| 679 | goto out; | 523 | goto out; |
| @@ -689,7 +533,7 @@ more: | |||
| 689 | req->r_num_pages = num_pages; | 533 | req->r_num_pages = num_pages; |
| 690 | req->r_inode = inode; | 534 | req->r_inode = inode; |
| 691 | 535 | ||
| 692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 536 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
| 693 | if (!ret) { | 537 | if (!ret) { |
| 694 | if (req->r_safe_callback) { | 538 | if (req->r_safe_callback) { |
| 695 | /* | 539 | /* |
| @@ -701,11 +545,11 @@ more: | |||
| 701 | spin_unlock(&ci->i_unsafe_lock); | 545 | spin_unlock(&ci->i_unsafe_lock); |
| 702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 546 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
| 703 | } | 547 | } |
| 704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 548 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
| 705 | } | 549 | } |
| 706 | 550 | ||
| 707 | if (file->f_flags & O_DIRECT) | 551 | if (file->f_flags & O_DIRECT) |
| 708 | put_page_vector(pages, num_pages); | 552 | ceph_put_page_vector(pages, num_pages); |
| 709 | else if (file->f_flags & O_SYNC) | 553 | else if (file->f_flags & O_SYNC) |
| 710 | ceph_release_page_vector(pages, num_pages); | 554 | ceph_release_page_vector(pages, num_pages); |
| 711 | 555 | ||
| @@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 814 | struct ceph_file_info *fi = file->private_data; | 658 | struct ceph_file_info *fi = file->private_data; |
| 815 | struct inode *inode = file->f_dentry->d_inode; | 659 | struct inode *inode = file->f_dentry->d_inode; |
| 816 | struct ceph_inode_info *ci = ceph_inode(inode); | 660 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 661 | struct ceph_osd_client *osdc = |
| 662 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
| 818 | loff_t endoff = pos + iov->iov_len; | 663 | loff_t endoff = pos + iov->iov_len; |
| 819 | int want, got = 0; | 664 | int want, got = 0; |
| 820 | int ret, err; | 665 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 62377ec37edf..1d6a45b5a04c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
| @@ -13,7 +13,8 @@ | |||
| 13 | #include <linux/pagevec.h> | 13 | #include <linux/pagevec.h> |
| 14 | 14 | ||
| 15 | #include "super.h" | 15 | #include "super.h" |
| 16 | #include "decode.h" | 16 | #include "mds_client.h" |
| 17 | #include <linux/ceph/decode.h> | ||
| 17 | 18 | ||
| 18 | /* | 19 | /* |
| 19 | * Ceph inode operations | 20 | * Ceph inode operations |
| @@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
| 384 | */ | 385 | */ |
| 385 | if (ci->i_snap_realm) { | 386 | if (ci->i_snap_realm) { |
| 386 | struct ceph_mds_client *mdsc = | 387 | struct ceph_mds_client *mdsc = |
| 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 388 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
| 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 389 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
| 389 | 390 | ||
| 390 | dout(" dropping residual ref to snap realm %p\n", realm); | 391 | dout(" dropping residual ref to snap realm %p\n", realm); |
| @@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode, | |||
| 685 | } | 686 | } |
| 686 | 687 | ||
| 687 | /* it may be better to set st_size in getattr instead? */ | 688 | /* it may be better to set st_size in getattr instead? */ |
| 688 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | 689 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
| 689 | inode->i_size = ci->i_rbytes; | 690 | inode->i_size = ci->i_rbytes; |
| 690 | break; | 691 | break; |
| 691 | default: | 692 | default: |
| @@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 901 | struct inode *in = NULL; | 902 | struct inode *in = NULL; |
| 902 | struct ceph_mds_reply_inode *ininfo; | 903 | struct ceph_mds_reply_inode *ininfo; |
| 903 | struct ceph_vino vino; | 904 | struct ceph_vino vino; |
| 904 | struct ceph_client *client = ceph_sb_to_client(sb); | 905 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 905 | int i = 0; | 906 | int i = 0; |
| 906 | int err = 0; | 907 | int err = 0; |
| 907 | 908 | ||
| @@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
| 965 | */ | 966 | */ |
| 966 | if (rinfo->head->is_dentry && !req->r_aborted && | 967 | if (rinfo->head->is_dentry && !req->r_aborted && |
| 967 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 968 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
| 968 | client->mount_args->snapdir_name, | 969 | fsc->mount_options->snapdir_name, |
| 969 | req->r_dentry->d_name.len))) { | 970 | req->r_dentry->d_name.len))) { |
| 970 | /* | 971 | /* |
| 971 | * lookup link rename : null -> possibly existing inode | 972 | * lookup link rename : null -> possibly existing inode |
| @@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1533 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1534 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 1534 | const unsigned int ia_valid = attr->ia_valid; | 1535 | const unsigned int ia_valid = attr->ia_valid; |
| 1535 | struct ceph_mds_request *req; | 1536 | struct ceph_mds_request *req; |
| 1536 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 1537 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
| 1537 | int issued; | 1538 | int issued; |
| 1538 | int release = 0, dirtied = 0; | 1539 | int release = 0, dirtied = 0; |
| 1539 | int mask = 0; | 1540 | int mask = 0; |
| @@ -1728,8 +1729,8 @@ out: | |||
| 1728 | */ | 1729 | */ |
| 1729 | int ceph_do_getattr(struct inode *inode, int mask) | 1730 | int ceph_do_getattr(struct inode *inode, int mask) |
| 1730 | { | 1731 | { |
| 1731 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 1732 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
| 1732 | struct ceph_mds_client *mdsc = &client->mdsc; | 1733 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 1733 | struct ceph_mds_request *req; | 1734 | struct ceph_mds_request *req; |
| 1734 | int err; | 1735 | int err; |
| 1735 | 1736 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 76e307d2aba1..8888c9ba68db 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
| @@ -1,8 +1,10 @@ | |||
| 1 | #include <linux/in.h> | 1 | #include <linux/in.h> |
| 2 | 2 | ||
| 3 | #include "ioctl.h" | ||
| 4 | #include "super.h" | 3 | #include "super.h" |
| 5 | #include "ceph_debug.h" | 4 | #include "mds_client.h" |
| 5 | #include <linux/ceph/ceph_debug.h> | ||
| 6 | |||
| 7 | #include "ioctl.h" | ||
| 6 | 8 | ||
| 7 | 9 | ||
| 8 | /* | 10 | /* |
| @@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
| 37 | { | 39 | { |
| 38 | struct inode *inode = file->f_dentry->d_inode; | 40 | struct inode *inode = file->f_dentry->d_inode; |
| 39 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 41 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
| 40 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 41 | struct ceph_mds_request *req; | 43 | struct ceph_mds_request *req; |
| 42 | struct ceph_ioctl_layout l; | 44 | struct ceph_ioctl_layout l; |
| 43 | int err, i; | 45 | int err, i; |
| @@ -90,6 +92,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
| 90 | } | 92 | } |
| 91 | 93 | ||
| 92 | /* | 94 | /* |
| 95 | * Set a layout policy on a directory inode. All items in the tree | ||
| 96 | * rooted at this inode will inherit this layout on creation, | ||
| 97 | * (It doesn't apply retroactively ) | ||
| 98 | * unless a subdirectory has its own layout policy. | ||
| 99 | */ | ||
| 100 | static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | ||
| 101 | { | ||
| 102 | struct inode *inode = file->f_dentry->d_inode; | ||
| 103 | struct ceph_mds_request *req; | ||
| 104 | struct ceph_ioctl_layout l; | ||
| 105 | int err, i; | ||
| 106 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||
| 107 | |||
| 108 | /* copy and validate */ | ||
| 109 | if (copy_from_user(&l, arg, sizeof(l))) | ||
| 110 | return -EFAULT; | ||
| 111 | |||
| 112 | if ((l.object_size & ~PAGE_MASK) || | ||
| 113 | (l.stripe_unit & ~PAGE_MASK) || | ||
| 114 | !l.stripe_unit || | ||
| 115 | (l.object_size && | ||
| 116 | (unsigned)l.object_size % (unsigned)l.stripe_unit)) | ||
| 117 | return -EINVAL; | ||
| 118 | |||
| 119 | /* make sure it's a valid data pool */ | ||
| 120 | if (l.data_pool > 0) { | ||
| 121 | mutex_lock(&mdsc->mutex); | ||
| 122 | err = -EINVAL; | ||
| 123 | for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) | ||
| 124 | if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { | ||
| 125 | err = 0; | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | mutex_unlock(&mdsc->mutex); | ||
| 129 | if (err) | ||
| 130 | return err; | ||
| 131 | } | ||
| 132 | |||
| 133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, | ||
| 134 | USE_AUTH_MDS); | ||
| 135 | |||
| 136 | if (IS_ERR(req)) | ||
| 137 | return PTR_ERR(req); | ||
| 138 | req->r_inode = igrab(inode); | ||
| 139 | |||
| 140 | req->r_args.setlayout.layout.fl_stripe_unit = | ||
| 141 | cpu_to_le32(l.stripe_unit); | ||
| 142 | req->r_args.setlayout.layout.fl_stripe_count = | ||
| 143 | cpu_to_le32(l.stripe_count); | ||
| 144 | req->r_args.setlayout.layout.fl_object_size = | ||
| 145 | cpu_to_le32(l.object_size); | ||
| 146 | req->r_args.setlayout.layout.fl_pg_pool = | ||
| 147 | cpu_to_le32(l.data_pool); | ||
| 148 | req->r_args.setlayout.layout.fl_pg_preferred = | ||
| 149 | cpu_to_le32(l.preferred_osd); | ||
| 150 | |||
| 151 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
| 152 | ceph_mdsc_put_request(req); | ||
| 153 | return err; | ||
| 154 | } | ||
| 155 | |||
| 156 | /* | ||
| 93 | * Return object name, size/offset information, and location (OSD | 157 | * Return object name, size/offset information, and location (OSD |
| 94 | * number, network address) for a given file offset. | 158 | * number, network address) for a given file offset. |
| 95 | */ | 159 | */ |
| @@ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
| 98 | struct ceph_ioctl_dataloc dl; | 162 | struct ceph_ioctl_dataloc dl; |
| 99 | struct inode *inode = file->f_dentry->d_inode; | 163 | struct inode *inode = file->f_dentry->d_inode; |
| 100 | struct ceph_inode_info *ci = ceph_inode(inode); | 164 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 165 | struct ceph_osd_client *osdc = |
| 166 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
| 102 | u64 len = 1, olen; | 167 | u64 len = 1, olen; |
| 103 | u64 tmp; | 168 | u64 tmp; |
| 104 | struct ceph_object_layout ol; | 169 | struct ceph_object_layout ol; |
| @@ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 174 | case CEPH_IOC_SET_LAYOUT: | 239 | case CEPH_IOC_SET_LAYOUT: |
| 175 | return ceph_ioctl_set_layout(file, (void __user *)arg); | 240 | return ceph_ioctl_set_layout(file, (void __user *)arg); |
| 176 | 241 | ||
| 242 | case CEPH_IOC_SET_LAYOUT_POLICY: | ||
| 243 | return ceph_ioctl_set_layout_policy(file, (void __user *)arg); | ||
| 244 | |||
| 177 | case CEPH_IOC_GET_DATALOC: | 245 | case CEPH_IOC_GET_DATALOC: |
| 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 246 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
| 179 | 247 | ||
| 180 | case CEPH_IOC_LAZYIO: | 248 | case CEPH_IOC_LAZYIO: |
| 181 | return ceph_ioctl_lazyio(file); | 249 | return ceph_ioctl_lazyio(file); |
| 182 | } | 250 | } |
| 251 | |||
| 183 | return -ENOTTY; | 252 | return -ENOTTY; |
| 184 | } | 253 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 88451a3b6857..a6ce54e94eb5 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | 6 | ||
| 7 | #define CEPH_IOCTL_MAGIC 0x97 | 7 | #define CEPH_IOCTL_MAGIC 0x98 |
| 8 | 8 | ||
| 9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
| 10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
| @@ -17,6 +17,8 @@ struct ceph_ioctl_layout { | |||
| 17 | struct ceph_ioctl_layout) | 17 | struct ceph_ioctl_layout) |
| 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ | 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ |
| 19 | struct ceph_ioctl_layout) | 19 | struct ceph_ioctl_layout) |
| 20 | #define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \ | ||
| 21 | struct ceph_ioctl_layout) | ||
| 20 | 22 | ||
| 21 | /* | 23 | /* |
| 22 | * Extract identity, address of the OSD and object storing a given | 24 | * Extract identity, address of the OSD and object storing a given |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ff4e753aae92..40abde93c345 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
| 4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
| 5 | 5 | ||
| 6 | #include "super.h" | 6 | #include "super.h" |
| 7 | #include "mds_client.h" | 7 | #include "mds_client.h" |
| 8 | #include "pagelist.h" | 8 | #include <linux/ceph/pagelist.h> |
| 9 | 9 | ||
| 10 | /** | 10 | /** |
| 11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
| @@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
| 16 | { | 16 | { |
| 17 | struct inode *inode = file->f_dentry->d_inode; | 17 | struct inode *inode = file->f_dentry->d_inode; |
| 18 | struct ceph_mds_client *mdsc = | 18 | struct ceph_mds_client *mdsc = |
| 19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 19 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 20 | struct ceph_mds_request *req; | 20 | struct ceph_mds_request *req; |
| 21 | int err; | 21 | int err; |
| 22 | 22 | ||
| @@ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
| 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. | 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. |
| 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, |
| 183 | * sequential flock locks. | 183 | * sequential flock locks. |
| 184 | * Must be called with BLK already held, and the lock numbers should have | 184 | * Must be called with lock_flocks() already held. |
| 185 | * been gathered under the same lock holding window. | 185 | * If we encounter more of a specific lock type than expected, |
| 186 | * we return the value 1. | ||
| 186 | */ | 187 | */ |
| 187 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | 188 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, |
| 188 | int num_fcntl_locks, int num_flock_locks) | 189 | int num_fcntl_locks, int num_flock_locks) |
| @@ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 190 | struct file_lock *lock; | 191 | struct file_lock *lock; |
| 191 | struct ceph_filelock cephlock; | 192 | struct ceph_filelock cephlock; |
| 192 | int err = 0; | 193 | int err = 0; |
| 194 | int seen_fcntl = 0; | ||
| 195 | int seen_flock = 0; | ||
| 193 | 196 | ||
| 194 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 197 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, |
| 195 | num_fcntl_locks); | 198 | num_fcntl_locks); |
| @@ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 198 | goto fail; | 201 | goto fail; |
| 199 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 202 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
| 200 | if (lock->fl_flags & FL_POSIX) { | 203 | if (lock->fl_flags & FL_POSIX) { |
| 204 | ++seen_fcntl; | ||
| 205 | if (seen_fcntl > num_fcntl_locks) { | ||
| 206 | err = -ENOSPC; | ||
| 207 | goto fail; | ||
| 208 | } | ||
| 201 | err = lock_to_ceph_filelock(lock, &cephlock); | 209 | err = lock_to_ceph_filelock(lock, &cephlock); |
| 202 | if (err) | 210 | if (err) |
| 203 | goto fail; | 211 | goto fail; |
| @@ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
| 213 | goto fail; | 221 | goto fail; |
| 214 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 222 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
| 215 | if (lock->fl_flags & FL_FLOCK) { | 223 | if (lock->fl_flags & FL_FLOCK) { |
| 224 | ++seen_flock; | ||
| 225 | if (seen_flock > num_flock_locks) { | ||
| 226 | err = -ENOSPC; | ||
| 227 | goto fail; | ||
| 228 | } | ||
| 216 | err = lock_to_ceph_filelock(lock, &cephlock); | 229 | err = lock_to_ceph_filelock(lock, &cephlock); |
| 217 | if (err) | 230 | if (err) |
| 218 | goto fail; | 231 | goto fail; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fad95f8f2608..3142b15940c2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -1,17 +1,21 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/fs.h> | ||
| 3 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/debugfs.h> | ||
| 8 | #include <linux/seq_file.h> | ||
| 6 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
| 7 | 10 | ||
| 8 | #include "mds_client.h" | ||
| 9 | #include "mon_client.h" | ||
| 10 | #include "super.h" | 11 | #include "super.h" |
| 11 | #include "messenger.h" | 12 | #include "mds_client.h" |
| 12 | #include "decode.h" | 13 | |
| 13 | #include "auth.h" | 14 | #include <linux/ceph/messenger.h> |
| 14 | #include "pagelist.h" | 15 | #include <linux/ceph/decode.h> |
| 16 | #include <linux/ceph/pagelist.h> | ||
| 17 | #include <linux/ceph/auth.h> | ||
| 18 | #include <linux/ceph/debugfs.h> | ||
| 15 | 19 | ||
| 16 | /* | 20 | /* |
| 17 | * A cluster of MDS (metadata server) daemons is responsible for | 21 | * A cluster of MDS (metadata server) daemons is responsible for |
| @@ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||
| 286 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | 290 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); |
| 287 | if (atomic_dec_and_test(&s->s_ref)) { | 291 | if (atomic_dec_and_test(&s->s_ref)) { |
| 288 | if (s->s_authorizer) | 292 | if (s->s_authorizer) |
| 289 | s->s_mdsc->client->monc.auth->ops->destroy_authorizer( | 293 | s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( |
| 290 | s->s_mdsc->client->monc.auth, s->s_authorizer); | 294 | s->s_mdsc->fsc->client->monc.auth, |
| 295 | s->s_authorizer); | ||
| 291 | kfree(s); | 296 | kfree(s); |
| 292 | } | 297 | } |
| 293 | } | 298 | } |
| @@ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
| 344 | s->s_seq = 0; | 349 | s->s_seq = 0; |
| 345 | mutex_init(&s->s_mutex); | 350 | mutex_init(&s->s_mutex); |
| 346 | 351 | ||
| 347 | ceph_con_init(mdsc->client->msgr, &s->s_con); | 352 | ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); |
| 348 | s->s_con.private = s; | 353 | s->s_con.private = s; |
| 349 | s->s_con.ops = &mds_con_ops; | 354 | s->s_con.ops = &mds_con_ops; |
| 350 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 355 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; |
| @@ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
| 599 | } else if (req->r_dentry) { | 604 | } else if (req->r_dentry) { |
| 600 | struct inode *dir = req->r_dentry->d_parent->d_inode; | 605 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
| 601 | 606 | ||
| 602 | if (dir->i_sb != mdsc->client->sb) { | 607 | if (dir->i_sb != mdsc->fsc->sb) { |
| 603 | /* not this fs! */ | 608 | /* not this fs! */ |
| 604 | inode = req->r_dentry->d_inode; | 609 | inode = req->r_dentry->d_inode; |
| 605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | 610 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { |
| @@ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 884 | __ceph_remove_cap(cap); | 889 | __ceph_remove_cap(cap); |
| 885 | if (!__ceph_is_any_real_caps(ci)) { | 890 | if (!__ceph_is_any_real_caps(ci)) { |
| 886 | struct ceph_mds_client *mdsc = | 891 | struct ceph_mds_client *mdsc = |
| 887 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 892 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| 888 | 893 | ||
| 889 | spin_lock(&mdsc->cap_dirty_lock); | 894 | spin_lock(&mdsc->cap_dirty_lock); |
| 890 | if (!list_empty(&ci->i_dirty_item)) { | 895 | if (!list_empty(&ci->i_dirty_item)) { |
| @@ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1146 | struct ceph_msg *msg, *partial = NULL; | 1151 | struct ceph_msg *msg, *partial = NULL; |
| 1147 | struct ceph_mds_cap_release *head; | 1152 | struct ceph_mds_cap_release *head; |
| 1148 | int err = -ENOMEM; | 1153 | int err = -ENOMEM; |
| 1149 | int extra = mdsc->client->mount_args->cap_release_safety; | 1154 | int extra = mdsc->fsc->mount_options->cap_release_safety; |
| 1150 | int num; | 1155 | int num; |
| 1151 | 1156 | ||
| 1152 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | 1157 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
| @@ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2085 | 2090 | ||
| 2086 | /* insert trace into our cache */ | 2091 | /* insert trace into our cache */ |
| 2087 | mutex_lock(&req->r_fill_mutex); | 2092 | mutex_lock(&req->r_fill_mutex); |
| 2088 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
| 2089 | if (err == 0) { | 2094 | if (err == 0) { |
| 2090 | if (result == 0 && rinfo->dir_nr) | 2095 | if (result == 0 && rinfo->dir_nr) |
| 2091 | ceph_readdir_prepopulate(req, req->r_session); | 2096 | ceph_readdir_prepopulate(req, req->r_session); |
| @@ -2361,19 +2366,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2361 | 2366 | ||
| 2362 | if (recon_state->flock) { | 2367 | if (recon_state->flock) { |
| 2363 | int num_fcntl_locks, num_flock_locks; | 2368 | int num_fcntl_locks, num_flock_locks; |
| 2364 | 2369 | struct ceph_pagelist_cursor trunc_point; | |
| 2365 | lock_kernel(); | 2370 | |
| 2366 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2371 | ceph_pagelist_set_cursor(pagelist, &trunc_point); |
| 2367 | rec.v2.flock_len = (2*sizeof(u32) + | 2372 | do { |
| 2368 | (num_fcntl_locks+num_flock_locks) * | 2373 | lock_flocks(); |
| 2369 | sizeof(struct ceph_filelock)); | 2374 | ceph_count_locks(inode, &num_fcntl_locks, |
| 2370 | 2375 | &num_flock_locks); | |
| 2371 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2376 | rec.v2.flock_len = (2*sizeof(u32) + |
| 2372 | if (!err) | 2377 | (num_fcntl_locks+num_flock_locks) * |
| 2373 | err = ceph_encode_locks(inode, pagelist, | 2378 | sizeof(struct ceph_filelock)); |
| 2374 | num_fcntl_locks, | 2379 | unlock_flocks(); |
| 2375 | num_flock_locks); | 2380 | |
| 2376 | unlock_kernel(); | 2381 | /* pre-alloc pagelist */ |
| 2382 | ceph_pagelist_truncate(pagelist, &trunc_point); | ||
| 2383 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
| 2384 | if (!err) | ||
| 2385 | err = ceph_pagelist_reserve(pagelist, | ||
| 2386 | rec.v2.flock_len); | ||
| 2387 | |||
| 2388 | /* encode locks */ | ||
| 2389 | if (!err) { | ||
| 2390 | lock_flocks(); | ||
| 2391 | err = ceph_encode_locks(inode, | ||
| 2392 | pagelist, | ||
| 2393 | num_fcntl_locks, | ||
| 2394 | num_flock_locks); | ||
| 2395 | unlock_flocks(); | ||
| 2396 | } | ||
| 2397 | } while (err == -ENOSPC); | ||
| 2377 | } else { | 2398 | } else { |
| 2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2399 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
| 2379 | } | 2400 | } |
| @@ -2613,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
| 2613 | struct ceph_mds_session *session, | 2634 | struct ceph_mds_session *session, |
| 2614 | struct ceph_msg *msg) | 2635 | struct ceph_msg *msg) |
| 2615 | { | 2636 | { |
| 2616 | struct super_block *sb = mdsc->client->sb; | 2637 | struct super_block *sb = mdsc->fsc->sb; |
| 2617 | struct inode *inode; | 2638 | struct inode *inode; |
| 2618 | struct ceph_inode_info *ci; | 2639 | struct ceph_inode_info *ci; |
| 2619 | struct dentry *parent, *dentry; | 2640 | struct dentry *parent, *dentry; |
| @@ -2891,10 +2912,16 @@ static void delayed_work(struct work_struct *work) | |||
| 2891 | schedule_delayed(mdsc); | 2912 | schedule_delayed(mdsc); |
| 2892 | } | 2913 | } |
| 2893 | 2914 | ||
| 2915 | int ceph_mdsc_init(struct ceph_fs_client *fsc) | ||
| 2894 | 2916 | ||
| 2895 | int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | ||
| 2896 | { | 2917 | { |
| 2897 | mdsc->client = client; | 2918 | struct ceph_mds_client *mdsc; |
| 2919 | |||
| 2920 | mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); | ||
| 2921 | if (!mdsc) | ||
| 2922 | return -ENOMEM; | ||
| 2923 | mdsc->fsc = fsc; | ||
| 2924 | fsc->mdsc = mdsc; | ||
| 2898 | mutex_init(&mdsc->mutex); | 2925 | mutex_init(&mdsc->mutex); |
| 2899 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2926 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
| 2900 | if (mdsc->mdsmap == NULL) | 2927 | if (mdsc->mdsmap == NULL) |
| @@ -2927,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2927 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2954 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
| 2928 | 2955 | ||
| 2929 | ceph_caps_init(mdsc); | 2956 | ceph_caps_init(mdsc); |
| 2930 | ceph_adjust_min_caps(mdsc, client->min_caps); | 2957 | ceph_adjust_min_caps(mdsc, fsc->min_caps); |
| 2931 | 2958 | ||
| 2932 | return 0; | 2959 | return 0; |
| 2933 | } | 2960 | } |
| @@ -2939,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2939 | static void wait_requests(struct ceph_mds_client *mdsc) | 2966 | static void wait_requests(struct ceph_mds_client *mdsc) |
| 2940 | { | 2967 | { |
| 2941 | struct ceph_mds_request *req; | 2968 | struct ceph_mds_request *req; |
| 2942 | struct ceph_client *client = mdsc->client; | 2969 | struct ceph_fs_client *fsc = mdsc->fsc; |
| 2943 | 2970 | ||
| 2944 | mutex_lock(&mdsc->mutex); | 2971 | mutex_lock(&mdsc->mutex); |
| 2945 | if (__get_oldest_req(mdsc)) { | 2972 | if (__get_oldest_req(mdsc)) { |
| @@ -2947,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||
| 2947 | 2974 | ||
| 2948 | dout("wait_requests waiting for requests\n"); | 2975 | dout("wait_requests waiting for requests\n"); |
| 2949 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 2976 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, |
| 2950 | client->mount_args->mount_timeout * HZ); | 2977 | fsc->client->options->mount_timeout * HZ); |
| 2951 | 2978 | ||
| 2952 | /* tear down remaining requests */ | 2979 | /* tear down remaining requests */ |
| 2953 | mutex_lock(&mdsc->mutex); | 2980 | mutex_lock(&mdsc->mutex); |
| @@ -3030,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3030 | { | 3057 | { |
| 3031 | u64 want_tid, want_flush; | 3058 | u64 want_tid, want_flush; |
| 3032 | 3059 | ||
| 3033 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3060 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
| 3034 | return; | 3061 | return; |
| 3035 | 3062 | ||
| 3036 | dout("sync\n"); | 3063 | dout("sync\n"); |
| @@ -3053,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) | |||
| 3053 | { | 3080 | { |
| 3054 | int i, n = 0; | 3081 | int i, n = 0; |
| 3055 | 3082 | ||
| 3056 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3083 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
| 3057 | return true; | 3084 | return true; |
| 3058 | 3085 | ||
| 3059 | mutex_lock(&mdsc->mutex); | 3086 | mutex_lock(&mdsc->mutex); |
| @@ -3071,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3071 | { | 3098 | { |
| 3072 | struct ceph_mds_session *session; | 3099 | struct ceph_mds_session *session; |
| 3073 | int i; | 3100 | int i; |
| 3074 | struct ceph_client *client = mdsc->client; | 3101 | struct ceph_fs_client *fsc = mdsc->fsc; |
| 3075 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | 3102 | unsigned long timeout = fsc->client->options->mount_timeout * HZ; |
| 3076 | 3103 | ||
| 3077 | dout("close_sessions\n"); | 3104 | dout("close_sessions\n"); |
| 3078 | 3105 | ||
| @@ -3119,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3119 | dout("stopped\n"); | 3146 | dout("stopped\n"); |
| 3120 | } | 3147 | } |
| 3121 | 3148 | ||
| 3122 | void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3149 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
| 3123 | { | 3150 | { |
| 3124 | dout("stop\n"); | 3151 | dout("stop\n"); |
| 3125 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | 3152 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ |
| @@ -3129,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
| 3129 | ceph_caps_finalize(mdsc); | 3156 | ceph_caps_finalize(mdsc); |
| 3130 | } | 3157 | } |
| 3131 | 3158 | ||
| 3159 | void ceph_mdsc_destroy(struct ceph_fs_client *fsc) | ||
| 3160 | { | ||
| 3161 | struct ceph_mds_client *mdsc = fsc->mdsc; | ||
| 3162 | |||
| 3163 | ceph_mdsc_stop(mdsc); | ||
| 3164 | fsc->mdsc = NULL; | ||
| 3165 | kfree(mdsc); | ||
| 3166 | } | ||
| 3167 | |||
| 3132 | 3168 | ||
| 3133 | /* | 3169 | /* |
| 3134 | * handle mds map update. | 3170 | * handle mds map update. |
| @@ -3145,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
| 3145 | 3181 | ||
| 3146 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | 3182 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); |
| 3147 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | 3183 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); |
| 3148 | if (ceph_check_fsid(mdsc->client, &fsid) < 0) | 3184 | if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) |
| 3149 | return; | 3185 | return; |
| 3150 | epoch = ceph_decode_32(&p); | 3186 | epoch = ceph_decode_32(&p); |
| 3151 | maplen = ceph_decode_32(&p); | 3187 | maplen = ceph_decode_32(&p); |
| 3152 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | 3188 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); |
| 3153 | 3189 | ||
| 3154 | /* do we need it? */ | 3190 | /* do we need it? */ |
| 3155 | ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); | 3191 | ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); |
| 3156 | mutex_lock(&mdsc->mutex); | 3192 | mutex_lock(&mdsc->mutex); |
| 3157 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | 3193 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { |
| 3158 | dout("handle_map epoch %u <= our %u\n", | 3194 | dout("handle_map epoch %u <= our %u\n", |
| @@ -3176,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
| 3176 | } else { | 3212 | } else { |
| 3177 | mdsc->mdsmap = newmap; /* first mds map */ | 3213 | mdsc->mdsmap = newmap; /* first mds map */ |
| 3178 | } | 3214 | } |
| 3179 | mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | 3215 | mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; |
| 3180 | 3216 | ||
| 3181 | __wake_requests(mdsc, &mdsc->waiting_for_map); | 3217 | __wake_requests(mdsc, &mdsc->waiting_for_map); |
| 3182 | 3218 | ||
| @@ -3277,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
| 3277 | { | 3313 | { |
| 3278 | struct ceph_mds_session *s = con->private; | 3314 | struct ceph_mds_session *s = con->private; |
| 3279 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3315 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3280 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3316 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3281 | int ret = 0; | 3317 | int ret = 0; |
| 3282 | 3318 | ||
| 3283 | if (force_new && s->s_authorizer) { | 3319 | if (force_new && s->s_authorizer) { |
| @@ -3311,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) | |||
| 3311 | { | 3347 | { |
| 3312 | struct ceph_mds_session *s = con->private; | 3348 | struct ceph_mds_session *s = con->private; |
| 3313 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3349 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3314 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3350 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3315 | 3351 | ||
| 3316 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | 3352 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); |
| 3317 | } | 3353 | } |
| @@ -3320,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
| 3320 | { | 3356 | { |
| 3321 | struct ceph_mds_session *s = con->private; | 3357 | struct ceph_mds_session *s = con->private; |
| 3322 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3358 | struct ceph_mds_client *mdsc = s->s_mdsc; |
| 3323 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3359 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
| 3324 | 3360 | ||
| 3325 | if (ac->ops->invalidate_authorizer) | 3361 | if (ac->ops->invalidate_authorizer) |
| 3326 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | 3362 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); |
| 3327 | 3363 | ||
| 3328 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3364 | return ceph_monc_validate_auth(&mdsc->fsc->client->monc); |
| 3329 | } | 3365 | } |
| 3330 | 3366 | ||
| 3331 | static const struct ceph_connection_operations mds_con_ops = { | 3367 | static const struct ceph_connection_operations mds_con_ops = { |
| @@ -3338,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = { | |||
| 3338 | .peer_reset = peer_reset, | 3374 | .peer_reset = peer_reset, |
| 3339 | }; | 3375 | }; |
| 3340 | 3376 | ||
| 3341 | |||
| 3342 | |||
| 3343 | |||
| 3344 | /* eof */ | 3377 | /* eof */ |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c98267ce6d2a..d66d63c72355 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -8,9 +8,9 @@ | |||
| 8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
| 9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
| 10 | 10 | ||
| 11 | #include "types.h" | 11 | #include <linux/ceph/types.h> |
| 12 | #include "messenger.h" | 12 | #include <linux/ceph/messenger.h> |
| 13 | #include "mdsmap.h" | 13 | #include <linux/ceph/mdsmap.h> |
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * Some lock dependencies: | 16 | * Some lock dependencies: |
| @@ -26,7 +26,7 @@ | |||
| 26 | * | 26 | * |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | struct ceph_client; | 29 | struct ceph_fs_client; |
| 30 | struct ceph_cap; | 30 | struct ceph_cap; |
| 31 | 31 | ||
| 32 | /* | 32 | /* |
| @@ -230,7 +230,7 @@ struct ceph_mds_request { | |||
| 230 | * mds client state | 230 | * mds client state |
| 231 | */ | 231 | */ |
| 232 | struct ceph_mds_client { | 232 | struct ceph_mds_client { |
| 233 | struct ceph_client *client; | 233 | struct ceph_fs_client *fsc; |
| 234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
| 235 | 235 | ||
| 236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
| @@ -289,11 +289,6 @@ struct ceph_mds_client { | |||
| 289 | int caps_avail_count; /* unused, unreserved */ | 289 | int caps_avail_count; /* unused, unreserved */ |
| 290 | int caps_min_count; /* keep at least this many | 290 | int caps_min_count; /* keep at least this many |
| 291 | (unreserved) */ | 291 | (unreserved) */ |
| 292 | |||
| 293 | #ifdef CONFIG_DEBUG_FS | ||
| 294 | struct dentry *debugfs_file; | ||
| 295 | #endif | ||
| 296 | |||
| 297 | spinlock_t dentry_lru_lock; | 292 | spinlock_t dentry_lru_lock; |
| 298 | struct list_head dentry_lru; | 293 | struct list_head dentry_lru; |
| 299 | int num_dentry; | 294 | int num_dentry; |
| @@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); | |||
| 316 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | 311 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
| 317 | struct ceph_msg *msg, int mds); | 312 | struct ceph_msg *msg, int mds); |
| 318 | 313 | ||
| 319 | extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, | 314 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
| 320 | struct ceph_client *client); | ||
| 321 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 315 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
| 322 | extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); | 316 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
| 323 | 317 | ||
| 324 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 318 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
| 325 | 319 | ||
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 040be6d1150b..73b7d44e8a35 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/bug.h> | 3 | #include <linux/bug.h> |
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| @@ -6,9 +6,9 @@ | |||
| 6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
| 8 | 8 | ||
| 9 | #include "mdsmap.h" | 9 | #include <linux/ceph/mdsmap.h> |
| 10 | #include "messenger.h" | 10 | #include <linux/ceph/messenger.h> |
| 11 | #include "decode.h" | 11 | #include <linux/ceph/decode.h> |
| 12 | 12 | ||
| 13 | #include "super.h" | 13 | #include "super.h" |
| 14 | 14 | ||
| @@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", |
| 120 | i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), | 120 | i+1, n, global_id, mds, inc, |
| 121 | ceph_pr_addr(&addr.in_addr), | ||
| 121 | ceph_mds_state_name(state)); | 122 | ceph_mds_state_name(state)); |
| 122 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 123 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { |
| 123 | m->m_info[mds].global_id = global_id; | 124 | m->m_info[mds].global_id = global_id; |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c deleted file mode 100644 index 46a368b6dce5..000000000000 --- a/fs/ceph/pagelist.c +++ /dev/null | |||
| @@ -1,63 +0,0 @@ | |||
| 1 | |||
| 2 | #include <linux/gfp.h> | ||
| 3 | #include <linux/pagemap.h> | ||
| 4 | #include <linux/highmem.h> | ||
| 5 | |||
| 6 | #include "pagelist.h" | ||
| 7 | |||
| 8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
| 9 | { | ||
| 10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
| 11 | lru); | ||
| 12 | kunmap(page); | ||
| 13 | } | ||
| 14 | |||
| 15 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
| 16 | { | ||
| 17 | if (pl->mapped_tail) | ||
| 18 | ceph_pagelist_unmap_tail(pl); | ||
| 19 | |||
| 20 | while (!list_empty(&pl->head)) { | ||
| 21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
| 22 | lru); | ||
| 23 | list_del(&page->lru); | ||
| 24 | __free_page(page); | ||
| 25 | } | ||
| 26 | return 0; | ||
| 27 | } | ||
| 28 | |||
| 29 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
| 30 | { | ||
| 31 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
| 32 | if (!page) | ||
| 33 | return -ENOMEM; | ||
| 34 | pl->room += PAGE_SIZE; | ||
| 35 | list_add_tail(&page->lru, &pl->head); | ||
| 36 | if (pl->mapped_tail) | ||
| 37 | ceph_pagelist_unmap_tail(pl); | ||
| 38 | pl->mapped_tail = kmap(page); | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) | ||
| 43 | { | ||
| 44 | while (pl->room < len) { | ||
| 45 | size_t bit = pl->room; | ||
| 46 | int ret; | ||
| 47 | |||
| 48 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
| 49 | buf, bit); | ||
| 50 | pl->length += bit; | ||
| 51 | pl->room -= bit; | ||
| 52 | buf += bit; | ||
| 53 | len -= bit; | ||
| 54 | ret = ceph_pagelist_addpage(pl); | ||
| 55 | if (ret) | ||
| 56 | return ret; | ||
| 57 | } | ||
| 58 | |||
| 59 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
| 60 | pl->length += len; | ||
| 61 | pl->room -= len; | ||
| 62 | return 0; | ||
| 63 | } | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 190b6c4a6f2b..39c243acd062 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -1,10 +1,12 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/sort.h> | 3 | #include <linux/sort.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | 5 | ||
| 6 | #include "super.h" | 6 | #include "super.h" |
| 7 | #include "decode.h" | 7 | #include "mds_client.h" |
| 8 | |||
| 9 | #include <linux/ceph/decode.h> | ||
| 8 | 10 | ||
| 9 | /* | 11 | /* |
| 10 | * Snapshots in ceph are driven in large part by cooperation from the | 12 | * Snapshots in ceph are driven in large part by cooperation from the |
| @@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
| 526 | struct ceph_cap_snap *capsnap) | 528 | struct ceph_cap_snap *capsnap) |
| 527 | { | 529 | { |
| 528 | struct inode *inode = &ci->vfs_inode; | 530 | struct inode *inode = &ci->vfs_inode; |
| 529 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 531 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
| 530 | 532 | ||
| 531 | BUG_ON(capsnap->writing); | 533 | BUG_ON(capsnap->writing); |
| 532 | capsnap->size = inode->i_size; | 534 | capsnap->size = inode->i_size; |
| @@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
| 747 | struct ceph_mds_session *session, | 749 | struct ceph_mds_session *session, |
| 748 | struct ceph_msg *msg) | 750 | struct ceph_msg *msg) |
| 749 | { | 751 | { |
| 750 | struct super_block *sb = mdsc->client->sb; | 752 | struct super_block *sb = mdsc->fsc->sb; |
| 751 | int mds = session->s_mds; | 753 | int mds = session->s_mds; |
| 752 | u64 split; | 754 | u64 split; |
| 753 | int op; | 755 | int op; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/strings.c index c6179d3a26a2..cd5097d7c804 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/strings.c | |||
| @@ -1,71 +1,9 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Ceph string constants | 2 | * Ceph fs string constants |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | const char *ceph_entity_type_name(int type) | ||
| 7 | { | ||
| 8 | switch (type) { | ||
| 9 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
| 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
| 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
| 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
| 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
| 14 | default: return "unknown"; | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | const char *ceph_osd_op_name(int op) | ||
| 19 | { | ||
| 20 | switch (op) { | ||
| 21 | case CEPH_OSD_OP_READ: return "read"; | ||
| 22 | case CEPH_OSD_OP_STAT: return "stat"; | ||
| 23 | |||
| 24 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
| 25 | |||
| 26 | case CEPH_OSD_OP_WRITE: return "write"; | ||
| 27 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
| 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
| 29 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
| 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
| 31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
| 32 | |||
| 33 | case CEPH_OSD_OP_APPEND: return "append"; | ||
| 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
| 35 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
| 36 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
| 37 | |||
| 38 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
| 39 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
| 40 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
| 41 | |||
| 42 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
| 43 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
| 44 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
| 45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
| 46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
| 47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
| 48 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
| 49 | |||
| 50 | case CEPH_OSD_OP_PULL: return "pull"; | ||
| 51 | case CEPH_OSD_OP_PUSH: return "push"; | ||
| 52 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
| 53 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
| 54 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
| 55 | |||
| 56 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
| 57 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
| 58 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
| 59 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
| 60 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
| 61 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
| 62 | |||
| 63 | case CEPH_OSD_OP_CALL: return "call"; | ||
| 64 | |||
| 65 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
| 66 | } | ||
| 67 | return "???"; | ||
| 68 | } | ||
| 69 | 7 | ||
| 70 | const char *ceph_mds_state_name(int s) | 8 | const char *ceph_mds_state_name(int s) |
| 71 | { | 9 | { |
| @@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o) | |||
| 177 | } | 115 | } |
| 178 | return "???"; | 116 | return "???"; |
| 179 | } | 117 | } |
| 180 | |||
| 181 | const char *ceph_pool_op_name(int op) | ||
| 182 | { | ||
| 183 | switch (op) { | ||
| 184 | case POOL_OP_CREATE: return "create"; | ||
| 185 | case POOL_OP_DELETE: return "delete"; | ||
| 186 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 187 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 188 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 189 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 190 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 191 | } | ||
| 192 | return "???"; | ||
| 193 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9922628532b2..d6e0e0421891 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
| 5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
| @@ -15,10 +15,13 @@ | |||
| 15 | #include <linux/statfs.h> | 15 | #include <linux/statfs.h> |
| 16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
| 17 | 17 | ||
| 18 | #include "decode.h" | ||
| 19 | #include "super.h" | 18 | #include "super.h" |
| 20 | #include "mon_client.h" | 19 | #include "mds_client.h" |
| 21 | #include "auth.h" | 20 | |
| 21 | #include <linux/ceph/decode.h> | ||
| 22 | #include <linux/ceph/mon_client.h> | ||
| 23 | #include <linux/ceph/auth.h> | ||
| 24 | #include <linux/ceph/debugfs.h> | ||
| 22 | 25 | ||
| 23 | /* | 26 | /* |
| 24 | * Ceph superblock operations | 27 | * Ceph superblock operations |
| @@ -26,36 +29,22 @@ | |||
| 26 | * Handle the basics of mounting, unmounting. | 29 | * Handle the basics of mounting, unmounting. |
| 27 | */ | 30 | */ |
| 28 | 31 | ||
| 29 | |||
| 30 | /* | ||
| 31 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
| 32 | */ | ||
| 33 | const char *ceph_file_part(const char *s, int len) | ||
| 34 | { | ||
| 35 | const char *e = s + len; | ||
| 36 | |||
| 37 | while (e != s && *(e-1) != '/') | ||
| 38 | e--; | ||
| 39 | return e; | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | /* | 32 | /* |
| 44 | * super ops | 33 | * super ops |
| 45 | */ | 34 | */ |
| 46 | static void ceph_put_super(struct super_block *s) | 35 | static void ceph_put_super(struct super_block *s) |
| 47 | { | 36 | { |
| 48 | struct ceph_client *client = ceph_sb_to_client(s); | 37 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
| 49 | 38 | ||
| 50 | dout("put_super\n"); | 39 | dout("put_super\n"); |
| 51 | ceph_mdsc_close_sessions(&client->mdsc); | 40 | ceph_mdsc_close_sessions(fsc->mdsc); |
| 52 | 41 | ||
| 53 | /* | 42 | /* |
| 54 | * ensure we release the bdi before put_anon_super releases | 43 | * ensure we release the bdi before put_anon_super releases |
| 55 | * the device name. | 44 | * the device name. |
| 56 | */ | 45 | */ |
| 57 | if (s->s_bdi == &client->backing_dev_info) { | 46 | if (s->s_bdi == &fsc->backing_dev_info) { |
| 58 | bdi_unregister(&client->backing_dev_info); | 47 | bdi_unregister(&fsc->backing_dev_info); |
| 59 | s->s_bdi = NULL; | 48 | s->s_bdi = NULL; |
| 60 | } | 49 | } |
| 61 | 50 | ||
| @@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s) | |||
| 64 | 53 | ||
| 65 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | 54 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 66 | { | 55 | { |
| 67 | struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); | 56 | struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); |
| 68 | struct ceph_monmap *monmap = client->monc.monmap; | 57 | struct ceph_monmap *monmap = fsc->client->monc.monmap; |
| 69 | struct ceph_statfs st; | 58 | struct ceph_statfs st; |
| 70 | u64 fsid; | 59 | u64 fsid; |
| 71 | int err; | 60 | int err; |
| 72 | 61 | ||
| 73 | dout("statfs\n"); | 62 | dout("statfs\n"); |
| 74 | err = ceph_monc_do_statfs(&client->monc, &st); | 63 | err = ceph_monc_do_statfs(&fsc->client->monc, &st); |
| 75 | if (err < 0) | 64 | if (err < 0) |
| 76 | return err; | 65 | return err; |
| 77 | 66 | ||
| @@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 104 | 93 | ||
| 105 | static int ceph_sync_fs(struct super_block *sb, int wait) | 94 | static int ceph_sync_fs(struct super_block *sb, int wait) |
| 106 | { | 95 | { |
| 107 | struct ceph_client *client = ceph_sb_to_client(sb); | 96 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 108 | 97 | ||
| 109 | if (!wait) { | 98 | if (!wait) { |
| 110 | dout("sync_fs (non-blocking)\n"); | 99 | dout("sync_fs (non-blocking)\n"); |
| 111 | ceph_flush_dirty_caps(&client->mdsc); | 100 | ceph_flush_dirty_caps(fsc->mdsc); |
| 112 | dout("sync_fs (non-blocking) done\n"); | 101 | dout("sync_fs (non-blocking) done\n"); |
| 113 | return 0; | 102 | return 0; |
| 114 | } | 103 | } |
| 115 | 104 | ||
| 116 | dout("sync_fs (blocking)\n"); | 105 | dout("sync_fs (blocking)\n"); |
| 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 106 | ceph_osdc_sync(&fsc->client->osdc); |
| 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 107 | ceph_mdsc_sync(fsc->mdsc); |
| 119 | dout("sync_fs (blocking) done\n"); | 108 | dout("sync_fs (blocking) done\n"); |
| 120 | return 0; | 109 | return 0; |
| 121 | } | 110 | } |
| 122 | 111 | ||
| 123 | static int default_congestion_kb(void) | ||
| 124 | { | ||
| 125 | int congestion_kb; | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Copied from NFS | ||
| 129 | * | ||
| 130 | * congestion size, scale with available memory. | ||
| 131 | * | ||
| 132 | * 64MB: 8192k | ||
| 133 | * 128MB: 11585k | ||
| 134 | * 256MB: 16384k | ||
| 135 | * 512MB: 23170k | ||
| 136 | * 1GB: 32768k | ||
| 137 | * 2GB: 46340k | ||
| 138 | * 4GB: 65536k | ||
| 139 | * 8GB: 92681k | ||
| 140 | * 16GB: 131072k | ||
| 141 | * | ||
| 142 | * This allows larger machines to have larger/more transfers. | ||
| 143 | * Limit the default to 256M | ||
| 144 | */ | ||
| 145 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 146 | if (congestion_kb > 256*1024) | ||
| 147 | congestion_kb = 256*1024; | ||
| 148 | |||
| 149 | return congestion_kb; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * ceph_show_options - Show mount options in /proc/mounts | ||
| 154 | * @m: seq_file to write to | ||
| 155 | * @mnt: mount descriptor | ||
| 156 | */ | ||
| 157 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
| 158 | { | ||
| 159 | struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb); | ||
| 160 | struct ceph_mount_args *args = client->mount_args; | ||
| 161 | |||
| 162 | if (args->flags & CEPH_OPT_FSID) | ||
| 163 | seq_printf(m, ",fsid=%pU", &args->fsid); | ||
| 164 | if (args->flags & CEPH_OPT_NOSHARE) | ||
| 165 | seq_puts(m, ",noshare"); | ||
| 166 | if (args->flags & CEPH_OPT_DIRSTAT) | ||
| 167 | seq_puts(m, ",dirstat"); | ||
| 168 | if ((args->flags & CEPH_OPT_RBYTES) == 0) | ||
| 169 | seq_puts(m, ",norbytes"); | ||
| 170 | if (args->flags & CEPH_OPT_NOCRC) | ||
| 171 | seq_puts(m, ",nocrc"); | ||
| 172 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | ||
| 173 | seq_puts(m, ",noasyncreaddir"); | ||
| 174 | |||
| 175 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
| 176 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
| 177 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
| 178 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
| 179 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
| 180 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
| 181 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
| 182 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
| 183 | args->osd_keepalive_timeout); | ||
| 184 | if (args->wsize) | ||
| 185 | seq_printf(m, ",wsize=%d", args->wsize); | ||
| 186 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
| 187 | seq_printf(m, ",rsize=%d", args->rsize); | ||
| 188 | if (args->congestion_kb != default_congestion_kb()) | ||
| 189 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
| 190 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
| 191 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
| 192 | args->caps_wanted_delay_min); | ||
| 193 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
| 194 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
| 195 | args->caps_wanted_delay_max); | ||
| 196 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
| 197 | seq_printf(m, ",cap_release_safety=%d", | ||
| 198 | args->cap_release_safety); | ||
| 199 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
| 200 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
| 201 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
| 202 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
| 203 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
| 204 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | ||
| 205 | if (args->name) | ||
| 206 | seq_printf(m, ",name=%s", args->name); | ||
| 207 | if (args->secret) | ||
| 208 | seq_puts(m, ",secret=<hidden>"); | ||
| 209 | return 0; | ||
| 210 | } | ||
| 211 | |||
| 212 | /* | ||
| 213 | * caches | ||
| 214 | */ | ||
| 215 | struct kmem_cache *ceph_inode_cachep; | ||
| 216 | struct kmem_cache *ceph_cap_cachep; | ||
| 217 | struct kmem_cache *ceph_dentry_cachep; | ||
| 218 | struct kmem_cache *ceph_file_cachep; | ||
| 219 | |||
| 220 | static void ceph_inode_init_once(void *foo) | ||
| 221 | { | ||
| 222 | struct ceph_inode_info *ci = foo; | ||
| 223 | inode_init_once(&ci->vfs_inode); | ||
| 224 | } | ||
| 225 | |||
| 226 | static int __init init_caches(void) | ||
| 227 | { | ||
| 228 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | ||
| 229 | sizeof(struct ceph_inode_info), | ||
| 230 | __alignof__(struct ceph_inode_info), | ||
| 231 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | ||
| 232 | ceph_inode_init_once); | ||
| 233 | if (ceph_inode_cachep == NULL) | ||
| 234 | return -ENOMEM; | ||
| 235 | |||
| 236 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
| 237 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 238 | if (ceph_cap_cachep == NULL) | ||
| 239 | goto bad_cap; | ||
| 240 | |||
| 241 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
| 242 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 243 | if (ceph_dentry_cachep == NULL) | ||
| 244 | goto bad_dentry; | ||
| 245 | |||
| 246 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
| 247 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 248 | if (ceph_file_cachep == NULL) | ||
| 249 | goto bad_file; | ||
| 250 | |||
| 251 | return 0; | ||
| 252 | |||
| 253 | bad_file: | ||
| 254 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 255 | bad_dentry: | ||
| 256 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 257 | bad_cap: | ||
| 258 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 259 | return -ENOMEM; | ||
| 260 | } | ||
| 261 | |||
| 262 | static void destroy_caches(void) | ||
| 263 | { | ||
| 264 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 265 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 266 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 267 | kmem_cache_destroy(ceph_file_cachep); | ||
| 268 | } | ||
| 269 | |||
| 270 | |||
| 271 | /* | ||
| 272 | * ceph_umount_begin - initiate forced umount. Tear down down the | ||
| 273 | * mount, skipping steps that may hang while waiting for server(s). | ||
| 274 | */ | ||
| 275 | static void ceph_umount_begin(struct super_block *sb) | ||
| 276 | { | ||
| 277 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
| 278 | |||
| 279 | dout("ceph_umount_begin - starting forced umount\n"); | ||
| 280 | if (!client) | ||
| 281 | return; | ||
| 282 | client->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
| 283 | return; | ||
| 284 | } | ||
| 285 | |||
| 286 | static const struct super_operations ceph_super_ops = { | ||
| 287 | .alloc_inode = ceph_alloc_inode, | ||
| 288 | .destroy_inode = ceph_destroy_inode, | ||
| 289 | .write_inode = ceph_write_inode, | ||
| 290 | .sync_fs = ceph_sync_fs, | ||
| 291 | .put_super = ceph_put_super, | ||
| 292 | .show_options = ceph_show_options, | ||
| 293 | .statfs = ceph_statfs, | ||
| 294 | .umount_begin = ceph_umount_begin, | ||
| 295 | }; | ||
| 296 | |||
| 297 | |||
| 298 | const char *ceph_msg_type_name(int type) | ||
| 299 | { | ||
| 300 | switch (type) { | ||
| 301 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
| 302 | case CEPH_MSG_PING: return "ping"; | ||
| 303 | case CEPH_MSG_AUTH: return "auth"; | ||
| 304 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
| 305 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
| 306 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
| 307 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
| 308 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
| 309 | case CEPH_MSG_STATFS: return "statfs"; | ||
| 310 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
| 311 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
| 312 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
| 313 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
| 314 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
| 315 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
| 316 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
| 317 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
| 318 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
| 319 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
| 320 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
| 321 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
| 322 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
| 323 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
| 324 | default: return "unknown"; | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | |||
| 329 | /* | 112 | /* |
| 330 | * mount options | 113 | * mount options |
| 331 | */ | 114 | */ |
| 332 | enum { | 115 | enum { |
| 333 | Opt_wsize, | 116 | Opt_wsize, |
| 334 | Opt_rsize, | 117 | Opt_rsize, |
| 335 | Opt_osdtimeout, | ||
| 336 | Opt_osdkeepalivetimeout, | ||
| 337 | Opt_mount_timeout, | ||
| 338 | Opt_osd_idle_ttl, | ||
| 339 | Opt_caps_wanted_delay_min, | 118 | Opt_caps_wanted_delay_min, |
| 340 | Opt_caps_wanted_delay_max, | 119 | Opt_caps_wanted_delay_max, |
| 341 | Opt_cap_release_safety, | 120 | Opt_cap_release_safety, |
| @@ -344,29 +123,19 @@ enum { | |||
| 344 | Opt_congestion_kb, | 123 | Opt_congestion_kb, |
| 345 | Opt_last_int, | 124 | Opt_last_int, |
| 346 | /* int args above */ | 125 | /* int args above */ |
| 347 | Opt_fsid, | ||
| 348 | Opt_snapdirname, | 126 | Opt_snapdirname, |
| 349 | Opt_name, | ||
| 350 | Opt_secret, | ||
| 351 | Opt_last_string, | 127 | Opt_last_string, |
| 352 | /* string args above */ | 128 | /* string args above */ |
| 353 | Opt_ip, | ||
| 354 | Opt_noshare, | ||
| 355 | Opt_dirstat, | 129 | Opt_dirstat, |
| 356 | Opt_nodirstat, | 130 | Opt_nodirstat, |
| 357 | Opt_rbytes, | 131 | Opt_rbytes, |
| 358 | Opt_norbytes, | 132 | Opt_norbytes, |
| 359 | Opt_nocrc, | ||
| 360 | Opt_noasyncreaddir, | 133 | Opt_noasyncreaddir, |
| 361 | }; | 134 | }; |
| 362 | 135 | ||
| 363 | static match_table_t arg_tokens = { | 136 | static match_table_t fsopt_tokens = { |
| 364 | {Opt_wsize, "wsize=%d"}, | 137 | {Opt_wsize, "wsize=%d"}, |
| 365 | {Opt_rsize, "rsize=%d"}, | 138 | {Opt_rsize, "rsize=%d"}, |
| 366 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
| 367 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
| 368 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
| 369 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
| 370 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 139 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
| 371 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 140 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
| 372 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | 141 | {Opt_cap_release_safety, "cap_release_safety=%d"}, |
| @@ -374,403 +143,459 @@ static match_table_t arg_tokens = { | |||
| 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 143 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
| 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 144 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
| 376 | /* int args above */ | 145 | /* int args above */ |
| 377 | {Opt_fsid, "fsid=%s"}, | ||
| 378 | {Opt_snapdirname, "snapdirname=%s"}, | 146 | {Opt_snapdirname, "snapdirname=%s"}, |
| 379 | {Opt_name, "name=%s"}, | ||
| 380 | {Opt_secret, "secret=%s"}, | ||
| 381 | /* string args above */ | 147 | /* string args above */ |
| 382 | {Opt_ip, "ip=%s"}, | ||
| 383 | {Opt_noshare, "noshare"}, | ||
| 384 | {Opt_dirstat, "dirstat"}, | 148 | {Opt_dirstat, "dirstat"}, |
| 385 | {Opt_nodirstat, "nodirstat"}, | 149 | {Opt_nodirstat, "nodirstat"}, |
| 386 | {Opt_rbytes, "rbytes"}, | 150 | {Opt_rbytes, "rbytes"}, |
| 387 | {Opt_norbytes, "norbytes"}, | 151 | {Opt_norbytes, "norbytes"}, |
| 388 | {Opt_nocrc, "nocrc"}, | ||
| 389 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 152 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
| 390 | {-1, NULL} | 153 | {-1, NULL} |
| 391 | }; | 154 | }; |
| 392 | 155 | ||
| 393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 156 | static int parse_fsopt_token(char *c, void *private) |
| 394 | { | 157 | { |
| 395 | int i = 0; | 158 | struct ceph_mount_options *fsopt = private; |
| 396 | char tmp[3]; | 159 | substring_t argstr[MAX_OPT_ARGS]; |
| 397 | int err = -EINVAL; | 160 | int token, intval, ret; |
| 398 | int d; | 161 | |
| 399 | 162 | token = match_token((char *)c, fsopt_tokens, argstr); | |
| 400 | dout("parse_fsid '%s'\n", str); | 163 | if (token < 0) |
| 401 | tmp[2] = 0; | 164 | return -EINVAL; |
| 402 | while (*str && i < 16) { | 165 | |
| 403 | if (ispunct(*str)) { | 166 | if (token < Opt_last_int) { |
| 404 | str++; | 167 | ret = match_int(&argstr[0], &intval); |
| 405 | continue; | 168 | if (ret < 0) { |
| 169 | pr_err("bad mount option arg (not int) " | ||
| 170 | "at '%s'\n", c); | ||
| 171 | return ret; | ||
| 406 | } | 172 | } |
| 407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | 173 | dout("got int token %d val %d\n", token, intval); |
| 408 | break; | 174 | } else if (token > Opt_last_int && token < Opt_last_string) { |
| 409 | tmp[0] = str[0]; | 175 | dout("got string token %d val %s\n", token, |
| 410 | tmp[1] = str[1]; | 176 | argstr[0].from); |
| 411 | if (sscanf(tmp, "%x", &d) < 1) | 177 | } else { |
| 412 | break; | 178 | dout("got token %d\n", token); |
| 413 | fsid->fsid[i] = d & 0xff; | ||
| 414 | i++; | ||
| 415 | str += 2; | ||
| 416 | } | 179 | } |
| 417 | 180 | ||
| 418 | if (i == 16) | 181 | switch (token) { |
| 419 | err = 0; | 182 | case Opt_snapdirname: |
| 420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | 183 | kfree(fsopt->snapdir_name); |
| 421 | return err; | 184 | fsopt->snapdir_name = kstrndup(argstr[0].from, |
| 185 | argstr[0].to-argstr[0].from, | ||
| 186 | GFP_KERNEL); | ||
| 187 | if (!fsopt->snapdir_name) | ||
| 188 | return -ENOMEM; | ||
| 189 | break; | ||
| 190 | |||
| 191 | /* misc */ | ||
| 192 | case Opt_wsize: | ||
| 193 | fsopt->wsize = intval; | ||
| 194 | break; | ||
| 195 | case Opt_rsize: | ||
| 196 | fsopt->rsize = intval; | ||
| 197 | break; | ||
| 198 | case Opt_caps_wanted_delay_min: | ||
| 199 | fsopt->caps_wanted_delay_min = intval; | ||
| 200 | break; | ||
| 201 | case Opt_caps_wanted_delay_max: | ||
| 202 | fsopt->caps_wanted_delay_max = intval; | ||
| 203 | break; | ||
| 204 | case Opt_readdir_max_entries: | ||
| 205 | fsopt->max_readdir = intval; | ||
| 206 | break; | ||
| 207 | case Opt_readdir_max_bytes: | ||
| 208 | fsopt->max_readdir_bytes = intval; | ||
| 209 | break; | ||
| 210 | case Opt_congestion_kb: | ||
| 211 | fsopt->congestion_kb = intval; | ||
| 212 | break; | ||
| 213 | case Opt_dirstat: | ||
| 214 | fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; | ||
| 215 | break; | ||
| 216 | case Opt_nodirstat: | ||
| 217 | fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; | ||
| 218 | break; | ||
| 219 | case Opt_rbytes: | ||
| 220 | fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; | ||
| 221 | break; | ||
| 222 | case Opt_norbytes: | ||
| 223 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | ||
| 224 | break; | ||
| 225 | case Opt_noasyncreaddir: | ||
| 226 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | ||
| 227 | break; | ||
| 228 | default: | ||
| 229 | BUG_ON(token); | ||
| 230 | } | ||
| 231 | return 0; | ||
| 422 | } | 232 | } |
| 423 | 233 | ||
| 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 234 | static void destroy_mount_options(struct ceph_mount_options *args) |
| 425 | const char *dev_name, | ||
| 426 | const char **path) | ||
| 427 | { | 235 | { |
| 428 | struct ceph_mount_args *args; | 236 | dout("destroy_mount_options %p\n", args); |
| 429 | const char *c; | 237 | kfree(args->snapdir_name); |
| 430 | int err = -ENOMEM; | 238 | kfree(args); |
| 431 | substring_t argstr[MAX_OPT_ARGS]; | 239 | } |
| 432 | 240 | ||
| 433 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 241 | static int strcmp_null(const char *s1, const char *s2) |
| 434 | if (!args) | 242 | { |
| 435 | return ERR_PTR(-ENOMEM); | 243 | if (!s1 && !s2) |
| 436 | args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), | 244 | return 0; |
| 437 | GFP_KERNEL); | 245 | if (s1 && !s2) |
| 438 | if (!args->mon_addr) | 246 | return -1; |
| 439 | goto out; | 247 | if (!s1 && s2) |
| 248 | return 1; | ||
| 249 | return strcmp(s1, s2); | ||
| 250 | } | ||
| 440 | 251 | ||
| 441 | dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); | 252 | static int compare_mount_options(struct ceph_mount_options *new_fsopt, |
| 442 | 253 | struct ceph_options *new_opt, | |
| 443 | /* start with defaults */ | 254 | struct ceph_fs_client *fsc) |
| 444 | args->sb_flags = flags; | 255 | { |
| 445 | args->flags = CEPH_OPT_DEFAULT; | 256 | struct ceph_mount_options *fsopt1 = new_fsopt; |
| 446 | args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | 257 | struct ceph_mount_options *fsopt2 = fsc->mount_options; |
| 447 | args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 258 | int ofs = offsetof(struct ceph_mount_options, snapdir_name); |
| 448 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 259 | int ret; |
| 449 | args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
| 450 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | ||
| 451 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | ||
| 452 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
| 453 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
| 454 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
| 455 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
| 456 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
| 457 | args->congestion_kb = default_congestion_kb(); | ||
| 458 | |||
| 459 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
| 460 | err = -EINVAL; | ||
| 461 | if (!dev_name) | ||
| 462 | goto out; | ||
| 463 | *path = strstr(dev_name, ":/"); | ||
| 464 | if (*path == NULL) { | ||
| 465 | pr_err("device name is missing path (no :/ in %s)\n", | ||
| 466 | dev_name); | ||
| 467 | goto out; | ||
| 468 | } | ||
| 469 | 260 | ||
| 470 | /* get mon ip(s) */ | 261 | ret = memcmp(fsopt1, fsopt2, ofs); |
| 471 | err = ceph_parse_ips(dev_name, *path, args->mon_addr, | 262 | if (ret) |
| 472 | CEPH_MAX_MON, &args->num_mon); | 263 | return ret; |
| 473 | if (err < 0) | 264 | |
| 474 | goto out; | 265 | ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); |
| 266 | if (ret) | ||
| 267 | return ret; | ||
| 268 | |||
| 269 | return ceph_compare_options(new_opt, fsc->client); | ||
| 270 | } | ||
| 271 | |||
| 272 | static int parse_mount_options(struct ceph_mount_options **pfsopt, | ||
| 273 | struct ceph_options **popt, | ||
| 274 | int flags, char *options, | ||
| 275 | const char *dev_name, | ||
| 276 | const char **path) | ||
| 277 | { | ||
| 278 | struct ceph_mount_options *fsopt; | ||
| 279 | const char *dev_name_end; | ||
| 280 | int err = -ENOMEM; | ||
| 281 | |||
| 282 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); | ||
| 283 | if (!fsopt) | ||
| 284 | return -ENOMEM; | ||
| 285 | |||
| 286 | dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); | ||
| 287 | |||
| 288 | fsopt->sb_flags = flags; | ||
| 289 | fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; | ||
| 290 | |||
| 291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
| 292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
| 293 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
| 294 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
| 295 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
| 296 | fsopt->congestion_kb = default_congestion_kb(); | ||
| 297 | |||
| 298 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
| 299 | err = -EINVAL; | ||
| 300 | if (!dev_name) | ||
| 301 | goto out; | ||
| 302 | *path = strstr(dev_name, ":/"); | ||
| 303 | if (*path == NULL) { | ||
| 304 | pr_err("device name is missing path (no :/ in %s)\n", | ||
| 305 | dev_name); | ||
| 306 | goto out; | ||
| 307 | } | ||
| 308 | dev_name_end = *path; | ||
| 309 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | ||
| 475 | 310 | ||
| 476 | /* path on server */ | 311 | /* path on server */ |
| 477 | *path += 2; | 312 | *path += 2; |
| 478 | dout("server path '%s'\n", *path); | 313 | dout("server path '%s'\n", *path); |
| 479 | 314 | ||
| 480 | /* parse mount options */ | 315 | err = ceph_parse_options(popt, options, dev_name, dev_name_end, |
| 481 | while ((c = strsep(&options, ",")) != NULL) { | 316 | parse_fsopt_token, (void *)fsopt); |
| 482 | int token, intval, ret; | 317 | if (err) |
| 483 | if (!*c) | 318 | goto out; |
| 484 | continue; | 319 | |
| 485 | err = -EINVAL; | 320 | /* success */ |
| 486 | token = match_token((char *)c, arg_tokens, argstr); | 321 | *pfsopt = fsopt; |
| 487 | if (token < 0) { | 322 | return 0; |
| 488 | pr_err("bad mount option at '%s'\n", c); | ||
| 489 | goto out; | ||
| 490 | } | ||
| 491 | if (token < Opt_last_int) { | ||
| 492 | ret = match_int(&argstr[0], &intval); | ||
| 493 | if (ret < 0) { | ||
| 494 | pr_err("bad mount option arg (not int) " | ||
| 495 | "at '%s'\n", c); | ||
| 496 | continue; | ||
| 497 | } | ||
| 498 | dout("got int token %d val %d\n", token, intval); | ||
| 499 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
| 500 | dout("got string token %d val %s\n", token, | ||
| 501 | argstr[0].from); | ||
| 502 | } else { | ||
| 503 | dout("got token %d\n", token); | ||
| 504 | } | ||
| 505 | switch (token) { | ||
| 506 | case Opt_ip: | ||
| 507 | err = ceph_parse_ips(argstr[0].from, | ||
| 508 | argstr[0].to, | ||
| 509 | &args->my_addr, | ||
| 510 | 1, NULL); | ||
| 511 | if (err < 0) | ||
| 512 | goto out; | ||
| 513 | args->flags |= CEPH_OPT_MYIP; | ||
| 514 | break; | ||
| 515 | |||
| 516 | case Opt_fsid: | ||
| 517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
| 518 | if (err == 0) | ||
| 519 | args->flags |= CEPH_OPT_FSID; | ||
| 520 | break; | ||
| 521 | case Opt_snapdirname: | ||
| 522 | kfree(args->snapdir_name); | ||
| 523 | args->snapdir_name = kstrndup(argstr[0].from, | ||
| 524 | argstr[0].to-argstr[0].from, | ||
| 525 | GFP_KERNEL); | ||
| 526 | break; | ||
| 527 | case Opt_name: | ||
| 528 | args->name = kstrndup(argstr[0].from, | ||
| 529 | argstr[0].to-argstr[0].from, | ||
| 530 | GFP_KERNEL); | ||
| 531 | break; | ||
| 532 | case Opt_secret: | ||
| 533 | args->secret = kstrndup(argstr[0].from, | ||
| 534 | argstr[0].to-argstr[0].from, | ||
| 535 | GFP_KERNEL); | ||
| 536 | break; | ||
| 537 | |||
| 538 | /* misc */ | ||
| 539 | case Opt_wsize: | ||
| 540 | args->wsize = intval; | ||
| 541 | break; | ||
| 542 | case Opt_rsize: | ||
| 543 | args->rsize = intval; | ||
| 544 | break; | ||
| 545 | case Opt_osdtimeout: | ||
| 546 | args->osd_timeout = intval; | ||
| 547 | break; | ||
| 548 | case Opt_osdkeepalivetimeout: | ||
| 549 | args->osd_keepalive_timeout = intval; | ||
| 550 | break; | ||
| 551 | case Opt_osd_idle_ttl: | ||
| 552 | args->osd_idle_ttl = intval; | ||
| 553 | break; | ||
| 554 | case Opt_mount_timeout: | ||
| 555 | args->mount_timeout = intval; | ||
| 556 | break; | ||
| 557 | case Opt_caps_wanted_delay_min: | ||
| 558 | args->caps_wanted_delay_min = intval; | ||
| 559 | break; | ||
| 560 | case Opt_caps_wanted_delay_max: | ||
| 561 | args->caps_wanted_delay_max = intval; | ||
| 562 | break; | ||
| 563 | case Opt_readdir_max_entries: | ||
| 564 | args->max_readdir = intval; | ||
| 565 | break; | ||
| 566 | case Opt_readdir_max_bytes: | ||
| 567 | args->max_readdir_bytes = intval; | ||
| 568 | break; | ||
| 569 | case Opt_congestion_kb: | ||
| 570 | args->congestion_kb = intval; | ||
| 571 | break; | ||
| 572 | |||
| 573 | case Opt_noshare: | ||
| 574 | args->flags |= CEPH_OPT_NOSHARE; | ||
| 575 | break; | ||
| 576 | |||
| 577 | case Opt_dirstat: | ||
| 578 | args->flags |= CEPH_OPT_DIRSTAT; | ||
| 579 | break; | ||
| 580 | case Opt_nodirstat: | ||
| 581 | args->flags &= ~CEPH_OPT_DIRSTAT; | ||
| 582 | break; | ||
| 583 | case Opt_rbytes: | ||
| 584 | args->flags |= CEPH_OPT_RBYTES; | ||
| 585 | break; | ||
| 586 | case Opt_norbytes: | ||
| 587 | args->flags &= ~CEPH_OPT_RBYTES; | ||
| 588 | break; | ||
| 589 | case Opt_nocrc: | ||
| 590 | args->flags |= CEPH_OPT_NOCRC; | ||
| 591 | break; | ||
| 592 | case Opt_noasyncreaddir: | ||
| 593 | args->flags |= CEPH_OPT_NOASYNCREADDIR; | ||
| 594 | break; | ||
| 595 | |||
| 596 | default: | ||
| 597 | BUG_ON(token); | ||
| 598 | } | ||
| 599 | } | ||
| 600 | return args; | ||
| 601 | 323 | ||
| 602 | out: | 324 | out: |
| 603 | kfree(args->mon_addr); | 325 | destroy_mount_options(fsopt); |
| 604 | kfree(args); | 326 | return err; |
| 605 | return ERR_PTR(err); | ||
| 606 | } | 327 | } |
| 607 | 328 | ||
| 608 | static void destroy_mount_args(struct ceph_mount_args *args) | 329 | /** |
| 330 | * ceph_show_options - Show mount options in /proc/mounts | ||
| 331 | * @m: seq_file to write to | ||
| 332 | * @mnt: mount descriptor | ||
| 333 | */ | ||
| 334 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
| 609 | { | 335 | { |
| 610 | dout("destroy_mount_args %p\n", args); | 336 | struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); |
| 611 | kfree(args->snapdir_name); | 337 | struct ceph_mount_options *fsopt = fsc->mount_options; |
| 612 | args->snapdir_name = NULL; | 338 | struct ceph_options *opt = fsc->client->options; |
| 613 | kfree(args->name); | 339 | |
| 614 | args->name = NULL; | 340 | if (opt->flags & CEPH_OPT_FSID) |
| 615 | kfree(args->secret); | 341 | seq_printf(m, ",fsid=%pU", &opt->fsid); |
| 616 | args->secret = NULL; | 342 | if (opt->flags & CEPH_OPT_NOSHARE) |
| 617 | kfree(args); | 343 | seq_puts(m, ",noshare"); |
| 344 | if (opt->flags & CEPH_OPT_NOCRC) | ||
| 345 | seq_puts(m, ",nocrc"); | ||
| 346 | |||
| 347 | if (opt->name) | ||
| 348 | seq_printf(m, ",name=%s", opt->name); | ||
| 349 | if (opt->secret) | ||
| 350 | seq_puts(m, ",secret=<hidden>"); | ||
| 351 | |||
| 352 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
| 353 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
| 354 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
| 355 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
| 356 | if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
| 357 | seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); | ||
| 358 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
| 359 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
| 360 | opt->osd_keepalive_timeout); | ||
| 361 | |||
| 362 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | ||
| 363 | seq_puts(m, ",dirstat"); | ||
| 364 | if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) | ||
| 365 | seq_puts(m, ",norbytes"); | ||
| 366 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | ||
| 367 | seq_puts(m, ",noasyncreaddir"); | ||
| 368 | |||
| 369 | if (fsopt->wsize) | ||
| 370 | seq_printf(m, ",wsize=%d", fsopt->wsize); | ||
| 371 | if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
| 372 | seq_printf(m, ",rsize=%d", fsopt->rsize); | ||
| 373 | if (fsopt->congestion_kb != default_congestion_kb()) | ||
| 374 | seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); | ||
| 375 | if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
| 376 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
| 377 | fsopt->caps_wanted_delay_min); | ||
| 378 | if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
| 379 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
| 380 | fsopt->caps_wanted_delay_max); | ||
| 381 | if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
| 382 | seq_printf(m, ",cap_release_safety=%d", | ||
| 383 | fsopt->cap_release_safety); | ||
| 384 | if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
| 385 | seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); | ||
| 386 | if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
| 387 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | ||
| 388 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
| 389 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | ||
| 390 | return 0; | ||
| 618 | } | 391 | } |
| 619 | 392 | ||
| 620 | /* | 393 | /* |
| 621 | * create a fresh client instance | 394 | * handle any mon messages the standard library doesn't understand. |
| 395 | * return error if we don't either. | ||
| 622 | */ | 396 | */ |
| 623 | static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | 397 | static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) |
| 624 | { | 398 | { |
| 625 | struct ceph_client *client; | 399 | struct ceph_fs_client *fsc = client->private; |
| 400 | int type = le16_to_cpu(msg->hdr.type); | ||
| 401 | |||
| 402 | switch (type) { | ||
| 403 | case CEPH_MSG_MDS_MAP: | ||
| 404 | ceph_mdsc_handle_map(fsc->mdsc, msg); | ||
| 405 | return 0; | ||
| 406 | |||
| 407 | default: | ||
| 408 | return -1; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | /* | ||
| 413 | * create a new fs client | ||
| 414 | */ | ||
| 415 | struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | ||
| 416 | struct ceph_options *opt) | ||
| 417 | { | ||
| 418 | struct ceph_fs_client *fsc; | ||
| 626 | int err = -ENOMEM; | 419 | int err = -ENOMEM; |
| 627 | 420 | ||
| 628 | client = kzalloc(sizeof(*client), GFP_KERNEL); | 421 | fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); |
| 629 | if (client == NULL) | 422 | if (!fsc) |
| 630 | return ERR_PTR(-ENOMEM); | 423 | return ERR_PTR(-ENOMEM); |
| 631 | 424 | ||
| 632 | mutex_init(&client->mount_mutex); | 425 | fsc->client = ceph_create_client(opt, fsc); |
| 633 | 426 | if (IS_ERR(fsc->client)) { | |
| 634 | init_waitqueue_head(&client->auth_wq); | 427 | err = PTR_ERR(fsc->client); |
| 428 | goto fail; | ||
| 429 | } | ||
| 430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | ||
| 431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | ||
| 432 | fsc->client->monc.want_mdsmap = 1; | ||
| 635 | 433 | ||
| 636 | client->sb = NULL; | 434 | fsc->mount_options = fsopt; |
| 637 | client->mount_state = CEPH_MOUNT_MOUNTING; | ||
| 638 | client->mount_args = args; | ||
| 639 | 435 | ||
| 640 | client->msgr = NULL; | 436 | fsc->sb = NULL; |
| 437 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | ||
| 641 | 438 | ||
| 642 | client->auth_err = 0; | 439 | atomic_long_set(&fsc->writeback_count, 0); |
| 643 | atomic_long_set(&client->writeback_count, 0); | ||
| 644 | 440 | ||
| 645 | err = bdi_init(&client->backing_dev_info); | 441 | err = bdi_init(&fsc->backing_dev_info); |
| 646 | if (err < 0) | 442 | if (err < 0) |
| 647 | goto fail; | 443 | goto fail_client; |
| 648 | 444 | ||
| 649 | err = -ENOMEM; | 445 | err = -ENOMEM; |
| 650 | client->wb_wq = create_workqueue("ceph-writeback"); | 446 | fsc->wb_wq = create_workqueue("ceph-writeback"); |
| 651 | if (client->wb_wq == NULL) | 447 | if (fsc->wb_wq == NULL) |
| 652 | goto fail_bdi; | 448 | goto fail_bdi; |
| 653 | client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); | 449 | fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); |
| 654 | if (client->pg_inv_wq == NULL) | 450 | if (fsc->pg_inv_wq == NULL) |
| 655 | goto fail_wb_wq; | 451 | goto fail_wb_wq; |
| 656 | client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); | 452 | fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); |
| 657 | if (client->trunc_wq == NULL) | 453 | if (fsc->trunc_wq == NULL) |
| 658 | goto fail_pg_inv_wq; | 454 | goto fail_pg_inv_wq; |
| 659 | 455 | ||
| 660 | /* set up mempools */ | 456 | /* set up mempools */ |
| 661 | err = -ENOMEM; | 457 | err = -ENOMEM; |
| 662 | client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, | 458 | fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, |
| 663 | client->mount_args->wsize >> PAGE_CACHE_SHIFT); | 459 | fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); |
| 664 | if (!client->wb_pagevec_pool) | 460 | if (!fsc->wb_pagevec_pool) |
| 665 | goto fail_trunc_wq; | 461 | goto fail_trunc_wq; |
| 666 | 462 | ||
| 667 | /* caps */ | 463 | /* caps */ |
| 668 | client->min_caps = args->max_readdir; | 464 | fsc->min_caps = fsopt->max_readdir; |
| 465 | |||
| 466 | return fsc; | ||
| 669 | 467 | ||
| 670 | /* subsystems */ | ||
| 671 | err = ceph_monc_init(&client->monc, client); | ||
| 672 | if (err < 0) | ||
| 673 | goto fail_mempool; | ||
| 674 | err = ceph_osdc_init(&client->osdc, client); | ||
| 675 | if (err < 0) | ||
| 676 | goto fail_monc; | ||
| 677 | err = ceph_mdsc_init(&client->mdsc, client); | ||
| 678 | if (err < 0) | ||
| 679 | goto fail_osdc; | ||
| 680 | return client; | ||
| 681 | |||
| 682 | fail_osdc: | ||
| 683 | ceph_osdc_stop(&client->osdc); | ||
| 684 | fail_monc: | ||
| 685 | ceph_monc_stop(&client->monc); | ||
| 686 | fail_mempool: | ||
| 687 | mempool_destroy(client->wb_pagevec_pool); | ||
| 688 | fail_trunc_wq: | 468 | fail_trunc_wq: |
| 689 | destroy_workqueue(client->trunc_wq); | 469 | destroy_workqueue(fsc->trunc_wq); |
| 690 | fail_pg_inv_wq: | 470 | fail_pg_inv_wq: |
| 691 | destroy_workqueue(client->pg_inv_wq); | 471 | destroy_workqueue(fsc->pg_inv_wq); |
| 692 | fail_wb_wq: | 472 | fail_wb_wq: |
| 693 | destroy_workqueue(client->wb_wq); | 473 | destroy_workqueue(fsc->wb_wq); |
| 694 | fail_bdi: | 474 | fail_bdi: |
| 695 | bdi_destroy(&client->backing_dev_info); | 475 | bdi_destroy(&fsc->backing_dev_info); |
| 476 | fail_client: | ||
| 477 | ceph_destroy_client(fsc->client); | ||
| 696 | fail: | 478 | fail: |
| 697 | kfree(client); | 479 | kfree(fsc); |
| 698 | return ERR_PTR(err); | 480 | return ERR_PTR(err); |
| 699 | } | 481 | } |
| 700 | 482 | ||
| 701 | static void ceph_destroy_client(struct ceph_client *client) | 483 | void destroy_fs_client(struct ceph_fs_client *fsc) |
| 702 | { | 484 | { |
| 703 | dout("destroy_client %p\n", client); | 485 | dout("destroy_fs_client %p\n", fsc); |
| 704 | 486 | ||
| 705 | /* unmount */ | 487 | destroy_workqueue(fsc->wb_wq); |
| 706 | ceph_mdsc_stop(&client->mdsc); | 488 | destroy_workqueue(fsc->pg_inv_wq); |
| 707 | ceph_osdc_stop(&client->osdc); | 489 | destroy_workqueue(fsc->trunc_wq); |
| 708 | 490 | ||
| 709 | /* | 491 | bdi_destroy(&fsc->backing_dev_info); |
| 710 | * make sure mds and osd connections close out before destroying | ||
| 711 | * the auth module, which is needed to free those connections' | ||
| 712 | * ceph_authorizers. | ||
| 713 | */ | ||
| 714 | ceph_msgr_flush(); | ||
| 715 | |||
| 716 | ceph_monc_stop(&client->monc); | ||
| 717 | 492 | ||
| 718 | ceph_debugfs_client_cleanup(client); | 493 | mempool_destroy(fsc->wb_pagevec_pool); |
| 719 | destroy_workqueue(client->wb_wq); | ||
| 720 | destroy_workqueue(client->pg_inv_wq); | ||
| 721 | destroy_workqueue(client->trunc_wq); | ||
| 722 | 494 | ||
| 723 | bdi_destroy(&client->backing_dev_info); | 495 | destroy_mount_options(fsc->mount_options); |
| 724 | 496 | ||
| 725 | if (client->msgr) | 497 | ceph_fs_debugfs_cleanup(fsc); |
| 726 | ceph_messenger_destroy(client->msgr); | ||
| 727 | mempool_destroy(client->wb_pagevec_pool); | ||
| 728 | 498 | ||
| 729 | destroy_mount_args(client->mount_args); | 499 | ceph_destroy_client(fsc->client); |
| 730 | 500 | ||
| 731 | kfree(client); | 501 | kfree(fsc); |
| 732 | dout("destroy_client %p done\n", client); | 502 | dout("destroy_fs_client %p done\n", fsc); |
| 733 | } | 503 | } |
| 734 | 504 | ||
| 735 | /* | 505 | /* |
| 736 | * Initially learn our fsid, or verify an fsid matches. | 506 | * caches |
| 737 | */ | 507 | */ |
| 738 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | 508 | struct kmem_cache *ceph_inode_cachep; |
| 509 | struct kmem_cache *ceph_cap_cachep; | ||
| 510 | struct kmem_cache *ceph_dentry_cachep; | ||
| 511 | struct kmem_cache *ceph_file_cachep; | ||
| 512 | |||
| 513 | static void ceph_inode_init_once(void *foo) | ||
| 739 | { | 514 | { |
| 740 | if (client->have_fsid) { | 515 | struct ceph_inode_info *ci = foo; |
| 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 516 | inode_init_once(&ci->vfs_inode); |
| 742 | pr_err("bad fsid, had %pU got %pU", | 517 | } |
| 743 | &client->fsid, fsid); | 518 | |
| 744 | return -1; | 519 | static int __init init_caches(void) |
| 745 | } | 520 | { |
| 746 | } else { | 521 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
| 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, | 522 | sizeof(struct ceph_inode_info), |
| 748 | fsid); | 523 | __alignof__(struct ceph_inode_info), |
| 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 524 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), |
| 750 | ceph_debugfs_client_init(client); | 525 | ceph_inode_init_once); |
| 751 | client->have_fsid = true; | 526 | if (ceph_inode_cachep == NULL) |
| 752 | } | 527 | return -ENOMEM; |
| 528 | |||
| 529 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
| 530 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 531 | if (ceph_cap_cachep == NULL) | ||
| 532 | goto bad_cap; | ||
| 533 | |||
| 534 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
| 535 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 536 | if (ceph_dentry_cachep == NULL) | ||
| 537 | goto bad_dentry; | ||
| 538 | |||
| 539 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
| 540 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
| 541 | if (ceph_file_cachep == NULL) | ||
| 542 | goto bad_file; | ||
| 543 | |||
| 753 | return 0; | 544 | return 0; |
| 545 | |||
| 546 | bad_file: | ||
| 547 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 548 | bad_dentry: | ||
| 549 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 550 | bad_cap: | ||
| 551 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 552 | return -ENOMEM; | ||
| 754 | } | 553 | } |
| 755 | 554 | ||
| 555 | static void destroy_caches(void) | ||
| 556 | { | ||
| 557 | kmem_cache_destroy(ceph_inode_cachep); | ||
| 558 | kmem_cache_destroy(ceph_cap_cachep); | ||
| 559 | kmem_cache_destroy(ceph_dentry_cachep); | ||
| 560 | kmem_cache_destroy(ceph_file_cachep); | ||
| 561 | } | ||
| 562 | |||
| 563 | |||
| 756 | /* | 564 | /* |
| 757 | * true if we have the mon map (and have thus joined the cluster) | 565 | * ceph_umount_begin - initiate forced umount. Tear down down the |
| 566 | * mount, skipping steps that may hang while waiting for server(s). | ||
| 758 | */ | 567 | */ |
| 759 | static int have_mon_and_osd_map(struct ceph_client *client) | 568 | static void ceph_umount_begin(struct super_block *sb) |
| 760 | { | 569 | { |
| 761 | return client->monc.monmap && client->monc.monmap->epoch && | 570 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 762 | client->osdc.osdmap && client->osdc.osdmap->epoch; | 571 | |
| 572 | dout("ceph_umount_begin - starting forced umount\n"); | ||
| 573 | if (!fsc) | ||
| 574 | return; | ||
| 575 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
| 576 | return; | ||
| 763 | } | 577 | } |
| 764 | 578 | ||
| 579 | static const struct super_operations ceph_super_ops = { | ||
| 580 | .alloc_inode = ceph_alloc_inode, | ||
| 581 | .destroy_inode = ceph_destroy_inode, | ||
| 582 | .write_inode = ceph_write_inode, | ||
| 583 | .sync_fs = ceph_sync_fs, | ||
| 584 | .put_super = ceph_put_super, | ||
| 585 | .show_options = ceph_show_options, | ||
| 586 | .statfs = ceph_statfs, | ||
| 587 | .umount_begin = ceph_umount_begin, | ||
| 588 | }; | ||
| 589 | |||
| 765 | /* | 590 | /* |
| 766 | * Bootstrap mount by opening the root directory. Note the mount | 591 | * Bootstrap mount by opening the root directory. Note the mount |
| 767 | * @started time from caller, and time out if this takes too long. | 592 | * @started time from caller, and time out if this takes too long. |
| 768 | */ | 593 | */ |
| 769 | static struct dentry *open_root_dentry(struct ceph_client *client, | 594 | static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, |
| 770 | const char *path, | 595 | const char *path, |
| 771 | unsigned long started) | 596 | unsigned long started) |
| 772 | { | 597 | { |
| 773 | struct ceph_mds_client *mdsc = &client->mdsc; | 598 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 774 | struct ceph_mds_request *req = NULL; | 599 | struct ceph_mds_request *req = NULL; |
| 775 | int err; | 600 | int err; |
| 776 | struct dentry *root; | 601 | struct dentry *root; |
| @@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
| 784 | req->r_ino1.ino = CEPH_INO_ROOT; | 609 | req->r_ino1.ino = CEPH_INO_ROOT; |
| 785 | req->r_ino1.snap = CEPH_NOSNAP; | 610 | req->r_ino1.snap = CEPH_NOSNAP; |
| 786 | req->r_started = started; | 611 | req->r_started = started; |
| 787 | req->r_timeout = client->mount_args->mount_timeout * HZ; | 612 | req->r_timeout = fsc->client->options->mount_timeout * HZ; |
| 788 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); | 613 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
| 789 | req->r_num_caps = 2; | 614 | req->r_num_caps = 2; |
| 790 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 615 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
| 791 | if (err == 0) { | 616 | if (err == 0) { |
| 792 | dout("open_root_inode success\n"); | 617 | dout("open_root_inode success\n"); |
| 793 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && | 618 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && |
| 794 | client->sb->s_root == NULL) | 619 | fsc->sb->s_root == NULL) |
| 795 | root = d_alloc_root(req->r_target_inode); | 620 | root = d_alloc_root(req->r_target_inode); |
| 796 | else | 621 | else |
| 797 | root = d_obtain_alias(req->r_target_inode); | 622 | root = d_obtain_alias(req->r_target_inode); |
| @@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
| 804 | return root; | 629 | return root; |
| 805 | } | 630 | } |
| 806 | 631 | ||
| 632 | |||
| 633 | |||
| 634 | |||
| 807 | /* | 635 | /* |
| 808 | * mount: join the ceph cluster, and open root directory. | 636 | * mount: join the ceph cluster, and open root directory. |
| 809 | */ | 637 | */ |
| 810 | static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | 638 | static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, |
| 811 | const char *path) | 639 | const char *path) |
| 812 | { | 640 | { |
| 813 | struct ceph_entity_addr *myaddr = NULL; | ||
| 814 | int err; | 641 | int err; |
| 815 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | ||
| 816 | unsigned long started = jiffies; /* note the start time */ | 642 | unsigned long started = jiffies; /* note the start time */ |
| 817 | struct dentry *root; | 643 | struct dentry *root; |
| 644 | int first = 0; /* first vfsmount for this super_block */ | ||
| 818 | 645 | ||
| 819 | dout("mount start\n"); | 646 | dout("mount start\n"); |
| 820 | mutex_lock(&client->mount_mutex); | 647 | mutex_lock(&fsc->client->mount_mutex); |
| 821 | |||
| 822 | /* initialize the messenger */ | ||
| 823 | if (client->msgr == NULL) { | ||
| 824 | if (ceph_test_opt(client, MYIP)) | ||
| 825 | myaddr = &client->mount_args->my_addr; | ||
| 826 | client->msgr = ceph_messenger_create(myaddr); | ||
| 827 | if (IS_ERR(client->msgr)) { | ||
| 828 | err = PTR_ERR(client->msgr); | ||
| 829 | client->msgr = NULL; | ||
| 830 | goto out; | ||
| 831 | } | ||
| 832 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
| 833 | } | ||
| 834 | 648 | ||
| 835 | /* open session, and wait for mon, mds, and osd maps */ | 649 | err = __ceph_open_session(fsc->client, started); |
| 836 | err = ceph_monc_open_session(&client->monc); | ||
| 837 | if (err < 0) | 650 | if (err < 0) |
| 838 | goto out; | 651 | goto out; |
| 839 | 652 | ||
| 840 | while (!have_mon_and_osd_map(client)) { | ||
| 841 | err = -EIO; | ||
| 842 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
| 843 | goto out; | ||
| 844 | |||
| 845 | /* wait */ | ||
| 846 | dout("mount waiting for mon_map\n"); | ||
| 847 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
| 848 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
| 849 | timeout); | ||
| 850 | if (err == -EINTR || err == -ERESTARTSYS) | ||
| 851 | goto out; | ||
| 852 | if (client->auth_err < 0) { | ||
| 853 | err = client->auth_err; | ||
| 854 | goto out; | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | dout("mount opening root\n"); | 653 | dout("mount opening root\n"); |
| 859 | root = open_root_dentry(client, "", started); | 654 | root = open_root_dentry(fsc, "", started); |
| 860 | if (IS_ERR(root)) { | 655 | if (IS_ERR(root)) { |
| 861 | err = PTR_ERR(root); | 656 | err = PTR_ERR(root); |
| 862 | goto out; | 657 | goto out; |
| 863 | } | 658 | } |
| 864 | if (client->sb->s_root) | 659 | if (fsc->sb->s_root) { |
| 865 | dput(root); | 660 | dput(root); |
| 866 | else | 661 | } else { |
| 867 | client->sb->s_root = root; | 662 | fsc->sb->s_root = root; |
| 663 | first = 1; | ||
| 664 | |||
| 665 | err = ceph_fs_debugfs_init(fsc); | ||
| 666 | if (err < 0) | ||
| 667 | goto fail; | ||
| 668 | } | ||
| 868 | 669 | ||
| 869 | if (path[0] == 0) { | 670 | if (path[0] == 0) { |
| 870 | dget(root); | 671 | dget(root); |
| 871 | } else { | 672 | } else { |
| 872 | dout("mount opening base mountpoint\n"); | 673 | dout("mount opening base mountpoint\n"); |
| 873 | root = open_root_dentry(client, path, started); | 674 | root = open_root_dentry(fsc, path, started); |
| 874 | if (IS_ERR(root)) { | 675 | if (IS_ERR(root)) { |
| 875 | err = PTR_ERR(root); | 676 | err = PTR_ERR(root); |
| 876 | dput(client->sb->s_root); | 677 | goto fail; |
| 877 | client->sb->s_root = NULL; | ||
| 878 | goto out; | ||
| 879 | } | 678 | } |
| 880 | } | 679 | } |
| 881 | 680 | ||
| 882 | mnt->mnt_root = root; | 681 | mnt->mnt_root = root; |
| 883 | mnt->mnt_sb = client->sb; | 682 | mnt->mnt_sb = fsc->sb; |
| 884 | 683 | ||
| 885 | client->mount_state = CEPH_MOUNT_MOUNTED; | 684 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
| 886 | dout("mount success\n"); | 685 | dout("mount success\n"); |
| 887 | err = 0; | 686 | err = 0; |
| 888 | 687 | ||
| 889 | out: | 688 | out: |
| 890 | mutex_unlock(&client->mount_mutex); | 689 | mutex_unlock(&fsc->client->mount_mutex); |
| 891 | return err; | 690 | return err; |
| 691 | |||
| 692 | fail: | ||
| 693 | if (first) { | ||
| 694 | dput(fsc->sb->s_root); | ||
| 695 | fsc->sb->s_root = NULL; | ||
| 696 | } | ||
| 697 | goto out; | ||
| 892 | } | 698 | } |
| 893 | 699 | ||
| 894 | static int ceph_set_super(struct super_block *s, void *data) | 700 | static int ceph_set_super(struct super_block *s, void *data) |
| 895 | { | 701 | { |
| 896 | struct ceph_client *client = data; | 702 | struct ceph_fs_client *fsc = data; |
| 897 | int ret; | 703 | int ret; |
| 898 | 704 | ||
| 899 | dout("set_super %p data %p\n", s, data); | 705 | dout("set_super %p data %p\n", s, data); |
| 900 | 706 | ||
| 901 | s->s_flags = client->mount_args->sb_flags; | 707 | s->s_flags = fsc->mount_options->sb_flags; |
| 902 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ | 708 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ |
| 903 | 709 | ||
| 904 | s->s_fs_info = client; | 710 | s->s_fs_info = fsc; |
| 905 | client->sb = s; | 711 | fsc->sb = s; |
| 906 | 712 | ||
| 907 | s->s_op = &ceph_super_ops; | 713 | s->s_op = &ceph_super_ops; |
| 908 | s->s_export_op = &ceph_export_ops; | 714 | s->s_export_op = &ceph_export_ops; |
| @@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data) | |||
| 917 | 723 | ||
| 918 | fail: | 724 | fail: |
| 919 | s->s_fs_info = NULL; | 725 | s->s_fs_info = NULL; |
| 920 | client->sb = NULL; | 726 | fsc->sb = NULL; |
| 921 | return ret; | 727 | return ret; |
| 922 | } | 728 | } |
| 923 | 729 | ||
| @@ -926,30 +732,23 @@ fail: | |||
| 926 | */ | 732 | */ |
| 927 | static int ceph_compare_super(struct super_block *sb, void *data) | 733 | static int ceph_compare_super(struct super_block *sb, void *data) |
| 928 | { | 734 | { |
| 929 | struct ceph_client *new = data; | 735 | struct ceph_fs_client *new = data; |
| 930 | struct ceph_mount_args *args = new->mount_args; | 736 | struct ceph_mount_options *fsopt = new->mount_options; |
| 931 | struct ceph_client *other = ceph_sb_to_client(sb); | 737 | struct ceph_options *opt = new->client->options; |
| 932 | int i; | 738 | struct ceph_fs_client *other = ceph_sb_to_client(sb); |
| 933 | 739 | ||
| 934 | dout("ceph_compare_super %p\n", sb); | 740 | dout("ceph_compare_super %p\n", sb); |
| 935 | if (args->flags & CEPH_OPT_FSID) { | 741 | |
| 936 | if (ceph_fsid_compare(&args->fsid, &other->fsid)) { | 742 | if (compare_mount_options(fsopt, opt, other)) { |
| 937 | dout("fsid doesn't match\n"); | 743 | dout("monitor(s)/mount options don't match\n"); |
| 938 | return 0; | 744 | return 0; |
| 939 | } | ||
| 940 | } else { | ||
| 941 | /* do we share (a) monitor? */ | ||
| 942 | for (i = 0; i < new->monc.monmap->num_mon; i++) | ||
| 943 | if (ceph_monmap_contains(other->monc.monmap, | ||
| 944 | &new->monc.monmap->mon_inst[i].addr)) | ||
| 945 | break; | ||
| 946 | if (i == new->monc.monmap->num_mon) { | ||
| 947 | dout("mon ip not part of monmap\n"); | ||
| 948 | return 0; | ||
| 949 | } | ||
| 950 | dout("mon ip matches existing sb %p\n", sb); | ||
| 951 | } | 745 | } |
| 952 | if (args->sb_flags != other->mount_args->sb_flags) { | 746 | if ((opt->flags & CEPH_OPT_FSID) && |
| 747 | ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { | ||
| 748 | dout("fsid doesn't match\n"); | ||
| 749 | return 0; | ||
| 750 | } | ||
| 751 | if (fsopt->sb_flags != other->mount_options->sb_flags) { | ||
| 953 | dout("flags differ\n"); | 752 | dout("flags differ\n"); |
| 954 | return 0; | 753 | return 0; |
| 955 | } | 754 | } |
| @@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
| 961 | */ | 760 | */ |
| 962 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 761 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
| 963 | 762 | ||
| 964 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 763 | static int ceph_register_bdi(struct super_block *sb, |
| 764 | struct ceph_fs_client *fsc) | ||
| 965 | { | 765 | { |
| 966 | int err; | 766 | int err; |
| 967 | 767 | ||
| 968 | /* set ra_pages based on rsize mount option? */ | 768 | /* set ra_pages based on rsize mount option? */ |
| 969 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 769 | if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) |
| 970 | client->backing_dev_info.ra_pages = | 770 | fsc->backing_dev_info.ra_pages = |
| 971 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 771 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) |
| 972 | >> PAGE_SHIFT; | 772 | >> PAGE_SHIFT; |
| 973 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", | 773 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", |
| 974 | atomic_long_inc_return(&bdi_seq)); | 774 | atomic_long_inc_return(&bdi_seq)); |
| 975 | if (!err) | 775 | if (!err) |
| 976 | sb->s_bdi = &client->backing_dev_info; | 776 | sb->s_bdi = &fsc->backing_dev_info; |
| 977 | return err; | 777 | return err; |
| 978 | } | 778 | } |
| 979 | 779 | ||
| @@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
| 982 | struct vfsmount *mnt) | 782 | struct vfsmount *mnt) |
| 983 | { | 783 | { |
| 984 | struct super_block *sb; | 784 | struct super_block *sb; |
| 985 | struct ceph_client *client; | 785 | struct ceph_fs_client *fsc; |
| 986 | int err; | 786 | int err; |
| 987 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; | 787 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; |
| 988 | const char *path = NULL; | 788 | const char *path = NULL; |
| 989 | struct ceph_mount_args *args; | 789 | struct ceph_mount_options *fsopt = NULL; |
| 790 | struct ceph_options *opt = NULL; | ||
| 990 | 791 | ||
| 991 | dout("ceph_get_sb\n"); | 792 | dout("ceph_get_sb\n"); |
| 992 | args = parse_mount_args(flags, data, dev_name, &path); | 793 | err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); |
| 993 | if (IS_ERR(args)) { | 794 | if (err < 0) |
| 994 | err = PTR_ERR(args); | ||
| 995 | goto out_final; | 795 | goto out_final; |
| 996 | } | ||
| 997 | 796 | ||
| 998 | /* create client (which we may/may not use) */ | 797 | /* create client (which we may/may not use) */ |
| 999 | client = ceph_create_client(args); | 798 | fsc = create_fs_client(fsopt, opt); |
| 1000 | if (IS_ERR(client)) { | 799 | if (IS_ERR(fsc)) { |
| 1001 | err = PTR_ERR(client); | 800 | err = PTR_ERR(fsc); |
| 801 | kfree(fsopt); | ||
| 802 | kfree(opt); | ||
| 1002 | goto out_final; | 803 | goto out_final; |
| 1003 | } | 804 | } |
| 1004 | 805 | ||
| 1005 | if (client->mount_args->flags & CEPH_OPT_NOSHARE) | 806 | err = ceph_mdsc_init(fsc); |
| 807 | if (err < 0) | ||
| 808 | goto out; | ||
| 809 | |||
| 810 | if (ceph_test_opt(fsc->client, NOSHARE)) | ||
| 1006 | compare_super = NULL; | 811 | compare_super = NULL; |
| 1007 | sb = sget(fs_type, compare_super, ceph_set_super, client); | 812 | sb = sget(fs_type, compare_super, ceph_set_super, fsc); |
| 1008 | if (IS_ERR(sb)) { | 813 | if (IS_ERR(sb)) { |
| 1009 | err = PTR_ERR(sb); | 814 | err = PTR_ERR(sb); |
| 1010 | goto out; | 815 | goto out; |
| 1011 | } | 816 | } |
| 1012 | 817 | ||
| 1013 | if (ceph_sb_to_client(sb) != client) { | 818 | if (ceph_sb_to_client(sb) != fsc) { |
| 1014 | ceph_destroy_client(client); | 819 | ceph_mdsc_destroy(fsc); |
| 1015 | client = ceph_sb_to_client(sb); | 820 | destroy_fs_client(fsc); |
| 1016 | dout("get_sb got existing client %p\n", client); | 821 | fsc = ceph_sb_to_client(sb); |
| 822 | dout("get_sb got existing client %p\n", fsc); | ||
| 1017 | } else { | 823 | } else { |
| 1018 | dout("get_sb using new client %p\n", client); | 824 | dout("get_sb using new client %p\n", fsc); |
| 1019 | err = ceph_register_bdi(sb, client); | 825 | err = ceph_register_bdi(sb, fsc); |
| 1020 | if (err < 0) | 826 | if (err < 0) |
| 1021 | goto out_splat; | 827 | goto out_splat; |
| 1022 | } | 828 | } |
| 1023 | 829 | ||
| 1024 | err = ceph_mount(client, mnt, path); | 830 | err = ceph_mount(fsc, mnt, path); |
| 1025 | if (err < 0) | 831 | if (err < 0) |
| 1026 | goto out_splat; | 832 | goto out_splat; |
| 1027 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, | 833 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, |
| @@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
| 1029 | return 0; | 835 | return 0; |
| 1030 | 836 | ||
| 1031 | out_splat: | 837 | out_splat: |
| 1032 | ceph_mdsc_close_sessions(&client->mdsc); | 838 | ceph_mdsc_close_sessions(fsc->mdsc); |
| 1033 | deactivate_locked_super(sb); | 839 | deactivate_locked_super(sb); |
| 1034 | goto out_final; | 840 | goto out_final; |
| 1035 | 841 | ||
| 1036 | out: | 842 | out: |
| 1037 | ceph_destroy_client(client); | 843 | ceph_mdsc_destroy(fsc); |
| 844 | destroy_fs_client(fsc); | ||
| 1038 | out_final: | 845 | out_final: |
| 1039 | dout("ceph_get_sb fail %d\n", err); | 846 | dout("ceph_get_sb fail %d\n", err); |
| 1040 | return err; | 847 | return err; |
| @@ -1042,11 +849,12 @@ out_final: | |||
| 1042 | 849 | ||
| 1043 | static void ceph_kill_sb(struct super_block *s) | 850 | static void ceph_kill_sb(struct super_block *s) |
| 1044 | { | 851 | { |
| 1045 | struct ceph_client *client = ceph_sb_to_client(s); | 852 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
| 1046 | dout("kill_sb %p\n", s); | 853 | dout("kill_sb %p\n", s); |
| 1047 | ceph_mdsc_pre_umount(&client->mdsc); | 854 | ceph_mdsc_pre_umount(fsc->mdsc); |
| 1048 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 855 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
| 1049 | ceph_destroy_client(client); | 856 | ceph_mdsc_destroy(fsc); |
| 857 | destroy_fs_client(fsc); | ||
| 1050 | } | 858 | } |
| 1051 | 859 | ||
| 1052 | static struct file_system_type ceph_fs_type = { | 860 | static struct file_system_type ceph_fs_type = { |
| @@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = { | |||
| 1062 | 870 | ||
| 1063 | static int __init init_ceph(void) | 871 | static int __init init_ceph(void) |
| 1064 | { | 872 | { |
| 1065 | int ret = 0; | 873 | int ret = init_caches(); |
| 1066 | |||
| 1067 | ret = ceph_debugfs_init(); | ||
| 1068 | if (ret < 0) | ||
| 1069 | goto out; | ||
| 1070 | |||
| 1071 | ret = ceph_msgr_init(); | ||
| 1072 | if (ret < 0) | ||
| 1073 | goto out_debugfs; | ||
| 1074 | |||
| 1075 | ret = init_caches(); | ||
| 1076 | if (ret) | 874 | if (ret) |
| 1077 | goto out_msgr; | 875 | goto out; |
| 1078 | 876 | ||
| 1079 | ret = register_filesystem(&ceph_fs_type); | 877 | ret = register_filesystem(&ceph_fs_type); |
| 1080 | if (ret) | 878 | if (ret) |
| 1081 | goto out_icache; | 879 | goto out_icache; |
| 1082 | 880 | ||
| 1083 | pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", | 881 | pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); |
| 1084 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, | 882 | |
| 1085 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
| 1086 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
| 1087 | return 0; | 883 | return 0; |
| 1088 | 884 | ||
| 1089 | out_icache: | 885 | out_icache: |
| 1090 | destroy_caches(); | 886 | destroy_caches(); |
| 1091 | out_msgr: | ||
| 1092 | ceph_msgr_exit(); | ||
| 1093 | out_debugfs: | ||
| 1094 | ceph_debugfs_cleanup(); | ||
| 1095 | out: | 887 | out: |
| 1096 | return ret; | 888 | return ret; |
| 1097 | } | 889 | } |
| @@ -1101,8 +893,6 @@ static void __exit exit_ceph(void) | |||
| 1101 | dout("exit_ceph\n"); | 893 | dout("exit_ceph\n"); |
| 1102 | unregister_filesystem(&ceph_fs_type); | 894 | unregister_filesystem(&ceph_fs_type); |
| 1103 | destroy_caches(); | 895 | destroy_caches(); |
| 1104 | ceph_msgr_exit(); | ||
| 1105 | ceph_debugfs_cleanup(); | ||
| 1106 | } | 896 | } |
| 1107 | 897 | ||
| 1108 | module_init(init_ceph); | 898 | module_init(init_ceph); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b87638e84c4b..1886294e12f7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | #ifndef _FS_CEPH_SUPER_H | 1 | #ifndef _FS_CEPH_SUPER_H |
| 2 | #define _FS_CEPH_SUPER_H | 2 | #define _FS_CEPH_SUPER_H |
| 3 | 3 | ||
| 4 | #include "ceph_debug.h" | 4 | #include <linux/ceph/ceph_debug.h> |
| 5 | 5 | ||
| 6 | #include <asm/unaligned.h> | 6 | #include <asm/unaligned.h> |
| 7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
| @@ -14,13 +14,7 @@ | |||
| 14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | 16 | ||
| 17 | #include "types.h" | 17 | #include <linux/ceph/libceph.h> |
| 18 | #include "messenger.h" | ||
| 19 | #include "msgpool.h" | ||
| 20 | #include "mon_client.h" | ||
| 21 | #include "mds_client.h" | ||
| 22 | #include "osd_client.h" | ||
| 23 | #include "ceph_fs.h" | ||
| 24 | 18 | ||
| 25 | /* f_type in struct statfs */ | 19 | /* f_type in struct statfs */ |
| 26 | #define CEPH_SUPER_MAGIC 0x00c36400 | 20 | #define CEPH_SUPER_MAGIC 0x00c36400 |
| @@ -30,42 +24,25 @@ | |||
| 30 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ | 24 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ |
| 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 25 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
| 32 | 26 | ||
| 33 | /* | 27 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
| 34 | * Supported features | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
| 35 | */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
| 36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
| 37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
| 38 | 30 | ||
| 39 | /* | 31 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
| 40 | * mount options | ||
| 41 | */ | ||
| 42 | #define CEPH_OPT_FSID (1<<0) | ||
| 43 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
| 44 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
| 45 | #define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ | ||
| 46 | #define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | ||
| 47 | #define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ | ||
| 48 | #define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | ||
| 49 | 32 | ||
| 50 | #define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) | 33 | #define ceph_set_mount_opt(fsc, opt) \ |
| 34 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||
| 35 | #define ceph_test_mount_opt(fsc, opt) \ | ||
| 36 | (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||
| 51 | 37 | ||
| 52 | #define ceph_set_opt(client, opt) \ | 38 | #define CEPH_MAX_READDIR_DEFAULT 1024 |
| 53 | (client)->mount_args->flags |= CEPH_OPT_##opt; | 39 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) |
| 54 | #define ceph_test_opt(client, opt) \ | 40 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
| 55 | (!!((client)->mount_args->flags & CEPH_OPT_##opt)) | ||
| 56 | 41 | ||
| 57 | 42 | struct ceph_mount_options { | |
| 58 | struct ceph_mount_args { | ||
| 59 | int sb_flags; | ||
| 60 | int flags; | 43 | int flags; |
| 61 | struct ceph_fsid fsid; | 44 | int sb_flags; |
| 62 | struct ceph_entity_addr my_addr; | 45 | |
| 63 | int num_mon; | ||
| 64 | struct ceph_entity_addr *mon_addr; | ||
| 65 | int mount_timeout; | ||
| 66 | int osd_idle_ttl; | ||
| 67 | int osd_timeout; | ||
| 68 | int osd_keepalive_timeout; | ||
| 69 | int wsize; | 46 | int wsize; |
| 70 | int rsize; /* max readahead */ | 47 | int rsize; /* max readahead */ |
| 71 | int congestion_kb; /* max writeback in flight */ | 48 | int congestion_kb; /* max writeback in flight */ |
| @@ -73,82 +50,25 @@ struct ceph_mount_args { | |||
| 73 | int cap_release_safety; | 50 | int cap_release_safety; |
| 74 | int max_readdir; /* max readdir result (entires) */ | 51 | int max_readdir; /* max readdir result (entires) */ |
| 75 | int max_readdir_bytes; /* max readdir result (bytes) */ | 52 | int max_readdir_bytes; /* max readdir result (bytes) */ |
| 76 | char *snapdir_name; /* default ".snap" */ | ||
| 77 | char *name; | ||
| 78 | char *secret; | ||
| 79 | }; | ||
| 80 | 53 | ||
| 81 | /* | 54 | /* |
| 82 | * defaults | 55 | * everything above this point can be memcmp'd; everything below |
| 83 | */ | 56 | * is handled in compare_mount_options() |
| 84 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | 57 | */ |
| 85 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
| 86 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
| 87 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
| 88 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
| 89 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
| 90 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
| 91 | |||
| 92 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
| 93 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
| 94 | |||
| 95 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | ||
| 96 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
| 97 | /* | ||
| 98 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
| 99 | * the file. Delay a minimum amount of time, even if we send a cap | ||
| 100 | * message for some other reason. Otherwise, take the oppotunity to | ||
| 101 | * update the mds to avoid sending another message later. | ||
| 102 | */ | ||
| 103 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
| 104 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
| 105 | |||
| 106 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
| 107 | |||
| 108 | /* mount state */ | ||
| 109 | enum { | ||
| 110 | CEPH_MOUNT_MOUNTING, | ||
| 111 | CEPH_MOUNT_MOUNTED, | ||
| 112 | CEPH_MOUNT_UNMOUNTING, | ||
| 113 | CEPH_MOUNT_UNMOUNTED, | ||
| 114 | CEPH_MOUNT_SHUTDOWN, | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * subtract jiffies | ||
| 119 | */ | ||
| 120 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
| 121 | { | ||
| 122 | BUG_ON(time_after(b, a)); | ||
| 123 | return (long)a - (long)b; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * per-filesystem client state | ||
| 128 | * | ||
| 129 | * possibly shared by multiple mount points, if they are | ||
| 130 | * mounting the same ceph filesystem/cluster. | ||
| 131 | */ | ||
| 132 | struct ceph_client { | ||
| 133 | struct ceph_fsid fsid; | ||
| 134 | bool have_fsid; | ||
| 135 | 58 | ||
| 136 | struct mutex mount_mutex; /* serialize mount attempts */ | 59 | char *snapdir_name; /* default ".snap" */ |
| 137 | struct ceph_mount_args *mount_args; | 60 | }; |
| 138 | 61 | ||
| 62 | struct ceph_fs_client { | ||
| 139 | struct super_block *sb; | 63 | struct super_block *sb; |
| 140 | 64 | ||
| 141 | unsigned long mount_state; | 65 | struct ceph_mount_options *mount_options; |
| 142 | wait_queue_head_t auth_wq; | 66 | struct ceph_client *client; |
| 143 | |||
| 144 | int auth_err; | ||
| 145 | 67 | ||
| 68 | unsigned long mount_state; | ||
| 146 | int min_caps; /* min caps i added */ | 69 | int min_caps; /* min caps i added */ |
| 147 | 70 | ||
| 148 | struct ceph_messenger *msgr; /* messenger instance */ | 71 | struct ceph_mds_client *mdsc; |
| 149 | struct ceph_mon_client monc; | ||
| 150 | struct ceph_mds_client mdsc; | ||
| 151 | struct ceph_osd_client osdc; | ||
| 152 | 72 | ||
| 153 | /* writeback */ | 73 | /* writeback */ |
| 154 | mempool_t *wb_pagevec_pool; | 74 | mempool_t *wb_pagevec_pool; |
| @@ -160,14 +80,14 @@ struct ceph_client { | |||
| 160 | struct backing_dev_info backing_dev_info; | 80 | struct backing_dev_info backing_dev_info; |
| 161 | 81 | ||
| 162 | #ifdef CONFIG_DEBUG_FS | 82 | #ifdef CONFIG_DEBUG_FS |
| 163 | struct dentry *debugfs_monmap; | 83 | struct dentry *debugfs_dentry_lru, *debugfs_caps; |
| 164 | struct dentry *debugfs_mdsmap, *debugfs_osdmap; | ||
| 165 | struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; | ||
| 166 | struct dentry *debugfs_congestion_kb; | 84 | struct dentry *debugfs_congestion_kb; |
| 167 | struct dentry *debugfs_bdi; | 85 | struct dentry *debugfs_bdi; |
| 86 | struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||
| 168 | #endif | 87 | #endif |
| 169 | }; | 88 | }; |
| 170 | 89 | ||
| 90 | |||
| 171 | /* | 91 | /* |
| 172 | * File i/o capability. This tracks shared state with the metadata | 92 | * File i/o capability. This tracks shared state with the metadata |
| 173 | * server that allows us to cache or writeback attributes or to read | 93 | * server that allows us to cache or writeback attributes or to read |
| @@ -275,6 +195,20 @@ struct ceph_inode_xattr { | |||
| 275 | int should_free_val; | 195 | int should_free_val; |
| 276 | }; | 196 | }; |
| 277 | 197 | ||
| 198 | /* | ||
| 199 | * Ceph dentry state | ||
| 200 | */ | ||
| 201 | struct ceph_dentry_info { | ||
| 202 | struct ceph_mds_session *lease_session; | ||
| 203 | u32 lease_gen, lease_shared_gen; | ||
| 204 | u32 lease_seq; | ||
| 205 | unsigned long lease_renew_after, lease_renew_from; | ||
| 206 | struct list_head lru; | ||
| 207 | struct dentry *dentry; | ||
| 208 | u64 time; | ||
| 209 | u64 offset; | ||
| 210 | }; | ||
| 211 | |||
| 278 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
| 279 | /* | 213 | /* |
| 280 | * (still encoded) xattr blob. we avoid the overhead of parsing | 214 | * (still encoded) xattr blob. we avoid the overhead of parsing |
| @@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info { | |||
| 296 | /* | 230 | /* |
| 297 | * Ceph inode. | 231 | * Ceph inode. |
| 298 | */ | 232 | */ |
| 299 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
| 300 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
| 301 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
| 302 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
| 303 | |||
| 304 | struct ceph_inode_info { | 233 | struct ceph_inode_info { |
| 305 | struct ceph_vino i_vino; /* ceph ino + snap */ | 234 | struct ceph_vino i_vino; /* ceph ino + snap */ |
| 306 | 235 | ||
| @@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||
| 391 | return container_of(inode, struct ceph_inode_info, vfs_inode); | 320 | return container_of(inode, struct ceph_inode_info, vfs_inode); |
| 392 | } | 321 | } |
| 393 | 322 | ||
| 323 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
| 324 | { | ||
| 325 | return ceph_inode(inode)->i_vino; | ||
| 326 | } | ||
| 327 | |||
| 328 | /* | ||
| 329 | * ino_t is <64 bits on many architectures, blech. | ||
| 330 | * | ||
| 331 | * don't include snap in ino hash, at least for now. | ||
| 332 | */ | ||
| 333 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
| 334 | { | ||
| 335 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
| 336 | #if BITS_PER_LONG == 32 | ||
| 337 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
| 338 | if (!ino) | ||
| 339 | ino = 1; | ||
| 340 | #endif | ||
| 341 | return ino; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* for printf-style formatting */ | ||
| 345 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
| 346 | |||
| 347 | static inline u64 ceph_ino(struct inode *inode) | ||
| 348 | { | ||
| 349 | return ceph_inode(inode)->i_vino.ino; | ||
| 350 | } | ||
| 351 | static inline u64 ceph_snap(struct inode *inode) | ||
| 352 | { | ||
| 353 | return ceph_inode(inode)->i_vino.snap; | ||
| 354 | } | ||
| 355 | |||
| 356 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
| 357 | { | ||
| 358 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
| 359 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 360 | return ci->i_vino.ino == pvino->ino && | ||
| 361 | ci->i_vino.snap == pvino->snap; | ||
| 362 | } | ||
| 363 | |||
| 364 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
| 365 | struct ceph_vino vino) | ||
| 366 | { | ||
| 367 | ino_t t = ceph_vino_to_ino(vino); | ||
| 368 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
| 369 | } | ||
| 370 | |||
| 371 | |||
| 372 | /* | ||
| 373 | * Ceph inode. | ||
| 374 | */ | ||
| 375 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
| 376 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
| 377 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
| 378 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
| 379 | |||
| 394 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | 380 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) |
| 395 | { | 381 | { |
| 396 | struct ceph_inode_info *ci = ceph_inode(inode); | 382 | struct ceph_inode_info *ci = ceph_inode(inode); |
| @@ -414,8 +400,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) | |||
| 414 | struct ceph_inode_info *ci = ceph_inode(inode); | 400 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 415 | bool r; | 401 | bool r; |
| 416 | 402 | ||
| 417 | smp_mb(); | 403 | spin_lock(&inode->i_lock); |
| 418 | r = (ci->i_ceph_flags & mask) == mask; | 404 | r = (ci->i_ceph_flags & mask) == mask; |
| 405 | spin_unlock(&inode->i_lock); | ||
| 419 | return r; | 406 | return r; |
| 420 | } | 407 | } |
| 421 | 408 | ||
| @@ -432,20 +419,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
| 432 | struct ceph_inode_frag *pfrag, | 419 | struct ceph_inode_frag *pfrag, |
| 433 | int *found); | 420 | int *found); |
| 434 | 421 | ||
| 435 | /* | ||
| 436 | * Ceph dentry state | ||
| 437 | */ | ||
| 438 | struct ceph_dentry_info { | ||
| 439 | struct ceph_mds_session *lease_session; | ||
| 440 | u32 lease_gen, lease_shared_gen; | ||
| 441 | u32 lease_seq; | ||
| 442 | unsigned long lease_renew_after, lease_renew_from; | ||
| 443 | struct list_head lru; | ||
| 444 | struct dentry *dentry; | ||
| 445 | u64 time; | ||
| 446 | u64 offset; | ||
| 447 | }; | ||
| 448 | |||
| 449 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | 422 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) |
| 450 | { | 423 | { |
| 451 | return (struct ceph_dentry_info *)dentry->d_fsdata; | 424 | return (struct ceph_dentry_info *)dentry->d_fsdata; |
| @@ -456,22 +429,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | |||
| 456 | return ((loff_t)frag << 32) | (loff_t)off; | 429 | return ((loff_t)frag << 32) | (loff_t)off; |
| 457 | } | 430 | } |
| 458 | 431 | ||
| 459 | /* | ||
| 460 | * ino_t is <64 bits on many architectures, blech. | ||
| 461 | * | ||
| 462 | * don't include snap in ino hash, at least for now. | ||
| 463 | */ | ||
| 464 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
| 465 | { | ||
| 466 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
| 467 | #if BITS_PER_LONG == 32 | ||
| 468 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
| 469 | if (!ino) | ||
| 470 | ino = 1; | ||
| 471 | #endif | ||
| 472 | return ino; | ||
| 473 | } | ||
| 474 | |||
| 475 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | 432 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) |
| 476 | { | 433 | { |
| 477 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | 434 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |
| @@ -479,39 +436,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data) | |||
| 479 | return 0; | 436 | return 0; |
| 480 | } | 437 | } |
| 481 | 438 | ||
| 482 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
| 483 | { | ||
| 484 | return ceph_inode(inode)->i_vino; | ||
| 485 | } | ||
| 486 | |||
| 487 | /* for printf-style formatting */ | ||
| 488 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
| 489 | |||
| 490 | static inline u64 ceph_ino(struct inode *inode) | ||
| 491 | { | ||
| 492 | return ceph_inode(inode)->i_vino.ino; | ||
| 493 | } | ||
| 494 | static inline u64 ceph_snap(struct inode *inode) | ||
| 495 | { | ||
| 496 | return ceph_inode(inode)->i_vino.snap; | ||
| 497 | } | ||
| 498 | |||
| 499 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
| 500 | { | ||
| 501 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
| 502 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 503 | return ci->i_vino.ino == pvino->ino && | ||
| 504 | ci->i_vino.snap == pvino->snap; | ||
| 505 | } | ||
| 506 | |||
| 507 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
| 508 | struct ceph_vino vino) | ||
| 509 | { | ||
| 510 | ino_t t = ceph_vino_to_ino(vino); | ||
| 511 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
| 512 | } | ||
| 513 | |||
| 514 | |||
| 515 | /* | 439 | /* |
| 516 | * caps helpers | 440 | * caps helpers |
| 517 | */ | 441 | */ |
| @@ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
| 576 | struct ceph_cap_reservation *ctx, int need); | 500 | struct ceph_cap_reservation *ctx, int need); |
| 577 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 501 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
| 578 | struct ceph_cap_reservation *ctx); | 502 | struct ceph_cap_reservation *ctx); |
| 579 | extern void ceph_reservation_status(struct ceph_client *client, | 503 | extern void ceph_reservation_status(struct ceph_fs_client *client, |
| 580 | int *total, int *avail, int *used, | 504 | int *total, int *avail, int *used, |
| 581 | int *reserved, int *min); | 505 | int *reserved, int *min); |
| 582 | 506 | ||
| 583 | static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | 507 | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) |
| 584 | { | 508 | { |
| 585 | return (struct ceph_client *)inode->i_sb->s_fs_info; | 509 | return (struct ceph_fs_client *)inode->i_sb->s_fs_info; |
| 586 | } | 510 | } |
| 587 | 511 | ||
| 588 | static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | 512 | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) |
| 589 | { | 513 | { |
| 590 | return (struct ceph_client *)sb->s_fs_info; | 514 | return (struct ceph_fs_client *)sb->s_fs_info; |
| 591 | } | 515 | } |
| 592 | 516 | ||
| 593 | 517 | ||
| @@ -617,51 +541,6 @@ struct ceph_file_info { | |||
| 617 | 541 | ||
| 618 | 542 | ||
| 619 | /* | 543 | /* |
| 620 | * snapshots | ||
| 621 | */ | ||
| 622 | |||
| 623 | /* | ||
| 624 | * A "snap context" is the set of existing snapshots when we | ||
| 625 | * write data. It is used by the OSD to guide its COW behavior. | ||
| 626 | * | ||
| 627 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
| 628 | * page, indicating which context the dirty data belonged when it was | ||
| 629 | * dirtied. | ||
| 630 | */ | ||
| 631 | struct ceph_snap_context { | ||
| 632 | atomic_t nref; | ||
| 633 | u64 seq; | ||
| 634 | int num_snaps; | ||
| 635 | u64 snaps[]; | ||
| 636 | }; | ||
| 637 | |||
| 638 | static inline struct ceph_snap_context * | ||
| 639 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
| 640 | { | ||
| 641 | /* | ||
| 642 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 643 | atomic_read(&sc->nref)+1); | ||
| 644 | */ | ||
| 645 | if (sc) | ||
| 646 | atomic_inc(&sc->nref); | ||
| 647 | return sc; | ||
| 648 | } | ||
| 649 | |||
| 650 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
| 651 | { | ||
| 652 | if (!sc) | ||
| 653 | return; | ||
| 654 | /* | ||
| 655 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 656 | atomic_read(&sc->nref)-1); | ||
| 657 | */ | ||
| 658 | if (atomic_dec_and_test(&sc->nref)) { | ||
| 659 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
| 660 | kfree(sc); | ||
| 661 | } | ||
| 662 | } | ||
| 663 | |||
| 664 | /* | ||
| 665 | * A "snap realm" describes a subset of the file hierarchy sharing | 544 | * A "snap realm" describes a subset of the file hierarchy sharing |
| 666 | * the same set of snapshots that apply to it. The realms themselves | 545 | * the same set of snapshots that apply to it. The realms themselves |
| 667 | * are organized into a hierarchy, such that children inherit (some of) | 546 | * are organized into a hierarchy, such that children inherit (some of) |
| @@ -699,16 +578,33 @@ struct ceph_snap_realm { | |||
| 699 | spinlock_t inodes_with_caps_lock; | 578 | spinlock_t inodes_with_caps_lock; |
| 700 | }; | 579 | }; |
| 701 | 580 | ||
| 702 | 581 | static inline int default_congestion_kb(void) | |
| 703 | |||
| 704 | /* | ||
| 705 | * calculate the number of pages a given length and offset map onto, | ||
| 706 | * if we align the data. | ||
| 707 | */ | ||
| 708 | static inline int calc_pages_for(u64 off, u64 len) | ||
| 709 | { | 582 | { |
| 710 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 583 | int congestion_kb; |
| 711 | (off >> PAGE_CACHE_SHIFT); | 584 | |
| 585 | /* | ||
| 586 | * Copied from NFS | ||
| 587 | * | ||
| 588 | * congestion size, scale with available memory. | ||
| 589 | * | ||
| 590 | * 64MB: 8192k | ||
| 591 | * 128MB: 11585k | ||
| 592 | * 256MB: 16384k | ||
| 593 | * 512MB: 23170k | ||
| 594 | * 1GB: 32768k | ||
| 595 | * 2GB: 46340k | ||
| 596 | * 4GB: 65536k | ||
| 597 | * 8GB: 92681k | ||
| 598 | * 16GB: 131072k | ||
| 599 | * | ||
| 600 | * This allows larger machines to have larger/more transfers. | ||
| 601 | * Limit the default to 256M | ||
| 602 | */ | ||
| 603 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 604 | if (congestion_kb > 256*1024) | ||
| 605 | congestion_kb = 256*1024; | ||
| 606 | |||
| 607 | return congestion_kb; | ||
| 712 | } | 608 | } |
| 713 | 609 | ||
| 714 | 610 | ||
| @@ -741,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||
| 741 | ci_item)->writing; | 637 | ci_item)->writing; |
| 742 | } | 638 | } |
| 743 | 639 | ||
| 744 | |||
| 745 | /* super.c */ | ||
| 746 | extern struct kmem_cache *ceph_inode_cachep; | ||
| 747 | extern struct kmem_cache *ceph_cap_cachep; | ||
| 748 | extern struct kmem_cache *ceph_dentry_cachep; | ||
| 749 | extern struct kmem_cache *ceph_file_cachep; | ||
| 750 | |||
| 751 | extern const char *ceph_msg_type_name(int type); | ||
| 752 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
| 753 | |||
| 754 | /* inode.c */ | 640 | /* inode.c */ |
| 755 | extern const struct inode_operations ceph_file_iops; | 641 | extern const struct inode_operations ceph_file_iops; |
| 756 | 642 | ||
| @@ -857,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
| 857 | /* file.c */ | 743 | /* file.c */ |
| 858 | extern const struct file_operations ceph_file_fops; | 744 | extern const struct file_operations ceph_file_fops; |
| 859 | extern const struct address_space_operations ceph_aops; | 745 | extern const struct address_space_operations ceph_aops; |
| 746 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
| 747 | const char *data, | ||
| 748 | loff_t off, size_t len); | ||
| 749 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
| 750 | char *data, | ||
| 751 | loff_t off, size_t len); | ||
| 752 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
| 860 | extern int ceph_open(struct inode *inode, struct file *file); | 753 | extern int ceph_open(struct inode *inode, struct file *file); |
| 861 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | 754 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, |
| 862 | struct nameidata *nd, int mode, | 755 | struct nameidata *nd, int mode, |
| 863 | int locked_dir); | 756 | int locked_dir); |
| 864 | extern int ceph_release(struct inode *inode, struct file *filp); | 757 | extern int ceph_release(struct inode *inode, struct file *filp); |
| 865 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 866 | 758 | ||
| 867 | /* dir.c */ | 759 | /* dir.c */ |
| 868 | extern const struct file_operations ceph_dir_fops; | 760 | extern const struct file_operations ceph_dir_fops; |
| @@ -892,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
| 892 | /* export.c */ | 784 | /* export.c */ |
| 893 | extern const struct export_operations ceph_export_ops; | 785 | extern const struct export_operations ceph_export_ops; |
| 894 | 786 | ||
| 895 | /* debugfs.c */ | ||
| 896 | extern int ceph_debugfs_init(void); | ||
| 897 | extern void ceph_debugfs_cleanup(void); | ||
| 898 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
| 899 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
| 900 | |||
| 901 | /* locks.c */ | 787 | /* locks.c */ |
| 902 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 788 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
| 903 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 789 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
| @@ -914,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||
| 914 | return NULL; | 800 | return NULL; |
| 915 | } | 801 | } |
| 916 | 802 | ||
| 803 | /* debugfs.c */ | ||
| 804 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||
| 805 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||
| 806 | |||
| 917 | #endif /* _FS_CEPH_SUPER_H */ | 807 | #endif /* _FS_CEPH_SUPER_H */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9578af610b73..6e12a6ba5f79 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | |||
| 2 | #include "super.h" | 3 | #include "super.h" |
| 3 | #include "decode.h" | 4 | #include "mds_client.h" |
| 5 | |||
| 6 | #include <linux/ceph/decode.h> | ||
| 4 | 7 | ||
| 5 | #include <linux/xattr.h> | 8 | #include <linux/xattr.h> |
| 6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| @@ -620,12 +623,12 @@ out: | |||
| 620 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 623 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
| 621 | const char *value, size_t size, int flags) | 624 | const char *value, size_t size, int flags) |
| 622 | { | 625 | { |
| 623 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 626 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 624 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
| 625 | struct ceph_inode_info *ci = ceph_inode(inode); | 628 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 626 | struct inode *parent_inode = dentry->d_parent->d_inode; | 629 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 627 | struct ceph_mds_request *req; | 630 | struct ceph_mds_request *req; |
| 628 | struct ceph_mds_client *mdsc = &client->mdsc; | 631 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 629 | int err; | 632 | int err; |
| 630 | int i, nr_pages; | 633 | int i, nr_pages; |
| 631 | struct page **pages = NULL; | 634 | struct page **pages = NULL; |
| @@ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
| 713 | 716 | ||
| 714 | /* preallocate memory for xattr name, value, index node */ | 717 | /* preallocate memory for xattr name, value, index node */ |
| 715 | err = -ENOMEM; | 718 | err = -ENOMEM; |
| 716 | newname = kmalloc(name_len + 1, GFP_NOFS); | 719 | newname = kmemdup(name, name_len + 1, GFP_NOFS); |
| 717 | if (!newname) | 720 | if (!newname) |
| 718 | goto out; | 721 | goto out; |
| 719 | memcpy(newname, name, name_len + 1); | ||
| 720 | 722 | ||
| 721 | if (val_len) { | 723 | if (val_len) { |
| 722 | newval = kmalloc(val_len + 1, GFP_NOFS); | 724 | newval = kmalloc(val_len + 1, GFP_NOFS); |
| @@ -777,8 +779,8 @@ out: | |||
| 777 | 779 | ||
| 778 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
| 779 | { | 781 | { |
| 780 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 782 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
| 781 | struct ceph_mds_client *mdsc = &client->mdsc; | 783 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 782 | struct inode *inode = dentry->d_inode; | 784 | struct inode *inode = dentry->d_inode; |
| 783 | struct inode *parent_inode = dentry->d_parent->d_inode; | 785 | struct inode *parent_inode = dentry->d_parent->d_inode; |
| 784 | struct ceph_mds_request *req; | 786 | struct ceph_mds_request *req; |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index cc9665522148..c465ae066c62 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | config GFS2_FS | 1 | config GFS2_FS |
| 2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
| 3 | depends on EXPERIMENTAL && (64BIT || LBDAF) | 3 | depends on (64BIT || LBDAF) |
| 4 | select DLM if GFS2_FS_LOCKING_DLM | 4 | select DLM if GFS2_FS_LOCKING_DLM |
| 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM | 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM |
| 6 | select SYSFS if GFS2_FS_LOCKING_DLM | 6 | select SYSFS if GFS2_FS_LOCKING_DLM |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 194fe16d8418..6b24afb96aae 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -36,8 +36,8 @@ | |||
| 36 | #include "glops.h" | 36 | #include "glops.h" |
| 37 | 37 | ||
| 38 | 38 | ||
| 39 | static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | 39 | void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, |
| 40 | unsigned int from, unsigned int to) | 40 | unsigned int from, unsigned int to) |
| 41 | { | 41 | { |
| 42 | struct buffer_head *head = page_buffers(page); | 42 | struct buffer_head *head = page_buffers(page); |
| 43 | unsigned int bsize = head->b_size; | 43 | unsigned int bsize = head->b_size; |
| @@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
| 616 | int alloc_required; | 616 | int alloc_required; |
| 617 | int error = 0; | 617 | int error = 0; |
| 618 | struct gfs2_alloc *al; | 618 | struct gfs2_alloc *al = NULL; |
| 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 621 | unsigned to = from + len; | 621 | unsigned to = from + len; |
| @@ -663,6 +663,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 663 | rblocks += RES_STATFS + RES_QUOTA; | 663 | rblocks += RES_STATFS + RES_QUOTA; |
| 664 | if (&ip->i_inode == sdp->sd_rindex) | 664 | if (&ip->i_inode == sdp->sd_rindex) |
| 665 | rblocks += 2 * RES_STATFS; | 665 | rblocks += 2 * RES_STATFS; |
| 666 | if (alloc_required) | ||
| 667 | rblocks += gfs2_rg_blocks(al); | ||
| 666 | 668 | ||
| 667 | error = gfs2_trans_begin(sdp, rblocks, | 669 | error = gfs2_trans_begin(sdp, rblocks, |
| 668 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 670 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
| @@ -696,13 +698,11 @@ out: | |||
| 696 | 698 | ||
| 697 | page_cache_release(page); | 699 | page_cache_release(page); |
| 698 | 700 | ||
| 699 | /* | 701 | gfs2_trans_end(sdp); |
| 700 | * XXX(truncate): the call below should probably be replaced with | ||
| 701 | * a call to the gfs2-specific truncate blocks helper to actually | ||
| 702 | * release disk blocks.. | ||
| 703 | */ | ||
| 704 | if (pos + len > ip->i_inode.i_size) | 702 | if (pos + len > ip->i_inode.i_size) |
| 705 | truncate_setsize(&ip->i_inode, ip->i_inode.i_size); | 703 | gfs2_trim_blocks(&ip->i_inode); |
| 704 | goto out_trans_fail; | ||
| 705 | |||
| 706 | out_endtrans: | 706 | out_endtrans: |
| 707 | gfs2_trans_end(sdp); | 707 | gfs2_trans_end(sdp); |
| 708 | out_trans_fail: | 708 | out_trans_fail: |
| @@ -802,10 +802,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
| 802 | page_cache_release(page); | 802 | page_cache_release(page); |
| 803 | 803 | ||
| 804 | if (copied) { | 804 | if (copied) { |
| 805 | if (inode->i_size < to) { | 805 | if (inode->i_size < to) |
| 806 | i_size_write(inode, to); | 806 | i_size_write(inode, to); |
| 807 | ip->i_disksize = inode->i_size; | ||
| 808 | } | ||
| 809 | gfs2_dinode_out(ip, di); | 807 | gfs2_dinode_out(ip, di); |
| 810 | mark_inode_dirty(inode); | 808 | mark_inode_dirty(inode); |
| 811 | } | 809 | } |
| @@ -876,8 +874,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
| 876 | 874 | ||
| 877 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 875 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
| 878 | if (ret > 0) { | 876 | if (ret > 0) { |
| 879 | if (inode->i_size > ip->i_disksize) | ||
| 880 | ip->i_disksize = inode->i_size; | ||
| 881 | gfs2_dinode_out(ip, dibh->b_data); | 877 | gfs2_dinode_out(ip, dibh->b_data); |
| 882 | mark_inode_dirty(inode); | 878 | mark_inode_dirty(inode); |
| 883 | } | 879 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6f482809d1a3..5476c066d4ee 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -50,7 +50,7 @@ struct strip_mine { | |||
| 50 | * @ip: the inode | 50 | * @ip: the inode |
| 51 | * @dibh: the dinode buffer | 51 | * @dibh: the dinode buffer |
| 52 | * @block: the block number that was allocated | 52 | * @block: the block number that was allocated |
| 53 | * @private: any locked page held by the caller process | 53 | * @page: The (optional) page. This is looked up if @page is NULL |
| 54 | * | 54 | * |
| 55 | * Returns: errno | 55 | * Returns: errno |
| 56 | */ | 56 | */ |
| @@ -109,8 +109,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 109 | /** | 109 | /** |
| 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big |
| 111 | * @ip: The GFS2 inode to unstuff | 111 | * @ip: The GFS2 inode to unstuff |
| 112 | * @unstuffer: the routine that handles unstuffing a non-zero length file | 112 | * @page: The (optional) page. This is looked up if the @page is NULL |
| 113 | * @private: private data for the unstuffer | ||
| 114 | * | 113 | * |
| 115 | * This routine unstuffs a dinode and returns it to a "normal" state such | 114 | * This routine unstuffs a dinode and returns it to a "normal" state such |
| 116 | * that the height can be grown in the traditional way. | 115 | * that the height can be grown in the traditional way. |
| @@ -132,7 +131,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 132 | if (error) | 131 | if (error) |
| 133 | goto out; | 132 | goto out; |
| 134 | 133 | ||
| 135 | if (ip->i_disksize) { | 134 | if (i_size_read(&ip->i_inode)) { |
| 136 | /* Get a free block, fill it with the stuffed data, | 135 | /* Get a free block, fill it with the stuffed data, |
| 137 | and write it out to disk */ | 136 | and write it out to disk */ |
| 138 | 137 | ||
| @@ -161,7 +160,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 161 | di = (struct gfs2_dinode *)dibh->b_data; | 160 | di = (struct gfs2_dinode *)dibh->b_data; |
| 162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 161 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
| 163 | 162 | ||
| 164 | if (ip->i_disksize) { | 163 | if (i_size_read(&ip->i_inode)) { |
| 165 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 164 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
| 166 | gfs2_add_inode_blocks(&ip->i_inode, 1); | 165 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 166 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| @@ -885,83 +884,14 @@ out: | |||
| 885 | } | 884 | } |
| 886 | 885 | ||
| 887 | /** | 886 | /** |
| 888 | * do_grow - Make a file look bigger than it is | ||
| 889 | * @ip: the inode | ||
| 890 | * @size: the size to set the file to | ||
| 891 | * | ||
| 892 | * Called with an exclusive lock on @ip. | ||
| 893 | * | ||
| 894 | * Returns: errno | ||
| 895 | */ | ||
| 896 | |||
| 897 | static int do_grow(struct gfs2_inode *ip, u64 size) | ||
| 898 | { | ||
| 899 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 900 | struct gfs2_alloc *al; | ||
| 901 | struct buffer_head *dibh; | ||
| 902 | int error; | ||
| 903 | |||
| 904 | al = gfs2_alloc_get(ip); | ||
| 905 | if (!al) | ||
| 906 | return -ENOMEM; | ||
| 907 | |||
| 908 | error = gfs2_quota_lock_check(ip); | ||
| 909 | if (error) | ||
| 910 | goto out; | ||
| 911 | |||
| 912 | al->al_requested = sdp->sd_max_height + RES_DATA; | ||
| 913 | |||
| 914 | error = gfs2_inplace_reserve(ip); | ||
| 915 | if (error) | ||
| 916 | goto out_gunlock_q; | ||
| 917 | |||
| 918 | error = gfs2_trans_begin(sdp, | ||
| 919 | sdp->sd_max_height + al->al_rgd->rd_length + | ||
| 920 | RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
| 921 | if (error) | ||
| 922 | goto out_ipres; | ||
| 923 | |||
| 924 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 925 | if (error) | ||
| 926 | goto out_end_trans; | ||
| 927 | |||
| 928 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
| 929 | if (gfs2_is_stuffed(ip)) { | ||
| 930 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 931 | if (error) | ||
| 932 | goto out_brelse; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | ip->i_disksize = size; | ||
| 937 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 938 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 939 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 940 | |||
| 941 | out_brelse: | ||
| 942 | brelse(dibh); | ||
| 943 | out_end_trans: | ||
| 944 | gfs2_trans_end(sdp); | ||
| 945 | out_ipres: | ||
| 946 | gfs2_inplace_release(ip); | ||
| 947 | out_gunlock_q: | ||
| 948 | gfs2_quota_unlock(ip); | ||
| 949 | out: | ||
| 950 | gfs2_alloc_put(ip); | ||
| 951 | return error; | ||
| 952 | } | ||
| 953 | |||
| 954 | |||
| 955 | /** | ||
| 956 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 887 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
| 957 | * | 888 | * |
| 958 | * This is partly borrowed from ext3. | 889 | * This is partly borrowed from ext3. |
| 959 | */ | 890 | */ |
| 960 | static int gfs2_block_truncate_page(struct address_space *mapping) | 891 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) |
| 961 | { | 892 | { |
| 962 | struct inode *inode = mapping->host; | 893 | struct inode *inode = mapping->host; |
| 963 | struct gfs2_inode *ip = GFS2_I(inode); | 894 | struct gfs2_inode *ip = GFS2_I(inode); |
| 964 | loff_t from = inode->i_size; | ||
| 965 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 895 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
| 966 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 896 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
| 967 | unsigned blocksize, iblock, length, pos; | 897 | unsigned blocksize, iblock, length, pos; |
| @@ -1023,9 +953,11 @@ unlock: | |||
| 1023 | return err; | 953 | return err; |
| 1024 | } | 954 | } |
| 1025 | 955 | ||
| 1026 | static int trunc_start(struct gfs2_inode *ip, u64 size) | 956 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
| 1027 | { | 957 | { |
| 1028 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 958 | struct gfs2_inode *ip = GFS2_I(inode); |
| 959 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 960 | struct address_space *mapping = inode->i_mapping; | ||
| 1029 | struct buffer_head *dibh; | 961 | struct buffer_head *dibh; |
| 1030 | int journaled = gfs2_is_jdata(ip); | 962 | int journaled = gfs2_is_jdata(ip); |
| 1031 | int error; | 963 | int error; |
| @@ -1039,31 +971,26 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
| 1039 | if (error) | 971 | if (error) |
| 1040 | goto out; | 972 | goto out; |
| 1041 | 973 | ||
| 974 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 975 | |||
| 1042 | if (gfs2_is_stuffed(ip)) { | 976 | if (gfs2_is_stuffed(ip)) { |
| 1043 | u64 dsize = size + sizeof(struct gfs2_dinode); | 977 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); |
| 1044 | ip->i_disksize = size; | ||
| 1045 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 1046 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1047 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1048 | if (dsize > dibh->b_size) | ||
| 1049 | dsize = dibh->b_size; | ||
| 1050 | gfs2_buffer_clear_tail(dibh, dsize); | ||
| 1051 | error = 1; | ||
| 1052 | } else { | 978 | } else { |
| 1053 | if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) | 979 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { |
| 1054 | error = gfs2_block_truncate_page(ip->i_inode.i_mapping); | 980 | error = gfs2_block_truncate_page(mapping, newsize); |
| 1055 | 981 | if (error) | |
| 1056 | if (!error) { | 982 | goto out_brelse; |
| 1057 | ip->i_disksize = size; | ||
| 1058 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 1059 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
| 1060 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1061 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1062 | } | 983 | } |
| 984 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
| 1063 | } | 985 | } |
| 1064 | 986 | ||
| 1065 | brelse(dibh); | 987 | i_size_write(inode, newsize); |
| 988 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
| 989 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1066 | 990 | ||
| 991 | truncate_pagecache(inode, oldsize, newsize); | ||
| 992 | out_brelse: | ||
| 993 | brelse(dibh); | ||
| 1067 | out: | 994 | out: |
| 1068 | gfs2_trans_end(sdp); | 995 | gfs2_trans_end(sdp); |
| 1069 | return error; | 996 | return error; |
| @@ -1123,7 +1050,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
| 1123 | if (error) | 1050 | if (error) |
| 1124 | goto out; | 1051 | goto out; |
| 1125 | 1052 | ||
| 1126 | if (!ip->i_disksize) { | 1053 | if (!i_size_read(&ip->i_inode)) { |
| 1127 | ip->i_height = 0; | 1054 | ip->i_height = 0; |
| 1128 | ip->i_goal = ip->i_no_addr; | 1055 | ip->i_goal = ip->i_no_addr; |
| 1129 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1056 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
| @@ -1143,92 +1070,154 @@ out: | |||
| 1143 | 1070 | ||
| 1144 | /** | 1071 | /** |
| 1145 | * do_shrink - make a file smaller | 1072 | * do_shrink - make a file smaller |
| 1146 | * @ip: the inode | 1073 | * @inode: the inode |
| 1147 | * @size: the size to make the file | 1074 | * @oldsize: the current inode size |
| 1148 | * @truncator: function to truncate the last partial block | 1075 | * @newsize: the size to make the file |
| 1149 | * | 1076 | * |
| 1150 | * Called with an exclusive lock on @ip. | 1077 | * Called with an exclusive lock on @inode. The @size must |
| 1078 | * be equal to or smaller than the current inode size. | ||
| 1151 | * | 1079 | * |
| 1152 | * Returns: errno | 1080 | * Returns: errno |
| 1153 | */ | 1081 | */ |
| 1154 | 1082 | ||
| 1155 | static int do_shrink(struct gfs2_inode *ip, u64 size) | 1083 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) |
| 1156 | { | 1084 | { |
| 1085 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1157 | int error; | 1086 | int error; |
| 1158 | 1087 | ||
| 1159 | error = trunc_start(ip, size); | 1088 | error = trunc_start(inode, oldsize, newsize); |
| 1160 | if (error < 0) | 1089 | if (error < 0) |
| 1161 | return error; | 1090 | return error; |
| 1162 | if (error > 0) | 1091 | if (gfs2_is_stuffed(ip)) |
| 1163 | return 0; | 1092 | return 0; |
| 1164 | 1093 | ||
| 1165 | error = trunc_dealloc(ip, size); | 1094 | error = trunc_dealloc(ip, newsize); |
| 1166 | if (!error) | 1095 | if (error == 0) |
| 1167 | error = trunc_end(ip); | 1096 | error = trunc_end(ip); |
| 1168 | 1097 | ||
| 1169 | return error; | 1098 | return error; |
| 1170 | } | 1099 | } |
| 1171 | 1100 | ||
| 1172 | static int do_touch(struct gfs2_inode *ip, u64 size) | 1101 | void gfs2_trim_blocks(struct inode *inode) |
| 1173 | { | 1102 | { |
| 1174 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1103 | u64 size = inode->i_size; |
| 1104 | int ret; | ||
| 1105 | |||
| 1106 | ret = do_shrink(inode, size, size); | ||
| 1107 | WARN_ON(ret != 0); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | /** | ||
| 1111 | * do_grow - Touch and update inode size | ||
| 1112 | * @inode: The inode | ||
| 1113 | * @size: The new size | ||
| 1114 | * | ||
| 1115 | * This function updates the timestamps on the inode and | ||
| 1116 | * may also increase the size of the inode. This function | ||
| 1117 | * must not be called with @size any smaller than the current | ||
| 1118 | * inode size. | ||
| 1119 | * | ||
| 1120 | * Although it is not strictly required to unstuff files here, | ||
| 1121 | * earlier versions of GFS2 have a bug in the stuffed file reading | ||
| 1122 | * code which will result in a buffer overrun if the size is larger | ||
| 1123 | * than the max stuffed file size. In order to prevent this from | ||
| 1124 | * occuring, such files are unstuffed, but in other cases we can | ||
| 1125 | * just update the inode size directly. | ||
| 1126 | * | ||
| 1127 | * Returns: 0 on success, or -ve on error | ||
| 1128 | */ | ||
| 1129 | |||
| 1130 | static int do_grow(struct inode *inode, u64 size) | ||
| 1131 | { | ||
| 1132 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1133 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1175 | struct buffer_head *dibh; | 1134 | struct buffer_head *dibh; |
| 1135 | struct gfs2_alloc *al = NULL; | ||
| 1176 | int error; | 1136 | int error; |
| 1177 | 1137 | ||
| 1178 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 1138 | if (gfs2_is_stuffed(ip) && |
| 1139 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | ||
| 1140 | al = gfs2_alloc_get(ip); | ||
| 1141 | if (al == NULL) | ||
| 1142 | return -ENOMEM; | ||
| 1143 | |||
| 1144 | error = gfs2_quota_lock_check(ip); | ||
| 1145 | if (error) | ||
| 1146 | goto do_grow_alloc_put; | ||
| 1147 | |||
| 1148 | al->al_requested = 1; | ||
| 1149 | error = gfs2_inplace_reserve(ip); | ||
| 1150 | if (error) | ||
| 1151 | goto do_grow_qunlock; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | ||
| 1179 | if (error) | 1155 | if (error) |
| 1180 | return error; | 1156 | goto do_grow_release; |
| 1181 | 1157 | ||
| 1182 | down_write(&ip->i_rw_mutex); | 1158 | if (al) { |
| 1159 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 1160 | if (error) | ||
| 1161 | goto do_end_trans; | ||
| 1162 | } | ||
| 1183 | 1163 | ||
| 1184 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1164 | error = gfs2_meta_inode_buffer(ip, &dibh); |
| 1185 | if (error) | 1165 | if (error) |
| 1186 | goto do_touch_out; | 1166 | goto do_end_trans; |
| 1187 | 1167 | ||
| 1168 | i_size_write(inode, size); | ||
| 1188 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1169 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 1189 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1170 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 1190 | gfs2_dinode_out(ip, dibh->b_data); | 1171 | gfs2_dinode_out(ip, dibh->b_data); |
| 1191 | brelse(dibh); | 1172 | brelse(dibh); |
| 1192 | 1173 | ||
| 1193 | do_touch_out: | 1174 | do_end_trans: |
| 1194 | up_write(&ip->i_rw_mutex); | ||
| 1195 | gfs2_trans_end(sdp); | 1175 | gfs2_trans_end(sdp); |
| 1176 | do_grow_release: | ||
| 1177 | if (al) { | ||
| 1178 | gfs2_inplace_release(ip); | ||
| 1179 | do_grow_qunlock: | ||
| 1180 | gfs2_quota_unlock(ip); | ||
| 1181 | do_grow_alloc_put: | ||
| 1182 | gfs2_alloc_put(ip); | ||
| 1183 | } | ||
| 1196 | return error; | 1184 | return error; |
| 1197 | } | 1185 | } |
| 1198 | 1186 | ||
| 1199 | /** | 1187 | /** |
| 1200 | * gfs2_truncatei - make a file a given size | 1188 | * gfs2_setattr_size - make a file a given size |
| 1201 | * @ip: the inode | 1189 | * @inode: the inode |
| 1202 | * @size: the size to make the file | 1190 | * @newsize: the size to make the file |
| 1203 | * @truncator: function to truncate the last partial block | ||
| 1204 | * | 1191 | * |
| 1205 | * The file size can grow, shrink, or stay the same size. | 1192 | * The file size can grow, shrink, or stay the same size. This |
| 1193 | * is called holding i_mutex and an exclusive glock on the inode | ||
| 1194 | * in question. | ||
| 1206 | * | 1195 | * |
| 1207 | * Returns: errno | 1196 | * Returns: errno |
| 1208 | */ | 1197 | */ |
| 1209 | 1198 | ||
| 1210 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size) | 1199 | int gfs2_setattr_size(struct inode *inode, u64 newsize) |
| 1211 | { | 1200 | { |
| 1212 | int error; | 1201 | int ret; |
| 1202 | u64 oldsize; | ||
| 1213 | 1203 | ||
| 1214 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) | 1204 | BUG_ON(!S_ISREG(inode->i_mode)); |
| 1215 | return -EINVAL; | ||
| 1216 | 1205 | ||
| 1217 | if (size > ip->i_disksize) | 1206 | ret = inode_newsize_ok(inode, newsize); |
| 1218 | error = do_grow(ip, size); | 1207 | if (ret) |
| 1219 | else if (size < ip->i_disksize) | 1208 | return ret; |
| 1220 | error = do_shrink(ip, size); | ||
| 1221 | else | ||
| 1222 | /* update time stamps */ | ||
| 1223 | error = do_touch(ip, size); | ||
| 1224 | 1209 | ||
| 1225 | return error; | 1210 | oldsize = inode->i_size; |
| 1211 | if (newsize >= oldsize) | ||
| 1212 | return do_grow(inode, newsize); | ||
| 1213 | |||
| 1214 | return do_shrink(inode, oldsize, newsize); | ||
| 1226 | } | 1215 | } |
| 1227 | 1216 | ||
| 1228 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | 1217 | int gfs2_truncatei_resume(struct gfs2_inode *ip) |
| 1229 | { | 1218 | { |
| 1230 | int error; | 1219 | int error; |
| 1231 | error = trunc_dealloc(ip, ip->i_disksize); | 1220 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); |
| 1232 | if (!error) | 1221 | if (!error) |
| 1233 | error = trunc_end(ip); | 1222 | error = trunc_end(ip); |
| 1234 | return error; | 1223 | return error; |
| @@ -1269,7 +1258,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
| 1269 | 1258 | ||
| 1270 | shift = sdp->sd_sb.sb_bsize_shift; | 1259 | shift = sdp->sd_sb.sb_bsize_shift; |
| 1271 | BUG_ON(gfs2_is_dir(ip)); | 1260 | BUG_ON(gfs2_is_dir(ip)); |
| 1272 | end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; | 1261 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1273 | lblock = offset >> shift; | 1262 | lblock = offset >> shift; |
| 1274 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1263 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1275 | if (lblock_stop > end_of_file) | 1264 | if (lblock_stop > end_of_file) |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index a20a5213135a..42fea03e2bd9 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
| @@ -44,14 +44,16 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, | |||
| 44 | } | 44 | } |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 47 | extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
| 48 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); | 48 | extern int gfs2_block_map(struct inode *inode, sector_t lblock, |
| 49 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 49 | struct buffer_head *bh, int create); |
| 50 | 50 | extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, | |
| 51 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 51 | u64 *dblock, unsigned *extlen); |
| 52 | int gfs2_truncatei_resume(struct gfs2_inode *ip); | 52 | extern int gfs2_setattr_size(struct inode *inode, u64 size); |
| 53 | int gfs2_file_dealloc(struct gfs2_inode *ip); | 53 | extern void gfs2_trim_blocks(struct inode *inode); |
| 54 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 54 | extern int gfs2_truncatei_resume(struct gfs2_inode *ip); |
| 55 | unsigned int len); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
| 56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
| 57 | unsigned int len); | ||
| 56 | 58 | ||
| 57 | #endif /* __BMAP_DOT_H__ */ | 59 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index bb7907bde3d8..6798755b3858 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
| @@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 49 | ip = GFS2_I(inode); | 49 | ip = GFS2_I(inode); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | if (sdp->sd_args.ar_localcaching) | 52 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 53 | goto valid; | 53 | goto valid; |
| 54 | 54 | ||
| 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); | 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b9dd88a78dd4..5c356d09c321 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -79,6 +79,9 @@ | |||
| 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) | 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) |
| 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) | 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) |
| 81 | 81 | ||
| 82 | struct qstr gfs2_qdot __read_mostly; | ||
| 83 | struct qstr gfs2_qdotdot __read_mostly; | ||
| 84 | |||
| 82 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, | 85 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, |
| 83 | u64 leaf_no, void *data); | 86 | u64 leaf_no, void *data); |
| 84 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, | 87 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, |
| @@ -127,8 +130,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
| 127 | 130 | ||
| 128 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 131 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 129 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 132 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
| 130 | if (ip->i_disksize < offset + size) | 133 | if (ip->i_inode.i_size < offset + size) |
| 131 | ip->i_disksize = offset + size; | 134 | i_size_write(&ip->i_inode, offset + size); |
| 132 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 135 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 133 | gfs2_dinode_out(ip, dibh->b_data); | 136 | gfs2_dinode_out(ip, dibh->b_data); |
| 134 | 137 | ||
| @@ -225,8 +228,8 @@ out: | |||
| 225 | if (error) | 228 | if (error) |
| 226 | return error; | 229 | return error; |
| 227 | 230 | ||
| 228 | if (ip->i_disksize < offset + copied) | 231 | if (ip->i_inode.i_size < offset + copied) |
| 229 | ip->i_disksize = offset + copied; | 232 | i_size_write(&ip->i_inode, offset + copied); |
| 230 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 233 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 231 | 234 | ||
| 232 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 235 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| @@ -275,12 +278,13 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
| 275 | unsigned int o; | 278 | unsigned int o; |
| 276 | int copied = 0; | 279 | int copied = 0; |
| 277 | int error = 0; | 280 | int error = 0; |
| 281 | u64 disksize = i_size_read(&ip->i_inode); | ||
| 278 | 282 | ||
| 279 | if (offset >= ip->i_disksize) | 283 | if (offset >= disksize) |
| 280 | return 0; | 284 | return 0; |
| 281 | 285 | ||
| 282 | if (offset + size > ip->i_disksize) | 286 | if (offset + size > disksize) |
| 283 | size = ip->i_disksize - offset; | 287 | size = disksize - offset; |
| 284 | 288 | ||
| 285 | if (!size) | 289 | if (!size) |
| 286 | return 0; | 290 | return 0; |
| @@ -727,7 +731,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
| 727 | unsigned hsize = 1 << ip->i_depth; | 731 | unsigned hsize = 1 << ip->i_depth; |
| 728 | unsigned index; | 732 | unsigned index; |
| 729 | u64 ln; | 733 | u64 ln; |
| 730 | if (hsize * sizeof(u64) != ip->i_disksize) { | 734 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
| 731 | gfs2_consist_inode(ip); | 735 | gfs2_consist_inode(ip); |
| 732 | return ERR_PTR(-EIO); | 736 | return ERR_PTR(-EIO); |
| 733 | } | 737 | } |
| @@ -879,7 +883,7 @@ static int dir_make_exhash(struct inode *inode) | |||
| 879 | for (x = sdp->sd_hash_ptrs; x--; lp++) | 883 | for (x = sdp->sd_hash_ptrs; x--; lp++) |
| 880 | *lp = cpu_to_be64(bn); | 884 | *lp = cpu_to_be64(bn); |
| 881 | 885 | ||
| 882 | dip->i_disksize = sdp->sd_sb.sb_bsize / 2; | 886 | i_size_write(inode, sdp->sd_sb.sb_bsize / 2); |
| 883 | gfs2_add_inode_blocks(&dip->i_inode, 1); | 887 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
| 884 | dip->i_diskflags |= GFS2_DIF_EXHASH; | 888 | dip->i_diskflags |= GFS2_DIF_EXHASH; |
| 885 | 889 | ||
| @@ -1057,11 +1061,12 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1057 | u64 *buf; | 1061 | u64 *buf; |
| 1058 | u64 *from, *to; | 1062 | u64 *from, *to; |
| 1059 | u64 block; | 1063 | u64 block; |
| 1064 | u64 disksize = i_size_read(&dip->i_inode); | ||
| 1060 | int x; | 1065 | int x; |
| 1061 | int error = 0; | 1066 | int error = 0; |
| 1062 | 1067 | ||
| 1063 | hsize = 1 << dip->i_depth; | 1068 | hsize = 1 << dip->i_depth; |
| 1064 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1069 | if (hsize * sizeof(u64) != disksize) { |
| 1065 | gfs2_consist_inode(dip); | 1070 | gfs2_consist_inode(dip); |
| 1066 | return -EIO; | 1071 | return -EIO; |
| 1067 | } | 1072 | } |
| @@ -1072,7 +1077,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1072 | if (!buf) | 1077 | if (!buf) |
| 1073 | return -ENOMEM; | 1078 | return -ENOMEM; |
| 1074 | 1079 | ||
| 1075 | for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { | 1080 | for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { |
| 1076 | error = gfs2_dir_read_data(dip, (char *)buf, | 1081 | error = gfs2_dir_read_data(dip, (char *)buf, |
| 1077 | block * sdp->sd_hash_bsize, | 1082 | block * sdp->sd_hash_bsize, |
| 1078 | sdp->sd_hash_bsize, 1); | 1083 | sdp->sd_hash_bsize, 1); |
| @@ -1370,7 +1375,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1370 | unsigned depth = 0; | 1375 | unsigned depth = 0; |
| 1371 | 1376 | ||
| 1372 | hsize = 1 << dip->i_depth; | 1377 | hsize = 1 << dip->i_depth; |
| 1373 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1378 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
| 1374 | gfs2_consist_inode(dip); | 1379 | gfs2_consist_inode(dip); |
| 1375 | return -EIO; | 1380 | return -EIO; |
| 1376 | } | 1381 | } |
| @@ -1784,7 +1789,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
| 1784 | int error = 0; | 1789 | int error = 0; |
| 1785 | 1790 | ||
| 1786 | hsize = 1 << dip->i_depth; | 1791 | hsize = 1 << dip->i_depth; |
| 1787 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1792 | if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { |
| 1788 | gfs2_consist_inode(dip); | 1793 | gfs2_consist_inode(dip); |
| 1789 | return -EIO; | 1794 | return -EIO; |
| 1790 | } | 1795 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f919440c3be..a98f644bd3df 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
| @@ -17,23 +17,24 @@ struct inode; | |||
| 17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
| 18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
| 19 | 19 | ||
| 20 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
| 21 | int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 21 | const struct qstr *filename); |
| 22 | const struct gfs2_inode *ip); | 22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
| 23 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 23 | const struct gfs2_inode *ip); |
| 24 | const struct gfs2_inode *ip, unsigned int type); | 24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
| 25 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 25 | const struct gfs2_inode *ip, unsigned int type); |
| 26 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
| 27 | filldir_t filldir); | 27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
| 28 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 28 | filldir_t filldir); |
| 29 | const struct gfs2_inode *nip, unsigned int new_type); | 29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
| 30 | const struct gfs2_inode *nip, unsigned int new_type); | ||
| 30 | 31 | ||
| 31 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 32 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
| 32 | 33 | ||
| 33 | int gfs2_diradd_alloc_required(struct inode *dir, | 34 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
| 34 | const struct qstr *filename); | 35 | const struct qstr *filename); |
| 35 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
| 36 | struct buffer_head **bhp); | 37 | struct buffer_head **bhp); |
| 37 | 38 | ||
| 38 | static inline u32 gfs2_disk_hash(const char *data, int len) | 39 | static inline u32 gfs2_disk_hash(const char *data, int len) |
| 39 | { | 40 | { |
| @@ -61,4 +62,7 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct | |||
| 61 | memcpy(dent + 1, name->name, name->len); | 62 | memcpy(dent + 1, name->name, name->len); |
| 62 | } | 63 | } |
| 63 | 64 | ||
| 65 | extern struct qstr gfs2_qdot; | ||
| 66 | extern struct qstr gfs2_qdotdot; | ||
| 67 | |||
| 64 | #endif /* __DIR_DOT_H__ */ | 68 | #endif /* __DIR_DOT_H__ */ |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index dfe237a3f8ad..06d582732d34 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
| @@ -126,16 +126,9 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
| 126 | 126 | ||
| 127 | static struct dentry *gfs2_get_parent(struct dentry *child) | 127 | static struct dentry *gfs2_get_parent(struct dentry *child) |
| 128 | { | 128 | { |
| 129 | struct qstr dotdot; | ||
| 130 | struct dentry *dentry; | 129 | struct dentry *dentry; |
| 131 | 130 | ||
| 132 | /* | 131 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); |
| 133 | * XXX(hch): it would be a good idea to keep this around as a | ||
| 134 | * static variable. | ||
| 135 | */ | ||
| 136 | gfs2_str2qstr(&dotdot, ".."); | ||
| 137 | |||
| 138 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); | ||
| 139 | if (!IS_ERR(dentry)) | 132 | if (!IS_ERR(dentry)) |
| 140 | dentry->d_op = &gfs2_dops; | 133 | dentry->d_op = &gfs2_dops; |
| 141 | return dentry; | 134 | return dentry; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4edd662c8232..237ee6a940df 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -382,8 +382,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 382 | rblocks = RES_DINODE + ind_blocks; | 382 | rblocks = RES_DINODE + ind_blocks; |
| 383 | if (gfs2_is_jdata(ip)) | 383 | if (gfs2_is_jdata(ip)) |
| 384 | rblocks += data_blocks ? data_blocks : 1; | 384 | rblocks += data_blocks ? data_blocks : 1; |
| 385 | if (ind_blocks || data_blocks) | 385 | if (ind_blocks || data_blocks) { |
| 386 | rblocks += RES_STATFS + RES_QUOTA; | 386 | rblocks += RES_STATFS + RES_QUOTA; |
| 387 | rblocks += gfs2_rg_blocks(al); | ||
| 388 | } | ||
| 387 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 389 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
| 388 | if (ret) | 390 | if (ret) |
| 389 | goto out_trans_fail; | 391 | goto out_trans_fail; |
| @@ -491,7 +493,7 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
| 491 | goto fail; | 493 | goto fail; |
| 492 | 494 | ||
| 493 | if (!(file->f_flags & O_LARGEFILE) && | 495 | if (!(file->f_flags & O_LARGEFILE) && |
| 494 | ip->i_disksize > MAX_NON_LFS) { | 496 | i_size_read(inode) > MAX_NON_LFS) { |
| 495 | error = -EOVERFLOW; | 497 | error = -EOVERFLOW; |
| 496 | goto fail_gunlock; | 498 | goto fail_gunlock; |
| 497 | } | 499 | } |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9adf8f924e08..87778857f099 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -441,6 +441,8 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
| 441 | else | 441 | else |
| 442 | gfs2_glock_put_nolock(gl); | 442 | gfs2_glock_put_nolock(gl); |
| 443 | } | 443 | } |
| 444 | if (held1 && held2 && list_empty(&gl->gl_holders)) | ||
| 445 | clear_bit(GLF_QUEUED, &gl->gl_flags); | ||
| 444 | 446 | ||
| 445 | gl->gl_state = new_state; | 447 | gl->gl_state = new_state; |
| 446 | gl->gl_tchange = jiffies; | 448 | gl->gl_tchange = jiffies; |
| @@ -1012,6 +1014,7 @@ fail: | |||
| 1012 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | 1014 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) |
| 1013 | insert_pt = &gh2->gh_list; | 1015 | insert_pt = &gh2->gh_list; |
| 1014 | } | 1016 | } |
| 1017 | set_bit(GLF_QUEUED, &gl->gl_flags); | ||
| 1015 | if (likely(insert_pt == NULL)) { | 1018 | if (likely(insert_pt == NULL)) { |
| 1016 | list_add_tail(&gh->gh_list, &gl->gl_holders); | 1019 | list_add_tail(&gh->gh_list, &gl->gl_holders); |
| 1017 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | 1020 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) |
| @@ -1310,10 +1313,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) | |||
| 1310 | 1313 | ||
| 1311 | gfs2_glock_hold(gl); | 1314 | gfs2_glock_hold(gl); |
| 1312 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1315 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
| 1313 | if (time_before(now, holdtime)) | 1316 | if (test_bit(GLF_QUEUED, &gl->gl_flags)) { |
| 1314 | delay = holdtime - now; | 1317 | if (time_before(now, holdtime)) |
| 1315 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | 1318 | delay = holdtime - now; |
| 1316 | delay = gl->gl_ops->go_min_hold_time; | 1319 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) |
| 1320 | delay = gl->gl_ops->go_min_hold_time; | ||
| 1321 | } | ||
| 1317 | 1322 | ||
| 1318 | spin_lock(&gl->gl_spin); | 1323 | spin_lock(&gl->gl_spin); |
| 1319 | handle_callback(gl, state, delay); | 1324 | handle_callback(gl, state, delay); |
| @@ -1512,7 +1517,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
| 1512 | spin_unlock(&lru_lock); | 1517 | spin_unlock(&lru_lock); |
| 1513 | 1518 | ||
| 1514 | spin_lock(&gl->gl_spin); | 1519 | spin_lock(&gl->gl_spin); |
| 1515 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) | 1520 | if (gl->gl_state != LM_ST_UNLOCKED) |
| 1516 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1521 | handle_callback(gl, LM_ST_UNLOCKED, 0); |
| 1517 | spin_unlock(&gl->gl_spin); | 1522 | spin_unlock(&gl->gl_spin); |
| 1518 | gfs2_glock_hold(gl); | 1523 | gfs2_glock_hold(gl); |
| @@ -1660,6 +1665,8 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) | |||
| 1660 | *p++ = 'I'; | 1665 | *p++ = 'I'; |
| 1661 | if (test_bit(GLF_FROZEN, gflags)) | 1666 | if (test_bit(GLF_FROZEN, gflags)) |
| 1662 | *p++ = 'F'; | 1667 | *p++ = 'F'; |
| 1668 | if (test_bit(GLF_QUEUED, gflags)) | ||
| 1669 | *p++ = 'q'; | ||
| 1663 | *p = 0; | 1670 | *p = 0; |
| 1664 | return buf; | 1671 | return buf; |
| 1665 | } | 1672 | } |
| @@ -1776,10 +1783,12 @@ int __init gfs2_glock_init(void) | |||
| 1776 | } | 1783 | } |
| 1777 | #endif | 1784 | #endif |
| 1778 | 1785 | ||
| 1779 | glock_workqueue = create_workqueue("glock_workqueue"); | 1786 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | |
| 1787 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
| 1780 | if (IS_ERR(glock_workqueue)) | 1788 | if (IS_ERR(glock_workqueue)) |
| 1781 | return PTR_ERR(glock_workqueue); | 1789 | return PTR_ERR(glock_workqueue); |
| 1782 | gfs2_delete_workqueue = create_workqueue("delete_workqueue"); | 1790 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | |
| 1791 | WQ_FREEZEABLE, 0); | ||
| 1783 | if (IS_ERR(gfs2_delete_workqueue)) { | 1792 | if (IS_ERR(gfs2_delete_workqueue)) { |
| 1784 | destroy_workqueue(glock_workqueue); | 1793 | destroy_workqueue(glock_workqueue); |
| 1785 | return PTR_ERR(gfs2_delete_workqueue); | 1794 | return PTR_ERR(gfs2_delete_workqueue); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2bda1911b156..db1c26d6d220 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
| 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
| 216 | 216 | ||
| 217 | /** | 217 | /** |
| 218 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 218 | * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock |
| 219 | * @gl: the glock | 219 | * @gl: the glock |
| 220 | * @state: the state we're requesting | 220 | * @state: the state we're requesting |
| 221 | * @flags: the modifier flags | 221 | * @flags: the modifier flags |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49f97d3bb690..0d149dcc04e5 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -262,13 +262,12 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
| 262 | const struct gfs2_inode *ip = gl->gl_object; | 262 | const struct gfs2_inode *ip = gl->gl_object; |
| 263 | if (ip == NULL) | 263 | if (ip == NULL) |
| 264 | return 0; | 264 | return 0; |
| 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n", | 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
| 266 | (unsigned long long)ip->i_no_formal_ino, | 266 | (unsigned long long)ip->i_no_formal_ino, |
| 267 | (unsigned long long)ip->i_no_addr, | 267 | (unsigned long long)ip->i_no_addr, |
| 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
| 269 | (unsigned int)ip->i_diskflags, | 269 | (unsigned int)ip->i_diskflags, |
| 270 | (unsigned long long)ip->i_inode.i_size, | 270 | (unsigned long long)i_size_read(&ip->i_inode)); |
| 271 | (unsigned long long)ip->i_disksize); | ||
| 272 | return 0; | 271 | return 0; |
| 273 | } | 272 | } |
| 274 | 273 | ||
| @@ -453,7 +452,6 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = { | |||
| 453 | [LM_TYPE_META] = &gfs2_meta_glops, | 452 | [LM_TYPE_META] = &gfs2_meta_glops, |
| 454 | [LM_TYPE_INODE] = &gfs2_inode_glops, | 453 | [LM_TYPE_INODE] = &gfs2_inode_glops, |
| 455 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, | 454 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, |
| 456 | [LM_TYPE_NONDISK] = &gfs2_trans_glops, | ||
| 457 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, | 455 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, |
| 458 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, | 456 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, |
| 459 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, | 457 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index fdbf4b366fa5..764fbb49efc8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -196,6 +196,7 @@ enum { | |||
| 196 | GLF_REPLY_PENDING = 9, | 196 | GLF_REPLY_PENDING = 9, |
| 197 | GLF_INITIAL = 10, | 197 | GLF_INITIAL = 10, |
| 198 | GLF_FROZEN = 11, | 198 | GLF_FROZEN = 11, |
| 199 | GLF_QUEUED = 12, | ||
| 199 | }; | 200 | }; |
| 200 | 201 | ||
| 201 | struct gfs2_glock { | 202 | struct gfs2_glock { |
| @@ -267,7 +268,6 @@ struct gfs2_inode { | |||
| 267 | u64 i_no_formal_ino; | 268 | u64 i_no_formal_ino; |
| 268 | u64 i_generation; | 269 | u64 i_generation; |
| 269 | u64 i_eattr; | 270 | u64 i_eattr; |
| 270 | loff_t i_disksize; | ||
| 271 | unsigned long i_flags; /* GIF_... */ | 271 | unsigned long i_flags; /* GIF_... */ |
| 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
| 273 | struct gfs2_holder i_iopen_gh; | 273 | struct gfs2_holder i_iopen_gh; |
| @@ -416,11 +416,8 @@ struct gfs2_args { | |||
| 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
| 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
| 418 | unsigned int ar_spectator:1; /* Don't get a journal */ | 418 | unsigned int ar_spectator:1; /* Don't get a journal */ |
| 419 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ | ||
| 420 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ | 419 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
| 421 | unsigned int ar_localcaching:1; /* Local caching */ | ||
| 422 | unsigned int ar_debug:1; /* Oops on errors */ | 420 | unsigned int ar_debug:1; /* Oops on errors */ |
| 423 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ | ||
| 424 | unsigned int ar_posix_acl:1; /* Enable posix acls */ | 421 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
| 425 | unsigned int ar_quota:2; /* off/account/on */ | 422 | unsigned int ar_quota:2; /* off/account/on */ |
| 426 | unsigned int ar_suiddir:1; /* suiddir support */ | 423 | unsigned int ar_suiddir:1; /* suiddir support */ |
| @@ -497,7 +494,7 @@ struct gfs2_sb_host { | |||
| 497 | */ | 494 | */ |
| 498 | 495 | ||
| 499 | struct lm_lockstruct { | 496 | struct lm_lockstruct { |
| 500 | unsigned int ls_jid; | 497 | int ls_jid; |
| 501 | unsigned int ls_first; | 498 | unsigned int ls_first; |
| 502 | unsigned int ls_first_done; | 499 | unsigned int ls_first_done; |
| 503 | unsigned int ls_nodir; | 500 | unsigned int ls_nodir; |
| @@ -572,6 +569,7 @@ struct gfs2_sbd { | |||
| 572 | struct list_head sd_rindex_mru_list; | 569 | struct list_head sd_rindex_mru_list; |
| 573 | struct gfs2_rgrpd *sd_rindex_forward; | 570 | struct gfs2_rgrpd *sd_rindex_forward; |
| 574 | unsigned int sd_rgrps; | 571 | unsigned int sd_rgrps; |
| 572 | unsigned int sd_max_rg_data; | ||
| 575 | 573 | ||
| 576 | /* Journal index stuff */ | 574 | /* Journal index stuff */ |
| 577 | 575 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 08140f185a37..06370f8bd8cf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -359,8 +359,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 359 | * to do that. | 359 | * to do that. |
| 360 | */ | 360 | */ |
| 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
| 362 | ip->i_disksize = be64_to_cpu(str->di_size); | 362 | i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); |
| 363 | i_size_write(&ip->i_inode, ip->i_disksize); | ||
| 364 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 363 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
| 365 | atime.tv_sec = be64_to_cpu(str->di_atime); | 364 | atime.tv_sec = be64_to_cpu(str->di_atime); |
| 366 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 365 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
| @@ -1055,7 +1054,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
| 1055 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); | 1054 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); |
| 1056 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1055 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
| 1057 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1056 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
| 1058 | str->di_size = cpu_to_be64(ip->i_disksize); | 1057 | str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); |
| 1059 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 1058 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| 1060 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1059 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
| 1061 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1060 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
| @@ -1085,8 +1084,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
| 1085 | (unsigned long long)ip->i_no_formal_ino); | 1084 | (unsigned long long)ip->i_no_formal_ino); |
| 1086 | printk(KERN_INFO " no_addr = %llu\n", | 1085 | printk(KERN_INFO " no_addr = %llu\n", |
| 1087 | (unsigned long long)ip->i_no_addr); | 1086 | (unsigned long long)ip->i_no_addr); |
| 1088 | printk(KERN_INFO " i_disksize = %llu\n", | 1087 | printk(KERN_INFO " i_size = %llu\n", |
| 1089 | (unsigned long long)ip->i_disksize); | 1088 | (unsigned long long)i_size_read(&ip->i_inode)); |
| 1090 | printk(KERN_INFO " blocks = %llu\n", | 1089 | printk(KERN_INFO " blocks = %llu\n", |
| 1091 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); | 1090 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
| 1092 | printk(KERN_INFO " i_goal = %llu\n", | 1091 | printk(KERN_INFO " i_goal = %llu\n", |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 300ada3f21de..6720d7d5fbc6 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | |||
| 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, | 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, |
| 20 | struct file_ra_state *ra_state, | 20 | struct file_ra_state *ra_state, |
| 21 | char *buf, loff_t *pos, unsigned size); | 21 | char *buf, loff_t *pos, unsigned size); |
| 22 | extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | ||
| 23 | unsigned int from, unsigned int to); | ||
| 22 | extern void gfs2_set_aops(struct inode *inode); | 24 | extern void gfs2_set_aops(struct inode *inode); |
| 23 | 25 | ||
| 24 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 26 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
| @@ -80,6 +82,19 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
| 80 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); | 82 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); |
| 81 | } | 83 | } |
| 82 | 84 | ||
| 85 | static inline int gfs2_check_internal_file_size(struct inode *inode, | ||
| 86 | u64 minsize, u64 maxsize) | ||
| 87 | { | ||
| 88 | u64 size = i_size_read(inode); | ||
| 89 | if (size < minsize || size > maxsize) | ||
| 90 | goto err; | ||
| 91 | if (size & ((1 << inode->i_blkbits) - 1)) | ||
| 92 | goto err; | ||
| 93 | return 0; | ||
| 94 | err: | ||
| 95 | gfs2_consist_inode(GFS2_I(inode)); | ||
| 96 | return -EIO; | ||
| 97 | } | ||
| 83 | 98 | ||
| 84 | extern void gfs2_set_iop(struct inode *inode); | 99 | extern void gfs2_set_iop(struct inode *inode); |
| 85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0e0470ed34c2..1c09425b45fd 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
| @@ -42,9 +42,9 @@ static void gdlm_ast(void *arg) | |||
| 42 | ret |= LM_OUT_CANCELED; | 42 | ret |= LM_OUT_CANCELED; |
| 43 | goto out; | 43 | goto out; |
| 44 | case -EAGAIN: /* Try lock fails */ | 44 | case -EAGAIN: /* Try lock fails */ |
| 45 | case -EDEADLK: /* Deadlock detected */ | ||
| 45 | goto out; | 46 | goto out; |
| 46 | case -EINVAL: /* Invalid */ | 47 | case -ETIMEDOUT: /* Canceled due to timeout */ |
| 47 | case -ENOMEM: /* Out of memory */ | ||
| 48 | ret |= LM_OUT_ERROR; | 48 | ret |= LM_OUT_ERROR; |
| 49 | goto out; | 49 | goto out; |
| 50 | case 0: /* Success */ | 50 | case 0: /* Success */ |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index b1e9630eb46a..d7eb1e209aa8 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "glock.h" | 24 | #include "glock.h" |
| 25 | #include "quota.h" | 25 | #include "quota.h" |
| 26 | #include "recovery.h" | 26 | #include "recovery.h" |
| 27 | #include "dir.h" | ||
| 27 | 28 | ||
| 28 | static struct shrinker qd_shrinker = { | 29 | static struct shrinker qd_shrinker = { |
| 29 | .shrink = gfs2_shrink_qd_memory, | 30 | .shrink = gfs2_shrink_qd_memory, |
| @@ -78,6 +79,9 @@ static int __init init_gfs2_fs(void) | |||
| 78 | { | 79 | { |
| 79 | int error; | 80 | int error; |
| 80 | 81 | ||
| 82 | gfs2_str2qstr(&gfs2_qdot, "."); | ||
| 83 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | ||
| 84 | |||
| 81 | error = gfs2_sys_init(); | 85 | error = gfs2_sys_init(); |
| 82 | if (error) | 86 | if (error) |
| 83 | return error; | 87 | return error; |
| @@ -140,7 +144,7 @@ static int __init init_gfs2_fs(void) | |||
| 140 | 144 | ||
| 141 | error = -ENOMEM; | 145 | error = -ENOMEM; |
| 142 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", | 146 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", |
| 143 | WQ_NON_REENTRANT | WQ_RESCUER, 0); | 147 | WQ_RESCUER | WQ_FREEZEABLE, 0); |
| 144 | if (!gfs_recovery_wq) | 148 | if (!gfs_recovery_wq) |
| 145 | goto fail_wq; | 149 | goto fail_wq; |
| 146 | 150 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4d4b1e8ac64c..aeafc233dc89 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -38,14 +38,6 @@ | |||
| 38 | #define DO 0 | 38 | #define DO 0 |
| 39 | #define UNDO 1 | 39 | #define UNDO 1 |
| 40 | 40 | ||
| 41 | static const u32 gfs2_old_fs_formats[] = { | ||
| 42 | 0 | ||
| 43 | }; | ||
| 44 | |||
| 45 | static const u32 gfs2_old_multihost_formats[] = { | ||
| 46 | 0 | ||
| 47 | }; | ||
| 48 | |||
| 49 | /** | 41 | /** |
| 50 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | 42 | * gfs2_tune_init - Fill a gfs2_tune structure with default values |
| 51 | * @gt: tune | 43 | * @gt: tune |
| @@ -135,8 +127,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 135 | 127 | ||
| 136 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | 128 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) |
| 137 | { | 129 | { |
| 138 | unsigned int x; | ||
| 139 | |||
| 140 | if (sb->sb_magic != GFS2_MAGIC || | 130 | if (sb->sb_magic != GFS2_MAGIC || |
| 141 | sb->sb_type != GFS2_METATYPE_SB) { | 131 | sb->sb_type != GFS2_METATYPE_SB) { |
| 142 | if (!silent) | 132 | if (!silent) |
| @@ -150,55 +140,9 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile | |||
| 150 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | 140 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) |
| 151 | return 0; | 141 | return 0; |
| 152 | 142 | ||
| 153 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | 143 | fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); |
| 154 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
| 155 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
| 156 | break; | ||
| 157 | 144 | ||
| 158 | if (!gfs2_old_fs_formats[x]) { | 145 | return -EINVAL; |
| 159 | printk(KERN_WARNING | ||
| 160 | "GFS2: code version (%u, %u) is incompatible " | ||
| 161 | "with ondisk format (%u, %u)\n", | ||
| 162 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 163 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 164 | printk(KERN_WARNING | ||
| 165 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 166 | return -EINVAL; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
| 171 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
| 172 | if (gfs2_old_multihost_formats[x] == | ||
| 173 | sb->sb_multihost_format) | ||
| 174 | break; | ||
| 175 | |||
| 176 | if (!gfs2_old_multihost_formats[x]) { | ||
| 177 | printk(KERN_WARNING | ||
| 178 | "GFS2: code version (%u, %u) is incompatible " | ||
| 179 | "with ondisk format (%u, %u)\n", | ||
| 180 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 181 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 182 | printk(KERN_WARNING | ||
| 183 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 184 | return -EINVAL; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | if (!sdp->sd_args.ar_upgrade) { | ||
| 189 | printk(KERN_WARNING | ||
| 190 | "GFS2: code version (%u, %u) is incompatible " | ||
| 191 | "with ondisk format (%u, %u)\n", | ||
| 192 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 193 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 194 | printk(KERN_INFO | ||
| 195 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
| 196 | "the FS\n"); | ||
| 197 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
| 198 | return -EINVAL; | ||
| 199 | } | ||
| 200 | |||
| 201 | return 0; | ||
| 202 | } | 146 | } |
| 203 | 147 | ||
| 204 | static void end_bio_io_page(struct bio *bio, int error) | 148 | static void end_bio_io_page(struct bio *bio, int error) |
| @@ -586,7 +530,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
| 586 | 530 | ||
| 587 | prev_db = 0; | 531 | prev_db = 0; |
| 588 | 532 | ||
| 589 | for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) { | 533 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { |
| 590 | bh.b_state = 0; | 534 | bh.b_state = 0; |
| 591 | bh.b_blocknr = 0; | 535 | bh.b_blocknr = 0; |
| 592 | bh.b_size = 1 << ip->i_inode.i_blkbits; | 536 | bh.b_size = 1 << ip->i_inode.i_blkbits; |
| @@ -1022,7 +966,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
| 1022 | if (!strcmp("lock_nolock", proto)) { | 966 | if (!strcmp("lock_nolock", proto)) { |
| 1023 | lm = &nolock_ops; | 967 | lm = &nolock_ops; |
| 1024 | sdp->sd_args.ar_localflocks = 1; | 968 | sdp->sd_args.ar_localflocks = 1; |
| 1025 | sdp->sd_args.ar_localcaching = 1; | ||
| 1026 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 969 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
| 1027 | } else if (!strcmp("lock_dlm", proto)) { | 970 | } else if (!strcmp("lock_dlm", proto)) { |
| 1028 | lm = &gfs2_dlm_ops; | 971 | lm = &gfs2_dlm_ops; |
| @@ -1113,8 +1056,6 @@ static int gfs2_journalid_wait(void *word) | |||
| 1113 | 1056 | ||
| 1114 | static int wait_on_journal(struct gfs2_sbd *sdp) | 1057 | static int wait_on_journal(struct gfs2_sbd *sdp) |
| 1115 | { | 1058 | { |
| 1116 | if (sdp->sd_args.ar_spectator) | ||
| 1117 | return 0; | ||
| 1118 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 1059 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 1119 | return 0; | 1060 | return 0; |
| 1120 | 1061 | ||
| @@ -1217,6 +1158,20 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
| 1217 | if (error) | 1158 | if (error) |
| 1218 | goto fail_sb; | 1159 | goto fail_sb; |
| 1219 | 1160 | ||
| 1161 | /* | ||
| 1162 | * If user space has failed to join the cluster or some similar | ||
| 1163 | * failure has occurred, then the journal id will contain a | ||
| 1164 | * negative (error) number. This will then be returned to the | ||
| 1165 | * caller (of the mount syscall). We do this even for spectator | ||
| 1166 | * mounts (which just write a jid of 0 to indicate "ok" even though | ||
| 1167 | * the jid is unused in the spectator case) | ||
| 1168 | */ | ||
| 1169 | if (sdp->sd_lockstruct.ls_jid < 0) { | ||
| 1170 | error = sdp->sd_lockstruct.ls_jid; | ||
| 1171 | sdp->sd_lockstruct.ls_jid = 0; | ||
| 1172 | goto fail_sb; | ||
| 1173 | } | ||
| 1174 | |||
| 1220 | error = init_inodes(sdp, DO); | 1175 | error = init_inodes(sdp, DO); |
| 1221 | if (error) | 1176 | if (error) |
| 1222 | goto fail_sb; | 1177 | goto fail_sb; |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1009be2c9737..0534510200d5 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
| 19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
| 20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
| 21 | #include <linux/swap.h> | ||
| 22 | #include <linux/falloc.h> | ||
| 21 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
| 22 | 24 | ||
| 23 | #include "gfs2.h" | 25 | #include "gfs2.h" |
| @@ -217,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 217 | goto out_gunlock_q; | 219 | goto out_gunlock_q; |
| 218 | 220 | ||
| 219 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 221 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
| 220 | al->al_rgd->rd_length + | 222 | gfs2_rg_blocks(al) + |
| 221 | 2 * RES_DINODE + RES_STATFS + | 223 | 2 * RES_DINODE + RES_STATFS + |
| 222 | RES_QUOTA, 0); | 224 | RES_QUOTA, 0); |
| 223 | if (error) | 225 | if (error) |
| @@ -406,7 +408,6 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
| 406 | 408 | ||
| 407 | ip = ghs[1].gh_gl->gl_object; | 409 | ip = ghs[1].gh_gl->gl_object; |
| 408 | 410 | ||
| 409 | ip->i_disksize = size; | ||
| 410 | i_size_write(inode, size); | 411 | i_size_write(inode, size); |
| 411 | 412 | ||
| 412 | error = gfs2_meta_inode_buffer(ip, &dibh); | 413 | error = gfs2_meta_inode_buffer(ip, &dibh); |
| @@ -461,7 +462,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 461 | ip = ghs[1].gh_gl->gl_object; | 462 | ip = ghs[1].gh_gl->gl_object; |
| 462 | 463 | ||
| 463 | ip->i_inode.i_nlink = 2; | 464 | ip->i_inode.i_nlink = 2; |
| 464 | ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 465 | i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); |
| 465 | ip->i_diskflags |= GFS2_DIF_JDATA; | 466 | ip->i_diskflags |= GFS2_DIF_JDATA; |
| 466 | ip->i_entries = 2; | 467 | ip->i_entries = 2; |
| 467 | 468 | ||
| @@ -470,18 +471,15 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 470 | if (!gfs2_assert_withdraw(sdp, !error)) { | 471 | if (!gfs2_assert_withdraw(sdp, !error)) { |
| 471 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | 472 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; |
| 472 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); | 473 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); |
| 473 | struct qstr str; | ||
| 474 | 474 | ||
| 475 | gfs2_str2qstr(&str, "."); | ||
| 476 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 475 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 477 | gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); | 476 | gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); |
| 478 | dent->de_inum = di->di_num; /* already GFS2 endian */ | 477 | dent->de_inum = di->di_num; /* already GFS2 endian */ |
| 479 | dent->de_type = cpu_to_be16(DT_DIR); | 478 | dent->de_type = cpu_to_be16(DT_DIR); |
| 480 | di->di_entries = cpu_to_be32(1); | 479 | di->di_entries = cpu_to_be32(1); |
| 481 | 480 | ||
| 482 | gfs2_str2qstr(&str, ".."); | ||
| 483 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); | 481 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); |
| 484 | gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); | 482 | gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); |
| 485 | 483 | ||
| 486 | gfs2_inum_out(dip, dent); | 484 | gfs2_inum_out(dip, dent); |
| 487 | dent->de_type = cpu_to_be16(DT_DIR); | 485 | dent->de_type = cpu_to_be16(DT_DIR); |
| @@ -522,7 +520,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 522 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | 520 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, |
| 523 | struct gfs2_inode *ip) | 521 | struct gfs2_inode *ip) |
| 524 | { | 522 | { |
| 525 | struct qstr dotname; | ||
| 526 | int error; | 523 | int error; |
| 527 | 524 | ||
| 528 | if (ip->i_entries != 2) { | 525 | if (ip->i_entries != 2) { |
| @@ -539,13 +536,11 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
| 539 | if (error) | 536 | if (error) |
| 540 | return error; | 537 | return error; |
| 541 | 538 | ||
| 542 | gfs2_str2qstr(&dotname, "."); | 539 | error = gfs2_dir_del(ip, &gfs2_qdot); |
| 543 | error = gfs2_dir_del(ip, &dotname); | ||
| 544 | if (error) | 540 | if (error) |
| 545 | return error; | 541 | return error; |
| 546 | 542 | ||
| 547 | gfs2_str2qstr(&dotname, ".."); | 543 | error = gfs2_dir_del(ip, &gfs2_qdotdot); |
| 548 | error = gfs2_dir_del(ip, &dotname); | ||
| 549 | if (error) | 544 | if (error) |
| 550 | return error; | 545 | return error; |
| 551 | 546 | ||
| @@ -694,11 +689,8 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
| 694 | struct inode *dir = &to->i_inode; | 689 | struct inode *dir = &to->i_inode; |
| 695 | struct super_block *sb = dir->i_sb; | 690 | struct super_block *sb = dir->i_sb; |
| 696 | struct inode *tmp; | 691 | struct inode *tmp; |
| 697 | struct qstr dotdot; | ||
| 698 | int error = 0; | 692 | int error = 0; |
| 699 | 693 | ||
| 700 | gfs2_str2qstr(&dotdot, ".."); | ||
| 701 | |||
| 702 | igrab(dir); | 694 | igrab(dir); |
| 703 | 695 | ||
| 704 | for (;;) { | 696 | for (;;) { |
| @@ -711,7 +703,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
| 711 | break; | 703 | break; |
| 712 | } | 704 | } |
| 713 | 705 | ||
| 714 | tmp = gfs2_lookupi(dir, &dotdot, 1); | 706 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
| 715 | if (IS_ERR(tmp)) { | 707 | if (IS_ERR(tmp)) { |
| 716 | error = PTR_ERR(tmp); | 708 | error = PTR_ERR(tmp); |
| 717 | break; | 709 | break; |
| @@ -744,7 +736,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 744 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 736 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
| 745 | struct gfs2_inode *nip = NULL; | 737 | struct gfs2_inode *nip = NULL; |
| 746 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 738 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
| 747 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; | 739 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; |
| 748 | struct gfs2_rgrpd *nrgd; | 740 | struct gfs2_rgrpd *nrgd; |
| 749 | unsigned int num_gh; | 741 | unsigned int num_gh; |
| 750 | int dir_rename = 0; | 742 | int dir_rename = 0; |
| @@ -758,6 +750,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 758 | return 0; | 750 | return 0; |
| 759 | } | 751 | } |
| 760 | 752 | ||
| 753 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
| 754 | if (error) | ||
| 755 | return error; | ||
| 761 | 756 | ||
| 762 | if (odip != ndip) { | 757 | if (odip != ndip) { |
| 763 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, | 758 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
| @@ -887,12 +882,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 887 | 882 | ||
| 888 | al->al_requested = sdp->sd_max_dirres; | 883 | al->al_requested = sdp->sd_max_dirres; |
| 889 | 884 | ||
| 890 | error = gfs2_inplace_reserve(ndip); | 885 | error = gfs2_inplace_reserve_ri(ndip); |
| 891 | if (error) | 886 | if (error) |
| 892 | goto out_gunlock_q; | 887 | goto out_gunlock_q; |
| 893 | 888 | ||
| 894 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 889 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
| 895 | al->al_rgd->rd_length + | 890 | gfs2_rg_blocks(al) + |
| 896 | 4 * RES_DINODE + 4 * RES_LEAF + | 891 | 4 * RES_DINODE + 4 * RES_LEAF + |
| 897 | RES_STATFS + RES_QUOTA + 4, 0); | 892 | RES_STATFS + RES_QUOTA + 4, 0); |
| 898 | if (error) | 893 | if (error) |
| @@ -920,9 +915,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 920 | } | 915 | } |
| 921 | 916 | ||
| 922 | if (dir_rename) { | 917 | if (dir_rename) { |
| 923 | struct qstr name; | ||
| 924 | gfs2_str2qstr(&name, ".."); | ||
| 925 | |||
| 926 | error = gfs2_change_nlink(ndip, +1); | 918 | error = gfs2_change_nlink(ndip, +1); |
| 927 | if (error) | 919 | if (error) |
| 928 | goto out_end_trans; | 920 | goto out_end_trans; |
| @@ -930,7 +922,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 930 | if (error) | 922 | if (error) |
| 931 | goto out_end_trans; | 923 | goto out_end_trans; |
| 932 | 924 | ||
| 933 | error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); | 925 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); |
| 934 | if (error) | 926 | if (error) |
| 935 | goto out_end_trans; | 927 | goto out_end_trans; |
| 936 | } else { | 928 | } else { |
| @@ -972,6 +964,7 @@ out_gunlock_r: | |||
| 972 | if (r_gh.gh_gl) | 964 | if (r_gh.gh_gl) |
| 973 | gfs2_glock_dq_uninit(&r_gh); | 965 | gfs2_glock_dq_uninit(&r_gh); |
| 974 | out: | 966 | out: |
| 967 | gfs2_glock_dq_uninit(&ri_gh); | ||
| 975 | return error; | 968 | return error; |
| 976 | } | 969 | } |
| 977 | 970 | ||
| @@ -990,7 +983,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 990 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 983 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
| 991 | struct gfs2_holder i_gh; | 984 | struct gfs2_holder i_gh; |
| 992 | struct buffer_head *dibh; | 985 | struct buffer_head *dibh; |
| 993 | unsigned int x; | 986 | unsigned int x, size; |
| 994 | char *buf; | 987 | char *buf; |
| 995 | int error; | 988 | int error; |
| 996 | 989 | ||
| @@ -1002,7 +995,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 1002 | return NULL; | 995 | return NULL; |
| 1003 | } | 996 | } |
| 1004 | 997 | ||
| 1005 | if (!ip->i_disksize) { | 998 | size = (unsigned int)i_size_read(&ip->i_inode); |
| 999 | if (size == 0) { | ||
| 1006 | gfs2_consist_inode(ip); | 1000 | gfs2_consist_inode(ip); |
| 1007 | buf = ERR_PTR(-EIO); | 1001 | buf = ERR_PTR(-EIO); |
| 1008 | goto out; | 1002 | goto out; |
| @@ -1014,7 +1008,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
| 1014 | goto out; | 1008 | goto out; |
| 1015 | } | 1009 | } |
| 1016 | 1010 | ||
| 1017 | x = ip->i_disksize + 1; | 1011 | x = size + 1; |
| 1018 | buf = kmalloc(x, GFP_NOFS); | 1012 | buf = kmalloc(x, GFP_NOFS); |
| 1019 | if (!buf) | 1013 | if (!buf) |
| 1020 | buf = ERR_PTR(-ENOMEM); | 1014 | buf = ERR_PTR(-ENOMEM); |
| @@ -1071,30 +1065,6 @@ int gfs2_permission(struct inode *inode, int mask) | |||
| 1071 | return error; | 1065 | return error; |
| 1072 | } | 1066 | } |
| 1073 | 1067 | ||
| 1074 | /* | ||
| 1075 | * XXX(truncate): the truncate_setsize calls should be moved to the end. | ||
| 1076 | */ | ||
| 1077 | static int setattr_size(struct inode *inode, struct iattr *attr) | ||
| 1078 | { | ||
| 1079 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1080 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1081 | int error; | ||
| 1082 | |||
| 1083 | if (attr->ia_size != ip->i_disksize) { | ||
| 1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
| 1085 | if (error) | ||
| 1086 | return error; | ||
| 1087 | truncate_setsize(inode, attr->ia_size); | ||
| 1088 | gfs2_trans_end(sdp); | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | error = gfs2_truncatei(ip, attr->ia_size); | ||
| 1092 | if (error && (inode->i_size != ip->i_disksize)) | ||
| 1093 | i_size_write(inode, ip->i_disksize); | ||
| 1094 | |||
| 1095 | return error; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | static int setattr_chown(struct inode *inode, struct iattr *attr) | 1068 | static int setattr_chown(struct inode *inode, struct iattr *attr) |
| 1099 | { | 1069 | { |
| 1100 | struct gfs2_inode *ip = GFS2_I(inode); | 1070 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -1195,7 +1165,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1195 | goto out; | 1165 | goto out; |
| 1196 | 1166 | ||
| 1197 | if (attr->ia_valid & ATTR_SIZE) | 1167 | if (attr->ia_valid & ATTR_SIZE) |
| 1198 | error = setattr_size(inode, attr); | 1168 | error = gfs2_setattr_size(inode, attr->ia_size); |
| 1199 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) | 1169 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) |
| 1200 | error = setattr_chown(inode, attr); | 1170 | error = setattr_chown(inode, attr); |
| 1201 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | 1171 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) |
| @@ -1301,6 +1271,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
| 1301 | return ret; | 1271 | return ret; |
| 1302 | } | 1272 | } |
| 1303 | 1273 | ||
| 1274 | static void empty_write_end(struct page *page, unsigned from, | ||
| 1275 | unsigned to) | ||
| 1276 | { | ||
| 1277 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
| 1278 | |||
| 1279 | page_zero_new_buffers(page, from, to); | ||
| 1280 | flush_dcache_page(page); | ||
| 1281 | mark_page_accessed(page); | ||
| 1282 | |||
| 1283 | if (!gfs2_is_writeback(ip)) | ||
| 1284 | gfs2_page_add_databufs(ip, page, from, to); | ||
| 1285 | |||
| 1286 | block_commit_write(page, from, to); | ||
| 1287 | } | ||
| 1288 | |||
| 1289 | |||
| 1290 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
| 1291 | { | ||
| 1292 | unsigned start, end, next; | ||
| 1293 | struct buffer_head *bh, *head; | ||
| 1294 | int error; | ||
| 1295 | |||
| 1296 | if (!page_has_buffers(page)) { | ||
| 1297 | error = block_prepare_write(page, from, to, gfs2_block_map); | ||
| 1298 | if (unlikely(error)) | ||
| 1299 | return error; | ||
| 1300 | |||
| 1301 | empty_write_end(page, from, to); | ||
| 1302 | return 0; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | bh = head = page_buffers(page); | ||
| 1306 | next = end = 0; | ||
| 1307 | while (next < from) { | ||
| 1308 | next += bh->b_size; | ||
| 1309 | bh = bh->b_this_page; | ||
| 1310 | } | ||
| 1311 | start = next; | ||
| 1312 | do { | ||
| 1313 | next += bh->b_size; | ||
| 1314 | if (buffer_mapped(bh)) { | ||
| 1315 | if (end) { | ||
| 1316 | error = block_prepare_write(page, start, end, | ||
| 1317 | gfs2_block_map); | ||
| 1318 | if (unlikely(error)) | ||
| 1319 | return error; | ||
| 1320 | empty_write_end(page, start, end); | ||
| 1321 | end = 0; | ||
| 1322 | } | ||
| 1323 | start = next; | ||
| 1324 | } | ||
| 1325 | else | ||
| 1326 | end = next; | ||
| 1327 | bh = bh->b_this_page; | ||
| 1328 | } while (next < to); | ||
| 1329 | |||
| 1330 | if (end) { | ||
| 1331 | error = block_prepare_write(page, start, end, gfs2_block_map); | ||
| 1332 | if (unlikely(error)) | ||
| 1333 | return error; | ||
| 1334 | empty_write_end(page, start, end); | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | return 0; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
| 1341 | int mode) | ||
| 1342 | { | ||
| 1343 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1344 | struct buffer_head *dibh; | ||
| 1345 | int error; | ||
| 1346 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
| 1347 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
| 1348 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 1349 | pgoff_t curr; | ||
| 1350 | struct page *page; | ||
| 1351 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
| 1352 | unsigned int from, to; | ||
| 1353 | |||
| 1354 | if (!end_offset) | ||
| 1355 | end_offset = PAGE_CACHE_SIZE; | ||
| 1356 | |||
| 1357 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1358 | if (unlikely(error)) | ||
| 1359 | goto out; | ||
| 1360 | |||
| 1361 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1362 | |||
| 1363 | if (gfs2_is_stuffed(ip)) { | ||
| 1364 | error = gfs2_unstuff_dinode(ip, NULL); | ||
| 1365 | if (unlikely(error)) | ||
| 1366 | goto out; | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | curr = start; | ||
| 1370 | offset = start << PAGE_CACHE_SHIFT; | ||
| 1371 | from = start_offset; | ||
| 1372 | to = PAGE_CACHE_SIZE; | ||
| 1373 | while (curr <= end) { | ||
| 1374 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
| 1375 | AOP_FLAG_NOFS); | ||
| 1376 | if (unlikely(!page)) { | ||
| 1377 | error = -ENOMEM; | ||
| 1378 | goto out; | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | if (curr == end) | ||
| 1382 | to = end_offset; | ||
| 1383 | error = write_empty_blocks(page, from, to); | ||
| 1384 | if (!error && offset + to > inode->i_size && | ||
| 1385 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
| 1386 | i_size_write(inode, offset + to); | ||
| 1387 | } | ||
| 1388 | unlock_page(page); | ||
| 1389 | page_cache_release(page); | ||
| 1390 | if (error) | ||
| 1391 | goto out; | ||
| 1392 | curr++; | ||
| 1393 | offset += PAGE_CACHE_SIZE; | ||
| 1394 | from = 0; | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 1398 | mark_inode_dirty(inode); | ||
| 1399 | |||
| 1400 | brelse(dibh); | ||
| 1401 | |||
| 1402 | out: | ||
| 1403 | return error; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
| 1407 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
| 1408 | { | ||
| 1409 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1410 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
| 1411 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
| 1412 | |||
| 1413 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
| 1414 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
| 1415 | max_data -= tmp; | ||
| 1416 | } | ||
| 1417 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
| 1418 | so it might end up with fewer data blocks */ | ||
| 1419 | if (max_data <= *data_blocks) | ||
| 1420 | return; | ||
| 1421 | *data_blocks = max_data; | ||
| 1422 | *ind_blocks = max_blocks - max_data; | ||
| 1423 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
| 1424 | if (*len > max) { | ||
| 1425 | *len = max; | ||
| 1426 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
| 1427 | } | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
| 1431 | loff_t len) | ||
| 1432 | { | ||
| 1433 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 1434 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 1435 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
| 1436 | loff_t bytes, max_bytes; | ||
| 1437 | struct gfs2_alloc *al; | ||
| 1438 | int error; | ||
| 1439 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
| 1440 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
| 1441 | |||
| 1442 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
| 1443 | sdp->sd_sb.sb_bsize_shift; | ||
| 1444 | |||
| 1445 | len = next - offset; | ||
| 1446 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
| 1447 | if (!bytes) | ||
| 1448 | bytes = UINT_MAX; | ||
| 1449 | |||
| 1450 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
| 1451 | error = gfs2_glock_nq(&ip->i_gh); | ||
| 1452 | if (unlikely(error)) | ||
| 1453 | goto out_uninit; | ||
| 1454 | |||
| 1455 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
| 1456 | goto out_unlock; | ||
| 1457 | |||
| 1458 | while (len > 0) { | ||
| 1459 | if (len < bytes) | ||
| 1460 | bytes = len; | ||
| 1461 | al = gfs2_alloc_get(ip); | ||
| 1462 | if (!al) { | ||
| 1463 | error = -ENOMEM; | ||
| 1464 | goto out_unlock; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | error = gfs2_quota_lock_check(ip); | ||
| 1468 | if (error) | ||
| 1469 | goto out_alloc_put; | ||
| 1470 | |||
| 1471 | retry: | ||
| 1472 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
| 1473 | |||
| 1474 | al->al_requested = data_blocks + ind_blocks; | ||
| 1475 | error = gfs2_inplace_reserve(ip); | ||
| 1476 | if (error) { | ||
| 1477 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
| 1478 | bytes >>= 1; | ||
| 1479 | goto retry; | ||
| 1480 | } | ||
| 1481 | goto out_qunlock; | ||
| 1482 | } | ||
| 1483 | max_bytes = bytes; | ||
| 1484 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
| 1485 | al->al_requested = data_blocks + ind_blocks; | ||
| 1486 | |||
| 1487 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
| 1488 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
| 1489 | if (gfs2_is_jdata(ip)) | ||
| 1490 | rblocks += data_blocks ? data_blocks : 1; | ||
| 1491 | |||
| 1492 | error = gfs2_trans_begin(sdp, rblocks, | ||
| 1493 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
| 1494 | if (error) | ||
| 1495 | goto out_trans_fail; | ||
| 1496 | |||
| 1497 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
| 1498 | gfs2_trans_end(sdp); | ||
| 1499 | |||
| 1500 | if (error) | ||
| 1501 | goto out_trans_fail; | ||
| 1502 | |||
| 1503 | len -= max_bytes; | ||
| 1504 | offset += max_bytes; | ||
| 1505 | gfs2_inplace_release(ip); | ||
| 1506 | gfs2_quota_unlock(ip); | ||
| 1507 | gfs2_alloc_put(ip); | ||
| 1508 | } | ||
| 1509 | goto out_unlock; | ||
| 1510 | |||
| 1511 | out_trans_fail: | ||
| 1512 | gfs2_inplace_release(ip); | ||
| 1513 | out_qunlock: | ||
| 1514 | gfs2_quota_unlock(ip); | ||
| 1515 | out_alloc_put: | ||
| 1516 | gfs2_alloc_put(ip); | ||
| 1517 | out_unlock: | ||
| 1518 | gfs2_glock_dq(&ip->i_gh); | ||
| 1519 | out_uninit: | ||
| 1520 | gfs2_holder_uninit(&ip->i_gh); | ||
| 1521 | return error; | ||
| 1522 | } | ||
| 1523 | |||
| 1524 | |||
| 1304 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1525 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 1305 | u64 start, u64 len) | 1526 | u64 start, u64 len) |
| 1306 | { | 1527 | { |
| @@ -1351,6 +1572,7 @@ const struct inode_operations gfs2_file_iops = { | |||
| 1351 | .getxattr = gfs2_getxattr, | 1572 | .getxattr = gfs2_getxattr, |
| 1352 | .listxattr = gfs2_listxattr, | 1573 | .listxattr = gfs2_listxattr, |
| 1353 | .removexattr = gfs2_removexattr, | 1574 | .removexattr = gfs2_removexattr, |
| 1575 | .fallocate = gfs2_fallocate, | ||
| 1354 | .fiemap = gfs2_fiemap, | 1576 | .fiemap = gfs2_fiemap, |
| 1355 | }; | 1577 | }; |
| 1356 | 1578 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1bc6b5695e6d..58a9b9998b42 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -735,10 +735,8 @@ get_a_page: | |||
| 735 | goto out; | 735 | goto out; |
| 736 | 736 | ||
| 737 | size = loc + sizeof(struct gfs2_quota); | 737 | size = loc + sizeof(struct gfs2_quota); |
| 738 | if (size > inode->i_size) { | 738 | if (size > inode->i_size) |
| 739 | ip->i_disksize = size; | ||
| 740 | i_size_write(inode, size); | 739 | i_size_write(inode, size); |
| 741 | } | ||
| 742 | inode->i_mtime = inode->i_atime = CURRENT_TIME; | 740 | inode->i_mtime = inode->i_atime = CURRENT_TIME; |
| 743 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 741 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 744 | gfs2_dinode_out(ip, dibh->b_data); | 742 | gfs2_dinode_out(ip, dibh->b_data); |
| @@ -817,7 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
| 817 | goto out_alloc; | 815 | goto out_alloc; |
| 818 | 816 | ||
| 819 | if (nalloc) | 817 | if (nalloc) |
| 820 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; | 818 | blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; |
| 821 | 819 | ||
| 822 | error = gfs2_trans_begin(sdp, blocks, 0); | 820 | error = gfs2_trans_begin(sdp, blocks, 0); |
| 823 | if (error) | 821 | if (error) |
| @@ -1190,18 +1188,17 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * | |||
| 1190 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1188 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
| 1191 | { | 1189 | { |
| 1192 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1190 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
| 1193 | unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | 1191 | u64 size = i_size_read(sdp->sd_qc_inode); |
| 1192 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | ||
| 1194 | unsigned int x, slot = 0; | 1193 | unsigned int x, slot = 0; |
| 1195 | unsigned int found = 0; | 1194 | unsigned int found = 0; |
| 1196 | u64 dblock; | 1195 | u64 dblock; |
| 1197 | u32 extlen = 0; | 1196 | u32 extlen = 0; |
| 1198 | int error; | 1197 | int error; |
| 1199 | 1198 | ||
| 1200 | if (!ip->i_disksize || ip->i_disksize > (64 << 20) || | 1199 | if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) |
| 1201 | ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) { | ||
| 1202 | gfs2_consist_inode(ip); | ||
| 1203 | return -EIO; | 1200 | return -EIO; |
| 1204 | } | 1201 | |
| 1205 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1202 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
| 1206 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1203 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); |
| 1207 | 1204 | ||
| @@ -1589,6 +1586,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
| 1589 | error = gfs2_inplace_reserve(ip); | 1586 | error = gfs2_inplace_reserve(ip); |
| 1590 | if (error) | 1587 | if (error) |
| 1591 | goto out_alloc; | 1588 | goto out_alloc; |
| 1589 | blocks += gfs2_rg_blocks(al); | ||
| 1592 | } | 1590 | } |
| 1593 | 1591 | ||
| 1594 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); | 1592 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f7f89a94a5a4..f2a02edcac8f 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -455,11 +455,13 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 455 | int ro = 0; | 455 | int ro = 0; |
| 456 | unsigned int pass; | 456 | unsigned int pass; |
| 457 | int error; | 457 | int error; |
| 458 | int jlocked = 0; | ||
| 458 | 459 | ||
| 459 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 460 | if (sdp->sd_args.ar_spectator || |
| 461 | (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { | ||
| 460 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", | 462 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", |
| 461 | jd->jd_jid); | 463 | jd->jd_jid); |
| 462 | 464 | jlocked = 1; | |
| 463 | /* Acquire the journal lock so we can do recovery */ | 465 | /* Acquire the journal lock so we can do recovery */ |
| 464 | 466 | ||
| 465 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, | 467 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, |
| @@ -554,13 +556,12 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 554 | jd->jd_jid, t); | 556 | jd->jd_jid, t); |
| 555 | } | 557 | } |
| 556 | 558 | ||
| 557 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
| 558 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 559 | |||
| 560 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); | 559 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); |
| 561 | 560 | ||
| 562 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | 561 | if (jlocked) { |
| 562 | gfs2_glock_dq_uninit(&ji_gh); | ||
| 563 | gfs2_glock_dq_uninit(&j_gh); | 563 | gfs2_glock_dq_uninit(&j_gh); |
| 564 | } | ||
| 564 | 565 | ||
| 565 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 566 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
| 566 | goto done; | 567 | goto done; |
| @@ -568,7 +569,7 @@ void gfs2_recover_func(struct work_struct *work) | |||
| 568 | fail_gunlock_tr: | 569 | fail_gunlock_tr: |
| 569 | gfs2_glock_dq_uninit(&t_gh); | 570 | gfs2_glock_dq_uninit(&t_gh); |
| 570 | fail_gunlock_ji: | 571 | fail_gunlock_ji: |
| 571 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 572 | if (jlocked) { |
| 572 | gfs2_glock_dq_uninit(&ji_gh); | 573 | gfs2_glock_dq_uninit(&ji_gh); |
| 573 | fail_gunlock_j: | 574 | fail_gunlock_j: |
| 574 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 171a744f8e45..fb67f593f408 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
| 500 | for (rgrps = 0;; rgrps++) { | 500 | for (rgrps = 0;; rgrps++) { |
| 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); | 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); |
| 502 | 502 | ||
| 503 | if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize) | 503 | if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) |
| 504 | break; | 504 | break; |
| 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, |
| 506 | sizeof(struct gfs2_rindex)); | 506 | sizeof(struct gfs2_rindex)); |
| @@ -588,7 +588,9 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
| 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 589 | struct inode *inode = &ip->i_inode; | 589 | struct inode *inode = &ip->i_inode; |
| 590 | struct file_ra_state ra_state; | 590 | struct file_ra_state ra_state; |
| 591 | u64 rgrp_count = ip->i_disksize; | 591 | u64 rgrp_count = i_size_read(inode); |
| 592 | struct gfs2_rgrpd *rgd; | ||
| 593 | unsigned int max_data = 0; | ||
| 592 | int error; | 594 | int error; |
| 593 | 595 | ||
| 594 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); | 596 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); |
| @@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
| 603 | } | 605 | } |
| 604 | } | 606 | } |
| 605 | 607 | ||
| 608 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
| 609 | if (rgd->rd_data > max_data) | ||
| 610 | max_data = rgd->rd_data; | ||
| 611 | sdp->sd_max_rg_data = max_data; | ||
| 606 | sdp->sd_rindex_uptodate = 1; | 612 | sdp->sd_rindex_uptodate = 1; |
| 607 | return 0; | 613 | return 0; |
| 608 | } | 614 | } |
| @@ -622,13 +628,15 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
| 622 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 628 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 623 | struct inode *inode = &ip->i_inode; | 629 | struct inode *inode = &ip->i_inode; |
| 624 | struct file_ra_state ra_state; | 630 | struct file_ra_state ra_state; |
| 631 | struct gfs2_rgrpd *rgd; | ||
| 632 | unsigned int max_data = 0; | ||
| 625 | int error; | 633 | int error; |
| 626 | 634 | ||
| 627 | file_ra_state_init(&ra_state, inode->i_mapping); | 635 | file_ra_state_init(&ra_state, inode->i_mapping); |
| 628 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | 636 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { |
| 629 | /* Ignore partials */ | 637 | /* Ignore partials */ |
| 630 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > | 638 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > |
| 631 | ip->i_disksize) | 639 | i_size_read(inode)) |
| 632 | break; | 640 | break; |
| 633 | error = read_rindex_entry(ip, &ra_state); | 641 | error = read_rindex_entry(ip, &ra_state); |
| 634 | if (error) { | 642 | if (error) { |
| @@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
| 636 | return error; | 644 | return error; |
| 637 | } | 645 | } |
| 638 | } | 646 | } |
| 647 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
| 648 | if (rgd->rd_data > max_data) | ||
| 649 | max_data = rgd->rd_data; | ||
| 650 | sdp->sd_max_rg_data = max_data; | ||
| 639 | 651 | ||
| 640 | sdp->sd_rindex_uptodate = 1; | 652 | sdp->sd_rindex_uptodate = 1; |
| 641 | return 0; | 653 | return 0; |
| @@ -1188,7 +1200,8 @@ out: | |||
| 1188 | * Returns: errno | 1200 | * Returns: errno |
| 1189 | */ | 1201 | */ |
| 1190 | 1202 | ||
| 1191 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1203 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
| 1204 | char *file, unsigned int line) | ||
| 1192 | { | 1205 | { |
| 1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1206 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1194 | struct gfs2_alloc *al = ip->i_alloc; | 1207 | struct gfs2_alloc *al = ip->i_alloc; |
| @@ -1199,12 +1212,15 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | |||
| 1199 | return -EINVAL; | 1212 | return -EINVAL; |
| 1200 | 1213 | ||
| 1201 | try_again: | 1214 | try_again: |
| 1202 | /* We need to hold the rindex unless the inode we're using is | 1215 | if (hold_rindex) { |
| 1203 | the rindex itself, in which case it's already held. */ | 1216 | /* We need to hold the rindex unless the inode we're using is |
| 1204 | if (ip != GFS2_I(sdp->sd_rindex)) | 1217 | the rindex itself, in which case it's already held. */ |
| 1205 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | 1218 | if (ip != GFS2_I(sdp->sd_rindex)) |
| 1206 | else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ | 1219 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); |
| 1207 | error = gfs2_ri_update_special(ip); | 1220 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
| 1221 | in, so: */ | ||
| 1222 | error = gfs2_ri_update_special(ip); | ||
| 1223 | } | ||
| 1208 | 1224 | ||
| 1209 | if (error) | 1225 | if (error) |
| 1210 | return error; | 1226 | return error; |
| @@ -1215,7 +1231,7 @@ try_again: | |||
| 1215 | try to free it, and try the allocation again. */ | 1231 | try to free it, and try the allocation again. */ |
| 1216 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | 1232 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); |
| 1217 | if (error) { | 1233 | if (error) { |
| 1218 | if (ip != GFS2_I(sdp->sd_rindex)) | 1234 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) |
| 1219 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1235 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
| 1220 | if (error != -EAGAIN) | 1236 | if (error != -EAGAIN) |
| 1221 | return error; | 1237 | return error; |
| @@ -1257,7 +1273,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
| 1257 | al->al_rgd = NULL; | 1273 | al->al_rgd = NULL; |
| 1258 | if (al->al_rgd_gh.gh_gl) | 1274 | if (al->al_rgd_gh.gh_gl) |
| 1259 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1275 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
| 1260 | if (ip != GFS2_I(sdp->sd_rindex)) | 1276 | if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) |
| 1261 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1277 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
| 1262 | } | 1278 | } |
| 1263 | 1279 | ||
| @@ -1496,11 +1512,19 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
| 1496 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1512 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1497 | struct buffer_head *dibh; | 1513 | struct buffer_head *dibh; |
| 1498 | struct gfs2_alloc *al = ip->i_alloc; | 1514 | struct gfs2_alloc *al = ip->i_alloc; |
| 1499 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1515 | struct gfs2_rgrpd *rgd; |
| 1500 | u32 goal, blk; | 1516 | u32 goal, blk; |
| 1501 | u64 block; | 1517 | u64 block; |
| 1502 | int error; | 1518 | int error; |
| 1503 | 1519 | ||
| 1520 | /* Only happens if there is a bug in gfs2, return something distinctive | ||
| 1521 | * to ensure that it is noticed. | ||
| 1522 | */ | ||
| 1523 | if (al == NULL) | ||
| 1524 | return -ECANCELED; | ||
| 1525 | |||
| 1526 | rgd = al->al_rgd; | ||
| 1527 | |||
| 1504 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1528 | if (rgrp_contains_block(rgd, ip->i_goal)) |
| 1505 | goal = ip->i_goal - rgd->rd_data0; | 1529 | goal = ip->i_goal - rgd->rd_data0; |
| 1506 | else | 1530 | else |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index f07119d89557..0e35c0466f9a 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
| @@ -39,10 +39,12 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
| 39 | ip->i_alloc = NULL; | 39 | ip->i_alloc = NULL; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, | 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
| 43 | unsigned int line); | 43 | char *file, unsigned int line); |
| 44 | #define gfs2_inplace_reserve(ip) \ | 44 | #define gfs2_inplace_reserve(ip) \ |
| 45 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | 45 | gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) |
| 46 | #define gfs2_inplace_reserve_ri(ip) \ | ||
| 47 | gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) | ||
| 46 | 48 | ||
| 47 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
| 48 | 50 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 77cb9f830ee4..047d1176096c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -85,6 +85,7 @@ static const match_table_t tokens = { | |||
| 85 | {Opt_locktable, "locktable=%s"}, | 85 | {Opt_locktable, "locktable=%s"}, |
| 86 | {Opt_hostdata, "hostdata=%s"}, | 86 | {Opt_hostdata, "hostdata=%s"}, |
| 87 | {Opt_spectator, "spectator"}, | 87 | {Opt_spectator, "spectator"}, |
| 88 | {Opt_spectator, "norecovery"}, | ||
| 88 | {Opt_ignore_local_fs, "ignore_local_fs"}, | 89 | {Opt_ignore_local_fs, "ignore_local_fs"}, |
| 89 | {Opt_localflocks, "localflocks"}, | 90 | {Opt_localflocks, "localflocks"}, |
| 90 | {Opt_localcaching, "localcaching"}, | 91 | {Opt_localcaching, "localcaching"}, |
| @@ -159,13 +160,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 159 | args->ar_spectator = 1; | 160 | args->ar_spectator = 1; |
| 160 | break; | 161 | break; |
| 161 | case Opt_ignore_local_fs: | 162 | case Opt_ignore_local_fs: |
| 162 | args->ar_ignore_local_fs = 1; | 163 | /* Retained for backwards compat only */ |
| 163 | break; | 164 | break; |
| 164 | case Opt_localflocks: | 165 | case Opt_localflocks: |
| 165 | args->ar_localflocks = 1; | 166 | args->ar_localflocks = 1; |
| 166 | break; | 167 | break; |
| 167 | case Opt_localcaching: | 168 | case Opt_localcaching: |
| 168 | args->ar_localcaching = 1; | 169 | /* Retained for backwards compat only */ |
| 169 | break; | 170 | break; |
| 170 | case Opt_debug: | 171 | case Opt_debug: |
| 171 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 172 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
| @@ -179,7 +180,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
| 179 | args->ar_debug = 0; | 180 | args->ar_debug = 0; |
| 180 | break; | 181 | break; |
| 181 | case Opt_upgrade: | 182 | case Opt_upgrade: |
| 182 | args->ar_upgrade = 1; | 183 | /* Retained for backwards compat only */ |
| 183 | break; | 184 | break; |
| 184 | case Opt_acl: | 185 | case Opt_acl: |
| 185 | args->ar_posix_acl = 1; | 186 | args->ar_posix_acl = 1; |
| @@ -342,15 +343,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
| 342 | { | 343 | { |
| 343 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 344 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
| 344 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 345 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
| 346 | u64 size = i_size_read(jd->jd_inode); | ||
| 345 | 347 | ||
| 346 | if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || | 348 | if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30)) |
| 347 | (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { | ||
| 348 | gfs2_consist_inode(ip); | ||
| 349 | return -EIO; | 349 | return -EIO; |
| 350 | } | ||
| 351 | jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | ||
| 352 | 350 | ||
| 353 | if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { | 351 | jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; |
| 352 | |||
| 353 | if (gfs2_write_alloc_required(ip, 0, size)) { | ||
| 354 | gfs2_consist_inode(ip); | 354 | gfs2_consist_inode(ip); |
| 355 | return -EIO; | 355 | return -EIO; |
| 356 | } | 356 | } |
| @@ -1129,9 +1129,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 1129 | 1129 | ||
| 1130 | /* Some flags must not be changed */ | 1130 | /* Some flags must not be changed */ |
| 1131 | if (args_neq(&args, &sdp->sd_args, spectator) || | 1131 | if (args_neq(&args, &sdp->sd_args, spectator) || |
| 1132 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
| 1133 | args_neq(&args, &sdp->sd_args, localflocks) || | 1132 | args_neq(&args, &sdp->sd_args, localflocks) || |
| 1134 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
| 1135 | args_neq(&args, &sdp->sd_args, meta)) | 1133 | args_neq(&args, &sdp->sd_args, meta)) |
| 1136 | return -EINVAL; | 1134 | return -EINVAL; |
| 1137 | 1135 | ||
| @@ -1234,16 +1232,10 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1234 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | 1232 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); |
| 1235 | if (args->ar_spectator) | 1233 | if (args->ar_spectator) |
| 1236 | seq_printf(s, ",spectator"); | 1234 | seq_printf(s, ",spectator"); |
| 1237 | if (args->ar_ignore_local_fs) | ||
| 1238 | seq_printf(s, ",ignore_local_fs"); | ||
| 1239 | if (args->ar_localflocks) | 1235 | if (args->ar_localflocks) |
| 1240 | seq_printf(s, ",localflocks"); | 1236 | seq_printf(s, ",localflocks"); |
| 1241 | if (args->ar_localcaching) | ||
| 1242 | seq_printf(s, ",localcaching"); | ||
| 1243 | if (args->ar_debug) | 1237 | if (args->ar_debug) |
| 1244 | seq_printf(s, ",debug"); | 1238 | seq_printf(s, ",debug"); |
| 1245 | if (args->ar_upgrade) | ||
| 1246 | seq_printf(s, ",upgrade"); | ||
| 1247 | if (args->ar_posix_acl) | 1239 | if (args->ar_posix_acl) |
| 1248 | seq_printf(s, ",acl"); | 1240 | seq_printf(s, ",acl"); |
| 1249 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | 1241 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ccacffd2faaa..748ccb557c18 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -230,7 +230,10 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len | |||
| 230 | 230 | ||
| 231 | if (gltype > LM_TYPE_JOURNAL) | 231 | if (gltype > LM_TYPE_JOURNAL) |
| 232 | return -EINVAL; | 232 | return -EINVAL; |
| 233 | glops = gfs2_glops_list[gltype]; | 233 | if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) |
| 234 | glops = &gfs2_trans_glops; | ||
| 235 | else | ||
| 236 | glops = gfs2_glops_list[gltype]; | ||
| 234 | if (glops == NULL) | 237 | if (glops == NULL) |
| 235 | return -EINVAL; | 238 | return -EINVAL; |
| 236 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) | 239 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) |
| @@ -399,31 +402,32 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) | |||
| 399 | 402 | ||
| 400 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | 403 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) |
| 401 | { | 404 | { |
| 402 | return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); | 405 | return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); |
| 403 | } | 406 | } |
| 404 | 407 | ||
| 405 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 408 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
| 406 | { | 409 | { |
| 407 | unsigned jid; | 410 | int jid; |
| 408 | int rv; | 411 | int rv; |
| 409 | 412 | ||
| 410 | rv = sscanf(buf, "%u", &jid); | 413 | rv = sscanf(buf, "%d", &jid); |
| 411 | if (rv != 1) | 414 | if (rv != 1) |
| 412 | return -EINVAL; | 415 | return -EINVAL; |
| 413 | 416 | ||
| 414 | spin_lock(&sdp->sd_jindex_spin); | 417 | spin_lock(&sdp->sd_jindex_spin); |
| 415 | rv = -EINVAL; | 418 | rv = -EINVAL; |
| 416 | if (sdp->sd_args.ar_spectator) | ||
| 417 | goto out; | ||
| 418 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 419 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
| 419 | goto out; | 420 | goto out; |
| 420 | rv = -EBUSY; | 421 | rv = -EBUSY; |
| 421 | if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) | 422 | if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) |
| 422 | goto out; | 423 | goto out; |
| 424 | rv = 0; | ||
| 425 | if (sdp->sd_args.ar_spectator && jid > 0) | ||
| 426 | rv = jid = -EINVAL; | ||
| 423 | sdp->sd_lockstruct.ls_jid = jid; | 427 | sdp->sd_lockstruct.ls_jid = jid; |
| 428 | clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); | ||
| 424 | smp_mb__after_clear_bit(); | 429 | smp_mb__after_clear_bit(); |
| 425 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); | 430 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); |
| 426 | rv = 0; | ||
| 427 | out: | 431 | out: |
| 428 | spin_unlock(&sdp->sd_jindex_spin); | 432 | spin_unlock(&sdp->sd_jindex_spin); |
| 429 | return rv ? rv : len; | 433 | return rv ? rv : len; |
| @@ -617,7 +621,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | |||
| 617 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); | 621 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); |
| 618 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); | 622 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); |
| 619 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) | 623 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) |
| 620 | add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); | 624 | add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); |
| 621 | if (gfs2_uuid_valid(uuid)) | 625 | if (gfs2_uuid_valid(uuid)) |
| 622 | add_uevent_var(env, "UUID=%pUB", uuid); | 626 | add_uevent_var(env, "UUID=%pUB", uuid); |
| 623 | return 0; | 627 | return 0; |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 148d55c14171..cedb0bb96d96 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
| @@ -39,7 +39,8 @@ | |||
| 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ | 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ |
| 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ | 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ |
| 41 | {(1UL << GLF_INITIAL), "I" }, \ | 41 | {(1UL << GLF_INITIAL), "I" }, \ |
| 42 | {(1UL << GLF_FROZEN), "F" }) | 42 | {(1UL << GLF_FROZEN), "F" }, \ |
| 43 | {(1UL << GLF_QUEUED), "q" }) | ||
| 43 | 44 | ||
| 44 | #ifndef NUMPTY | 45 | #ifndef NUMPTY |
| 45 | #define NUMPTY | 46 | #define NUMPTY |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index edf9d4bd908e..fb56b783e028 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
| @@ -20,11 +20,20 @@ struct gfs2_glock; | |||
| 20 | #define RES_JDATA 1 | 20 | #define RES_JDATA 1 |
| 21 | #define RES_DATA 1 | 21 | #define RES_DATA 1 |
| 22 | #define RES_LEAF 1 | 22 | #define RES_LEAF 1 |
| 23 | #define RES_RG_HDR 1 | ||
| 23 | #define RES_RG_BIT 2 | 24 | #define RES_RG_BIT 2 |
| 24 | #define RES_EATTR 1 | 25 | #define RES_EATTR 1 |
| 25 | #define RES_STATFS 1 | 26 | #define RES_STATFS 1 |
| 26 | #define RES_QUOTA 2 | 27 | #define RES_QUOTA 2 |
| 27 | 28 | ||
| 29 | /* reserve either the number of blocks to be allocated plus the rg header | ||
| 30 | * block, or all of the blocks in the rg, whichever is smaller */ | ||
| 31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) | ||
| 32 | { | ||
| 33 | return (al->al_requested < al->al_rgd->rd_length)? | ||
| 34 | al->al_requested + 1 : al->al_rgd->rd_length; | ||
| 35 | } | ||
| 36 | |||
| 28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | 37 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, |
| 29 | unsigned int revokes); | 38 | unsigned int revokes); |
| 30 | 39 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 776af6eb4bcb..30b58f07c8a6 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
| @@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 734 | goto out_gunlock_q; | 734 | goto out_gunlock_q; |
| 735 | 735 | ||
| 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
| 737 | blks + al->al_rgd->rd_length + | 737 | blks + gfs2_rg_blocks(al) + |
| 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
| 739 | if (error) | 739 | if (error) |
| 740 | goto out_ipres; | 740 | goto out_ipres; |
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 4129cdb3f0d8..571abe97b42a 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c | |||
| @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
| 23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
| 24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
| 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
| 26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
| 27 | return 0; | 27 | return 0; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
| 32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
| 33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
| 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
| 35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
| 36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
| 37 | } | 37 | } |
| 38 | 38 | ||
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 38a0a9917d7f..3ebc437736fe 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c | |||
| @@ -27,7 +27,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
| 27 | if (!tree) | 27 | if (!tree) |
| 28 | return NULL; | 28 | return NULL; |
| 29 | 29 | ||
| 30 | init_MUTEX(&tree->tree_lock); | 30 | mutex_init(&tree->tree_lock); |
| 31 | spin_lock_init(&tree->hash_lock); | 31 | spin_lock_init(&tree->hash_lock); |
| 32 | /* Set the correct compare function */ | 32 | /* Set the correct compare function */ |
| 33 | tree->sb = sb; | 33 | tree->sb = sb; |
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index cc51905ac21d..2a1d712f85dc 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h | |||
| @@ -33,7 +33,7 @@ struct hfs_btree { | |||
| 33 | unsigned int depth; | 33 | unsigned int depth; |
| 34 | 34 | ||
| 35 | //unsigned int map1_size, map_size; | 35 | //unsigned int map1_size, map_size; |
| 36 | struct semaphore tree_lock; | 36 | struct mutex tree_lock; |
| 37 | 37 | ||
| 38 | unsigned int pages_per_bnode; | 38 | unsigned int pages_per_bnode; |
| 39 | spinlock_t hash_lock; | 39 | spinlock_t hash_lock; |
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5007a41f1be9..d182438c7ae4 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c | |||
| @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
| 23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
| 24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
| 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
| 26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
| 27 | return 0; | 27 | return 0; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
| 32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
| 33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
| 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
| 35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
| 36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
| 37 | } | 37 | } |
| 38 | 38 | ||
| @@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
| 52 | rec = (e + b) / 2; | 52 | rec = (e + b) / 2; |
| 53 | len = hfs_brec_lenoff(bnode, rec, &off); | 53 | len = hfs_brec_lenoff(bnode, rec, &off); |
| 54 | keylen = hfs_brec_keylen(bnode, rec); | 54 | keylen = hfs_brec_keylen(bnode, rec); |
| 55 | if (keylen == 0) { | ||
| 56 | res = -EINVAL; | ||
| 57 | goto fail; | ||
| 58 | } | ||
| 55 | hfs_bnode_read(bnode, fd->key, off, keylen); | 59 | hfs_bnode_read(bnode, fd->key, off, keylen); |
| 56 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); | 60 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); |
| 57 | if (!cmpval) { | 61 | if (!cmpval) { |
| @@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
| 67 | if (rec != e && e >= 0) { | 71 | if (rec != e && e >= 0) { |
| 68 | len = hfs_brec_lenoff(bnode, e, &off); | 72 | len = hfs_brec_lenoff(bnode, e, &off); |
| 69 | keylen = hfs_brec_keylen(bnode, e); | 73 | keylen = hfs_brec_keylen(bnode, e); |
| 74 | if (keylen == 0) { | ||
| 75 | res = -EINVAL; | ||
| 76 | goto fail; | ||
| 77 | } | ||
| 70 | hfs_bnode_read(bnode, fd->key, off, keylen); | 78 | hfs_bnode_read(bnode, fd->key, off, keylen); |
| 71 | } | 79 | } |
| 72 | done: | 80 | done: |
| @@ -75,6 +83,7 @@ done: | |||
| 75 | fd->keylength = keylen; | 83 | fd->keylength = keylen; |
| 76 | fd->entryoffset = off + keylen; | 84 | fd->entryoffset = off + keylen; |
| 77 | fd->entrylength = len - keylen; | 85 | fd->entrylength = len - keylen; |
| 86 | fail: | ||
| 78 | return res; | 87 | return res; |
| 79 | } | 88 | } |
| 80 | 89 | ||
| @@ -198,6 +207,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) | |||
| 198 | 207 | ||
| 199 | len = hfs_brec_lenoff(bnode, fd->record, &off); | 208 | len = hfs_brec_lenoff(bnode, fd->record, &off); |
| 200 | keylen = hfs_brec_keylen(bnode, fd->record); | 209 | keylen = hfs_brec_keylen(bnode, fd->record); |
| 210 | if (keylen == 0) { | ||
| 211 | res = -EINVAL; | ||
| 212 | goto out; | ||
| 213 | } | ||
| 201 | fd->keyoffset = off; | 214 | fd->keyoffset = off; |
| 202 | fd->keylength = keylen; | 215 | fd->keylength = keylen; |
| 203 | fd->entryoffset = off + keylen; | 216 | fd->entryoffset = off + keylen; |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index ea30afc2a03c..ad57f5991eb1 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) | 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) |
| 19 | { | 19 | { |
| 20 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 20 | struct page *page; | 21 | struct page *page; |
| 21 | struct address_space *mapping; | 22 | struct address_space *mapping; |
| 22 | __be32 *pptr, *curr, *end; | 23 | __be32 *pptr, *curr, *end; |
| @@ -29,8 +30,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
| 29 | return size; | 30 | return size; |
| 30 | 31 | ||
| 31 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); | 32 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); |
| 32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 33 | mutex_lock(&sbi->alloc_mutex); |
| 33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 34 | mapping = sbi->alloc_file->i_mapping; |
| 34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 35 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
| 35 | if (IS_ERR(page)) { | 36 | if (IS_ERR(page)) { |
| 36 | start = size; | 37 | start = size; |
| @@ -150,16 +151,17 @@ done: | |||
| 150 | set_page_dirty(page); | 151 | set_page_dirty(page); |
| 151 | kunmap(page); | 152 | kunmap(page); |
| 152 | *max = offset + (curr - pptr) * 32 + i - start; | 153 | *max = offset + (curr - pptr) * 32 + i - start; |
| 153 | HFSPLUS_SB(sb).free_blocks -= *max; | 154 | sbi->free_blocks -= *max; |
| 154 | sb->s_dirt = 1; | 155 | sb->s_dirt = 1; |
| 155 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); | 156 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); |
| 156 | out: | 157 | out: |
| 157 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 158 | mutex_unlock(&sbi->alloc_mutex); |
| 158 | return start; | 159 | return start; |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | 162 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) |
| 162 | { | 163 | { |
| 164 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 163 | struct page *page; | 165 | struct page *page; |
| 164 | struct address_space *mapping; | 166 | struct address_space *mapping; |
| 165 | __be32 *pptr, *curr, *end; | 167 | __be32 *pptr, *curr, *end; |
| @@ -172,11 +174,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | |||
| 172 | 174 | ||
| 173 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); | 175 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); |
| 174 | /* are all of the bits in range? */ | 176 | /* are all of the bits in range? */ |
| 175 | if ((offset + count) > HFSPLUS_SB(sb).total_blocks) | 177 | if ((offset + count) > sbi->total_blocks) |
| 176 | return -2; | 178 | return -2; |
| 177 | 179 | ||
| 178 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 180 | mutex_lock(&sbi->alloc_mutex); |
| 179 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 181 | mapping = sbi->alloc_file->i_mapping; |
| 180 | pnr = offset / PAGE_CACHE_BITS; | 182 | pnr = offset / PAGE_CACHE_BITS; |
| 181 | page = read_mapping_page(mapping, pnr, NULL); | 183 | page = read_mapping_page(mapping, pnr, NULL); |
| 182 | pptr = kmap(page); | 184 | pptr = kmap(page); |
| @@ -224,9 +226,9 @@ done: | |||
| 224 | out: | 226 | out: |
| 225 | set_page_dirty(page); | 227 | set_page_dirty(page); |
| 226 | kunmap(page); | 228 | kunmap(page); |
| 227 | HFSPLUS_SB(sb).free_blocks += len; | 229 | sbi->free_blocks += len; |
| 228 | sb->s_dirt = 1; | 230 | sb->s_dirt = 1; |
| 229 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 231 | mutex_unlock(&sbi->alloc_mutex); |
| 230 | 232 | ||
| 231 | return 0; | 233 | return 0; |
| 232 | } | 234 | } |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index c88e5d72a402..2f39d05443e1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
| @@ -42,10 +42,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) | |||
| 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); | 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); |
| 43 | if (!recoff) | 43 | if (!recoff) |
| 44 | return 0; | 44 | return 0; |
| 45 | if (node->tree->attributes & HFS_TREE_BIGKEYS) | 45 | |
| 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; | 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; |
| 47 | else | 47 | if (retval > node->tree->max_key_len + 2) { |
| 48 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; | 48 | printk(KERN_ERR "hfs: keylen %d too large\n", |
| 49 | retval); | ||
| 50 | retval = 0; | ||
| 51 | } | ||
| 49 | } | 52 | } |
| 50 | return retval; | 53 | return retval; |
| 51 | } | 54 | } |
| @@ -216,7 +219,7 @@ skip: | |||
| 216 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | 219 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) |
| 217 | { | 220 | { |
| 218 | struct hfs_btree *tree; | 221 | struct hfs_btree *tree; |
| 219 | struct hfs_bnode *node, *new_node; | 222 | struct hfs_bnode *node, *new_node, *next_node; |
| 220 | struct hfs_bnode_desc node_desc; | 223 | struct hfs_bnode_desc node_desc; |
| 221 | int num_recs, new_rec_off, new_off, old_rec_off; | 224 | int num_recs, new_rec_off, new_off, old_rec_off; |
| 222 | int data_start, data_end, size; | 225 | int data_start, data_end, size; |
| @@ -235,6 +238,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 235 | new_node->type = node->type; | 238 | new_node->type = node->type; |
| 236 | new_node->height = node->height; | 239 | new_node->height = node->height; |
| 237 | 240 | ||
| 241 | if (node->next) | ||
| 242 | next_node = hfs_bnode_find(tree, node->next); | ||
| 243 | else | ||
| 244 | next_node = NULL; | ||
| 245 | |||
| 246 | if (IS_ERR(next_node)) { | ||
| 247 | hfs_bnode_put(node); | ||
| 248 | hfs_bnode_put(new_node); | ||
| 249 | return next_node; | ||
| 250 | } | ||
| 251 | |||
| 238 | size = tree->node_size / 2 - node->num_recs * 2 - 14; | 252 | size = tree->node_size / 2 - node->num_recs * 2 - 14; |
| 239 | old_rec_off = tree->node_size - 4; | 253 | old_rec_off = tree->node_size - 4; |
| 240 | num_recs = 1; | 254 | num_recs = 1; |
| @@ -248,6 +262,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 248 | /* panic? */ | 262 | /* panic? */ |
| 249 | hfs_bnode_put(node); | 263 | hfs_bnode_put(node); |
| 250 | hfs_bnode_put(new_node); | 264 | hfs_bnode_put(new_node); |
| 265 | if (next_node) | ||
| 266 | hfs_bnode_put(next_node); | ||
| 251 | return ERR_PTR(-ENOSPC); | 267 | return ERR_PTR(-ENOSPC); |
| 252 | } | 268 | } |
| 253 | 269 | ||
| @@ -302,8 +318,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
| 302 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); | 318 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); |
| 303 | 319 | ||
| 304 | /* update next bnode header */ | 320 | /* update next bnode header */ |
| 305 | if (new_node->next) { | 321 | if (next_node) { |
| 306 | struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next); | ||
| 307 | next_node->prev = new_node->this; | 322 | next_node->prev = new_node->this; |
| 308 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); | 323 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); |
| 309 | node_desc.prev = cpu_to_be32(next_node->prev); | 324 | node_desc.prev = cpu_to_be32(next_node->prev); |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index e49fcee1e293..22e4d4e32999 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
| @@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 30 | if (!tree) | 30 | if (!tree) |
| 31 | return NULL; | 31 | return NULL; |
| 32 | 32 | ||
| 33 | init_MUTEX(&tree->tree_lock); | 33 | mutex_init(&tree->tree_lock); |
| 34 | spin_lock_init(&tree->hash_lock); | 34 | spin_lock_init(&tree->hash_lock); |
| 35 | tree->sb = sb; | 35 | tree->sb = sb; |
| 36 | tree->cnid = id; | 36 | tree->cnid = id; |
| @@ -39,10 +39,16 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 39 | goto free_tree; | 39 | goto free_tree; |
| 40 | tree->inode = inode; | 40 | tree->inode = inode; |
| 41 | 41 | ||
| 42 | if (!HFSPLUS_I(tree->inode)->first_blocks) { | ||
| 43 | printk(KERN_ERR | ||
| 44 | "hfs: invalid btree extent records (0 size).\n"); | ||
| 45 | goto free_inode; | ||
| 46 | } | ||
| 47 | |||
| 42 | mapping = tree->inode->i_mapping; | 48 | mapping = tree->inode->i_mapping; |
| 43 | page = read_mapping_page(mapping, 0, NULL); | 49 | page = read_mapping_page(mapping, 0, NULL); |
| 44 | if (IS_ERR(page)) | 50 | if (IS_ERR(page)) |
| 45 | goto free_tree; | 51 | goto free_inode; |
| 46 | 52 | ||
| 47 | /* Load the header */ | 53 | /* Load the header */ |
| 48 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 54 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
| @@ -57,27 +63,56 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 57 | tree->max_key_len = be16_to_cpu(head->max_key_len); | 63 | tree->max_key_len = be16_to_cpu(head->max_key_len); |
| 58 | tree->depth = be16_to_cpu(head->depth); | 64 | tree->depth = be16_to_cpu(head->depth); |
| 59 | 65 | ||
| 60 | /* Set the correct compare function */ | 66 | /* Verify the tree and set the correct compare function */ |
| 61 | if (id == HFSPLUS_EXT_CNID) { | 67 | switch (id) { |
| 68 | case HFSPLUS_EXT_CNID: | ||
| 69 | if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { | ||
| 70 | printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", | ||
| 71 | tree->max_key_len); | ||
| 72 | goto fail_page; | ||
| 73 | } | ||
| 74 | if (tree->attributes & HFS_TREE_VARIDXKEYS) { | ||
| 75 | printk(KERN_ERR "hfs: invalid extent btree flag\n"); | ||
| 76 | goto fail_page; | ||
| 77 | } | ||
| 78 | |||
| 62 | tree->keycmp = hfsplus_ext_cmp_key; | 79 | tree->keycmp = hfsplus_ext_cmp_key; |
| 63 | } else if (id == HFSPLUS_CAT_CNID) { | 80 | break; |
| 64 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 81 | case HFSPLUS_CAT_CNID: |
| 82 | if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { | ||
| 83 | printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", | ||
| 84 | tree->max_key_len); | ||
| 85 | goto fail_page; | ||
| 86 | } | ||
| 87 | if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { | ||
| 88 | printk(KERN_ERR "hfs: invalid catalog btree flag\n"); | ||
| 89 | goto fail_page; | ||
| 90 | } | ||
| 91 | |||
| 92 | if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && | ||
| 65 | (head->key_type == HFSPLUS_KEY_BINARY)) | 93 | (head->key_type == HFSPLUS_KEY_BINARY)) |
| 66 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 94 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
| 67 | else { | 95 | else { |
| 68 | tree->keycmp = hfsplus_cat_case_cmp_key; | 96 | tree->keycmp = hfsplus_cat_case_cmp_key; |
| 69 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | 97 | set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 70 | } | 98 | } |
| 71 | } else { | 99 | break; |
| 100 | default: | ||
| 72 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 101 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
| 73 | goto fail_page; | 102 | goto fail_page; |
| 74 | } | 103 | } |
| 75 | 104 | ||
| 105 | if (!(tree->attributes & HFS_TREE_BIGKEYS)) { | ||
| 106 | printk(KERN_ERR "hfs: invalid btree flag\n"); | ||
| 107 | goto fail_page; | ||
| 108 | } | ||
| 109 | |||
| 76 | size = tree->node_size; | 110 | size = tree->node_size; |
| 77 | if (!is_power_of_2(size)) | 111 | if (!is_power_of_2(size)) |
| 78 | goto fail_page; | 112 | goto fail_page; |
| 79 | if (!tree->node_count) | 113 | if (!tree->node_count) |
| 80 | goto fail_page; | 114 | goto fail_page; |
| 115 | |||
| 81 | tree->node_size_shift = ffs(size) - 1; | 116 | tree->node_size_shift = ffs(size) - 1; |
| 82 | 117 | ||
| 83 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 118 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| @@ -87,10 +122,11 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
| 87 | return tree; | 122 | return tree; |
| 88 | 123 | ||
| 89 | fail_page: | 124 | fail_page: |
| 90 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
| 91 | page_cache_release(page); | 125 | page_cache_release(page); |
| 92 | free_tree: | 126 | free_inode: |
| 127 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
| 93 | iput(tree->inode); | 128 | iput(tree->inode); |
| 129 | free_tree: | ||
| 94 | kfree(tree); | 130 | kfree(tree); |
| 95 | return NULL; | 131 | return NULL; |
| 96 | } | 132 | } |
| @@ -192,17 +228,18 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) | |||
| 192 | 228 | ||
| 193 | while (!tree->free_nodes) { | 229 | while (!tree->free_nodes) { |
| 194 | struct inode *inode = tree->inode; | 230 | struct inode *inode = tree->inode; |
| 231 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 195 | u32 count; | 232 | u32 count; |
| 196 | int res; | 233 | int res; |
| 197 | 234 | ||
| 198 | res = hfsplus_file_extend(inode); | 235 | res = hfsplus_file_extend(inode); |
| 199 | if (res) | 236 | if (res) |
| 200 | return ERR_PTR(res); | 237 | return ERR_PTR(res); |
| 201 | HFSPLUS_I(inode).phys_size = inode->i_size = | 238 | hip->phys_size = inode->i_size = |
| 202 | (loff_t)HFSPLUS_I(inode).alloc_blocks << | 239 | (loff_t)hip->alloc_blocks << |
| 203 | HFSPLUS_SB(tree->sb).alloc_blksz_shift; | 240 | HFSPLUS_SB(tree->sb)->alloc_blksz_shift; |
| 204 | HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks << | 241 | hip->fs_blocks = |
| 205 | HFSPLUS_SB(tree->sb).fs_shift; | 242 | hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; |
| 206 | inode_set_bytes(inode, inode->i_size); | 243 | inode_set_bytes(inode, inode->i_size); |
| 207 | count = inode->i_size >> tree->node_size_shift; | 244 | count = inode->i_size >> tree->node_size_shift; |
| 208 | tree->free_nodes = count - tree->node_count; | 245 | tree->free_nodes = count - tree->node_count; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f6874acb2cf2..8af45fc5b051 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
| @@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, | |||
| 67 | key->key_len = cpu_to_be16(6 + ustrlen); | 67 | key->key_len = cpu_to_be16(6 + ustrlen); |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | 70 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) |
| 71 | { | 71 | { |
| 72 | if (inode->i_flags & S_IMMUTABLE) | 72 | if (inode->i_flags & S_IMMUTABLE) |
| 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; |
| @@ -77,15 +77,24 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
| 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; | 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; |
| 78 | else | 78 | else |
| 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; |
| 80 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 80 | |
| 81 | HFSPLUS_I(inode).userflags = perms->userflags; | 81 | perms->userflags = HFSPLUS_I(inode)->userflags; |
| 82 | perms->mode = cpu_to_be16(inode->i_mode); | 82 | perms->mode = cpu_to_be16(inode->i_mode); |
| 83 | perms->owner = cpu_to_be32(inode->i_uid); | 83 | perms->owner = cpu_to_be32(inode->i_uid); |
| 84 | perms->group = cpu_to_be32(inode->i_gid); | 84 | perms->group = cpu_to_be32(inode->i_gid); |
| 85 | |||
| 86 | if (S_ISREG(inode->i_mode)) | ||
| 87 | perms->dev = cpu_to_be32(inode->i_nlink); | ||
| 88 | else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) | ||
| 89 | perms->dev = cpu_to_be32(inode->i_rdev); | ||
| 90 | else | ||
| 91 | perms->dev = 0; | ||
| 85 | } | 92 | } |
| 86 | 93 | ||
| 87 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) | 94 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) |
| 88 | { | 95 | { |
| 96 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | ||
| 97 | |||
| 89 | if (S_ISDIR(inode->i_mode)) { | 98 | if (S_ISDIR(inode->i_mode)) { |
| 90 | struct hfsplus_cat_folder *folder; | 99 | struct hfsplus_cat_folder *folder; |
| 91 | 100 | ||
| @@ -93,13 +102,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 93 | memset(folder, 0, sizeof(*folder)); | 102 | memset(folder, 0, sizeof(*folder)); |
| 94 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); | 103 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); |
| 95 | folder->id = cpu_to_be32(inode->i_ino); | 104 | folder->id = cpu_to_be32(inode->i_ino); |
| 96 | HFSPLUS_I(inode).create_date = | 105 | HFSPLUS_I(inode)->create_date = |
| 97 | folder->create_date = | 106 | folder->create_date = |
| 98 | folder->content_mod_date = | 107 | folder->content_mod_date = |
| 99 | folder->attribute_mod_date = | 108 | folder->attribute_mod_date = |
| 100 | folder->access_date = hfsp_now2mt(); | 109 | folder->access_date = hfsp_now2mt(); |
| 101 | hfsplus_set_perms(inode, &folder->permissions); | 110 | hfsplus_cat_set_perms(inode, &folder->permissions); |
| 102 | if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) | 111 | if (inode == sbi->hidden_dir) |
| 103 | /* invisible and namelocked */ | 112 | /* invisible and namelocked */ |
| 104 | folder->user_info.frFlags = cpu_to_be16(0x5000); | 113 | folder->user_info.frFlags = cpu_to_be16(0x5000); |
| 105 | return sizeof(*folder); | 114 | return sizeof(*folder); |
| @@ -111,19 +120,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 111 | file->type = cpu_to_be16(HFSPLUS_FILE); | 120 | file->type = cpu_to_be16(HFSPLUS_FILE); |
| 112 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); | 121 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); |
| 113 | file->id = cpu_to_be32(cnid); | 122 | file->id = cpu_to_be32(cnid); |
| 114 | HFSPLUS_I(inode).create_date = | 123 | HFSPLUS_I(inode)->create_date = |
| 115 | file->create_date = | 124 | file->create_date = |
| 116 | file->content_mod_date = | 125 | file->content_mod_date = |
| 117 | file->attribute_mod_date = | 126 | file->attribute_mod_date = |
| 118 | file->access_date = hfsp_now2mt(); | 127 | file->access_date = hfsp_now2mt(); |
| 119 | if (cnid == inode->i_ino) { | 128 | if (cnid == inode->i_ino) { |
| 120 | hfsplus_set_perms(inode, &file->permissions); | 129 | hfsplus_cat_set_perms(inode, &file->permissions); |
| 121 | if (S_ISLNK(inode->i_mode)) { | 130 | if (S_ISLNK(inode->i_mode)) { |
| 122 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); | 131 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); |
| 123 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); | 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); |
| 124 | } else { | 133 | } else { |
| 125 | file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); | 134 | file->user_info.fdType = cpu_to_be32(sbi->type); |
| 126 | file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); | 135 | file->user_info.fdCreator = cpu_to_be32(sbi->creator); |
| 127 | } | 136 | } |
| 128 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 137 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
| 129 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 138 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
| @@ -131,8 +140,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
| 131 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); | 140 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); |
| 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); | 141 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); |
| 133 | file->user_info.fdFlags = cpu_to_be16(0x100); | 142 | file->user_info.fdFlags = cpu_to_be16(0x100); |
| 134 | file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date; | 143 | file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; |
| 135 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); | 144 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); |
| 136 | } | 145 | } |
| 137 | return sizeof(*file); | 146 | return sizeof(*file); |
| 138 | } | 147 | } |
| @@ -180,15 +189,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
| 180 | 189 | ||
| 181 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) | 190 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) |
| 182 | { | 191 | { |
| 192 | struct super_block *sb = dir->i_sb; | ||
| 183 | struct hfs_find_data fd; | 193 | struct hfs_find_data fd; |
| 184 | struct super_block *sb; | ||
| 185 | hfsplus_cat_entry entry; | 194 | hfsplus_cat_entry entry; |
| 186 | int entry_size; | 195 | int entry_size; |
| 187 | int err; | 196 | int err; |
| 188 | 197 | ||
| 189 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); | 198 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); |
| 190 | sb = dir->i_sb; | 199 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 191 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
| 192 | 200 | ||
| 193 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); | 201 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); |
| 194 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? | 202 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? |
| @@ -234,7 +242,7 @@ err2: | |||
| 234 | 242 | ||
| 235 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | 243 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) |
| 236 | { | 244 | { |
| 237 | struct super_block *sb; | 245 | struct super_block *sb = dir->i_sb; |
| 238 | struct hfs_find_data fd; | 246 | struct hfs_find_data fd; |
| 239 | struct hfsplus_fork_raw fork; | 247 | struct hfsplus_fork_raw fork; |
| 240 | struct list_head *pos; | 248 | struct list_head *pos; |
| @@ -242,8 +250,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
| 242 | u16 type; | 250 | u16 type; |
| 243 | 251 | ||
| 244 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); | 252 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); |
| 245 | sb = dir->i_sb; | 253 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 246 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
| 247 | 254 | ||
| 248 | if (!str) { | 255 | if (!str) { |
| 249 | int len; | 256 | int len; |
| @@ -279,7 +286,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
| 279 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); | 286 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); |
| 280 | } | 287 | } |
| 281 | 288 | ||
| 282 | list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) { | 289 | list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { |
| 283 | struct hfsplus_readdir_data *rd = | 290 | struct hfsplus_readdir_data *rd = |
| 284 | list_entry(pos, struct hfsplus_readdir_data, list); | 291 | list_entry(pos, struct hfsplus_readdir_data, list); |
| 285 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) | 292 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) |
| @@ -312,7 +319,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 312 | struct inode *src_dir, struct qstr *src_name, | 319 | struct inode *src_dir, struct qstr *src_name, |
| 313 | struct inode *dst_dir, struct qstr *dst_name) | 320 | struct inode *dst_dir, struct qstr *dst_name) |
| 314 | { | 321 | { |
| 315 | struct super_block *sb; | 322 | struct super_block *sb = src_dir->i_sb; |
| 316 | struct hfs_find_data src_fd, dst_fd; | 323 | struct hfs_find_data src_fd, dst_fd; |
| 317 | hfsplus_cat_entry entry; | 324 | hfsplus_cat_entry entry; |
| 318 | int entry_size, type; | 325 | int entry_size, type; |
| @@ -320,8 +327,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
| 320 | 327 | ||
| 321 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, | 328 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, |
| 322 | dst_dir->i_ino, dst_name->name); | 329 | dst_dir->i_ino, dst_name->name); |
| 323 | sb = src_dir->i_sb; | 330 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); |
| 324 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd); | ||
| 325 | dst_fd = src_fd; | 331 | dst_fd = src_fd; |
| 326 | 332 | ||
| 327 | /* find the old dir entry and read the data */ | 333 | /* find the old dir entry and read the data */ |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 764fd1bdca88..d236d85ec9d7 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
| @@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
| 39 | 39 | ||
| 40 | dentry->d_op = &hfsplus_dentry_operations; | 40 | dentry->d_op = &hfsplus_dentry_operations; |
| 41 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
| 42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
| 44 | again: | 44 | again: |
| 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); | 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); |
| @@ -68,9 +68,9 @@ again: | |||
| 68 | cnid = be32_to_cpu(entry.file.id); | 68 | cnid = be32_to_cpu(entry.file.id); |
| 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && | 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && |
| 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && | 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && |
| 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date || | 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || |
| 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) && | 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && |
| 73 | HFSPLUS_SB(sb).hidden_dir) { | 73 | HFSPLUS_SB(sb)->hidden_dir) { |
| 74 | struct qstr str; | 74 | struct qstr str; |
| 75 | char name[32]; | 75 | char name[32]; |
| 76 | 76 | ||
| @@ -86,7 +86,8 @@ again: | |||
| 86 | linkid = be32_to_cpu(entry.file.permissions.dev); | 86 | linkid = be32_to_cpu(entry.file.permissions.dev); |
| 87 | str.len = sprintf(name, "iNode%d", linkid); | 87 | str.len = sprintf(name, "iNode%d", linkid); |
| 88 | str.name = name; | 88 | str.name = name; |
| 89 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str); | 89 | hfsplus_cat_build_key(sb, fd.search_key, |
| 90 | HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); | ||
| 90 | goto again; | 91 | goto again; |
| 91 | } | 92 | } |
| 92 | } else if (!dentry->d_fsdata) | 93 | } else if (!dentry->d_fsdata) |
| @@ -101,7 +102,7 @@ again: | |||
| 101 | if (IS_ERR(inode)) | 102 | if (IS_ERR(inode)) |
| 102 | return ERR_CAST(inode); | 103 | return ERR_CAST(inode); |
| 103 | if (S_ISREG(inode->i_mode)) | 104 | if (S_ISREG(inode->i_mode)) |
| 104 | HFSPLUS_I(inode).dev = linkid; | 105 | HFSPLUS_I(inode)->linkid = linkid; |
| 105 | out: | 106 | out: |
| 106 | d_add(dentry, inode); | 107 | d_add(dentry, inode); |
| 107 | return NULL; | 108 | return NULL; |
| @@ -124,7 +125,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 124 | if (filp->f_pos >= inode->i_size) | 125 | if (filp->f_pos >= inode->i_size) |
| 125 | return 0; | 126 | return 0; |
| 126 | 127 | ||
| 127 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 128 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 128 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); | 129 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); |
| 129 | err = hfs_brec_find(&fd); | 130 | err = hfs_brec_find(&fd); |
| 130 | if (err) | 131 | if (err) |
| @@ -180,8 +181,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 180 | err = -EIO; | 181 | err = -EIO; |
| 181 | goto out; | 182 | goto out; |
| 182 | } | 183 | } |
| 183 | if (HFSPLUS_SB(sb).hidden_dir && | 184 | if (HFSPLUS_SB(sb)->hidden_dir && |
| 184 | HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id)) | 185 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
| 186 | be32_to_cpu(entry.folder.id)) | ||
| 185 | goto next; | 187 | goto next; |
| 186 | if (filldir(dirent, strbuf, len, filp->f_pos, | 188 | if (filldir(dirent, strbuf, len, filp->f_pos, |
| 187 | be32_to_cpu(entry.folder.id), DT_DIR)) | 189 | be32_to_cpu(entry.folder.id), DT_DIR)) |
| @@ -217,7 +219,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 217 | } | 219 | } |
| 218 | filp->private_data = rd; | 220 | filp->private_data = rd; |
| 219 | rd->file = filp; | 221 | rd->file = filp; |
| 220 | list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list); | 222 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
| 221 | } | 223 | } |
| 222 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 224 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
| 223 | out: | 225 | out: |
| @@ -229,38 +231,18 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) | |||
| 229 | { | 231 | { |
| 230 | struct hfsplus_readdir_data *rd = file->private_data; | 232 | struct hfsplus_readdir_data *rd = file->private_data; |
| 231 | if (rd) { | 233 | if (rd) { |
| 234 | mutex_lock(&inode->i_mutex); | ||
| 232 | list_del(&rd->list); | 235 | list_del(&rd->list); |
| 236 | mutex_unlock(&inode->i_mutex); | ||
| 233 | kfree(rd); | 237 | kfree(rd); |
| 234 | } | 238 | } |
| 235 | return 0; | 239 | return 0; |
| 236 | } | 240 | } |
| 237 | 241 | ||
| 238 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, | ||
| 239 | struct nameidata *nd) | ||
| 240 | { | ||
| 241 | struct inode *inode; | ||
| 242 | int res; | ||
| 243 | |||
| 244 | inode = hfsplus_new_inode(dir->i_sb, mode); | ||
| 245 | if (!inode) | ||
| 246 | return -ENOSPC; | ||
| 247 | |||
| 248 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
| 249 | if (res) { | ||
| 250 | inode->i_nlink = 0; | ||
| 251 | hfsplus_delete_inode(inode); | ||
| 252 | iput(inode); | ||
| 253 | return res; | ||
| 254 | } | ||
| 255 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
| 256 | mark_inode_dirty(inode); | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | 242 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, |
| 261 | struct dentry *dst_dentry) | 243 | struct dentry *dst_dentry) |
| 262 | { | 244 | { |
| 263 | struct super_block *sb = dst_dir->i_sb; | 245 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb); |
| 264 | struct inode *inode = src_dentry->d_inode; | 246 | struct inode *inode = src_dentry->d_inode; |
| 265 | struct inode *src_dir = src_dentry->d_parent->d_inode; | 247 | struct inode *src_dir = src_dentry->d_parent->d_inode; |
| 266 | struct qstr str; | 248 | struct qstr str; |
| @@ -270,7 +252,10 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
| 270 | 252 | ||
| 271 | if (HFSPLUS_IS_RSRC(inode)) | 253 | if (HFSPLUS_IS_RSRC(inode)) |
| 272 | return -EPERM; | 254 | return -EPERM; |
| 255 | if (!S_ISREG(inode->i_mode)) | ||
| 256 | return -EPERM; | ||
| 273 | 257 | ||
| 258 | mutex_lock(&sbi->vh_mutex); | ||
| 274 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { | 259 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { |
| 275 | for (;;) { | 260 | for (;;) { |
| 276 | get_random_bytes(&id, sizeof(cnid)); | 261 | get_random_bytes(&id, sizeof(cnid)); |
| @@ -279,40 +264,41 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
| 279 | str.len = sprintf(name, "iNode%d", id); | 264 | str.len = sprintf(name, "iNode%d", id); |
| 280 | res = hfsplus_rename_cat(inode->i_ino, | 265 | res = hfsplus_rename_cat(inode->i_ino, |
| 281 | src_dir, &src_dentry->d_name, | 266 | src_dir, &src_dentry->d_name, |
| 282 | HFSPLUS_SB(sb).hidden_dir, &str); | 267 | sbi->hidden_dir, &str); |
| 283 | if (!res) | 268 | if (!res) |
| 284 | break; | 269 | break; |
| 285 | if (res != -EEXIST) | 270 | if (res != -EEXIST) |
| 286 | return res; | 271 | goto out; |
| 287 | } | 272 | } |
| 288 | HFSPLUS_I(inode).dev = id; | 273 | HFSPLUS_I(inode)->linkid = id; |
| 289 | cnid = HFSPLUS_SB(sb).next_cnid++; | 274 | cnid = sbi->next_cnid++; |
| 290 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; | 275 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; |
| 291 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); | 276 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); |
| 292 | if (res) | 277 | if (res) |
| 293 | /* panic? */ | 278 | /* panic? */ |
| 294 | return res; | 279 | goto out; |
| 295 | HFSPLUS_SB(sb).file_count++; | 280 | sbi->file_count++; |
| 296 | } | 281 | } |
| 297 | cnid = HFSPLUS_SB(sb).next_cnid++; | 282 | cnid = sbi->next_cnid++; |
| 298 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); | 283 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); |
| 299 | if (res) | 284 | if (res) |
| 300 | return res; | 285 | goto out; |
| 301 | 286 | ||
| 302 | inc_nlink(inode); | 287 | inc_nlink(inode); |
| 303 | hfsplus_instantiate(dst_dentry, inode, cnid); | 288 | hfsplus_instantiate(dst_dentry, inode, cnid); |
| 304 | atomic_inc(&inode->i_count); | 289 | atomic_inc(&inode->i_count); |
| 305 | inode->i_ctime = CURRENT_TIME_SEC; | 290 | inode->i_ctime = CURRENT_TIME_SEC; |
| 306 | mark_inode_dirty(inode); | 291 | mark_inode_dirty(inode); |
| 307 | HFSPLUS_SB(sb).file_count++; | 292 | sbi->file_count++; |
| 308 | sb->s_dirt = 1; | 293 | dst_dir->i_sb->s_dirt = 1; |
| 309 | 294 | out: | |
| 310 | return 0; | 295 | mutex_unlock(&sbi->vh_mutex); |
| 296 | return res; | ||
| 311 | } | 297 | } |
| 312 | 298 | ||
| 313 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | 299 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) |
| 314 | { | 300 | { |
| 315 | struct super_block *sb = dir->i_sb; | 301 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 316 | struct inode *inode = dentry->d_inode; | 302 | struct inode *inode = dentry->d_inode; |
| 317 | struct qstr str; | 303 | struct qstr str; |
| 318 | char name[32]; | 304 | char name[32]; |
| @@ -322,21 +308,22 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 322 | if (HFSPLUS_IS_RSRC(inode)) | 308 | if (HFSPLUS_IS_RSRC(inode)) |
| 323 | return -EPERM; | 309 | return -EPERM; |
| 324 | 310 | ||
| 311 | mutex_lock(&sbi->vh_mutex); | ||
| 325 | cnid = (u32)(unsigned long)dentry->d_fsdata; | 312 | cnid = (u32)(unsigned long)dentry->d_fsdata; |
| 326 | if (inode->i_ino == cnid && | 313 | if (inode->i_ino == cnid && |
| 327 | atomic_read(&HFSPLUS_I(inode).opencnt)) { | 314 | atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
| 328 | str.name = name; | 315 | str.name = name; |
| 329 | str.len = sprintf(name, "temp%lu", inode->i_ino); | 316 | str.len = sprintf(name, "temp%lu", inode->i_ino); |
| 330 | res = hfsplus_rename_cat(inode->i_ino, | 317 | res = hfsplus_rename_cat(inode->i_ino, |
| 331 | dir, &dentry->d_name, | 318 | dir, &dentry->d_name, |
| 332 | HFSPLUS_SB(sb).hidden_dir, &str); | 319 | sbi->hidden_dir, &str); |
| 333 | if (!res) | 320 | if (!res) |
| 334 | inode->i_flags |= S_DEAD; | 321 | inode->i_flags |= S_DEAD; |
| 335 | return res; | 322 | goto out; |
| 336 | } | 323 | } |
| 337 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); | 324 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); |
| 338 | if (res) | 325 | if (res) |
| 339 | return res; | 326 | goto out; |
| 340 | 327 | ||
| 341 | if (inode->i_nlink > 0) | 328 | if (inode->i_nlink > 0) |
| 342 | drop_nlink(inode); | 329 | drop_nlink(inode); |
| @@ -344,10 +331,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 344 | clear_nlink(inode); | 331 | clear_nlink(inode); |
| 345 | if (!inode->i_nlink) { | 332 | if (!inode->i_nlink) { |
| 346 | if (inode->i_ino != cnid) { | 333 | if (inode->i_ino != cnid) { |
| 347 | HFSPLUS_SB(sb).file_count--; | 334 | sbi->file_count--; |
| 348 | if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { | 335 | if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
| 349 | res = hfsplus_delete_cat(inode->i_ino, | 336 | res = hfsplus_delete_cat(inode->i_ino, |
| 350 | HFSPLUS_SB(sb).hidden_dir, | 337 | sbi->hidden_dir, |
| 351 | NULL); | 338 | NULL); |
| 352 | if (!res) | 339 | if (!res) |
| 353 | hfsplus_delete_inode(inode); | 340 | hfsplus_delete_inode(inode); |
| @@ -356,107 +343,108 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
| 356 | } else | 343 | } else |
| 357 | hfsplus_delete_inode(inode); | 344 | hfsplus_delete_inode(inode); |
| 358 | } else | 345 | } else |
| 359 | HFSPLUS_SB(sb).file_count--; | 346 | sbi->file_count--; |
| 360 | inode->i_ctime = CURRENT_TIME_SEC; | 347 | inode->i_ctime = CURRENT_TIME_SEC; |
| 361 | mark_inode_dirty(inode); | 348 | mark_inode_dirty(inode); |
| 362 | 349 | out: | |
| 350 | mutex_unlock(&sbi->vh_mutex); | ||
| 363 | return res; | 351 | return res; |
| 364 | } | 352 | } |
| 365 | 353 | ||
| 366 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 367 | { | ||
| 368 | struct inode *inode; | ||
| 369 | int res; | ||
| 370 | |||
| 371 | inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode); | ||
| 372 | if (!inode) | ||
| 373 | return -ENOSPC; | ||
| 374 | |||
| 375 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
| 376 | if (res) { | ||
| 377 | inode->i_nlink = 0; | ||
| 378 | hfsplus_delete_inode(inode); | ||
| 379 | iput(inode); | ||
| 380 | return res; | ||
| 381 | } | ||
| 382 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
| 383 | mark_inode_dirty(inode); | ||
| 384 | return 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | 354 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) |
| 388 | { | 355 | { |
| 389 | struct inode *inode; | 356 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 357 | struct inode *inode = dentry->d_inode; | ||
| 390 | int res; | 358 | int res; |
| 391 | 359 | ||
| 392 | inode = dentry->d_inode; | ||
| 393 | if (inode->i_size != 2) | 360 | if (inode->i_size != 2) |
| 394 | return -ENOTEMPTY; | 361 | return -ENOTEMPTY; |
| 362 | |||
| 363 | mutex_lock(&sbi->vh_mutex); | ||
| 395 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); | 364 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); |
| 396 | if (res) | 365 | if (res) |
| 397 | return res; | 366 | goto out; |
| 398 | clear_nlink(inode); | 367 | clear_nlink(inode); |
| 399 | inode->i_ctime = CURRENT_TIME_SEC; | 368 | inode->i_ctime = CURRENT_TIME_SEC; |
| 400 | hfsplus_delete_inode(inode); | 369 | hfsplus_delete_inode(inode); |
| 401 | mark_inode_dirty(inode); | 370 | mark_inode_dirty(inode); |
| 402 | return 0; | 371 | out: |
| 372 | mutex_unlock(&sbi->vh_mutex); | ||
| 373 | return res; | ||
| 403 | } | 374 | } |
| 404 | 375 | ||
| 405 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, | 376 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, |
| 406 | const char *symname) | 377 | const char *symname) |
| 407 | { | 378 | { |
| 408 | struct super_block *sb; | 379 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 409 | struct inode *inode; | 380 | struct inode *inode; |
| 410 | int res; | 381 | int res = -ENOSPC; |
| 411 | 382 | ||
| 412 | sb = dir->i_sb; | 383 | mutex_lock(&sbi->vh_mutex); |
| 413 | inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO); | 384 | inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); |
| 414 | if (!inode) | 385 | if (!inode) |
| 415 | return -ENOSPC; | 386 | goto out; |
| 416 | 387 | ||
| 417 | res = page_symlink(inode, symname, strlen(symname) + 1); | 388 | res = page_symlink(inode, symname, strlen(symname) + 1); |
| 418 | if (res) { | 389 | if (res) |
| 419 | inode->i_nlink = 0; | 390 | goto out_err; |
| 420 | hfsplus_delete_inode(inode); | ||
| 421 | iput(inode); | ||
| 422 | return res; | ||
| 423 | } | ||
| 424 | 391 | ||
| 425 | mark_inode_dirty(inode); | ||
| 426 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 392 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
| 393 | if (res) | ||
| 394 | goto out_err; | ||
| 427 | 395 | ||
| 428 | if (!res) { | 396 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
| 429 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 397 | mark_inode_dirty(inode); |
| 430 | mark_inode_dirty(inode); | 398 | goto out; |
| 431 | } | ||
| 432 | 399 | ||
| 400 | out_err: | ||
| 401 | inode->i_nlink = 0; | ||
| 402 | hfsplus_delete_inode(inode); | ||
| 403 | iput(inode); | ||
| 404 | out: | ||
| 405 | mutex_unlock(&sbi->vh_mutex); | ||
| 433 | return res; | 406 | return res; |
| 434 | } | 407 | } |
| 435 | 408 | ||
| 436 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, | 409 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, |
| 437 | int mode, dev_t rdev) | 410 | int mode, dev_t rdev) |
| 438 | { | 411 | { |
| 439 | struct super_block *sb; | 412 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
| 440 | struct inode *inode; | 413 | struct inode *inode; |
| 441 | int res; | 414 | int res = -ENOSPC; |
| 442 | 415 | ||
| 443 | sb = dir->i_sb; | 416 | mutex_lock(&sbi->vh_mutex); |
| 444 | inode = hfsplus_new_inode(sb, mode); | 417 | inode = hfsplus_new_inode(dir->i_sb, mode); |
| 445 | if (!inode) | 418 | if (!inode) |
| 446 | return -ENOSPC; | 419 | goto out; |
| 420 | |||
| 421 | if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) | ||
| 422 | init_special_inode(inode, mode, rdev); | ||
| 447 | 423 | ||
| 448 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 424 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
| 449 | if (res) { | 425 | if (res) { |
| 450 | inode->i_nlink = 0; | 426 | inode->i_nlink = 0; |
| 451 | hfsplus_delete_inode(inode); | 427 | hfsplus_delete_inode(inode); |
| 452 | iput(inode); | 428 | iput(inode); |
| 453 | return res; | 429 | goto out; |
| 454 | } | 430 | } |
| 455 | init_special_inode(inode, mode, rdev); | 431 | |
| 456 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 432 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
| 457 | mark_inode_dirty(inode); | 433 | mark_inode_dirty(inode); |
| 434 | out: | ||
| 435 | mutex_unlock(&sbi->vh_mutex); | ||
| 436 | return res; | ||
| 437 | } | ||
| 458 | 438 | ||
| 459 | return 0; | 439 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, |
| 440 | struct nameidata *nd) | ||
| 441 | { | ||
| 442 | return hfsplus_mknod(dir, dentry, mode, 0); | ||
| 443 | } | ||
| 444 | |||
| 445 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
| 446 | { | ||
| 447 | return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); | ||
| 460 | } | 448 | } |
| 461 | 449 | ||
| 462 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | 450 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, |
| @@ -466,7 +454,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 466 | 454 | ||
| 467 | /* Unlink destination if it already exists */ | 455 | /* Unlink destination if it already exists */ |
| 468 | if (new_dentry->d_inode) { | 456 | if (new_dentry->d_inode) { |
| 469 | res = hfsplus_unlink(new_dir, new_dentry); | 457 | if (S_ISDIR(new_dentry->d_inode->i_mode)) |
| 458 | res = hfsplus_rmdir(new_dir, new_dentry); | ||
| 459 | else | ||
| 460 | res = hfsplus_unlink(new_dir, new_dentry); | ||
| 470 | if (res) | 461 | if (res) |
| 471 | return res; | 462 | return res; |
| 472 | } | 463 | } |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0022eec63cda..0c9cb1820a52 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
| @@ -85,35 +85,49 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) | |||
| 85 | 85 | ||
| 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) | 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) |
| 87 | { | 87 | { |
| 88 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 88 | int res; | 89 | int res; |
| 89 | 90 | ||
| 90 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start, | 91 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
| 91 | HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 92 | |
| 93 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, | ||
| 94 | HFSPLUS_IS_RSRC(inode) ? | ||
| 95 | HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | ||
| 96 | |||
| 92 | res = hfs_brec_find(fd); | 97 | res = hfs_brec_find(fd); |
| 93 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) { | 98 | if (hip->flags & HFSPLUS_FLG_EXT_NEW) { |
| 94 | if (res != -ENOENT) | 99 | if (res != -ENOENT) |
| 95 | return; | 100 | return; |
| 96 | hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec)); | 101 | hfs_brec_insert(fd, hip->cached_extents, |
| 97 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 102 | sizeof(hfsplus_extent_rec)); |
| 103 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | ||
| 98 | } else { | 104 | } else { |
| 99 | if (res) | 105 | if (res) |
| 100 | return; | 106 | return; |
| 101 | hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength); | 107 | hfs_bnode_write(fd->bnode, hip->cached_extents, |
| 102 | HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY; | 108 | fd->entryoffset, fd->entrylength); |
| 109 | hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; | ||
| 103 | } | 110 | } |
| 104 | } | 111 | } |
| 105 | 112 | ||
| 106 | void hfsplus_ext_write_extent(struct inode *inode) | 113 | static void hfsplus_ext_write_extent_locked(struct inode *inode) |
| 107 | { | 114 | { |
| 108 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) { | 115 | if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { |
| 109 | struct hfs_find_data fd; | 116 | struct hfs_find_data fd; |
| 110 | 117 | ||
| 111 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 118 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
| 112 | __hfsplus_ext_write_extent(inode, &fd); | 119 | __hfsplus_ext_write_extent(inode, &fd); |
| 113 | hfs_find_exit(&fd); | 120 | hfs_find_exit(&fd); |
| 114 | } | 121 | } |
| 115 | } | 122 | } |
| 116 | 123 | ||
| 124 | void hfsplus_ext_write_extent(struct inode *inode) | ||
| 125 | { | ||
| 126 | mutex_lock(&HFSPLUS_I(inode)->extents_lock); | ||
| 127 | hfsplus_ext_write_extent_locked(inode); | ||
| 128 | mutex_unlock(&HFSPLUS_I(inode)->extents_lock); | ||
| 129 | } | ||
| 130 | |||
| 117 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | 131 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, |
| 118 | struct hfsplus_extent *extent, | 132 | struct hfsplus_extent *extent, |
| 119 | u32 cnid, u32 block, u8 type) | 133 | u32 cnid, u32 block, u8 type) |
| @@ -136,33 +150,39 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | |||
| 136 | 150 | ||
| 137 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) | 151 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) |
| 138 | { | 152 | { |
| 153 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 139 | int res; | 154 | int res; |
| 140 | 155 | ||
| 141 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) | 156 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
| 157 | |||
| 158 | if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) | ||
| 142 | __hfsplus_ext_write_extent(inode, fd); | 159 | __hfsplus_ext_write_extent(inode, fd); |
| 143 | 160 | ||
| 144 | res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino, | 161 | res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, |
| 145 | block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 162 | block, HFSPLUS_IS_RSRC(inode) ? |
| 163 | HFSPLUS_TYPE_RSRC : | ||
| 164 | HFSPLUS_TYPE_DATA); | ||
| 146 | if (!res) { | 165 | if (!res) { |
| 147 | HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block); | 166 | hip->cached_start = be32_to_cpu(fd->key->ext.start_block); |
| 148 | HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents); | 167 | hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); |
| 149 | } else { | 168 | } else { |
| 150 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 169 | hip->cached_start = hip->cached_blocks = 0; |
| 151 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 170 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
| 152 | } | 171 | } |
| 153 | return res; | 172 | return res; |
| 154 | } | 173 | } |
| 155 | 174 | ||
| 156 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | 175 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) |
| 157 | { | 176 | { |
| 177 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 158 | struct hfs_find_data fd; | 178 | struct hfs_find_data fd; |
| 159 | int res; | 179 | int res; |
| 160 | 180 | ||
| 161 | if (block >= HFSPLUS_I(inode).cached_start && | 181 | if (block >= hip->cached_start && |
| 162 | block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks) | 182 | block < hip->cached_start + hip->cached_blocks) |
| 163 | return 0; | 183 | return 0; |
| 164 | 184 | ||
| 165 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 185 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
| 166 | res = __hfsplus_ext_cache_extent(&fd, inode, block); | 186 | res = __hfsplus_ext_cache_extent(&fd, inode, block); |
| 167 | hfs_find_exit(&fd); | 187 | hfs_find_exit(&fd); |
| 168 | return res; | 188 | return res; |
| @@ -172,21 +192,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | |||
| 172 | int hfsplus_get_block(struct inode *inode, sector_t iblock, | 192 | int hfsplus_get_block(struct inode *inode, sector_t iblock, |
| 173 | struct buffer_head *bh_result, int create) | 193 | struct buffer_head *bh_result, int create) |
| 174 | { | 194 | { |
| 175 | struct super_block *sb; | 195 | struct super_block *sb = inode->i_sb; |
| 196 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 197 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 176 | int res = -EIO; | 198 | int res = -EIO; |
| 177 | u32 ablock, dblock, mask; | 199 | u32 ablock, dblock, mask; |
| 178 | int shift; | 200 | int shift; |
| 179 | 201 | ||
| 180 | sb = inode->i_sb; | ||
| 181 | |||
| 182 | /* Convert inode block to disk allocation block */ | 202 | /* Convert inode block to disk allocation block */ |
| 183 | shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits; | 203 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
| 184 | ablock = iblock >> HFSPLUS_SB(sb).fs_shift; | 204 | ablock = iblock >> sbi->fs_shift; |
| 185 | 205 | ||
| 186 | if (iblock >= HFSPLUS_I(inode).fs_blocks) { | 206 | if (iblock >= hip->fs_blocks) { |
| 187 | if (iblock > HFSPLUS_I(inode).fs_blocks || !create) | 207 | if (iblock > hip->fs_blocks || !create) |
| 188 | return -EIO; | 208 | return -EIO; |
| 189 | if (ablock >= HFSPLUS_I(inode).alloc_blocks) { | 209 | if (ablock >= hip->alloc_blocks) { |
| 190 | res = hfsplus_file_extend(inode); | 210 | res = hfsplus_file_extend(inode); |
| 191 | if (res) | 211 | if (res) |
| 192 | return res; | 212 | return res; |
| @@ -194,33 +214,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
| 194 | } else | 214 | } else |
| 195 | create = 0; | 215 | create = 0; |
| 196 | 216 | ||
| 197 | if (ablock < HFSPLUS_I(inode).first_blocks) { | 217 | if (ablock < hip->first_blocks) { |
| 198 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock); | 218 | dblock = hfsplus_ext_find_block(hip->first_extents, ablock); |
| 199 | goto done; | 219 | goto done; |
| 200 | } | 220 | } |
| 201 | 221 | ||
| 202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | 222 | if (inode->i_ino == HFSPLUS_EXT_CNID) |
| 203 | return -EIO; | 223 | return -EIO; |
| 204 | 224 | ||
| 205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 225 | mutex_lock(&hip->extents_lock); |
| 206 | res = hfsplus_ext_read_extent(inode, ablock); | 226 | res = hfsplus_ext_read_extent(inode, ablock); |
| 207 | if (!res) { | 227 | if (!res) { |
| 208 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - | 228 | dblock = hfsplus_ext_find_block(hip->cached_extents, |
| 209 | HFSPLUS_I(inode).cached_start); | 229 | ablock - hip->cached_start); |
| 210 | } else { | 230 | } else { |
| 211 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 231 | mutex_unlock(&hip->extents_lock); |
| 212 | return -EIO; | 232 | return -EIO; |
| 213 | } | 233 | } |
| 214 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 234 | mutex_unlock(&hip->extents_lock); |
| 215 | 235 | ||
| 216 | done: | 236 | done: |
| 217 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); | 237 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); |
| 218 | mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1; | 238 | mask = (1 << sbi->fs_shift) - 1; |
| 219 | map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask)); | 239 | map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); |
| 220 | if (create) { | 240 | if (create) { |
| 221 | set_buffer_new(bh_result); | 241 | set_buffer_new(bh_result); |
| 222 | HFSPLUS_I(inode).phys_size += sb->s_blocksize; | 242 | hip->phys_size += sb->s_blocksize; |
| 223 | HFSPLUS_I(inode).fs_blocks++; | 243 | hip->fs_blocks++; |
| 224 | inode_add_bytes(inode, sb->s_blocksize); | 244 | inode_add_bytes(inode, sb->s_blocksize); |
| 225 | mark_inode_dirty(inode); | 245 | mark_inode_dirty(inode); |
| 226 | } | 246 | } |
| @@ -327,7 +347,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
| 327 | if (total_blocks == blocks) | 347 | if (total_blocks == blocks) |
| 328 | return 0; | 348 | return 0; |
| 329 | 349 | ||
| 330 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 350 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
| 331 | do { | 351 | do { |
| 332 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, | 352 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, |
| 333 | total_blocks, type); | 353 | total_blocks, type); |
| @@ -348,29 +368,33 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
| 348 | int hfsplus_file_extend(struct inode *inode) | 368 | int hfsplus_file_extend(struct inode *inode) |
| 349 | { | 369 | { |
| 350 | struct super_block *sb = inode->i_sb; | 370 | struct super_block *sb = inode->i_sb; |
| 371 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 372 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 351 | u32 start, len, goal; | 373 | u32 start, len, goal; |
| 352 | int res; | 374 | int res; |
| 353 | 375 | ||
| 354 | if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { | 376 | if (sbi->alloc_file->i_size * 8 < |
| 377 | sbi->total_blocks - sbi->free_blocks + 8) { | ||
| 355 | // extend alloc file | 378 | // extend alloc file |
| 356 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, | 379 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", |
| 357 | HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); | 380 | sbi->alloc_file->i_size * 8, |
| 381 | sbi->total_blocks, sbi->free_blocks); | ||
| 358 | return -ENOSPC; | 382 | return -ENOSPC; |
| 359 | } | 383 | } |
| 360 | 384 | ||
| 361 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 385 | mutex_lock(&hip->extents_lock); |
| 362 | if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) | 386 | if (hip->alloc_blocks == hip->first_blocks) |
| 363 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); | 387 | goal = hfsplus_ext_lastblock(hip->first_extents); |
| 364 | else { | 388 | else { |
| 365 | res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks); | 389 | res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); |
| 366 | if (res) | 390 | if (res) |
| 367 | goto out; | 391 | goto out; |
| 368 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents); | 392 | goal = hfsplus_ext_lastblock(hip->cached_extents); |
| 369 | } | 393 | } |
| 370 | 394 | ||
| 371 | len = HFSPLUS_I(inode).clump_blocks; | 395 | len = hip->clump_blocks; |
| 372 | start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len); | 396 | start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); |
| 373 | if (start >= HFSPLUS_SB(sb).total_blocks) { | 397 | if (start >= sbi->total_blocks) { |
| 374 | start = hfsplus_block_allocate(sb, goal, 0, &len); | 398 | start = hfsplus_block_allocate(sb, goal, 0, &len); |
| 375 | if (start >= goal) { | 399 | if (start >= goal) { |
| 376 | res = -ENOSPC; | 400 | res = -ENOSPC; |
| @@ -379,56 +403,56 @@ int hfsplus_file_extend(struct inode *inode) | |||
| 379 | } | 403 | } |
| 380 | 404 | ||
| 381 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); | 405 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); |
| 382 | if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) { | 406 | |
| 383 | if (!HFSPLUS_I(inode).first_blocks) { | 407 | if (hip->alloc_blocks <= hip->first_blocks) { |
| 408 | if (!hip->first_blocks) { | ||
| 384 | dprint(DBG_EXTENT, "first extents\n"); | 409 | dprint(DBG_EXTENT, "first extents\n"); |
| 385 | /* no extents yet */ | 410 | /* no extents yet */ |
| 386 | HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start); | 411 | hip->first_extents[0].start_block = cpu_to_be32(start); |
| 387 | HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len); | 412 | hip->first_extents[0].block_count = cpu_to_be32(len); |
| 388 | res = 0; | 413 | res = 0; |
| 389 | } else { | 414 | } else { |
| 390 | /* try to append to extents in inode */ | 415 | /* try to append to extents in inode */ |
| 391 | res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents, | 416 | res = hfsplus_add_extent(hip->first_extents, |
| 392 | HFSPLUS_I(inode).alloc_blocks, | 417 | hip->alloc_blocks, |
| 393 | start, len); | 418 | start, len); |
| 394 | if (res == -ENOSPC) | 419 | if (res == -ENOSPC) |
| 395 | goto insert_extent; | 420 | goto insert_extent; |
| 396 | } | 421 | } |
| 397 | if (!res) { | 422 | if (!res) { |
| 398 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 423 | hfsplus_dump_extent(hip->first_extents); |
| 399 | HFSPLUS_I(inode).first_blocks += len; | 424 | hip->first_blocks += len; |
| 400 | } | 425 | } |
| 401 | } else { | 426 | } else { |
| 402 | res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents, | 427 | res = hfsplus_add_extent(hip->cached_extents, |
| 403 | HFSPLUS_I(inode).alloc_blocks - | 428 | hip->alloc_blocks - hip->cached_start, |
| 404 | HFSPLUS_I(inode).cached_start, | ||
| 405 | start, len); | 429 | start, len); |
| 406 | if (!res) { | 430 | if (!res) { |
| 407 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 431 | hfsplus_dump_extent(hip->cached_extents); |
| 408 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 432 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
| 409 | HFSPLUS_I(inode).cached_blocks += len; | 433 | hip->cached_blocks += len; |
| 410 | } else if (res == -ENOSPC) | 434 | } else if (res == -ENOSPC) |
| 411 | goto insert_extent; | 435 | goto insert_extent; |
| 412 | } | 436 | } |
| 413 | out: | 437 | out: |
| 414 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 438 | mutex_unlock(&hip->extents_lock); |
| 415 | if (!res) { | 439 | if (!res) { |
| 416 | HFSPLUS_I(inode).alloc_blocks += len; | 440 | hip->alloc_blocks += len; |
| 417 | mark_inode_dirty(inode); | 441 | mark_inode_dirty(inode); |
| 418 | } | 442 | } |
| 419 | return res; | 443 | return res; |
| 420 | 444 | ||
| 421 | insert_extent: | 445 | insert_extent: |
| 422 | dprint(DBG_EXTENT, "insert new extent\n"); | 446 | dprint(DBG_EXTENT, "insert new extent\n"); |
| 423 | hfsplus_ext_write_extent(inode); | 447 | hfsplus_ext_write_extent_locked(inode); |
| 424 | 448 | ||
| 425 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 449 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 426 | HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start); | 450 | hip->cached_extents[0].start_block = cpu_to_be32(start); |
| 427 | HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len); | 451 | hip->cached_extents[0].block_count = cpu_to_be32(len); |
| 428 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 452 | hfsplus_dump_extent(hip->cached_extents); |
| 429 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; | 453 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; |
| 430 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks; | 454 | hip->cached_start = hip->alloc_blocks; |
| 431 | HFSPLUS_I(inode).cached_blocks = len; | 455 | hip->cached_blocks = len; |
| 432 | 456 | ||
| 433 | res = 0; | 457 | res = 0; |
| 434 | goto out; | 458 | goto out; |
| @@ -437,13 +461,15 @@ insert_extent: | |||
| 437 | void hfsplus_file_truncate(struct inode *inode) | 461 | void hfsplus_file_truncate(struct inode *inode) |
| 438 | { | 462 | { |
| 439 | struct super_block *sb = inode->i_sb; | 463 | struct super_block *sb = inode->i_sb; |
| 464 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 440 | struct hfs_find_data fd; | 465 | struct hfs_find_data fd; |
| 441 | u32 alloc_cnt, blk_cnt, start; | 466 | u32 alloc_cnt, blk_cnt, start; |
| 442 | int res; | 467 | int res; |
| 443 | 468 | ||
| 444 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, | 469 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", |
| 445 | (long long)HFSPLUS_I(inode).phys_size, inode->i_size); | 470 | inode->i_ino, (long long)hip->phys_size, inode->i_size); |
| 446 | if (inode->i_size > HFSPLUS_I(inode).phys_size) { | 471 | |
| 472 | if (inode->i_size > hip->phys_size) { | ||
| 447 | struct address_space *mapping = inode->i_mapping; | 473 | struct address_space *mapping = inode->i_mapping; |
| 448 | struct page *page; | 474 | struct page *page; |
| 449 | void *fsdata; | 475 | void *fsdata; |
| @@ -460,47 +486,48 @@ void hfsplus_file_truncate(struct inode *inode) | |||
| 460 | return; | 486 | return; |
| 461 | mark_inode_dirty(inode); | 487 | mark_inode_dirty(inode); |
| 462 | return; | 488 | return; |
| 463 | } else if (inode->i_size == HFSPLUS_I(inode).phys_size) | 489 | } else if (inode->i_size == hip->phys_size) |
| 464 | return; | 490 | return; |
| 465 | 491 | ||
| 466 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 492 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
| 467 | alloc_cnt = HFSPLUS_I(inode).alloc_blocks; | 493 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
| 494 | alloc_cnt = hip->alloc_blocks; | ||
| 468 | if (blk_cnt == alloc_cnt) | 495 | if (blk_cnt == alloc_cnt) |
| 469 | goto out; | 496 | goto out; |
| 470 | 497 | ||
| 471 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 498 | mutex_lock(&hip->extents_lock); |
| 472 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 499 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
| 473 | while (1) { | 500 | while (1) { |
| 474 | if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { | 501 | if (alloc_cnt == hip->first_blocks) { |
| 475 | hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents, | 502 | hfsplus_free_extents(sb, hip->first_extents, |
| 476 | alloc_cnt, alloc_cnt - blk_cnt); | 503 | alloc_cnt, alloc_cnt - blk_cnt); |
| 477 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 504 | hfsplus_dump_extent(hip->first_extents); |
| 478 | HFSPLUS_I(inode).first_blocks = blk_cnt; | 505 | hip->first_blocks = blk_cnt; |
| 479 | break; | 506 | break; |
| 480 | } | 507 | } |
| 481 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); | 508 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); |
| 482 | if (res) | 509 | if (res) |
| 483 | break; | 510 | break; |
| 484 | start = HFSPLUS_I(inode).cached_start; | 511 | start = hip->cached_start; |
| 485 | hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents, | 512 | hfsplus_free_extents(sb, hip->cached_extents, |
| 486 | alloc_cnt - start, alloc_cnt - blk_cnt); | 513 | alloc_cnt - start, alloc_cnt - blk_cnt); |
| 487 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 514 | hfsplus_dump_extent(hip->cached_extents); |
| 488 | if (blk_cnt > start) { | 515 | if (blk_cnt > start) { |
| 489 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 516 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
| 490 | break; | 517 | break; |
| 491 | } | 518 | } |
| 492 | alloc_cnt = start; | 519 | alloc_cnt = start; |
| 493 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 520 | hip->cached_start = hip->cached_blocks = 0; |
| 494 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 521 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
| 495 | hfs_brec_remove(&fd); | 522 | hfs_brec_remove(&fd); |
| 496 | } | 523 | } |
| 497 | hfs_find_exit(&fd); | 524 | hfs_find_exit(&fd); |
| 498 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 525 | mutex_unlock(&hip->extents_lock); |
| 499 | 526 | ||
| 500 | HFSPLUS_I(inode).alloc_blocks = blk_cnt; | 527 | hip->alloc_blocks = blk_cnt; |
| 501 | out: | 528 | out: |
| 502 | HFSPLUS_I(inode).phys_size = inode->i_size; | 529 | hip->phys_size = inode->i_size; |
| 503 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 530 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
| 504 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 531 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
| 505 | mark_inode_dirty(inode); | 532 | mark_inode_dirty(inode); |
| 506 | } | 533 | } |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dc856be3c2b0..cb3653efb57a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
| @@ -62,7 +62,7 @@ struct hfs_btree { | |||
| 62 | unsigned int depth; | 62 | unsigned int depth; |
| 63 | 63 | ||
| 64 | //unsigned int map1_size, map_size; | 64 | //unsigned int map1_size, map_size; |
| 65 | struct semaphore tree_lock; | 65 | struct mutex tree_lock; |
| 66 | 66 | ||
| 67 | unsigned int pages_per_bnode; | 67 | unsigned int pages_per_bnode; |
| 68 | spinlock_t hash_lock; | 68 | spinlock_t hash_lock; |
| @@ -121,16 +121,21 @@ struct hfsplus_sb_info { | |||
| 121 | u32 sect_count; | 121 | u32 sect_count; |
| 122 | int fs_shift; | 122 | int fs_shift; |
| 123 | 123 | ||
| 124 | /* Stuff in host order from Vol Header */ | 124 | /* immutable data from the volume header */ |
| 125 | u32 alloc_blksz; | 125 | u32 alloc_blksz; |
| 126 | int alloc_blksz_shift; | 126 | int alloc_blksz_shift; |
| 127 | u32 total_blocks; | 127 | u32 total_blocks; |
| 128 | u32 data_clump_blocks, rsrc_clump_blocks; | ||
| 129 | |||
| 130 | /* mutable data from the volume header, protected by alloc_mutex */ | ||
| 128 | u32 free_blocks; | 131 | u32 free_blocks; |
| 129 | u32 next_alloc; | 132 | struct mutex alloc_mutex; |
| 133 | |||
| 134 | /* mutable data from the volume header, protected by vh_mutex */ | ||
| 130 | u32 next_cnid; | 135 | u32 next_cnid; |
| 131 | u32 file_count; | 136 | u32 file_count; |
| 132 | u32 folder_count; | 137 | u32 folder_count; |
| 133 | u32 data_clump_blocks, rsrc_clump_blocks; | 138 | struct mutex vh_mutex; |
| 134 | 139 | ||
| 135 | /* Config options */ | 140 | /* Config options */ |
| 136 | u32 creator; | 141 | u32 creator; |
| @@ -143,40 +148,50 @@ struct hfsplus_sb_info { | |||
| 143 | int part, session; | 148 | int part, session; |
| 144 | 149 | ||
| 145 | unsigned long flags; | 150 | unsigned long flags; |
| 146 | |||
| 147 | struct hlist_head rsrc_inodes; | ||
| 148 | }; | 151 | }; |
| 149 | 152 | ||
| 150 | #define HFSPLUS_SB_WRITEBACKUP 0x0001 | 153 | #define HFSPLUS_SB_WRITEBACKUP 0 |
| 151 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 154 | #define HFSPLUS_SB_NODECOMPOSE 1 |
| 152 | #define HFSPLUS_SB_FORCE 0x0004 | 155 | #define HFSPLUS_SB_FORCE 2 |
| 153 | #define HFSPLUS_SB_HFSX 0x0008 | 156 | #define HFSPLUS_SB_HFSX 3 |
| 154 | #define HFSPLUS_SB_CASEFOLD 0x0010 | 157 | #define HFSPLUS_SB_CASEFOLD 4 |
| 155 | 158 | ||
| 156 | 159 | ||
| 157 | struct hfsplus_inode_info { | 160 | struct hfsplus_inode_info { |
| 158 | struct mutex extents_lock; | ||
| 159 | u32 clump_blocks, alloc_blocks; | ||
| 160 | sector_t fs_blocks; | ||
| 161 | /* Allocation extents from catalog record or volume header */ | ||
| 162 | hfsplus_extent_rec first_extents; | ||
| 163 | u32 first_blocks; | ||
| 164 | hfsplus_extent_rec cached_extents; | ||
| 165 | u32 cached_start, cached_blocks; | ||
| 166 | atomic_t opencnt; | 161 | atomic_t opencnt; |
| 167 | 162 | ||
| 168 | struct inode *rsrc_inode; | 163 | /* |
| 164 | * Extent allocation information, protected by extents_lock. | ||
| 165 | */ | ||
| 166 | u32 first_blocks; | ||
| 167 | u32 clump_blocks; | ||
| 168 | u32 alloc_blocks; | ||
| 169 | u32 cached_start; | ||
| 170 | u32 cached_blocks; | ||
| 171 | hfsplus_extent_rec first_extents; | ||
| 172 | hfsplus_extent_rec cached_extents; | ||
| 169 | unsigned long flags; | 173 | unsigned long flags; |
| 174 | struct mutex extents_lock; | ||
| 170 | 175 | ||
| 176 | /* | ||
| 177 | * Immutable data. | ||
| 178 | */ | ||
| 179 | struct inode *rsrc_inode; | ||
| 171 | __be32 create_date; | 180 | __be32 create_date; |
| 172 | /* Device number in hfsplus_permissions in catalog */ | ||
| 173 | u32 dev; | ||
| 174 | /* BSD system and user file flags */ | ||
| 175 | u8 rootflags; | ||
| 176 | u8 userflags; | ||
| 177 | 181 | ||
| 182 | /* | ||
| 183 | * Protected by sbi->vh_mutex. | ||
| 184 | */ | ||
| 185 | u32 linkid; | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Protected by i_mutex. | ||
| 189 | */ | ||
| 190 | sector_t fs_blocks; | ||
| 191 | u8 userflags; /* BSD user file flags */ | ||
| 178 | struct list_head open_dir_list; | 192 | struct list_head open_dir_list; |
| 179 | loff_t phys_size; | 193 | loff_t phys_size; |
| 194 | |||
| 180 | struct inode vfs_inode; | 195 | struct inode vfs_inode; |
| 181 | }; | 196 | }; |
| 182 | 197 | ||
| @@ -184,8 +199,8 @@ struct hfsplus_inode_info { | |||
| 184 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 | 199 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 |
| 185 | #define HFSPLUS_FLG_EXT_NEW 0x0004 | 200 | #define HFSPLUS_FLG_EXT_NEW 0x0004 |
| 186 | 201 | ||
| 187 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)) | 202 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) |
| 188 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC) | 203 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) |
| 189 | 204 | ||
| 190 | struct hfs_find_data { | 205 | struct hfs_find_data { |
| 191 | /* filled by caller */ | 206 | /* filled by caller */ |
| @@ -311,6 +326,7 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); | |||
| 311 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); | 326 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); |
| 312 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, | 327 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, |
| 313 | struct inode *, struct qstr *); | 328 | struct inode *, struct qstr *); |
| 329 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); | ||
| 314 | 330 | ||
| 315 | /* dir.c */ | 331 | /* dir.c */ |
| 316 | extern const struct inode_operations hfsplus_dir_inode_operations; | 332 | extern const struct inode_operations hfsplus_dir_inode_operations; |
| @@ -372,26 +388,15 @@ int hfsplus_read_wrapper(struct super_block *); | |||
| 372 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); | 388 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); |
| 373 | 389 | ||
| 374 | /* access macros */ | 390 | /* access macros */ |
| 375 | /* | ||
| 376 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) | 391 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) |
| 377 | { | 392 | { |
| 378 | return sb->s_fs_info; | 393 | return sb->s_fs_info; |
| 379 | } | 394 | } |
| 395 | |||
| 380 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | 396 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) |
| 381 | { | 397 | { |
| 382 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); | 398 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); |
| 383 | } | 399 | } |
| 384 | */ | ||
| 385 | #define HFSPLUS_SB(super) (*(struct hfsplus_sb_info *)(super)->s_fs_info) | ||
| 386 | #define HFSPLUS_I(inode) (*list_entry(inode, struct hfsplus_inode_info, vfs_inode)) | ||
| 387 | |||
| 388 | #if 1 | ||
| 389 | #define hfsplus_kmap(p) ({ struct page *__p = (p); kmap(__p); }) | ||
| 390 | #define hfsplus_kunmap(p) ({ struct page *__p = (p); kunmap(__p); __p; }) | ||
| 391 | #else | ||
| 392 | #define hfsplus_kmap(p) kmap(p) | ||
| 393 | #define hfsplus_kunmap(p) kunmap(p) | ||
| 394 | #endif | ||
| 395 | 400 | ||
| 396 | #define sb_bread512(sb, sec, data) ({ \ | 401 | #define sb_bread512(sb, sec, data) ({ \ |
| 397 | struct buffer_head *__bh; \ | 402 | struct buffer_head *__bh; \ |
| @@ -419,6 +424,4 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | |||
| 419 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) | 424 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) |
| 420 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) | 425 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) |
| 421 | 426 | ||
| 422 | #define kdev_t_to_nr(x) (x) | ||
| 423 | |||
| 424 | #endif | 427 | #endif |
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index fe99fe8db61a..6892899fd6fb 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h | |||
| @@ -200,6 +200,7 @@ struct hfsplus_cat_key { | |||
| 200 | struct hfsplus_unistr name; | 200 | struct hfsplus_unistr name; |
| 201 | } __packed; | 201 | } __packed; |
| 202 | 202 | ||
| 203 | #define HFSPLUS_CAT_KEYLEN (sizeof(struct hfsplus_cat_key)) | ||
| 203 | 204 | ||
| 204 | /* Structs from hfs.h */ | 205 | /* Structs from hfs.h */ |
| 205 | struct hfsp_point { | 206 | struct hfsp_point { |
| @@ -323,7 +324,7 @@ struct hfsplus_ext_key { | |||
| 323 | __be32 start_block; | 324 | __be32 start_block; |
| 324 | } __packed; | 325 | } __packed; |
| 325 | 326 | ||
| 326 | #define HFSPLUS_EXT_KEYLEN 12 | 327 | #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) |
| 327 | 328 | ||
| 328 | /* HFS+ generic BTree key */ | 329 | /* HFS+ generic BTree key */ |
| 329 | typedef union { | 330 | typedef union { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index c5a979d62c65..78449280dae0 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | |||
| 36 | *pagep = NULL; | 36 | *pagep = NULL; |
| 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
| 38 | hfsplus_get_block, | 38 | hfsplus_get_block, |
| 39 | &HFSPLUS_I(mapping->host).phys_size); | 39 | &HFSPLUS_I(mapping->host)->phys_size); |
| 40 | if (unlikely(ret)) { | 40 | if (unlikely(ret)) { |
| 41 | loff_t isize = mapping->host->i_size; | 41 | loff_t isize = mapping->host->i_size; |
| 42 | if (pos + len > isize) | 42 | if (pos + len > isize) |
| @@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) | |||
| 62 | 62 | ||
| 63 | switch (inode->i_ino) { | 63 | switch (inode->i_ino) { |
| 64 | case HFSPLUS_EXT_CNID: | 64 | case HFSPLUS_EXT_CNID: |
| 65 | tree = HFSPLUS_SB(sb).ext_tree; | 65 | tree = HFSPLUS_SB(sb)->ext_tree; |
| 66 | break; | 66 | break; |
| 67 | case HFSPLUS_CAT_CNID: | 67 | case HFSPLUS_CAT_CNID: |
| 68 | tree = HFSPLUS_SB(sb).cat_tree; | 68 | tree = HFSPLUS_SB(sb)->cat_tree; |
| 69 | break; | 69 | break; |
| 70 | case HFSPLUS_ATTR_CNID: | 70 | case HFSPLUS_ATTR_CNID: |
| 71 | tree = HFSPLUS_SB(sb).attr_tree; | 71 | tree = HFSPLUS_SB(sb)->attr_tree; |
| 72 | break; | 72 | break; |
| 73 | default: | 73 | default: |
| 74 | BUG(); | 74 | BUG(); |
| @@ -172,12 +172,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 172 | struct hfs_find_data fd; | 172 | struct hfs_find_data fd; |
| 173 | struct super_block *sb = dir->i_sb; | 173 | struct super_block *sb = dir->i_sb; |
| 174 | struct inode *inode = NULL; | 174 | struct inode *inode = NULL; |
| 175 | struct hfsplus_inode_info *hip; | ||
| 175 | int err; | 176 | int err; |
| 176 | 177 | ||
| 177 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 178 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
| 178 | goto out; | 179 | goto out; |
| 179 | 180 | ||
| 180 | inode = HFSPLUS_I(dir).rsrc_inode; | 181 | inode = HFSPLUS_I(dir)->rsrc_inode; |
| 181 | if (inode) | 182 | if (inode) |
| 182 | goto out; | 183 | goto out; |
| 183 | 184 | ||
| @@ -185,12 +186,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 185 | if (!inode) | 186 | if (!inode) |
| 186 | return ERR_PTR(-ENOMEM); | 187 | return ERR_PTR(-ENOMEM); |
| 187 | 188 | ||
| 189 | hip = HFSPLUS_I(inode); | ||
| 188 | inode->i_ino = dir->i_ino; | 190 | inode->i_ino = dir->i_ino; |
| 189 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 191 | INIT_LIST_HEAD(&hip->open_dir_list); |
| 190 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 192 | mutex_init(&hip->extents_lock); |
| 191 | HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; | 193 | hip->flags = HFSPLUS_FLG_RSRC; |
| 192 | 194 | ||
| 193 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 195 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| 194 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); | 196 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); |
| 195 | if (!err) | 197 | if (!err) |
| 196 | err = hfsplus_cat_read_inode(inode, &fd); | 198 | err = hfsplus_cat_read_inode(inode, &fd); |
| @@ -199,10 +201,18 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
| 199 | iput(inode); | 201 | iput(inode); |
| 200 | return ERR_PTR(err); | 202 | return ERR_PTR(err); |
| 201 | } | 203 | } |
| 202 | HFSPLUS_I(inode).rsrc_inode = dir; | 204 | hip->rsrc_inode = dir; |
| 203 | HFSPLUS_I(dir).rsrc_inode = inode; | 205 | HFSPLUS_I(dir)->rsrc_inode = inode; |
| 204 | igrab(dir); | 206 | igrab(dir); |
| 205 | hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); | 207 | |
| 208 | /* | ||
| 209 | * __mark_inode_dirty expects inodes to be hashed. Since we don't | ||
| 210 | * want resource fork inodes in the regular inode space, we make them | ||
| 211 | * appear hashed, but do not put on any lists. hlist_del() | ||
| 212 | * will work fine and require no locking. | ||
| 213 | */ | ||
| 214 | inode->i_hash.pprev = &inode->i_hash.next; | ||
| 215 | |||
| 206 | mark_inode_dirty(inode); | 216 | mark_inode_dirty(inode); |
| 207 | out: | 217 | out: |
| 208 | d_add(dentry, inode); | 218 | d_add(dentry, inode); |
| @@ -211,30 +221,27 @@ out: | |||
| 211 | 221 | ||
| 212 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) | 222 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) |
| 213 | { | 223 | { |
| 214 | struct super_block *sb = inode->i_sb; | 224 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
| 215 | u16 mode; | 225 | u16 mode; |
| 216 | 226 | ||
| 217 | mode = be16_to_cpu(perms->mode); | 227 | mode = be16_to_cpu(perms->mode); |
| 218 | 228 | ||
| 219 | inode->i_uid = be32_to_cpu(perms->owner); | 229 | inode->i_uid = be32_to_cpu(perms->owner); |
| 220 | if (!inode->i_uid && !mode) | 230 | if (!inode->i_uid && !mode) |
| 221 | inode->i_uid = HFSPLUS_SB(sb).uid; | 231 | inode->i_uid = sbi->uid; |
| 222 | 232 | ||
| 223 | inode->i_gid = be32_to_cpu(perms->group); | 233 | inode->i_gid = be32_to_cpu(perms->group); |
| 224 | if (!inode->i_gid && !mode) | 234 | if (!inode->i_gid && !mode) |
| 225 | inode->i_gid = HFSPLUS_SB(sb).gid; | 235 | inode->i_gid = sbi->gid; |
| 226 | 236 | ||
| 227 | if (dir) { | 237 | if (dir) { |
| 228 | mode = mode ? (mode & S_IALLUGO) : | 238 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); |
| 229 | (S_IRWXUGO & ~(HFSPLUS_SB(sb).umask)); | ||
| 230 | mode |= S_IFDIR; | 239 | mode |= S_IFDIR; |
| 231 | } else if (!mode) | 240 | } else if (!mode) |
| 232 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & | 241 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); |
| 233 | ~(HFSPLUS_SB(sb).umask)); | ||
| 234 | inode->i_mode = mode; | 242 | inode->i_mode = mode; |
| 235 | 243 | ||
| 236 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 244 | HFSPLUS_I(inode)->userflags = perms->userflags; |
| 237 | HFSPLUS_I(inode).userflags = perms->userflags; | ||
| 238 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) | 245 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) |
| 239 | inode->i_flags |= S_IMMUTABLE; | 246 | inode->i_flags |= S_IMMUTABLE; |
| 240 | else | 247 | else |
| @@ -245,30 +252,13 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i | |||
| 245 | inode->i_flags &= ~S_APPEND; | 252 | inode->i_flags &= ~S_APPEND; |
| 246 | } | 253 | } |
| 247 | 254 | ||
| 248 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | ||
| 249 | { | ||
| 250 | if (inode->i_flags & S_IMMUTABLE) | ||
| 251 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | ||
| 252 | else | ||
| 253 | perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | ||
| 254 | if (inode->i_flags & S_APPEND) | ||
| 255 | perms->rootflags |= HFSPLUS_FLG_APPEND; | ||
| 256 | else | ||
| 257 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | ||
| 258 | perms->userflags = HFSPLUS_I(inode).userflags; | ||
| 259 | perms->mode = cpu_to_be16(inode->i_mode); | ||
| 260 | perms->owner = cpu_to_be32(inode->i_uid); | ||
| 261 | perms->group = cpu_to_be32(inode->i_gid); | ||
| 262 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); | ||
| 263 | } | ||
| 264 | |||
| 265 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 255 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
| 266 | { | 256 | { |
| 267 | if (HFSPLUS_IS_RSRC(inode)) | 257 | if (HFSPLUS_IS_RSRC(inode)) |
| 268 | inode = HFSPLUS_I(inode).rsrc_inode; | 258 | inode = HFSPLUS_I(inode)->rsrc_inode; |
| 269 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | 259 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
| 270 | return -EOVERFLOW; | 260 | return -EOVERFLOW; |
| 271 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 261 | atomic_inc(&HFSPLUS_I(inode)->opencnt); |
| 272 | return 0; | 262 | return 0; |
| 273 | } | 263 | } |
| 274 | 264 | ||
| @@ -277,12 +267,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
| 277 | struct super_block *sb = inode->i_sb; | 267 | struct super_block *sb = inode->i_sb; |
| 278 | 268 | ||
| 279 | if (HFSPLUS_IS_RSRC(inode)) | 269 | if (HFSPLUS_IS_RSRC(inode)) |
| 280 | inode = HFSPLUS_I(inode).rsrc_inode; | 270 | inode = HFSPLUS_I(inode)->rsrc_inode; |
| 281 | if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { | 271 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { |
| 282 | mutex_lock(&inode->i_mutex); | 272 | mutex_lock(&inode->i_mutex); |
| 283 | hfsplus_file_truncate(inode); | 273 | hfsplus_file_truncate(inode); |
| 284 | if (inode->i_flags & S_DEAD) { | 274 | if (inode->i_flags & S_DEAD) { |
| 285 | hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); | 275 | hfsplus_delete_cat(inode->i_ino, |
| 276 | HFSPLUS_SB(sb)->hidden_dir, NULL); | ||
| 286 | hfsplus_delete_inode(inode); | 277 | hfsplus_delete_inode(inode); |
| 287 | } | 278 | } |
| 288 | mutex_unlock(&inode->i_mutex); | 279 | mutex_unlock(&inode->i_mutex); |
| @@ -361,47 +352,52 @@ static const struct file_operations hfsplus_file_operations = { | |||
| 361 | 352 | ||
| 362 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 353 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
| 363 | { | 354 | { |
| 355 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 364 | struct inode *inode = new_inode(sb); | 356 | struct inode *inode = new_inode(sb); |
| 357 | struct hfsplus_inode_info *hip; | ||
| 358 | |||
| 365 | if (!inode) | 359 | if (!inode) |
| 366 | return NULL; | 360 | return NULL; |
| 367 | 361 | ||
| 368 | inode->i_ino = HFSPLUS_SB(sb).next_cnid++; | 362 | inode->i_ino = sbi->next_cnid++; |
| 369 | inode->i_mode = mode; | 363 | inode->i_mode = mode; |
| 370 | inode->i_uid = current_fsuid(); | 364 | inode->i_uid = current_fsuid(); |
| 371 | inode->i_gid = current_fsgid(); | 365 | inode->i_gid = current_fsgid(); |
| 372 | inode->i_nlink = 1; | 366 | inode->i_nlink = 1; |
| 373 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 367 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
| 374 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 368 | |
| 375 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 369 | hip = HFSPLUS_I(inode); |
| 376 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | 370 | INIT_LIST_HEAD(&hip->open_dir_list); |
| 377 | HFSPLUS_I(inode).flags = 0; | 371 | mutex_init(&hip->extents_lock); |
| 378 | memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); | 372 | atomic_set(&hip->opencnt, 0); |
| 379 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 373 | hip->flags = 0; |
| 380 | HFSPLUS_I(inode).alloc_blocks = 0; | 374 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
| 381 | HFSPLUS_I(inode).first_blocks = 0; | 375 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 382 | HFSPLUS_I(inode).cached_start = 0; | 376 | hip->alloc_blocks = 0; |
| 383 | HFSPLUS_I(inode).cached_blocks = 0; | 377 | hip->first_blocks = 0; |
| 384 | HFSPLUS_I(inode).phys_size = 0; | 378 | hip->cached_start = 0; |
| 385 | HFSPLUS_I(inode).fs_blocks = 0; | 379 | hip->cached_blocks = 0; |
| 386 | HFSPLUS_I(inode).rsrc_inode = NULL; | 380 | hip->phys_size = 0; |
| 381 | hip->fs_blocks = 0; | ||
| 382 | hip->rsrc_inode = NULL; | ||
| 387 | if (S_ISDIR(inode->i_mode)) { | 383 | if (S_ISDIR(inode->i_mode)) { |
| 388 | inode->i_size = 2; | 384 | inode->i_size = 2; |
| 389 | HFSPLUS_SB(sb).folder_count++; | 385 | sbi->folder_count++; |
| 390 | inode->i_op = &hfsplus_dir_inode_operations; | 386 | inode->i_op = &hfsplus_dir_inode_operations; |
| 391 | inode->i_fop = &hfsplus_dir_operations; | 387 | inode->i_fop = &hfsplus_dir_operations; |
| 392 | } else if (S_ISREG(inode->i_mode)) { | 388 | } else if (S_ISREG(inode->i_mode)) { |
| 393 | HFSPLUS_SB(sb).file_count++; | 389 | sbi->file_count++; |
| 394 | inode->i_op = &hfsplus_file_inode_operations; | 390 | inode->i_op = &hfsplus_file_inode_operations; |
| 395 | inode->i_fop = &hfsplus_file_operations; | 391 | inode->i_fop = &hfsplus_file_operations; |
| 396 | inode->i_mapping->a_ops = &hfsplus_aops; | 392 | inode->i_mapping->a_ops = &hfsplus_aops; |
| 397 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks; | 393 | hip->clump_blocks = sbi->data_clump_blocks; |
| 398 | } else if (S_ISLNK(inode->i_mode)) { | 394 | } else if (S_ISLNK(inode->i_mode)) { |
| 399 | HFSPLUS_SB(sb).file_count++; | 395 | sbi->file_count++; |
| 400 | inode->i_op = &page_symlink_inode_operations; | 396 | inode->i_op = &page_symlink_inode_operations; |
| 401 | inode->i_mapping->a_ops = &hfsplus_aops; | 397 | inode->i_mapping->a_ops = &hfsplus_aops; |
| 402 | HFSPLUS_I(inode).clump_blocks = 1; | 398 | hip->clump_blocks = 1; |
| 403 | } else | 399 | } else |
| 404 | HFSPLUS_SB(sb).file_count++; | 400 | sbi->file_count++; |
| 405 | insert_inode_hash(inode); | 401 | insert_inode_hash(inode); |
| 406 | mark_inode_dirty(inode); | 402 | mark_inode_dirty(inode); |
| 407 | sb->s_dirt = 1; | 403 | sb->s_dirt = 1; |
| @@ -414,11 +410,11 @@ void hfsplus_delete_inode(struct inode *inode) | |||
| 414 | struct super_block *sb = inode->i_sb; | 410 | struct super_block *sb = inode->i_sb; |
| 415 | 411 | ||
| 416 | if (S_ISDIR(inode->i_mode)) { | 412 | if (S_ISDIR(inode->i_mode)) { |
| 417 | HFSPLUS_SB(sb).folder_count--; | 413 | HFSPLUS_SB(sb)->folder_count--; |
| 418 | sb->s_dirt = 1; | 414 | sb->s_dirt = 1; |
| 419 | return; | 415 | return; |
| 420 | } | 416 | } |
| 421 | HFSPLUS_SB(sb).file_count--; | 417 | HFSPLUS_SB(sb)->file_count--; |
| 422 | if (S_ISREG(inode->i_mode)) { | 418 | if (S_ISREG(inode->i_mode)) { |
| 423 | if (!inode->i_nlink) { | 419 | if (!inode->i_nlink) { |
| 424 | inode->i_size = 0; | 420 | inode->i_size = 0; |
| @@ -434,34 +430,39 @@ void hfsplus_delete_inode(struct inode *inode) | |||
| 434 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 430 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
| 435 | { | 431 | { |
| 436 | struct super_block *sb = inode->i_sb; | 432 | struct super_block *sb = inode->i_sb; |
| 433 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 434 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 437 | u32 count; | 435 | u32 count; |
| 438 | int i; | 436 | int i; |
| 439 | 437 | ||
| 440 | memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents, | 438 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); |
| 441 | sizeof(hfsplus_extent_rec)); | ||
| 442 | for (count = 0, i = 0; i < 8; i++) | 439 | for (count = 0, i = 0; i < 8; i++) |
| 443 | count += be32_to_cpu(fork->extents[i].block_count); | 440 | count += be32_to_cpu(fork->extents[i].block_count); |
| 444 | HFSPLUS_I(inode).first_blocks = count; | 441 | hip->first_blocks = count; |
| 445 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 442 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
| 446 | HFSPLUS_I(inode).cached_start = 0; | 443 | hip->cached_start = 0; |
| 447 | HFSPLUS_I(inode).cached_blocks = 0; | 444 | hip->cached_blocks = 0; |
| 448 | 445 | ||
| 449 | HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks); | 446 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); |
| 450 | inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size); | 447 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); |
| 451 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 448 | hip->fs_blocks = |
| 452 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 449 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
| 453 | HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 450 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
| 454 | if (!HFSPLUS_I(inode).clump_blocks) | 451 | hip->clump_blocks = |
| 455 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks : | 452 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; |
| 456 | HFSPLUS_SB(sb).data_clump_blocks; | 453 | if (!hip->clump_blocks) { |
| 454 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? | ||
| 455 | sbi->rsrc_clump_blocks : | ||
| 456 | sbi->data_clump_blocks; | ||
| 457 | } | ||
| 457 | } | 458 | } |
| 458 | 459 | ||
| 459 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 460 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
| 460 | { | 461 | { |
| 461 | memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents, | 462 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, |
| 462 | sizeof(hfsplus_extent_rec)); | 463 | sizeof(hfsplus_extent_rec)); |
| 463 | fork->total_size = cpu_to_be64(inode->i_size); | 464 | fork->total_size = cpu_to_be64(inode->i_size); |
| 464 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks); | 465 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); |
| 465 | } | 466 | } |
| 466 | 467 | ||
| 467 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | 468 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) |
| @@ -472,7 +473,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 472 | 473 | ||
| 473 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); | 474 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); |
| 474 | 475 | ||
| 475 | HFSPLUS_I(inode).dev = 0; | 476 | HFSPLUS_I(inode)->linkid = 0; |
| 476 | if (type == HFSPLUS_FOLDER) { | 477 | if (type == HFSPLUS_FOLDER) { |
| 477 | struct hfsplus_cat_folder *folder = &entry.folder; | 478 | struct hfsplus_cat_folder *folder = &entry.folder; |
| 478 | 479 | ||
| @@ -486,8 +487,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 486 | inode->i_atime = hfsp_mt2ut(folder->access_date); | 487 | inode->i_atime = hfsp_mt2ut(folder->access_date); |
| 487 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); | 488 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); |
| 488 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 489 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
| 489 | HFSPLUS_I(inode).create_date = folder->create_date; | 490 | HFSPLUS_I(inode)->create_date = folder->create_date; |
| 490 | HFSPLUS_I(inode).fs_blocks = 0; | 491 | HFSPLUS_I(inode)->fs_blocks = 0; |
| 491 | inode->i_op = &hfsplus_dir_inode_operations; | 492 | inode->i_op = &hfsplus_dir_inode_operations; |
| 492 | inode->i_fop = &hfsplus_dir_operations; | 493 | inode->i_fop = &hfsplus_dir_operations; |
| 493 | } else if (type == HFSPLUS_FILE) { | 494 | } else if (type == HFSPLUS_FILE) { |
| @@ -518,7 +519,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
| 518 | inode->i_atime = hfsp_mt2ut(file->access_date); | 519 | inode->i_atime = hfsp_mt2ut(file->access_date); |
| 519 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); | 520 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); |
| 520 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); | 521 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); |
| 521 | HFSPLUS_I(inode).create_date = file->create_date; | 522 | HFSPLUS_I(inode)->create_date = file->create_date; |
| 522 | } else { | 523 | } else { |
| 523 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); | 524 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); |
| 524 | res = -EIO; | 525 | res = -EIO; |
| @@ -533,12 +534,12 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 533 | hfsplus_cat_entry entry; | 534 | hfsplus_cat_entry entry; |
| 534 | 535 | ||
| 535 | if (HFSPLUS_IS_RSRC(inode)) | 536 | if (HFSPLUS_IS_RSRC(inode)) |
| 536 | main_inode = HFSPLUS_I(inode).rsrc_inode; | 537 | main_inode = HFSPLUS_I(inode)->rsrc_inode; |
| 537 | 538 | ||
| 538 | if (!main_inode->i_nlink) | 539 | if (!main_inode->i_nlink) |
| 539 | return 0; | 540 | return 0; |
| 540 | 541 | ||
| 541 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd)) | 542 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) |
| 542 | /* panic? */ | 543 | /* panic? */ |
| 543 | return -EIO; | 544 | return -EIO; |
| 544 | 545 | ||
| @@ -554,7 +555,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 554 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 555 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
| 555 | sizeof(struct hfsplus_cat_folder)); | 556 | sizeof(struct hfsplus_cat_folder)); |
| 556 | /* simple node checks? */ | 557 | /* simple node checks? */ |
| 557 | hfsplus_set_perms(inode, &folder->permissions); | 558 | hfsplus_cat_set_perms(inode, &folder->permissions); |
| 558 | folder->access_date = hfsp_ut2mt(inode->i_atime); | 559 | folder->access_date = hfsp_ut2mt(inode->i_atime); |
| 559 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 560 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
| 560 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 561 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
| @@ -576,11 +577,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
| 576 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 577 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
| 577 | sizeof(struct hfsplus_cat_file)); | 578 | sizeof(struct hfsplus_cat_file)); |
| 578 | hfsplus_inode_write_fork(inode, &file->data_fork); | 579 | hfsplus_inode_write_fork(inode, &file->data_fork); |
| 579 | if (S_ISREG(inode->i_mode)) | 580 | hfsplus_cat_set_perms(inode, &file->permissions); |
| 580 | HFSPLUS_I(inode).dev = inode->i_nlink; | ||
| 581 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
| 582 | HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev); | ||
| 583 | hfsplus_set_perms(inode, &file->permissions); | ||
| 584 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 581 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
| 585 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 582 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
| 586 | else | 583 | else |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index ac405f099026..5b4667e08ef7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
| @@ -17,83 +17,98 @@ | |||
| 17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
| 20 | #include <linux/smp_lock.h> | ||
| 21 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
| 22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
| 23 | 22 | ||
| 24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) |
| 25 | { | 24 | { |
| 26 | struct inode *inode = filp->f_path.dentry->d_inode; | 25 | struct inode *inode = file->f_path.dentry->d_inode; |
| 26 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 27 | unsigned int flags = 0; | ||
| 28 | |||
| 29 | if (inode->i_flags & S_IMMUTABLE) | ||
| 30 | flags |= FS_IMMUTABLE_FL; | ||
| 31 | if (inode->i_flags |= S_APPEND) | ||
| 32 | flags |= FS_APPEND_FL; | ||
| 33 | if (hip->userflags & HFSPLUS_FLG_NODUMP) | ||
| 34 | flags |= FS_NODUMP_FL; | ||
| 35 | |||
| 36 | return put_user(flags, user_flags); | ||
| 37 | } | ||
| 38 | |||
| 39 | static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) | ||
| 40 | { | ||
| 41 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 42 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
| 27 | unsigned int flags; | 43 | unsigned int flags; |
| 44 | int err = 0; | ||
| 28 | 45 | ||
| 29 | lock_kernel(); | 46 | err = mnt_want_write(file->f_path.mnt); |
| 30 | switch (cmd) { | 47 | if (err) |
| 31 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 48 | goto out; |
| 32 | flags = 0; | ||
| 33 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) | ||
| 34 | flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ | ||
| 35 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) | ||
| 36 | flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ | ||
| 37 | if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) | ||
| 38 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ | ||
| 39 | return put_user(flags, (int __user *)arg); | ||
| 40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | ||
| 41 | int err = 0; | ||
| 42 | err = mnt_want_write(filp->f_path.mnt); | ||
| 43 | if (err) { | ||
| 44 | unlock_kernel(); | ||
| 45 | return err; | ||
| 46 | } | ||
| 47 | 49 | ||
| 48 | if (!is_owner_or_cap(inode)) { | 50 | if (!is_owner_or_cap(inode)) { |
| 49 | err = -EACCES; | 51 | err = -EACCES; |
| 50 | goto setflags_out; | 52 | goto out_drop_write; |
| 51 | } | 53 | } |
| 52 | if (get_user(flags, (int __user *)arg)) { | ||
| 53 | err = -EFAULT; | ||
| 54 | goto setflags_out; | ||
| 55 | } | ||
| 56 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || | ||
| 57 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { | ||
| 58 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
| 59 | err = -EPERM; | ||
| 60 | goto setflags_out; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | 54 | ||
| 64 | /* don't silently ignore unsupported ext2 flags */ | 55 | if (get_user(flags, user_flags)) { |
| 65 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | 56 | err = -EFAULT; |
| 66 | err = -EOPNOTSUPP; | 57 | goto out_drop_write; |
| 67 | goto setflags_out; | 58 | } |
| 68 | } | 59 | |
| 69 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ | 60 | mutex_lock(&inode->i_mutex); |
| 70 | inode->i_flags |= S_IMMUTABLE; | 61 | |
| 71 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; | 62 | if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || |
| 72 | } else { | 63 | inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
| 73 | inode->i_flags &= ~S_IMMUTABLE; | 64 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
| 74 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | 65 | err = -EPERM; |
| 75 | } | 66 | goto out_unlock_inode; |
| 76 | if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ | ||
| 77 | inode->i_flags |= S_APPEND; | ||
| 78 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; | ||
| 79 | } else { | ||
| 80 | inode->i_flags &= ~S_APPEND; | ||
| 81 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; | ||
| 82 | } | 67 | } |
| 83 | if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ | ||
| 84 | HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; | ||
| 85 | else | ||
| 86 | HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; | ||
| 87 | |||
| 88 | inode->i_ctime = CURRENT_TIME_SEC; | ||
| 89 | mark_inode_dirty(inode); | ||
| 90 | setflags_out: | ||
| 91 | mnt_drop_write(filp->f_path.mnt); | ||
| 92 | unlock_kernel(); | ||
| 93 | return err; | ||
| 94 | } | 68 | } |
| 69 | |||
| 70 | /* don't silently ignore unsupported ext2 flags */ | ||
| 71 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | ||
| 72 | err = -EOPNOTSUPP; | ||
| 73 | goto out_unlock_inode; | ||
| 74 | } | ||
| 75 | |||
| 76 | if (flags & FS_IMMUTABLE_FL) | ||
| 77 | inode->i_flags |= S_IMMUTABLE; | ||
| 78 | else | ||
| 79 | inode->i_flags &= ~S_IMMUTABLE; | ||
| 80 | |||
| 81 | if (flags & FS_APPEND_FL) | ||
| 82 | inode->i_flags |= S_APPEND; | ||
| 83 | else | ||
| 84 | inode->i_flags &= ~S_APPEND; | ||
| 85 | |||
| 86 | if (flags & FS_NODUMP_FL) | ||
| 87 | hip->userflags |= HFSPLUS_FLG_NODUMP; | ||
| 88 | else | ||
| 89 | hip->userflags &= ~HFSPLUS_FLG_NODUMP; | ||
| 90 | |||
| 91 | inode->i_ctime = CURRENT_TIME_SEC; | ||
| 92 | mark_inode_dirty(inode); | ||
| 93 | |||
| 94 | out_unlock_inode: | ||
| 95 | mutex_lock(&inode->i_mutex); | ||
| 96 | out_drop_write: | ||
| 97 | mnt_drop_write(file->f_path.mnt); | ||
| 98 | out: | ||
| 99 | return err; | ||
| 100 | } | ||
| 101 | |||
| 102 | long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 103 | { | ||
| 104 | void __user *argp = (void __user *)arg; | ||
| 105 | |||
| 106 | switch (cmd) { | ||
| 107 | case HFSPLUS_IOC_EXT2_GETFLAGS: | ||
| 108 | return hfsplus_ioctl_getflags(file, argp); | ||
| 109 | case HFSPLUS_IOC_EXT2_SETFLAGS: | ||
| 110 | return hfsplus_ioctl_setflags(file, argp); | ||
| 95 | default: | 111 | default: |
| 96 | unlock_kernel(); | ||
| 97 | return -ENOTTY; | 112 | return -ENOTTY; |
| 98 | } | 113 | } |
| 99 | } | 114 | } |
| @@ -110,7 +125,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, | |||
| 110 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) | 125 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) |
| 111 | return -EOPNOTSUPP; | 126 | return -EOPNOTSUPP; |
| 112 | 127 | ||
| 113 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 128 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
| 114 | if (res) | 129 | if (res) |
| 115 | return res; | 130 | return res; |
| 116 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 131 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
| @@ -153,7 +168,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
| 153 | return -EOPNOTSUPP; | 168 | return -EOPNOTSUPP; |
| 154 | 169 | ||
| 155 | if (size) { | 170 | if (size) { |
| 156 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 171 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
| 157 | if (res) | 172 | if (res) |
| 158 | return res; | 173 | return res; |
| 159 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 174 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
| @@ -177,7 +192,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
| 177 | } else | 192 | } else |
| 178 | res = size ? -ERANGE : 4; | 193 | res = size ? -ERANGE : 4; |
| 179 | } else | 194 | } else |
| 180 | res = -ENODATA; | 195 | res = -EOPNOTSUPP; |
| 181 | out: | 196 | out: |
| 182 | if (size) | 197 | if (size) |
| 183 | hfs_find_exit(&fd); | 198 | hfs_find_exit(&fd); |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 572628b4b07d..f9ab276a4d8d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
| @@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) | |||
| 143 | kfree(p); | 143 | kfree(p); |
| 144 | break; | 144 | break; |
| 145 | case opt_decompose: | 145 | case opt_decompose: |
| 146 | sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE; | 146 | clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
| 147 | break; | 147 | break; |
| 148 | case opt_nodecompose: | 148 | case opt_nodecompose: |
| 149 | sbi->flags |= HFSPLUS_SB_NODECOMPOSE; | 149 | set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
| 150 | break; | 150 | break; |
| 151 | case opt_force: | 151 | case opt_force: |
| 152 | sbi->flags |= HFSPLUS_SB_FORCE; | 152 | set_bit(HFSPLUS_SB_FORCE, &sbi->flags); |
| 153 | break; | 153 | break; |
| 154 | default: | 154 | default: |
| 155 | return 0; | 155 | return 0; |
| @@ -171,7 +171,7 @@ done: | |||
| 171 | 171 | ||
| 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) |
| 173 | { | 173 | { |
| 174 | struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); | 174 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); |
| 175 | 175 | ||
| 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) | 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) |
| 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); | 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); |
| @@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
| 184 | seq_printf(seq, ",session=%u", sbi->session); | 184 | seq_printf(seq, ",session=%u", sbi->session); |
| 185 | if (sbi->nls) | 185 | if (sbi->nls) |
| 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); | 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); |
| 187 | if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) | 187 | if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) |
| 188 | seq_printf(seq, ",nodecompose"); | 188 | seq_printf(seq, ",nodecompose"); |
| 189 | return 0; | 189 | return 0; |
| 190 | } | 190 | } |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 1528a6fd0299..208b16c645cc 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
| @@ -74,6 +74,7 @@ struct old_pmap { | |||
| 74 | int hfs_part_find(struct super_block *sb, | 74 | int hfs_part_find(struct super_block *sb, |
| 75 | sector_t *part_start, sector_t *part_size) | 75 | sector_t *part_start, sector_t *part_size) |
| 76 | { | 76 | { |
| 77 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 77 | struct buffer_head *bh; | 78 | struct buffer_head *bh; |
| 78 | __be16 *data; | 79 | __be16 *data; |
| 79 | int i, size, res; | 80 | int i, size, res; |
| @@ -95,7 +96,7 @@ int hfs_part_find(struct super_block *sb, | |||
| 95 | for (i = 0; i < size; p++, i++) { | 96 | for (i = 0; i < size; p++, i++) { |
| 96 | if (p->pdStart && p->pdSize && | 97 | if (p->pdStart && p->pdSize && |
| 97 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && | 98 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && |
| 98 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 99 | (sbi->part < 0 || sbi->part == i)) { |
| 99 | *part_start += be32_to_cpu(p->pdStart); | 100 | *part_start += be32_to_cpu(p->pdStart); |
| 100 | *part_size = be32_to_cpu(p->pdSize); | 101 | *part_size = be32_to_cpu(p->pdSize); |
| 101 | res = 0; | 102 | res = 0; |
| @@ -111,7 +112,7 @@ int hfs_part_find(struct super_block *sb, | |||
| 111 | size = be32_to_cpu(pm->pmMapBlkCnt); | 112 | size = be32_to_cpu(pm->pmMapBlkCnt); |
| 112 | for (i = 0; i < size;) { | 113 | for (i = 0; i < size;) { |
| 113 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && | 114 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && |
| 114 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 115 | (sbi->part < 0 || sbi->part == i)) { |
| 115 | *part_start += be32_to_cpu(pm->pmPyPartStart); | 116 | *part_start += be32_to_cpu(pm->pmPyPartStart); |
| 116 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); | 117 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); |
| 117 | res = 0; | 118 | res = 0; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3b55c050c742..9a88d7536103 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
| 13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/smp_lock.h> | ||
| 16 | #include <linux/vfs.h> | 15 | #include <linux/vfs.h> |
| 17 | #include <linux/nls.h> | 16 | #include <linux/nls.h> |
| 18 | 17 | ||
| @@ -21,40 +20,11 @@ static void hfsplus_destroy_inode(struct inode *inode); | |||
| 21 | 20 | ||
| 22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
| 23 | 22 | ||
| 24 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | 23 | static int hfsplus_system_read_inode(struct inode *inode) |
| 25 | { | 24 | { |
| 26 | struct hfs_find_data fd; | 25 | struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; |
| 27 | struct hfsplus_vh *vhdr; | ||
| 28 | struct inode *inode; | ||
| 29 | long err = -EIO; | ||
| 30 | |||
| 31 | inode = iget_locked(sb, ino); | ||
| 32 | if (!inode) | ||
| 33 | return ERR_PTR(-ENOMEM); | ||
| 34 | if (!(inode->i_state & I_NEW)) | ||
| 35 | return inode; | ||
| 36 | 26 | ||
| 37 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 27 | switch (inode->i_ino) { |
| 38 | mutex_init(&HFSPLUS_I(inode).extents_lock); | ||
| 39 | HFSPLUS_I(inode).flags = 0; | ||
| 40 | HFSPLUS_I(inode).rsrc_inode = NULL; | ||
| 41 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | ||
| 42 | |||
| 43 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
| 44 | read_inode: | ||
| 45 | hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | ||
| 46 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
| 47 | if (!err) | ||
| 48 | err = hfsplus_cat_read_inode(inode, &fd); | ||
| 49 | hfs_find_exit(&fd); | ||
| 50 | if (err) | ||
| 51 | goto bad_inode; | ||
| 52 | goto done; | ||
| 53 | } | ||
| 54 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
| 55 | switch(inode->i_ino) { | ||
| 56 | case HFSPLUS_ROOT_CNID: | ||
| 57 | goto read_inode; | ||
| 58 | case HFSPLUS_EXT_CNID: | 28 | case HFSPLUS_EXT_CNID: |
| 59 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); | 29 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); |
| 60 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 30 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
| @@ -75,74 +45,101 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | |||
| 75 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 45 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
| 76 | break; | 46 | break; |
| 77 | default: | 47 | default: |
| 78 | goto bad_inode; | 48 | return -EIO; |
| 49 | } | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | ||
| 55 | { | ||
| 56 | struct hfs_find_data fd; | ||
| 57 | struct inode *inode; | ||
| 58 | int err; | ||
| 59 | |||
| 60 | inode = iget_locked(sb, ino); | ||
| 61 | if (!inode) | ||
| 62 | return ERR_PTR(-ENOMEM); | ||
| 63 | if (!(inode->i_state & I_NEW)) | ||
| 64 | return inode; | ||
| 65 | |||
| 66 | INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); | ||
| 67 | mutex_init(&HFSPLUS_I(inode)->extents_lock); | ||
| 68 | HFSPLUS_I(inode)->flags = 0; | ||
| 69 | HFSPLUS_I(inode)->rsrc_inode = NULL; | ||
| 70 | atomic_set(&HFSPLUS_I(inode)->opencnt, 0); | ||
| 71 | |||
| 72 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
| 73 | inode->i_ino == HFSPLUS_ROOT_CNID) { | ||
| 74 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); | ||
| 75 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
| 76 | if (!err) | ||
| 77 | err = hfsplus_cat_read_inode(inode, &fd); | ||
| 78 | hfs_find_exit(&fd); | ||
| 79 | } else { | ||
| 80 | err = hfsplus_system_read_inode(inode); | ||
| 81 | } | ||
| 82 | |||
| 83 | if (err) { | ||
| 84 | iget_failed(inode); | ||
| 85 | return ERR_PTR(err); | ||
| 79 | } | 86 | } |
| 80 | 87 | ||
| 81 | done: | ||
| 82 | unlock_new_inode(inode); | 88 | unlock_new_inode(inode); |
| 83 | return inode; | 89 | return inode; |
| 84 | |||
| 85 | bad_inode: | ||
| 86 | iget_failed(inode); | ||
| 87 | return ERR_PTR(err); | ||
| 88 | } | 90 | } |
| 89 | 91 | ||
| 90 | static int hfsplus_write_inode(struct inode *inode, | 92 | static int hfsplus_system_write_inode(struct inode *inode) |
| 91 | struct writeback_control *wbc) | ||
| 92 | { | 93 | { |
| 93 | struct hfsplus_vh *vhdr; | 94 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
| 94 | int ret = 0; | 95 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
| 96 | struct hfsplus_fork_raw *fork; | ||
| 97 | struct hfs_btree *tree = NULL; | ||
| 95 | 98 | ||
| 96 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
| 97 | hfsplus_ext_write_extent(inode); | ||
| 98 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
| 99 | return hfsplus_cat_write_inode(inode); | ||
| 100 | } | ||
| 101 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
| 102 | switch (inode->i_ino) { | 99 | switch (inode->i_ino) { |
| 103 | case HFSPLUS_ROOT_CNID: | ||
| 104 | ret = hfsplus_cat_write_inode(inode); | ||
| 105 | break; | ||
| 106 | case HFSPLUS_EXT_CNID: | 100 | case HFSPLUS_EXT_CNID: |
| 107 | if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) { | 101 | fork = &vhdr->ext_file; |
| 108 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 102 | tree = sbi->ext_tree; |
| 109 | inode->i_sb->s_dirt = 1; | ||
| 110 | } | ||
| 111 | hfsplus_inode_write_fork(inode, &vhdr->ext_file); | ||
| 112 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree); | ||
| 113 | break; | 103 | break; |
| 114 | case HFSPLUS_CAT_CNID: | 104 | case HFSPLUS_CAT_CNID: |
| 115 | if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) { | 105 | fork = &vhdr->cat_file; |
| 116 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 106 | tree = sbi->cat_tree; |
| 117 | inode->i_sb->s_dirt = 1; | ||
| 118 | } | ||
| 119 | hfsplus_inode_write_fork(inode, &vhdr->cat_file); | ||
| 120 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree); | ||
| 121 | break; | 107 | break; |
| 122 | case HFSPLUS_ALLOC_CNID: | 108 | case HFSPLUS_ALLOC_CNID: |
| 123 | if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) { | 109 | fork = &vhdr->alloc_file; |
| 124 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
| 125 | inode->i_sb->s_dirt = 1; | ||
| 126 | } | ||
| 127 | hfsplus_inode_write_fork(inode, &vhdr->alloc_file); | ||
| 128 | break; | 110 | break; |
| 129 | case HFSPLUS_START_CNID: | 111 | case HFSPLUS_START_CNID: |
| 130 | if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) { | 112 | fork = &vhdr->start_file; |
| 131 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
| 132 | inode->i_sb->s_dirt = 1; | ||
| 133 | } | ||
| 134 | hfsplus_inode_write_fork(inode, &vhdr->start_file); | ||
| 135 | break; | 113 | break; |
| 136 | case HFSPLUS_ATTR_CNID: | 114 | case HFSPLUS_ATTR_CNID: |
| 137 | if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) { | 115 | fork = &vhdr->attr_file; |
| 138 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 116 | tree = sbi->attr_tree; |
| 139 | inode->i_sb->s_dirt = 1; | 117 | default: |
| 140 | } | 118 | return -EIO; |
| 141 | hfsplus_inode_write_fork(inode, &vhdr->attr_file); | 119 | } |
| 142 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree); | 120 | |
| 143 | break; | 121 | if (fork->total_size != cpu_to_be64(inode->i_size)) { |
| 122 | set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); | ||
| 123 | inode->i_sb->s_dirt = 1; | ||
| 144 | } | 124 | } |
| 145 | return ret; | 125 | hfsplus_inode_write_fork(inode, fork); |
| 126 | if (tree) | ||
| 127 | hfs_btree_write(tree); | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | |||
| 131 | static int hfsplus_write_inode(struct inode *inode, | ||
| 132 | struct writeback_control *wbc) | ||
| 133 | { | ||
| 134 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
| 135 | |||
| 136 | hfsplus_ext_write_extent(inode); | ||
| 137 | |||
| 138 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
| 139 | inode->i_ino == HFSPLUS_ROOT_CNID) | ||
| 140 | return hfsplus_cat_write_inode(inode); | ||
| 141 | else | ||
| 142 | return hfsplus_system_write_inode(inode); | ||
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | static void hfsplus_evict_inode(struct inode *inode) | 145 | static void hfsplus_evict_inode(struct inode *inode) |
| @@ -151,51 +148,53 @@ static void hfsplus_evict_inode(struct inode *inode) | |||
| 151 | truncate_inode_pages(&inode->i_data, 0); | 148 | truncate_inode_pages(&inode->i_data, 0); |
| 152 | end_writeback(inode); | 149 | end_writeback(inode); |
| 153 | if (HFSPLUS_IS_RSRC(inode)) { | 150 | if (HFSPLUS_IS_RSRC(inode)) { |
| 154 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; | 151 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
| 155 | iput(HFSPLUS_I(inode).rsrc_inode); | 152 | iput(HFSPLUS_I(inode)->rsrc_inode); |
| 156 | } | 153 | } |
| 157 | } | 154 | } |
| 158 | 155 | ||
| 159 | int hfsplus_sync_fs(struct super_block *sb, int wait) | 156 | int hfsplus_sync_fs(struct super_block *sb, int wait) |
| 160 | { | 157 | { |
| 161 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 158 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
| 159 | struct hfsplus_vh *vhdr = sbi->s_vhdr; | ||
| 162 | 160 | ||
| 163 | dprint(DBG_SUPER, "hfsplus_write_super\n"); | 161 | dprint(DBG_SUPER, "hfsplus_write_super\n"); |
| 164 | 162 | ||
| 165 | lock_super(sb); | 163 | mutex_lock(&sbi->vh_mutex); |
| 164 | mutex_lock(&sbi->alloc_mutex); | ||
| 166 | sb->s_dirt = 0; | 165 | sb->s_dirt = 0; |
| 167 | 166 | ||
| 168 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); | 167 | vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); |
| 169 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); | 168 | vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); |
| 170 | vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid); | 169 | vhdr->folder_count = cpu_to_be32(sbi->folder_count); |
| 171 | vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count); | 170 | vhdr->file_count = cpu_to_be32(sbi->file_count); |
| 172 | vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count); | ||
| 173 | 171 | ||
| 174 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 172 | mark_buffer_dirty(sbi->s_vhbh); |
| 175 | if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) { | 173 | if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { |
| 176 | if (HFSPLUS_SB(sb).sect_count) { | 174 | if (sbi->sect_count) { |
| 177 | struct buffer_head *bh; | 175 | struct buffer_head *bh; |
| 178 | u32 block, offset; | 176 | u32 block, offset; |
| 179 | 177 | ||
| 180 | block = HFSPLUS_SB(sb).blockoffset; | 178 | block = sbi->blockoffset; |
| 181 | block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); | 179 | block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); |
| 182 | offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); | 180 | offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); |
| 183 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, | 181 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", |
| 184 | HFSPLUS_SB(sb).sect_count, block, offset); | 182 | sbi->blockoffset, sbi->sect_count, |
| 183 | block, offset); | ||
| 185 | bh = sb_bread(sb, block); | 184 | bh = sb_bread(sb, block); |
| 186 | if (bh) { | 185 | if (bh) { |
| 187 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); | 186 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); |
| 188 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { | 187 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { |
| 189 | memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr)); | 188 | memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); |
| 190 | mark_buffer_dirty(bh); | 189 | mark_buffer_dirty(bh); |
| 191 | brelse(bh); | 190 | brelse(bh); |
| 192 | } else | 191 | } else |
| 193 | printk(KERN_WARNING "hfs: backup not found!\n"); | 192 | printk(KERN_WARNING "hfs: backup not found!\n"); |
| 194 | } | 193 | } |
| 195 | } | 194 | } |
| 196 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; | ||
| 197 | } | 195 | } |
| 198 | unlock_super(sb); | 196 | mutex_unlock(&sbi->alloc_mutex); |
| 197 | mutex_unlock(&sbi->vh_mutex); | ||
| 199 | return 0; | 198 | return 0; |
| 200 | } | 199 | } |
| 201 | 200 | ||
| @@ -209,48 +208,48 @@ static void hfsplus_write_super(struct super_block *sb) | |||
| 209 | 208 | ||
| 210 | static void hfsplus_put_super(struct super_block *sb) | 209 | static void hfsplus_put_super(struct super_block *sb) |
| 211 | { | 210 | { |
| 211 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 212 | |||
| 212 | dprint(DBG_SUPER, "hfsplus_put_super\n"); | 213 | dprint(DBG_SUPER, "hfsplus_put_super\n"); |
| 214 | |||
| 213 | if (!sb->s_fs_info) | 215 | if (!sb->s_fs_info) |
| 214 | return; | 216 | return; |
| 215 | 217 | ||
| 216 | lock_kernel(); | ||
| 217 | |||
| 218 | if (sb->s_dirt) | 218 | if (sb->s_dirt) |
| 219 | hfsplus_write_super(sb); | 219 | hfsplus_write_super(sb); |
| 220 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { | 220 | if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { |
| 221 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 221 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
| 222 | 222 | ||
| 223 | vhdr->modify_date = hfsp_now2mt(); | 223 | vhdr->modify_date = hfsp_now2mt(); |
| 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); | 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); |
| 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); | 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); |
| 226 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 226 | mark_buffer_dirty(sbi->s_vhbh); |
| 227 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 227 | sync_dirty_buffer(sbi->s_vhbh); |
| 228 | } | 228 | } |
| 229 | 229 | ||
| 230 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); | 230 | hfs_btree_close(sbi->cat_tree); |
| 231 | hfs_btree_close(HFSPLUS_SB(sb).ext_tree); | 231 | hfs_btree_close(sbi->ext_tree); |
| 232 | iput(HFSPLUS_SB(sb).alloc_file); | 232 | iput(sbi->alloc_file); |
| 233 | iput(HFSPLUS_SB(sb).hidden_dir); | 233 | iput(sbi->hidden_dir); |
| 234 | brelse(HFSPLUS_SB(sb).s_vhbh); | 234 | brelse(sbi->s_vhbh); |
| 235 | unload_nls(HFSPLUS_SB(sb).nls); | 235 | unload_nls(sbi->nls); |
| 236 | kfree(sb->s_fs_info); | 236 | kfree(sb->s_fs_info); |
| 237 | sb->s_fs_info = NULL; | 237 | sb->s_fs_info = NULL; |
| 238 | |||
| 239 | unlock_kernel(); | ||
| 240 | } | 238 | } |
| 241 | 239 | ||
| 242 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 240 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 243 | { | 241 | { |
| 244 | struct super_block *sb = dentry->d_sb; | 242 | struct super_block *sb = dentry->d_sb; |
| 243 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 245 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 244 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
| 246 | 245 | ||
| 247 | buf->f_type = HFSPLUS_SUPER_MAGIC; | 246 | buf->f_type = HFSPLUS_SUPER_MAGIC; |
| 248 | buf->f_bsize = sb->s_blocksize; | 247 | buf->f_bsize = sb->s_blocksize; |
| 249 | buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; | 248 | buf->f_blocks = sbi->total_blocks << sbi->fs_shift; |
| 250 | buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift; | 249 | buf->f_bfree = sbi->free_blocks << sbi->fs_shift; |
| 251 | buf->f_bavail = buf->f_bfree; | 250 | buf->f_bavail = buf->f_bfree; |
| 252 | buf->f_files = 0xFFFFFFFF; | 251 | buf->f_files = 0xFFFFFFFF; |
| 253 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; | 252 | buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; |
| 254 | buf->f_fsid.val[0] = (u32)id; | 253 | buf->f_fsid.val[0] = (u32)id; |
| 255 | buf->f_fsid.val[1] = (u32)(id >> 32); | 254 | buf->f_fsid.val[1] = (u32)(id >> 32); |
| 256 | buf->f_namelen = HFSPLUS_MAX_STRLEN; | 255 | buf->f_namelen = HFSPLUS_MAX_STRLEN; |
| @@ -263,11 +262,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
| 263 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 262 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 264 | return 0; | 263 | return 0; |
| 265 | if (!(*flags & MS_RDONLY)) { | 264 | if (!(*flags & MS_RDONLY)) { |
| 266 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 265 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; |
| 267 | struct hfsplus_sb_info sbi; | 266 | struct hfsplus_sb_info sbi; |
| 268 | 267 | ||
| 269 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); | 268 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); |
| 270 | sbi.nls = HFSPLUS_SB(sb).nls; | 269 | sbi.nls = HFSPLUS_SB(sb)->nls; |
| 271 | if (!hfsplus_parse_options(data, &sbi)) | 270 | if (!hfsplus_parse_options(data, &sbi)) |
| 272 | return -EINVAL; | 271 | return -EINVAL; |
| 273 | 272 | ||
| @@ -276,7 +275,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
| 276 | "running fsck.hfsplus is recommended. leaving read-only.\n"); | 275 | "running fsck.hfsplus is recommended. leaving read-only.\n"); |
| 277 | sb->s_flags |= MS_RDONLY; | 276 | sb->s_flags |= MS_RDONLY; |
| 278 | *flags |= MS_RDONLY; | 277 | *flags |= MS_RDONLY; |
| 279 | } else if (sbi.flags & HFSPLUS_SB_FORCE) { | 278 | } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { |
| 280 | /* nothing */ | 279 | /* nothing */ |
| 281 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 280 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
| 282 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); | 281 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); |
| @@ -320,7 +319,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 320 | return -ENOMEM; | 319 | return -ENOMEM; |
| 321 | 320 | ||
| 322 | sb->s_fs_info = sbi; | 321 | sb->s_fs_info = sbi; |
| 323 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 322 | mutex_init(&sbi->alloc_mutex); |
| 323 | mutex_init(&sbi->vh_mutex); | ||
| 324 | hfsplus_fill_defaults(sbi); | 324 | hfsplus_fill_defaults(sbi); |
| 325 | if (!hfsplus_parse_options(data, sbi)) { | 325 | if (!hfsplus_parse_options(data, sbi)) { |
| 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); | 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); |
| @@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 344 | err = -EINVAL; | 344 | err = -EINVAL; |
| 345 | goto cleanup; | 345 | goto cleanup; |
| 346 | } | 346 | } |
| 347 | vhdr = HFSPLUS_SB(sb).s_vhdr; | 347 | vhdr = sbi->s_vhdr; |
| 348 | 348 | ||
| 349 | /* Copy parts of the volume header into the superblock */ | 349 | /* Copy parts of the volume header into the superblock */ |
| 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; | 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; |
| @@ -353,18 +353,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); | 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); |
| 354 | goto cleanup; | 354 | goto cleanup; |
| 355 | } | 355 | } |
| 356 | HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); | 356 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); |
| 357 | HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks); | 357 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); |
| 358 | HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc); | 358 | sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); |
| 359 | HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid); | 359 | sbi->file_count = be32_to_cpu(vhdr->file_count); |
| 360 | HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count); | 360 | sbi->folder_count = be32_to_cpu(vhdr->folder_count); |
| 361 | HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count); | 361 | sbi->data_clump_blocks = |
| 362 | HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 362 | be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; |
| 363 | if (!HFSPLUS_SB(sb).data_clump_blocks) | 363 | if (!sbi->data_clump_blocks) |
| 364 | HFSPLUS_SB(sb).data_clump_blocks = 1; | 364 | sbi->data_clump_blocks = 1; |
| 365 | HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 365 | sbi->rsrc_clump_blocks = |
| 366 | if (!HFSPLUS_SB(sb).rsrc_clump_blocks) | 366 | be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; |
| 367 | HFSPLUS_SB(sb).rsrc_clump_blocks = 1; | 367 | if (!sbi->rsrc_clump_blocks) |
| 368 | sbi->rsrc_clump_blocks = 1; | ||
| 368 | 369 | ||
| 369 | /* Set up operations so we can load metadata */ | 370 | /* Set up operations so we can load metadata */ |
| 370 | sb->s_op = &hfsplus_sops; | 371 | sb->s_op = &hfsplus_sops; |
| @@ -374,7 +375,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 374 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " | 375 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " |
| 375 | "running fsck.hfsplus is recommended. mounting read-only.\n"); | 376 | "running fsck.hfsplus is recommended. mounting read-only.\n"); |
| 376 | sb->s_flags |= MS_RDONLY; | 377 | sb->s_flags |= MS_RDONLY; |
| 377 | } else if (sbi->flags & HFSPLUS_SB_FORCE) { | 378 | } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { |
| 378 | /* nothing */ | 379 | /* nothing */ |
| 379 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 380 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
| 380 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 381 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
| @@ -384,16 +385,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 384 | "use the force option at your own risk, mounting read-only.\n"); | 385 | "use the force option at your own risk, mounting read-only.\n"); |
| 385 | sb->s_flags |= MS_RDONLY; | 386 | sb->s_flags |= MS_RDONLY; |
| 386 | } | 387 | } |
| 387 | sbi->flags &= ~HFSPLUS_SB_FORCE; | ||
| 388 | 388 | ||
| 389 | /* Load metadata objects (B*Trees) */ | 389 | /* Load metadata objects (B*Trees) */ |
| 390 | HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); | 390 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); |
| 391 | if (!HFSPLUS_SB(sb).ext_tree) { | 391 | if (!sbi->ext_tree) { |
| 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); | 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); |
| 393 | goto cleanup; | 393 | goto cleanup; |
| 394 | } | 394 | } |
| 395 | HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); | 395 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); |
| 396 | if (!HFSPLUS_SB(sb).cat_tree) { | 396 | if (!sbi->cat_tree) { |
| 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); | 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); |
| 398 | goto cleanup; | 398 | goto cleanup; |
| 399 | } | 399 | } |
| @@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 404 | err = PTR_ERR(inode); | 404 | err = PTR_ERR(inode); |
| 405 | goto cleanup; | 405 | goto cleanup; |
| 406 | } | 406 | } |
| 407 | HFSPLUS_SB(sb).alloc_file = inode; | 407 | sbi->alloc_file = inode; |
| 408 | 408 | ||
| 409 | /* Load the root directory */ | 409 | /* Load the root directory */ |
| 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); | 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); |
| @@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 423 | 423 | ||
| 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
| 425 | str.name = HFSP_HIDDENDIR_NAME; | 425 | str.name = HFSP_HIDDENDIR_NAME; |
| 426 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 426 | hfs_find_init(sbi->cat_tree, &fd); |
| 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); | 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); |
| 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { | 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { |
| 429 | hfs_find_exit(&fd); | 429 | hfs_find_exit(&fd); |
| @@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 434 | err = PTR_ERR(inode); | 434 | err = PTR_ERR(inode); |
| 435 | goto cleanup; | 435 | goto cleanup; |
| 436 | } | 436 | } |
| 437 | HFSPLUS_SB(sb).hidden_dir = inode; | 437 | sbi->hidden_dir = inode; |
| 438 | } else | 438 | } else |
| 439 | hfs_find_exit(&fd); | 439 | hfs_find_exit(&fd); |
| 440 | 440 | ||
| @@ -449,15 +449,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 449 | be32_add_cpu(&vhdr->write_count, 1); | 449 | be32_add_cpu(&vhdr->write_count, 1); |
| 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); |
| 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); |
| 452 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 452 | mark_buffer_dirty(sbi->s_vhbh); |
| 453 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 453 | sync_dirty_buffer(sbi->s_vhbh); |
| 454 | 454 | ||
| 455 | if (!HFSPLUS_SB(sb).hidden_dir) { | 455 | if (!sbi->hidden_dir) { |
| 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); | 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); |
| 457 | HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | 457 | |
| 458 | hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, | 458 | mutex_lock(&sbi->vh_mutex); |
| 459 | &str, HFSPLUS_SB(sb).hidden_dir); | 459 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
| 460 | mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir); | 460 | hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, |
| 461 | &str, sbi->hidden_dir); | ||
| 462 | mutex_unlock(&sbi->vh_mutex); | ||
| 463 | |||
| 464 | mark_inode_dirty(sbi->hidden_dir); | ||
| 461 | } | 465 | } |
| 462 | out: | 466 | out: |
| 463 | unload_nls(sbi->nls); | 467 | unload_nls(sbi->nls); |
| @@ -486,7 +490,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) | |||
| 486 | 490 | ||
| 487 | static void hfsplus_destroy_inode(struct inode *inode) | 491 | static void hfsplus_destroy_inode(struct inode *inode) |
| 488 | { | 492 | { |
| 489 | kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode)); | 493 | kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); |
| 490 | } | 494 | } |
| 491 | 495 | ||
| 492 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) | 496 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 628ccf6fa402..b66d67de882c 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
| @@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) | |||
| 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) | 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) |
| 122 | { | 122 | { |
| 123 | const hfsplus_unichr *ip; | 123 | const hfsplus_unichr *ip; |
| 124 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 124 | struct nls_table *nls = HFSPLUS_SB(sb)->nls; |
| 125 | u8 *op; | 125 | u8 *op; |
| 126 | u16 cc, c0, c1; | 126 | u16 cc, c0, c1; |
| 127 | u16 *ce1, *ce2; | 127 | u16 *ce1, *ce2; |
| @@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c | |||
| 132 | ustrlen = be16_to_cpu(ustr->length); | 132 | ustrlen = be16_to_cpu(ustr->length); |
| 133 | len = *len_p; | 133 | len = *len_p; |
| 134 | ce1 = NULL; | 134 | ce1 = NULL; |
| 135 | compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 135 | compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 136 | 136 | ||
| 137 | while (ustrlen > 0) { | 137 | while (ustrlen > 0) { |
| 138 | c0 = be16_to_cpu(*ip++); | 138 | c0 = be16_to_cpu(*ip++); |
| @@ -246,7 +246,7 @@ out: | |||
| 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, |
| 247 | wchar_t *uc) | 247 | wchar_t *uc) |
| 248 | { | 248 | { |
| 249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); | 249 | int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); |
| 250 | if (size <= 0) { | 250 | if (size <= 0) { |
| 251 | *uc = '?'; | 251 | *uc = '?'; |
| 252 | size = 1; | 252 | size = 1; |
| @@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | |||
| 293 | u16 *dstr, outlen = 0; | 293 | u16 *dstr, outlen = 0; |
| 294 | wchar_t c; | 294 | wchar_t c; |
| 295 | 295 | ||
| 296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
| 298 | size = asc2unichar(sb, astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
| 299 | 299 | ||
| @@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | |||
| 330 | wchar_t c; | 330 | wchar_t c; |
| 331 | u16 c2; | 331 | u16 c2; |
| 332 | 332 | ||
| 333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 333 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 334 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 335 | hash = init_name_hash(); | 335 | hash = init_name_hash(); |
| 336 | astr = str->name; | 336 | astr = str->name; |
| 337 | len = str->len; | 337 | len = str->len; |
| @@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * | |||
| 373 | u16 c1, c2; | 373 | u16 c1, c2; |
| 374 | wchar_t c; | 374 | wchar_t c; |
| 375 | 375 | ||
| 376 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 376 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
| 377 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 377 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
| 378 | astr1 = s1->name; | 378 | astr1 = s1->name; |
| 379 | len1 = s1->len; | 379 | len1 = s1->len; |
| 380 | astr2 = s2->name; | 380 | astr2 = s2->name; |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index bed78ac8f6d1..8972c20b3216 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
| @@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
| 65 | *start = 0; | 65 | *start = 0; |
| 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; | 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; |
| 67 | 67 | ||
| 68 | if (HFSPLUS_SB(sb).session >= 0) { | 68 | if (HFSPLUS_SB(sb)->session >= 0) { |
| 69 | te.cdte_track = HFSPLUS_SB(sb).session; | 69 | te.cdte_track = HFSPLUS_SB(sb)->session; |
| 70 | te.cdte_format = CDROM_LBA; | 70 | te.cdte_format = CDROM_LBA; |
| 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); | 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); |
| 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { | 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { |
| @@ -87,6 +87,7 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
| 87 | /* Takes in super block, returns true if good data read */ | 87 | /* Takes in super block, returns true if good data read */ |
| 88 | int hfsplus_read_wrapper(struct super_block *sb) | 88 | int hfsplus_read_wrapper(struct super_block *sb) |
| 89 | { | 89 | { |
| 90 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
| 90 | struct buffer_head *bh; | 91 | struct buffer_head *bh; |
| 91 | struct hfsplus_vh *vhdr; | 92 | struct hfsplus_vh *vhdr; |
| 92 | struct hfsplus_wd wd; | 93 | struct hfsplus_wd wd; |
| @@ -122,7 +123,7 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 122 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) | 123 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
| 123 | break; | 124 | break; |
| 124 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { | 125 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { |
| 125 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX; | 126 | set_bit(HFSPLUS_SB_HFSX, &sbi->flags); |
| 126 | break; | 127 | break; |
| 127 | } | 128 | } |
| 128 | brelse(bh); | 129 | brelse(bh); |
| @@ -143,11 +144,11 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 143 | if (blocksize < HFSPLUS_SECTOR_SIZE || | 144 | if (blocksize < HFSPLUS_SECTOR_SIZE || |
| 144 | ((blocksize - 1) & blocksize)) | 145 | ((blocksize - 1) & blocksize)) |
| 145 | return -EINVAL; | 146 | return -EINVAL; |
| 146 | HFSPLUS_SB(sb).alloc_blksz = blocksize; | 147 | sbi->alloc_blksz = blocksize; |
| 147 | HFSPLUS_SB(sb).alloc_blksz_shift = 0; | 148 | sbi->alloc_blksz_shift = 0; |
| 148 | while ((blocksize >>= 1) != 0) | 149 | while ((blocksize >>= 1) != 0) |
| 149 | HFSPLUS_SB(sb).alloc_blksz_shift++; | 150 | sbi->alloc_blksz_shift++; |
| 150 | blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE); | 151 | blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); |
| 151 | 152 | ||
| 152 | /* align block size to block offset */ | 153 | /* align block size to block offset */ |
| 153 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) | 154 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) |
| @@ -158,23 +159,26 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
| 158 | return -EINVAL; | 159 | return -EINVAL; |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | HFSPLUS_SB(sb).blockoffset = part_start >> | 162 | sbi->blockoffset = |
| 162 | (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); | 163 | part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); |
| 163 | HFSPLUS_SB(sb).sect_count = part_size; | 164 | sbi->sect_count = part_size; |
| 164 | HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift - | 165 | sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
| 165 | sb->s_blocksize_bits; | ||
| 166 | 166 | ||
| 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); | 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); |
| 168 | if (!bh) | 168 | if (!bh) |
| 169 | return -EIO; | 169 | return -EIO; |
| 170 | 170 | ||
| 171 | /* should still be the same... */ | 171 | /* should still be the same... */ |
| 172 | if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ? | 172 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { |
| 173 | cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) : | 173 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) |
| 174 | cpu_to_be16(HFSPLUS_VOLHEAD_SIG))) | 174 | goto error; |
| 175 | goto error; | 175 | } else { |
| 176 | HFSPLUS_SB(sb).s_vhbh = bh; | 176 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
| 177 | HFSPLUS_SB(sb).s_vhdr = vhdr; | 177 | goto error; |
| 178 | } | ||
| 179 | |||
| 180 | sbi->s_vhbh = bh; | ||
| 181 | sbi->s_vhdr = vhdr; | ||
| 178 | 182 | ||
| 179 | return 0; | 183 | return 0; |
| 180 | error: | 184 | error: |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c0..f4d4120e5128 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
| @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres | |||
| 129 | #define move_pte(pte, prot, old_addr, new_addr) (pte) | 129 | #define move_pte(pte, prot, old_addr, new_addr) (pte) |
| 130 | #endif | 130 | #endif |
| 131 | 131 | ||
| 132 | #ifndef flush_tlb_fix_spurious_fault | ||
| 133 | #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) | ||
| 134 | #endif | ||
| 135 | |||
| 132 | #ifndef pgprot_noncached | 136 | #ifndef pgprot_noncached |
| 133 | #define pgprot_noncached(prot) (prot) | 137 | #define pgprot_noncached(prot) (prot) |
| 134 | #endif | 138 | #endif |
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ef2af9948eac..f4229fb315e1 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
| @@ -687,7 +687,9 @@ | |||
| 687 | - LOAD_OFFSET) { \ | 687 | - LOAD_OFFSET) { \ |
| 688 | VMLINUX_SYMBOL(__per_cpu_start) = .; \ | 688 | VMLINUX_SYMBOL(__per_cpu_start) = .; \ |
| 689 | *(.data..percpu..first) \ | 689 | *(.data..percpu..first) \ |
| 690 | . = ALIGN(PAGE_SIZE); \ | ||
| 690 | *(.data..percpu..page_aligned) \ | 691 | *(.data..percpu..page_aligned) \ |
| 692 | *(.data..percpu..readmostly) \ | ||
| 691 | *(.data..percpu) \ | 693 | *(.data..percpu) \ |
| 692 | *(.data..percpu..shared_aligned) \ | 694 | *(.data..percpu..shared_aligned) \ |
| 693 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 695 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |
| @@ -713,7 +715,9 @@ | |||
| 713 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ | 715 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
| 714 | VMLINUX_SYMBOL(__per_cpu_start) = .; \ | 716 | VMLINUX_SYMBOL(__per_cpu_start) = .; \ |
| 715 | *(.data..percpu..first) \ | 717 | *(.data..percpu..first) \ |
| 718 | . = ALIGN(PAGE_SIZE); \ | ||
| 716 | *(.data..percpu..page_aligned) \ | 719 | *(.data..percpu..page_aligned) \ |
| 720 | *(.data..percpu..readmostly) \ | ||
| 717 | *(.data..percpu) \ | 721 | *(.data..percpu) \ |
| 718 | *(.data..percpu..shared_aligned) \ | 722 | *(.data..percpu..shared_aligned) \ |
| 719 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 723 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |
diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 7e3d2859be50..1d0ef1ae8036 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h | |||
| @@ -25,8 +25,6 @@ static inline u32 acpi_pm_read_early(void) | |||
| 25 | return acpi_pm_read_verified() & ACPI_PM_MASK; | 25 | return acpi_pm_read_verified() & ACPI_PM_MASK; |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | extern void pmtimer_wait(unsigned); | ||
| 29 | |||
| 30 | #else | 28 | #else |
| 31 | 29 | ||
| 32 | static inline u32 acpi_pm_read_early(void) | 30 | static inline u32 acpi_pm_read_early(void) |
diff --git a/fs/ceph/auth.h b/include/linux/ceph/auth.h index d38a2fb4a137..7fff521d7eb5 100644 --- a/fs/ceph/auth.h +++ b/include/linux/ceph/auth.h | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #ifndef _FS_CEPH_AUTH_H | 1 | #ifndef _FS_CEPH_AUTH_H |
| 2 | #define _FS_CEPH_AUTH_H | 2 | #define _FS_CEPH_AUTH_H |
| 3 | 3 | ||
| 4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
| 5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
| 6 | 6 | ||
| 7 | /* | 7 | /* |
| 8 | * Abstract interface for communicating with the authenticate module. | 8 | * Abstract interface for communicating with the authenticate module. |
diff --git a/fs/ceph/buffer.h b/include/linux/ceph/buffer.h index 58d19014068f..58d19014068f 100644 --- a/fs/ceph/buffer.h +++ b/include/linux/ceph/buffer.h | |||
diff --git a/fs/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 1818c2305610..aa2e19182d99 100644 --- a/fs/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 5 | 5 | ||
| 6 | #ifdef CONFIG_CEPH_FS_PRETTYDEBUG | 6 | #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG |
| 7 | 7 | ||
| 8 | /* | 8 | /* |
| 9 | * wrap pr_debug to include a filename:lineno prefix on each line. | 9 | * wrap pr_debug to include a filename:lineno prefix on each line. |
| @@ -14,7 +14,8 @@ | |||
| 14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | 14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) |
| 15 | extern const char *ceph_file_part(const char *s, int len); | 15 | extern const char *ceph_file_part(const char *s, int len); |
| 16 | # define dout(fmt, ...) \ | 16 | # define dout(fmt, ...) \ |
| 17 | pr_debug(" %12.12s:%-4d : " fmt, \ | 17 | pr_debug("%.*s %12.12s:%-4d : " fmt, \ |
| 18 | 8 - (int)sizeof(KBUILD_MODNAME), " ", \ | ||
| 18 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ | 19 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ |
| 19 | __LINE__, ##__VA_ARGS__) | 20 | __LINE__, ##__VA_ARGS__) |
| 20 | # else | 21 | # else |
diff --git a/fs/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h index 5babb8e95352..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/include/linux/ceph/ceph_frag.h | |||
diff --git a/fs/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d5619ac86711..c3c74aef289d 100644 --- a/fs/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
| @@ -299,6 +299,7 @@ enum { | |||
| 299 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
| 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, |
| 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, |
| 302 | CEPH_MDS_OP_SETDIRLAYOUT=0x0110a, | ||
| 302 | 303 | ||
| 303 | CEPH_MDS_OP_MKNOD = 0x01201, | 304 | CEPH_MDS_OP_MKNOD = 0x01201, |
| 304 | CEPH_MDS_OP_LINK = 0x01202, | 305 | CEPH_MDS_OP_LINK = 0x01202, |
diff --git a/fs/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h index d099c3f90236..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/include/linux/ceph/ceph_hash.h | |||
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h new file mode 100644 index 000000000000..2a79702e092b --- /dev/null +++ b/include/linux/ceph/debugfs.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | #ifndef _FS_CEPH_DEBUGFS_H | ||
| 2 | #define _FS_CEPH_DEBUGFS_H | ||
| 3 | |||
| 4 | #include "ceph_debug.h" | ||
| 5 | #include "types.h" | ||
| 6 | |||
| 7 | #define CEPH_DEFINE_SHOW_FUNC(name) \ | ||
| 8 | static int name##_open(struct inode *inode, struct file *file) \ | ||
| 9 | { \ | ||
| 10 | struct seq_file *sf; \ | ||
| 11 | int ret; \ | ||
| 12 | \ | ||
| 13 | ret = single_open(file, name, NULL); \ | ||
| 14 | sf = file->private_data; \ | ||
| 15 | sf->private = inode->i_private; \ | ||
| 16 | return ret; \ | ||
| 17 | } \ | ||
| 18 | \ | ||
| 19 | static const struct file_operations name##_fops = { \ | ||
| 20 | .open = name##_open, \ | ||
| 21 | .read = seq_read, \ | ||
| 22 | .llseek = seq_lseek, \ | ||
| 23 | .release = single_release, \ | ||
| 24 | }; | ||
| 25 | |||
| 26 | /* debugfs.c */ | ||
| 27 | extern int ceph_debugfs_init(void); | ||
| 28 | extern void ceph_debugfs_cleanup(void); | ||
| 29 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
| 30 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
| 31 | |||
| 32 | #endif | ||
| 33 | |||
diff --git a/fs/ceph/decode.h b/include/linux/ceph/decode.h index 3d25415afe63..c5b6939fb32a 100644 --- a/fs/ceph/decode.h +++ b/include/linux/ceph/decode.h | |||
| @@ -191,6 +191,11 @@ static inline void ceph_encode_string(void **p, void *end, | |||
| 191 | ceph_encode_need(p, end, n, bad); \ | 191 | ceph_encode_need(p, end, n, bad); \ |
| 192 | ceph_encode_copy(p, pv, n); \ | 192 | ceph_encode_copy(p, pv, n); \ |
| 193 | } while (0) | 193 | } while (0) |
| 194 | #define ceph_encode_string_safe(p, end, s, n, bad) \ | ||
| 195 | do { \ | ||
| 196 | ceph_encode_need(p, end, n, bad); \ | ||
| 197 | ceph_encode_string(p, end, s, n); \ | ||
| 198 | } while (0) | ||
| 194 | 199 | ||
| 195 | 200 | ||
| 196 | #endif | 201 | #endif |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h new file mode 100644 index 000000000000..f22b2e941686 --- /dev/null +++ b/include/linux/ceph/libceph.h | |||
| @@ -0,0 +1,249 @@ | |||
| 1 | #ifndef _FS_CEPH_LIBCEPH_H | ||
| 2 | #define _FS_CEPH_LIBCEPH_H | ||
| 3 | |||
| 4 | #include "ceph_debug.h" | ||
| 5 | |||
| 6 | #include <asm/unaligned.h> | ||
| 7 | #include <linux/backing-dev.h> | ||
| 8 | #include <linux/completion.h> | ||
| 9 | #include <linux/exportfs.h> | ||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/mempool.h> | ||
| 12 | #include <linux/pagemap.h> | ||
| 13 | #include <linux/wait.h> | ||
| 14 | #include <linux/writeback.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | |||
| 17 | #include "types.h" | ||
| 18 | #include "messenger.h" | ||
| 19 | #include "msgpool.h" | ||
| 20 | #include "mon_client.h" | ||
| 21 | #include "osd_client.h" | ||
| 22 | #include "ceph_fs.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Supported features | ||
| 26 | */ | ||
| 27 | #define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
| 28 | #define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR | ||
| 29 | |||
| 30 | /* | ||
| 31 | * mount options | ||
| 32 | */ | ||
| 33 | #define CEPH_OPT_FSID (1<<0) | ||
| 34 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
| 35 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
| 36 | #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ | ||
| 37 | |||
| 38 | #define CEPH_OPT_DEFAULT (0); | ||
| 39 | |||
| 40 | #define ceph_set_opt(client, opt) \ | ||
| 41 | (client)->options->flags |= CEPH_OPT_##opt; | ||
| 42 | #define ceph_test_opt(client, opt) \ | ||
| 43 | (!!((client)->options->flags & CEPH_OPT_##opt)) | ||
| 44 | |||
| 45 | struct ceph_options { | ||
| 46 | int flags; | ||
| 47 | struct ceph_fsid fsid; | ||
| 48 | struct ceph_entity_addr my_addr; | ||
| 49 | int mount_timeout; | ||
| 50 | int osd_idle_ttl; | ||
| 51 | int osd_timeout; | ||
| 52 | int osd_keepalive_timeout; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * any type that can't be simply compared or doesn't need need | ||
| 56 | * to be compared should go beyond this point, | ||
| 57 | * ceph_compare_options() should be updated accordingly | ||
| 58 | */ | ||
| 59 | |||
| 60 | struct ceph_entity_addr *mon_addr; /* should be the first | ||
| 61 | pointer type of args */ | ||
| 62 | int num_mon; | ||
| 63 | char *name; | ||
| 64 | char *secret; | ||
| 65 | }; | ||
| 66 | |||
| 67 | /* | ||
| 68 | * defaults | ||
| 69 | */ | ||
| 70 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | ||
| 71 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
| 72 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
| 73 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
| 74 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
| 75 | |||
| 76 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
| 77 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
| 78 | |||
| 79 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
| 80 | |||
| 81 | /* | ||
| 82 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
| 83 | * the file. Delay a minimum amount of time, even if we send a cap | ||
| 84 | * message for some other reason. Otherwise, take the oppotunity to | ||
| 85 | * update the mds to avoid sending another message later. | ||
| 86 | */ | ||
| 87 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
| 88 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
| 89 | |||
| 90 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
| 91 | |||
| 92 | /* mount state */ | ||
| 93 | enum { | ||
| 94 | CEPH_MOUNT_MOUNTING, | ||
| 95 | CEPH_MOUNT_MOUNTED, | ||
| 96 | CEPH_MOUNT_UNMOUNTING, | ||
| 97 | CEPH_MOUNT_UNMOUNTED, | ||
| 98 | CEPH_MOUNT_SHUTDOWN, | ||
| 99 | }; | ||
| 100 | |||
| 101 | /* | ||
| 102 | * subtract jiffies | ||
| 103 | */ | ||
| 104 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
| 105 | { | ||
| 106 | BUG_ON(time_after(b, a)); | ||
| 107 | return (long)a - (long)b; | ||
| 108 | } | ||
| 109 | |||
| 110 | struct ceph_mds_client; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * per client state | ||
| 114 | * | ||
| 115 | * possibly shared by multiple mount points, if they are | ||
| 116 | * mounting the same ceph filesystem/cluster. | ||
| 117 | */ | ||
| 118 | struct ceph_client { | ||
| 119 | struct ceph_fsid fsid; | ||
| 120 | bool have_fsid; | ||
| 121 | |||
| 122 | void *private; | ||
| 123 | |||
| 124 | struct ceph_options *options; | ||
| 125 | |||
| 126 | struct mutex mount_mutex; /* serialize mount attempts */ | ||
| 127 | wait_queue_head_t auth_wq; | ||
| 128 | int auth_err; | ||
| 129 | |||
| 130 | int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *); | ||
| 131 | |||
| 132 | u32 supported_features; | ||
| 133 | u32 required_features; | ||
| 134 | |||
| 135 | struct ceph_messenger *msgr; /* messenger instance */ | ||
| 136 | struct ceph_mon_client monc; | ||
| 137 | struct ceph_osd_client osdc; | ||
| 138 | |||
| 139 | #ifdef CONFIG_DEBUG_FS | ||
| 140 | struct dentry *debugfs_dir; | ||
| 141 | struct dentry *debugfs_monmap; | ||
| 142 | struct dentry *debugfs_osdmap; | ||
| 143 | #endif | ||
| 144 | }; | ||
| 145 | |||
| 146 | |||
| 147 | |||
| 148 | /* | ||
| 149 | * snapshots | ||
| 150 | */ | ||
| 151 | |||
| 152 | /* | ||
| 153 | * A "snap context" is the set of existing snapshots when we | ||
| 154 | * write data. It is used by the OSD to guide its COW behavior. | ||
| 155 | * | ||
| 156 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
| 157 | * page, indicating which context the dirty data belonged when it was | ||
| 158 | * dirtied. | ||
| 159 | */ | ||
| 160 | struct ceph_snap_context { | ||
| 161 | atomic_t nref; | ||
| 162 | u64 seq; | ||
| 163 | int num_snaps; | ||
| 164 | u64 snaps[]; | ||
| 165 | }; | ||
| 166 | |||
| 167 | static inline struct ceph_snap_context * | ||
| 168 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
| 169 | { | ||
| 170 | /* | ||
| 171 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 172 | atomic_read(&sc->nref)+1); | ||
| 173 | */ | ||
| 174 | if (sc) | ||
| 175 | atomic_inc(&sc->nref); | ||
| 176 | return sc; | ||
| 177 | } | ||
| 178 | |||
| 179 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
| 180 | { | ||
| 181 | if (!sc) | ||
| 182 | return; | ||
| 183 | /* | ||
| 184 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
| 185 | atomic_read(&sc->nref)-1); | ||
| 186 | */ | ||
| 187 | if (atomic_dec_and_test(&sc->nref)) { | ||
| 188 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
| 189 | kfree(sc); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * calculate the number of pages a given length and offset map onto, | ||
| 195 | * if we align the data. | ||
| 196 | */ | ||
| 197 | static inline int calc_pages_for(u64 off, u64 len) | ||
| 198 | { | ||
| 199 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | ||
| 200 | (off >> PAGE_CACHE_SHIFT); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* ceph_common.c */ | ||
| 204 | extern const char *ceph_msg_type_name(int type); | ||
| 205 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
| 206 | extern struct kmem_cache *ceph_inode_cachep; | ||
| 207 | extern struct kmem_cache *ceph_cap_cachep; | ||
| 208 | extern struct kmem_cache *ceph_dentry_cachep; | ||
| 209 | extern struct kmem_cache *ceph_file_cachep; | ||
| 210 | |||
| 211 | extern int ceph_parse_options(struct ceph_options **popt, char *options, | ||
| 212 | const char *dev_name, const char *dev_name_end, | ||
| 213 | int (*parse_extra_token)(char *c, void *private), | ||
| 214 | void *private); | ||
| 215 | extern void ceph_destroy_options(struct ceph_options *opt); | ||
| 216 | extern int ceph_compare_options(struct ceph_options *new_opt, | ||
| 217 | struct ceph_client *client); | ||
| 218 | extern struct ceph_client *ceph_create_client(struct ceph_options *opt, | ||
| 219 | void *private); | ||
| 220 | extern u64 ceph_client_id(struct ceph_client *client); | ||
| 221 | extern void ceph_destroy_client(struct ceph_client *client); | ||
| 222 | extern int __ceph_open_session(struct ceph_client *client, | ||
| 223 | unsigned long started); | ||
| 224 | extern int ceph_open_session(struct ceph_client *client); | ||
| 225 | |||
| 226 | /* pagevec.c */ | ||
| 227 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 228 | |||
| 229 | extern struct page **ceph_get_direct_page_vector(const char __user *data, | ||
| 230 | int num_pages, | ||
| 231 | loff_t off, size_t len); | ||
| 232 | extern void ceph_put_page_vector(struct page **pages, int num_pages); | ||
| 233 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
| 234 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
| 235 | extern int ceph_copy_user_to_page_vector(struct page **pages, | ||
| 236 | const char __user *data, | ||
| 237 | loff_t off, size_t len); | ||
| 238 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
| 239 | const char *data, | ||
| 240 | loff_t off, size_t len); | ||
| 241 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
| 242 | char *data, | ||
| 243 | loff_t off, size_t len); | ||
| 244 | extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, | ||
| 245 | loff_t off, size_t len); | ||
| 246 | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); | ||
| 247 | |||
| 248 | |||
| 249 | #endif /* _FS_CEPH_SUPER_H */ | ||
diff --git a/fs/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c5cb0880bba..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h | |||
diff --git a/fs/ceph/messenger.h b/include/linux/ceph/messenger.h index 76fbc957bc13..5956d62c3057 100644 --- a/fs/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -65,6 +65,9 @@ struct ceph_messenger { | |||
| 65 | */ | 65 | */ |
| 66 | u32 global_seq; | 66 | u32 global_seq; |
| 67 | spinlock_t global_seq_lock; | 67 | spinlock_t global_seq_lock; |
| 68 | |||
| 69 | u32 supported_features; | ||
| 70 | u32 required_features; | ||
| 68 | }; | 71 | }; |
| 69 | 72 | ||
| 70 | /* | 73 | /* |
| @@ -82,6 +85,10 @@ struct ceph_msg { | |||
| 82 | struct ceph_pagelist *pagelist; /* instead of pages */ | 85 | struct ceph_pagelist *pagelist; /* instead of pages */ |
| 83 | struct list_head list_head; | 86 | struct list_head list_head; |
| 84 | struct kref kref; | 87 | struct kref kref; |
| 88 | struct bio *bio; /* instead of pages/pagelist */ | ||
| 89 | struct bio *bio_iter; /* bio iterator */ | ||
| 90 | int bio_seg; /* current bio segment */ | ||
| 91 | struct ceph_pagelist *trail; /* the trailing part of the data */ | ||
| 85 | bool front_is_vmalloc; | 92 | bool front_is_vmalloc; |
| 86 | bool more_to_follow; | 93 | bool more_to_follow; |
| 87 | bool needs_out_seq; | 94 | bool needs_out_seq; |
| @@ -205,7 +212,7 @@ struct ceph_connection { | |||
| 205 | }; | 212 | }; |
| 206 | 213 | ||
| 207 | 214 | ||
| 208 | extern const char *pr_addr(const struct sockaddr_storage *ss); | 215 | extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); |
| 209 | extern int ceph_parse_ips(const char *c, const char *end, | 216 | extern int ceph_parse_ips(const char *c, const char *end, |
| 210 | struct ceph_entity_addr *addr, | 217 | struct ceph_entity_addr *addr, |
| 211 | int max_count, int *count); | 218 | int max_count, int *count); |
| @@ -216,7 +223,8 @@ extern void ceph_msgr_exit(void); | |||
| 216 | extern void ceph_msgr_flush(void); | 223 | extern void ceph_msgr_flush(void); |
| 217 | 224 | ||
| 218 | extern struct ceph_messenger *ceph_messenger_create( | 225 | extern struct ceph_messenger *ceph_messenger_create( |
| 219 | struct ceph_entity_addr *myaddr); | 226 | struct ceph_entity_addr *myaddr, |
| 227 | u32 features, u32 required); | ||
| 220 | extern void ceph_messenger_destroy(struct ceph_messenger *); | 228 | extern void ceph_messenger_destroy(struct ceph_messenger *); |
| 221 | 229 | ||
| 222 | extern void ceph_con_init(struct ceph_messenger *msgr, | 230 | extern void ceph_con_init(struct ceph_messenger *msgr, |
diff --git a/fs/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 8e396f2c0963..545f85917780 100644 --- a/fs/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h | |||
| @@ -79,6 +79,7 @@ struct ceph_mon_client { | |||
| 79 | u64 last_tid; | 79 | u64 last_tid; |
| 80 | 80 | ||
| 81 | /* mds/osd map */ | 81 | /* mds/osd map */ |
| 82 | int want_mdsmap; | ||
| 82 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ | 83 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ |
| 83 | u32 have_osdmap, have_mdsmap; | 84 | u32 have_osdmap, have_mdsmap; |
| 84 | 85 | ||
diff --git a/fs/ceph/msgpool.h b/include/linux/ceph/msgpool.h index a362605f9368..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/include/linux/ceph/msgpool.h | |||
diff --git a/fs/ceph/msgr.h b/include/linux/ceph/msgr.h index 680d3d648cac..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
diff --git a/fs/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ce776989ef6a..6c91fb032c39 100644 --- a/fs/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
| @@ -15,6 +15,7 @@ struct ceph_snap_context; | |||
| 15 | struct ceph_osd_request; | 15 | struct ceph_osd_request; |
| 16 | struct ceph_osd_client; | 16 | struct ceph_osd_client; |
| 17 | struct ceph_authorizer; | 17 | struct ceph_authorizer; |
| 18 | struct ceph_pagelist; | ||
| 18 | 19 | ||
| 19 | /* | 20 | /* |
| 20 | * completion callback for async writepages | 21 | * completion callback for async writepages |
| @@ -68,6 +69,7 @@ struct ceph_osd_request { | |||
| 68 | struct list_head r_unsafe_item; | 69 | struct list_head r_unsafe_item; |
| 69 | 70 | ||
| 70 | struct inode *r_inode; /* for use by callbacks */ | 71 | struct inode *r_inode; /* for use by callbacks */ |
| 72 | void *r_priv; /* ditto */ | ||
| 71 | 73 | ||
| 72 | char r_oid[40]; /* object name */ | 74 | char r_oid[40]; /* object name */ |
| 73 | int r_oid_len; | 75 | int r_oid_len; |
| @@ -80,6 +82,11 @@ struct ceph_osd_request { | |||
| 80 | struct page **r_pages; /* pages for data payload */ | 82 | struct page **r_pages; /* pages for data payload */ |
| 81 | int r_pages_from_pool; | 83 | int r_pages_from_pool; |
| 82 | int r_own_pages; /* if true, i own page list */ | 84 | int r_own_pages; /* if true, i own page list */ |
| 85 | #ifdef CONFIG_BLOCK | ||
| 86 | struct bio *r_bio; /* instead of pages */ | ||
| 87 | #endif | ||
| 88 | |||
| 89 | struct ceph_pagelist *r_trail; /* trailing part of the data */ | ||
| 83 | }; | 90 | }; |
| 84 | 91 | ||
| 85 | struct ceph_osd_client { | 92 | struct ceph_osd_client { |
| @@ -110,6 +117,42 @@ struct ceph_osd_client { | |||
| 110 | struct ceph_msgpool msgpool_op_reply; | 117 | struct ceph_msgpool msgpool_op_reply; |
| 111 | }; | 118 | }; |
| 112 | 119 | ||
| 120 | struct ceph_osd_req_op { | ||
| 121 | u16 op; /* CEPH_OSD_OP_* */ | ||
| 122 | u32 flags; /* CEPH_OSD_FLAG_* */ | ||
| 123 | union { | ||
| 124 | struct { | ||
| 125 | u64 offset, length; | ||
| 126 | u64 truncate_size; | ||
| 127 | u32 truncate_seq; | ||
| 128 | } extent; | ||
| 129 | struct { | ||
| 130 | const char *name; | ||
| 131 | u32 name_len; | ||
| 132 | const char *val; | ||
| 133 | u32 value_len; | ||
| 134 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
| 135 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
| 136 | } xattr; | ||
| 137 | struct { | ||
| 138 | const char *class_name; | ||
| 139 | __u8 class_len; | ||
| 140 | const char *method_name; | ||
| 141 | __u8 method_len; | ||
| 142 | __u8 argc; | ||
| 143 | const char *indata; | ||
| 144 | u32 indata_len; | ||
| 145 | } cls; | ||
| 146 | struct { | ||
| 147 | u64 cookie, count; | ||
| 148 | } pgls; | ||
| 149 | struct { | ||
| 150 | u64 snapid; | ||
| 151 | } snap; | ||
| 152 | }; | ||
| 153 | u32 payload_len; | ||
| 154 | }; | ||
| 155 | |||
| 113 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 156 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
| 114 | struct ceph_client *client); | 157 | struct ceph_client *client); |
| 115 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | 158 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); |
| @@ -119,6 +162,30 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||
| 119 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | 162 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, |
| 120 | struct ceph_msg *msg); | 163 | struct ceph_msg *msg); |
| 121 | 164 | ||
| 165 | extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
| 166 | struct ceph_file_layout *layout, | ||
| 167 | u64 snapid, | ||
| 168 | u64 off, u64 *plen, u64 *bno, | ||
| 169 | struct ceph_osd_request *req, | ||
| 170 | struct ceph_osd_req_op *op); | ||
| 171 | |||
| 172 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | ||
| 173 | int flags, | ||
| 174 | struct ceph_snap_context *snapc, | ||
| 175 | struct ceph_osd_req_op *ops, | ||
| 176 | bool use_mempool, | ||
| 177 | gfp_t gfp_flags, | ||
| 178 | struct page **pages, | ||
| 179 | struct bio *bio); | ||
| 180 | |||
| 181 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
| 182 | u64 off, u64 *plen, | ||
| 183 | struct ceph_osd_req_op *src_ops, | ||
| 184 | struct ceph_snap_context *snapc, | ||
| 185 | struct timespec *mtime, | ||
| 186 | const char *oid, | ||
| 187 | int oid_len); | ||
| 188 | |||
| 122 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | 189 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, |
| 123 | struct ceph_file_layout *layout, | 190 | struct ceph_file_layout *layout, |
| 124 | struct ceph_vino vino, | 191 | struct ceph_vino vino, |
diff --git a/fs/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 970b547e510d..ba4c205cbb01 100644 --- a/fs/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
| 5 | #include "types.h" | 5 | #include "types.h" |
| 6 | #include "ceph_fs.h" | 6 | #include "ceph_fs.h" |
| 7 | #include "crush/crush.h" | 7 | #include <linux/crush/crush.h> |
| 8 | 8 | ||
| 9 | /* | 9 | /* |
| 10 | * The osd map describes the current membership of the osd cluster and | 10 | * The osd map describes the current membership of the osd cluster and |
| @@ -125,4 +125,6 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
| 126 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
| 127 | 127 | ||
| 128 | extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); | ||
| 129 | |||
| 128 | #endif | 130 | #endif |
diff --git a/fs/ceph/pagelist.h b/include/linux/ceph/pagelist.h index e8a4187e1087..9660d6b0a35d 100644 --- a/fs/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h | |||
| @@ -8,6 +8,14 @@ struct ceph_pagelist { | |||
| 8 | void *mapped_tail; | 8 | void *mapped_tail; |
| 9 | size_t length; | 9 | size_t length; |
| 10 | size_t room; | 10 | size_t room; |
| 11 | struct list_head free_list; | ||
| 12 | size_t num_pages_free; | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct ceph_pagelist_cursor { | ||
| 16 | struct ceph_pagelist *pl; /* pagelist, for error checking */ | ||
| 17 | struct list_head *page_lru; /* page in list */ | ||
| 18 | size_t room; /* room remaining to reset to */ | ||
| 11 | }; | 19 | }; |
| 12 | 20 | ||
| 13 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | 21 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) |
| @@ -16,10 +24,23 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | |||
| 16 | pl->mapped_tail = NULL; | 24 | pl->mapped_tail = NULL; |
| 17 | pl->length = 0; | 25 | pl->length = 0; |
| 18 | pl->room = 0; | 26 | pl->room = 0; |
| 27 | INIT_LIST_HEAD(&pl->free_list); | ||
| 28 | pl->num_pages_free = 0; | ||
| 19 | } | 29 | } |
| 30 | |||
| 20 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); | 31 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); |
| 21 | 32 | ||
| 22 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); | 33 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l); |
| 34 | |||
| 35 | extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); | ||
| 36 | |||
| 37 | extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); | ||
| 38 | |||
| 39 | extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
| 40 | struct ceph_pagelist_cursor *c); | ||
| 41 | |||
| 42 | extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
| 43 | struct ceph_pagelist_cursor *c); | ||
| 23 | 44 | ||
| 24 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | 45 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) |
| 25 | { | 46 | { |
diff --git a/fs/ceph/rados.h b/include/linux/ceph/rados.h index 6d5247f2e81b..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
diff --git a/fs/ceph/types.h b/include/linux/ceph/types.h index 28b35a005ec2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/include/linux/ceph/types.h | |||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0c991023ee47..709dfb901d11 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -75,7 +75,7 @@ struct cgroup_subsys_state { | |||
| 75 | 75 | ||
| 76 | unsigned long flags; | 76 | unsigned long flags; |
| 77 | /* ID for this css, if possible */ | 77 | /* ID for this css, if possible */ |
| 78 | struct css_id *id; | 78 | struct css_id __rcu *id; |
| 79 | }; | 79 | }; |
| 80 | 80 | ||
| 81 | /* bits in struct cgroup_subsys_state flags field */ | 81 | /* bits in struct cgroup_subsys_state flags field */ |
| @@ -205,7 +205,7 @@ struct cgroup { | |||
| 205 | struct list_head children; /* my children */ | 205 | struct list_head children; /* my children */ |
| 206 | 206 | ||
| 207 | struct cgroup *parent; /* my parent */ | 207 | struct cgroup *parent; /* my parent */ |
| 208 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ | 208 | struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ |
| 209 | 209 | ||
| 210 | /* Private pointers for each registered subsystem */ | 210 | /* Private pointers for each registered subsystem */ |
| 211 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 211 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a660..320d6c94ff84 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
| @@ -16,7 +16,11 @@ | |||
| 16 | # define __release(x) __context__(x,-1) | 16 | # define __release(x) __context__(x,-1) |
| 17 | # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) | 17 | # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) |
| 18 | # define __percpu __attribute__((noderef, address_space(3))) | 18 | # define __percpu __attribute__((noderef, address_space(3))) |
| 19 | #ifdef CONFIG_SPARSE_RCU_POINTER | ||
| 20 | # define __rcu __attribute__((noderef, address_space(4))) | ||
| 21 | #else | ||
| 19 | # define __rcu | 22 | # define __rcu |
| 23 | #endif | ||
| 20 | extern void __chk_user_ptr(const volatile void __user *); | 24 | extern void __chk_user_ptr(const volatile void __user *); |
| 21 | extern void __chk_io_ptr(const volatile void __iomem *); | 25 | extern void __chk_io_ptr(const volatile void __iomem *); |
| 22 | #else | 26 | #else |
diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f36..4aaeab376446 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h | |||
| @@ -84,7 +84,7 @@ struct thread_group_cred { | |||
| 84 | atomic_t usage; | 84 | atomic_t usage; |
| 85 | pid_t tgid; /* thread group process ID */ | 85 | pid_t tgid; /* thread group process ID */ |
| 86 | spinlock_t lock; | 86 | spinlock_t lock; |
| 87 | struct key *session_keyring; /* keyring inherited over fork */ | 87 | struct key __rcu *session_keyring; /* keyring inherited over fork */ |
| 88 | struct key *process_keyring; /* keyring private to this process */ | 88 | struct key *process_keyring; /* keyring private to this process */ |
| 89 | struct rcu_head rcu; /* RCU deletion hook */ | 89 | struct rcu_head rcu; /* RCU deletion hook */ |
| 90 | }; | 90 | }; |
diff --git a/fs/ceph/crush/crush.h b/include/linux/crush/crush.h index 97e435b191f4..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/include/linux/crush/crush.h | |||
diff --git a/fs/ceph/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/include/linux/crush/hash.h | |||
diff --git a/fs/ceph/crush/mapper.h b/include/linux/crush/mapper.h index c46b99c18bb0..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/include/linux/crush/mapper.h | |||
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1d..2833452ea01c 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h | |||
| @@ -49,7 +49,6 @@ struct task_struct; | |||
| 49 | 49 | ||
| 50 | #ifdef CONFIG_LOCKDEP | 50 | #ifdef CONFIG_LOCKDEP |
| 51 | extern void debug_show_all_locks(void); | 51 | extern void debug_show_all_locks(void); |
| 52 | extern void __debug_show_held_locks(struct task_struct *task); | ||
| 53 | extern void debug_show_held_locks(struct task_struct *task); | 52 | extern void debug_show_held_locks(struct task_struct *task); |
| 54 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); | 53 | extern void debug_check_no_locks_freed(const void *from, unsigned long len); |
| 55 | extern void debug_check_no_locks_held(struct task_struct *task); | 54 | extern void debug_check_no_locks_held(struct task_struct *task); |
| @@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) | |||
| 58 | { | 57 | { |
| 59 | } | 58 | } |
| 60 | 59 | ||
| 61 | static inline void __debug_show_held_locks(struct task_struct *task) | ||
| 62 | { | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void debug_show_held_locks(struct task_struct *task) | 60 | static inline void debug_show_held_locks(struct task_struct *task) |
| 66 | { | 61 | { |
| 67 | } | 62 | } |
diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed34..51651b76d40f 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h | |||
| @@ -106,6 +106,7 @@ struct irte { | |||
| 106 | __u64 high; | 106 | __u64 high; |
| 107 | }; | 107 | }; |
| 108 | }; | 108 | }; |
| 109 | |||
| 109 | #ifdef CONFIG_INTR_REMAP | 110 | #ifdef CONFIG_INTR_REMAP |
| 110 | extern int intr_remapping_enabled; | 111 | extern int intr_remapping_enabled; |
| 111 | extern int intr_remapping_supported(void); | 112 | extern int intr_remapping_supported(void); |
| @@ -119,11 +120,8 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); | |||
| 119 | extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, | 120 | extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, |
| 120 | u16 sub_handle); | 121 | u16 sub_handle); |
| 121 | extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); | 122 | extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); |
| 122 | extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); | ||
| 123 | extern int flush_irte(int irq); | ||
| 124 | extern int free_irte(int irq); | 123 | extern int free_irte(int irq); |
| 125 | 124 | ||
| 126 | extern int irq_remapped(int irq); | ||
| 127 | extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); | 125 | extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); |
| 128 | extern struct intel_iommu *map_ioapic_to_ir(int apic); | 126 | extern struct intel_iommu *map_ioapic_to_ir(int apic); |
| 129 | extern struct intel_iommu *map_hpet_to_ir(u8 id); | 127 | extern struct intel_iommu *map_hpet_to_ir(u8 id); |
| @@ -177,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) | |||
| 177 | return 0; | 175 | return 0; |
| 178 | } | 176 | } |
| 179 | 177 | ||
| 180 | #define irq_remapped(irq) (0) | ||
| 181 | #define enable_intr_remapping(mode) (-1) | 178 | #define enable_intr_remapping(mode) (-1) |
| 182 | #define disable_intr_remapping() (0) | 179 | #define disable_intr_remapping() (0) |
| 183 | #define reenable_intr_remapping(mode) (0) | 180 | #define reenable_intr_remapping(mode) (0) |
| @@ -187,8 +184,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev) | |||
| 187 | /* Can't use the common MSI interrupt functions | 184 | /* Can't use the common MSI interrupt functions |
| 188 | * since DMAR is not a pci device | 185 | * since DMAR is not a pci device |
| 189 | */ | 186 | */ |
| 190 | extern void dmar_msi_unmask(unsigned int irq); | 187 | struct irq_data; |
| 191 | extern void dmar_msi_mask(unsigned int irq); | 188 | extern void dmar_msi_unmask(struct irq_data *data); |
| 189 | extern void dmar_msi_mask(struct irq_data *data); | ||
| 192 | extern void dmar_msi_read(int irq, struct msi_msg *msg); | 190 | extern void dmar_msi_read(int irq, struct msi_msg *msg); |
| 193 | extern void dmar_msi_write(int irq, struct msi_msg *msg); | 191 | extern void dmar_msi_write(int irq, struct msi_msg *msg); |
| 194 | extern int dmar_set_interrupt(struct intel_iommu *iommu); | 192 | extern int dmar_set_interrupt(struct intel_iommu *iommu); |
diff --git a/include/linux/edac.h b/include/linux/edac.h index 7cf92e8a4196..36c66443bdfd 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #define _LINUX_EDAC_H_ | 13 | #define _LINUX_EDAC_H_ |
| 14 | 14 | ||
| 15 | #include <asm/atomic.h> | 15 | #include <asm/atomic.h> |
| 16 | #include <linux/sysdev.h> | ||
| 16 | 17 | ||
| 17 | #define EDAC_OPSTATE_INVAL -1 | 18 | #define EDAC_OPSTATE_INVAL -1 |
| 18 | #define EDAC_OPSTATE_POLL 0 | 19 | #define EDAC_OPSTATE_POLL 0 |
| @@ -22,9 +23,12 @@ | |||
| 22 | extern int edac_op_state; | 23 | extern int edac_op_state; |
| 23 | extern int edac_err_assert; | 24 | extern int edac_err_assert; |
| 24 | extern atomic_t edac_handlers; | 25 | extern atomic_t edac_handlers; |
| 26 | extern struct sysdev_class edac_class; | ||
| 25 | 27 | ||
| 26 | extern int edac_handler_set(void); | 28 | extern int edac_handler_set(void); |
| 27 | extern void edac_atomic_assert_error(void); | 29 | extern void edac_atomic_assert_error(void); |
| 30 | extern struct sysdev_class *edac_get_sysfs_class(void); | ||
| 31 | extern void edac_put_sysfs_class(void); | ||
| 28 | 32 | ||
| 29 | static inline void opstate_init(void) | 33 | static inline void opstate_init(void) |
| 30 | { | 34 | { |
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b661..133c0ba25e30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h | |||
| @@ -31,7 +31,7 @@ struct embedded_fd_set { | |||
| 31 | 31 | ||
| 32 | struct fdtable { | 32 | struct fdtable { |
| 33 | unsigned int max_fds; | 33 | unsigned int max_fds; |
| 34 | struct file ** fd; /* current fd array */ | 34 | struct file __rcu **fd; /* current fd array */ |
| 35 | fd_set *close_on_exec; | 35 | fd_set *close_on_exec; |
| 36 | fd_set *open_fds; | 36 | fd_set *open_fds; |
| 37 | struct rcu_head rcu; | 37 | struct rcu_head rcu; |
| @@ -46,7 +46,7 @@ struct files_struct { | |||
| 46 | * read mostly part | 46 | * read mostly part |
| 47 | */ | 47 | */ |
| 48 | atomic_t count; | 48 | atomic_t count; |
| 49 | struct fdtable *fdt; | 49 | struct fdtable __rcu *fdt; |
| 50 | struct fdtable fdtab; | 50 | struct fdtable fdtab; |
| 51 | /* | 51 | /* |
| 52 | * written part on a separate cache line in SMP | 52 | * written part on a separate cache line in SMP |
| @@ -55,7 +55,7 @@ struct files_struct { | |||
| 55 | int next_fd; | 55 | int next_fd; |
| 56 | struct embedded_fd_set close_on_exec_init; | 56 | struct embedded_fd_set close_on_exec_init; |
| 57 | struct embedded_fd_set open_fds_init; | 57 | struct embedded_fd_set open_fds_init; |
| 58 | struct file * fd_array[NR_OPEN_DEFAULT]; | 58 | struct file __rcu * fd_array[NR_OPEN_DEFAULT]; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | #define rcu_dereference_check_fdtable(files, fdtfd) \ | 61 | #define rcu_dereference_check_fdtable(files, fdtfd) \ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b7..3168dcfb94f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -1384,7 +1384,7 @@ struct super_block { | |||
| 1384 | * Saved mount options for lazy filesystems using | 1384 | * Saved mount options for lazy filesystems using |
| 1385 | * generic_show_options() | 1385 | * generic_show_options() |
| 1386 | */ | 1386 | */ |
| 1387 | char *s_options; | 1387 | char __rcu *s_options; |
| 1388 | }; | 1388 | }; |
| 1389 | 1389 | ||
| 1390 | extern struct timespec current_fs_time(struct super_block *sb); | 1390 | extern struct timespec current_fs_time(struct super_block *sb); |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb0..af3f06b41dc1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
| @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { | |||
| 129 | struct disk_part_tbl { | 129 | struct disk_part_tbl { |
| 130 | struct rcu_head rcu_head; | 130 | struct rcu_head rcu_head; |
| 131 | int len; | 131 | int len; |
| 132 | struct hd_struct *last_lookup; | 132 | struct hd_struct __rcu *last_lookup; |
| 133 | struct hd_struct *part[]; | 133 | struct hd_struct __rcu *part[]; |
| 134 | }; | 134 | }; |
| 135 | 135 | ||
| 136 | struct gendisk { | 136 | struct gendisk { |
| @@ -149,7 +149,7 @@ struct gendisk { | |||
| 149 | * non-critical accesses use RCU. Always access through | 149 | * non-critical accesses use RCU. Always access through |
| 150 | * helpers. | 150 | * helpers. |
| 151 | */ | 151 | */ |
| 152 | struct disk_part_tbl *part_tbl; | 152 | struct disk_part_tbl __rcu *part_tbl; |
| 153 | struct hd_struct part0; | 153 | struct hd_struct part0; |
| 154 | 154 | ||
| 155 | const struct block_device_operations *fops; | 155 | const struct block_device_operations *fops; |
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..96c323ac44df 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
| @@ -64,6 +64,8 @@ | |||
| 64 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | 64 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) |
| 65 | #define NMI_OFFSET (1UL << NMI_SHIFT) | 65 | #define NMI_OFFSET (1UL << NMI_SHIFT) |
| 66 | 66 | ||
| 67 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
| 68 | |||
| 67 | #ifndef PREEMPT_ACTIVE | 69 | #ifndef PREEMPT_ACTIVE |
| 68 | #define PREEMPT_ACTIVE_BITS 1 | 70 | #define PREEMPT_ACTIVE_BITS 1 |
| 69 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | 71 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) |
| @@ -82,10 +84,13 @@ | |||
| 82 | /* | 84 | /* |
| 83 | * Are we doing bottom half or hardware interrupt processing? | 85 | * Are we doing bottom half or hardware interrupt processing? |
| 84 | * Are we in a softirq context? Interrupt context? | 86 | * Are we in a softirq context? Interrupt context? |
| 87 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
| 88 | * in_serving_softirq - Are we currently processing softirq? | ||
| 85 | */ | 89 | */ |
| 86 | #define in_irq() (hardirq_count()) | 90 | #define in_irq() (hardirq_count()) |
| 87 | #define in_softirq() (softirq_count()) | 91 | #define in_softirq() (softirq_count()) |
| 88 | #define in_interrupt() (irq_count()) | 92 | #define in_interrupt() (irq_count()) |
| 93 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
| 89 | 94 | ||
| 90 | /* | 95 | /* |
| 91 | * Are we in NMI context? | 96 | * Are we in NMI context? |
| @@ -132,14 +137,16 @@ extern void synchronize_irq(unsigned int irq); | |||
| 132 | 137 | ||
| 133 | struct task_struct; | 138 | struct task_struct; |
| 134 | 139 | ||
| 135 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 140 | #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) |
| 136 | static inline void account_system_vtime(struct task_struct *tsk) | 141 | static inline void account_system_vtime(struct task_struct *tsk) |
| 137 | { | 142 | { |
| 138 | } | 143 | } |
| 144 | #else | ||
| 145 | extern void account_system_vtime(struct task_struct *tsk); | ||
| 139 | #endif | 146 | #endif |
| 140 | 147 | ||
| 141 | #if defined(CONFIG_NO_HZ) | 148 | #if defined(CONFIG_NO_HZ) |
| 142 | #if defined(CONFIG_TINY_RCU) | 149 | #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
| 143 | extern void rcu_enter_nohz(void); | 150 | extern void rcu_enter_nohz(void); |
| 144 | extern void rcu_exit_nohz(void); | 151 | extern void rcu_exit_nohz(void); |
| 145 | 152 | ||
diff --git a/include/linux/htirq.h b/include/linux/htirq.h index c96ea46737d0..70a1dbbf2093 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h | |||
| @@ -9,8 +9,9 @@ struct ht_irq_msg { | |||
| 9 | /* Helper functions.. */ | 9 | /* Helper functions.. */ |
| 10 | void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); | 10 | void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); |
| 11 | void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); | 11 | void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); |
| 12 | void mask_ht_irq(unsigned int irq); | 12 | struct irq_data; |
| 13 | void unmask_ht_irq(unsigned int irq); | 13 | void mask_ht_irq(struct irq_data *data); |
| 14 | void unmask_ht_irq(struct irq_data *data); | ||
| 14 | 15 | ||
| 15 | /* The arch hook for getting things started */ | 16 | /* The arch hook for getting things started */ |
| 16 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); | 17 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); |
diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33a..cdb715e58e3e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h | |||
| @@ -50,14 +50,14 @@ | |||
| 50 | 50 | ||
| 51 | struct idr_layer { | 51 | struct idr_layer { |
| 52 | unsigned long bitmap; /* A zero bit means "space here" */ | 52 | unsigned long bitmap; /* A zero bit means "space here" */ |
| 53 | struct idr_layer *ary[1<<IDR_BITS]; | 53 | struct idr_layer __rcu *ary[1<<IDR_BITS]; |
| 54 | int count; /* When zero, we can release it */ | 54 | int count; /* When zero, we can release it */ |
| 55 | int layer; /* distance from leaf */ | 55 | int layer; /* distance from leaf */ |
| 56 | struct rcu_head rcu_head; | 56 | struct rcu_head rcu_head; |
| 57 | }; | 57 | }; |
| 58 | 58 | ||
| 59 | struct idr { | 59 | struct idr { |
| 60 | struct idr_layer *top; | 60 | struct idr_layer __rcu *top; |
| 61 | struct idr_layer *id_free; | 61 | struct idr_layer *id_free; |
| 62 | int layers; /* only valid without concurrent changes */ | 62 | int layers; /* only valid without concurrent changes */ |
| 63 | int id_free_cnt; | 63 | int id_free_cnt; |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f600..2fea6c8ef6ba 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
| @@ -82,11 +82,17 @@ extern struct group_info init_groups; | |||
| 82 | # define CAP_INIT_BSET CAP_FULL_SET | 82 | # define CAP_INIT_BSET CAP_FULL_SET |
| 83 | 83 | ||
| 84 | #ifdef CONFIG_TREE_PREEMPT_RCU | 84 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 85 | #define INIT_TASK_RCU_TREE_PREEMPT() \ | ||
| 86 | .rcu_blocked_node = NULL, | ||
| 87 | #else | ||
| 88 | #define INIT_TASK_RCU_TREE_PREEMPT(tsk) | ||
| 89 | #endif | ||
| 90 | #ifdef CONFIG_PREEMPT_RCU | ||
| 85 | #define INIT_TASK_RCU_PREEMPT(tsk) \ | 91 | #define INIT_TASK_RCU_PREEMPT(tsk) \ |
| 86 | .rcu_read_lock_nesting = 0, \ | 92 | .rcu_read_lock_nesting = 0, \ |
| 87 | .rcu_read_unlock_special = 0, \ | 93 | .rcu_read_unlock_special = 0, \ |
| 88 | .rcu_blocked_node = NULL, \ | 94 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ |
| 89 | .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), | 95 | INIT_TASK_RCU_TREE_PREEMPT() |
| 90 | #else | 96 | #else |
| 91 | #define INIT_TASK_RCU_PREEMPT(tsk) | 97 | #define INIT_TASK_RCU_PREEMPT(tsk) |
| 92 | #endif | 98 | #endif |
| @@ -137,8 +143,8 @@ extern struct cred init_cred; | |||
| 137 | .children = LIST_HEAD_INIT(tsk.children), \ | 143 | .children = LIST_HEAD_INIT(tsk.children), \ |
| 138 | .sibling = LIST_HEAD_INIT(tsk.sibling), \ | 144 | .sibling = LIST_HEAD_INIT(tsk.sibling), \ |
| 139 | .group_leader = &tsk, \ | 145 | .group_leader = &tsk, \ |
| 140 | .real_cred = &init_cred, \ | 146 | RCU_INIT_POINTER(.real_cred, &init_cred), \ |
| 141 | .cred = &init_cred, \ | 147 | RCU_INIT_POINTER(.cred, &init_cred), \ |
| 142 | .cred_guard_mutex = \ | 148 | .cred_guard_mutex = \ |
| 143 | __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ | 149 | __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ |
| 144 | .comm = "swapper", \ | 150 | .comm = "swapper", \ |
diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..d6ae1761be97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h | |||
| @@ -1196,7 +1196,7 @@ struct input_dev { | |||
| 1196 | int (*flush)(struct input_dev *dev, struct file *file); | 1196 | int (*flush)(struct input_dev *dev, struct file *file); |
| 1197 | int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); | 1197 | int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); |
| 1198 | 1198 | ||
| 1199 | struct input_handle *grab; | 1199 | struct input_handle __rcu *grab; |
| 1200 | 1200 | ||
| 1201 | spinlock_t event_lock; | 1201 | spinlock_t event_lock; |
| 1202 | struct mutex mutex; | 1202 | struct mutex mutex; |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 531495db1708..414328577ced 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
| @@ -647,11 +647,8 @@ static inline void init_irq_proc(void) | |||
| 647 | struct seq_file; | 647 | struct seq_file; |
| 648 | int show_interrupts(struct seq_file *p, void *v); | 648 | int show_interrupts(struct seq_file *p, void *v); |
| 649 | 649 | ||
| 650 | struct irq_desc; | ||
| 651 | |||
| 652 | extern int early_irq_init(void); | 650 | extern int early_irq_init(void); |
| 653 | extern int arch_probe_nr_irqs(void); | 651 | extern int arch_probe_nr_irqs(void); |
| 654 | extern int arch_early_irq_init(void); | 652 | extern int arch_early_irq_init(void); |
| 655 | extern int arch_init_chip_data(struct irq_desc *desc, int node); | ||
| 656 | 653 | ||
| 657 | #endif | 654 | #endif |
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d529133031..3e70b21884a9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h | |||
| @@ -53,7 +53,7 @@ struct io_context { | |||
| 53 | 53 | ||
| 54 | struct radix_tree_root radix_root; | 54 | struct radix_tree_root radix_root; |
| 55 | struct hlist_head cic_list; | 55 | struct hlist_head cic_list; |
| 56 | void *ioc_data; | 56 | void __rcu *ioc_data; |
| 57 | }; | 57 | }; |
| 58 | 58 | ||
| 59 | static inline struct io_context *ioc_task_link(struct io_context *ioc) | 59 | static inline struct io_context *ioc_task_link(struct io_context *ioc) |
diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b4..e9639115dff1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
| @@ -72,6 +72,10 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, | |||
| 72 | #define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ | 72 | #define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ |
| 73 | #define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ | 73 | #define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ |
| 74 | 74 | ||
| 75 | #define IRQF_MODIFY_MASK \ | ||
| 76 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | ||
| 77 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL) | ||
| 78 | |||
| 75 | #ifdef CONFIG_IRQ_PER_CPU | 79 | #ifdef CONFIG_IRQ_PER_CPU |
| 76 | # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) | 80 | # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) |
| 77 | # define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | 81 | # define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) |
| @@ -80,36 +84,77 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, | |||
| 80 | # define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING | 84 | # define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING |
| 81 | #endif | 85 | #endif |
| 82 | 86 | ||
| 83 | struct proc_dir_entry; | ||
| 84 | struct msi_desc; | 87 | struct msi_desc; |
| 85 | 88 | ||
| 86 | /** | 89 | /** |
| 90 | * struct irq_data - per irq and irq chip data passed down to chip functions | ||
| 91 | * @irq: interrupt number | ||
| 92 | * @node: node index useful for balancing | ||
| 93 | * @chip: low level interrupt hardware access | ||
| 94 | * @handler_data: per-IRQ data for the irq_chip methods | ||
| 95 | * @chip_data: platform-specific per-chip private data for the chip | ||
| 96 | * methods, to allow shared chip implementations | ||
| 97 | * @msi_desc: MSI descriptor | ||
| 98 | * @affinity: IRQ affinity on SMP | ||
| 99 | * | ||
| 100 | * The fields here need to overlay the ones in irq_desc until we | ||
| 101 | * cleaned up the direct references and switched everything over to | ||
| 102 | * irq_data. | ||
| 103 | */ | ||
| 104 | struct irq_data { | ||
| 105 | unsigned int irq; | ||
| 106 | unsigned int node; | ||
| 107 | struct irq_chip *chip; | ||
| 108 | void *handler_data; | ||
| 109 | void *chip_data; | ||
| 110 | struct msi_desc *msi_desc; | ||
| 111 | #ifdef CONFIG_SMP | ||
| 112 | cpumask_var_t affinity; | ||
| 113 | #endif | ||
| 114 | }; | ||
| 115 | |||
| 116 | /** | ||
| 87 | * struct irq_chip - hardware interrupt chip descriptor | 117 | * struct irq_chip - hardware interrupt chip descriptor |
| 88 | * | 118 | * |
| 89 | * @name: name for /proc/interrupts | 119 | * @name: name for /proc/interrupts |
| 90 | * @startup: start up the interrupt (defaults to ->enable if NULL) | 120 | * @startup: deprecated, replaced by irq_startup |
| 91 | * @shutdown: shut down the interrupt (defaults to ->disable if NULL) | 121 | * @shutdown: deprecated, replaced by irq_shutdown |
| 92 | * @enable: enable the interrupt (defaults to chip->unmask if NULL) | 122 | * @enable: deprecated, replaced by irq_enable |
| 93 | * @disable: disable the interrupt | 123 | * @disable: deprecated, replaced by irq_disable |
| 94 | * @ack: start of a new interrupt | 124 | * @ack: deprecated, replaced by irq_ack |
| 95 | * @mask: mask an interrupt source | 125 | * @mask: deprecated, replaced by irq_mask |
| 96 | * @mask_ack: ack and mask an interrupt source | 126 | * @mask_ack: deprecated, replaced by irq_mask_ack |
| 97 | * @unmask: unmask an interrupt source | 127 | * @unmask: deprecated, replaced by irq_unmask |
| 98 | * @eoi: end of interrupt - chip level | 128 | * @eoi: deprecated, replaced by irq_eoi |
| 99 | * @end: end of interrupt - flow level | 129 | * @end: deprecated, will go away with __do_IRQ() |
| 100 | * @set_affinity: set the CPU affinity on SMP machines | 130 | * @set_affinity: deprecated, replaced by irq_set_affinity |
| 101 | * @retrigger: resend an IRQ to the CPU | 131 | * @retrigger: deprecated, replaced by irq_retrigger |
| 102 | * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ | 132 | * @set_type: deprecated, replaced by irq_set_type |
| 103 | * @set_wake: enable/disable power-management wake-on of an IRQ | 133 | * @set_wake: deprecated, replaced by irq_wake |
| 134 | * @bus_lock: deprecated, replaced by irq_bus_lock | ||
| 135 | * @bus_sync_unlock: deprecated, replaced by irq_bus_sync_unlock | ||
| 104 | * | 136 | * |
| 105 | * @bus_lock: function to lock access to slow bus (i2c) chips | 137 | * @irq_startup: start up the interrupt (defaults to ->enable if NULL) |
| 106 | * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips | 138 | * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) |
| 139 | * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL) | ||
| 140 | * @irq_disable: disable the interrupt | ||
| 141 | * @irq_ack: start of a new interrupt | ||
| 142 | * @irq_mask: mask an interrupt source | ||
| 143 | * @irq_mask_ack: ack and mask an interrupt source | ||
| 144 | * @irq_unmask: unmask an interrupt source | ||
| 145 | * @irq_eoi: end of interrupt | ||
| 146 | * @irq_set_affinity: set the CPU affinity on SMP machines | ||
| 147 | * @irq_retrigger: resend an IRQ to the CPU | ||
| 148 | * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ | ||
| 149 | * @irq_set_wake: enable/disable power-management wake-on of an IRQ | ||
| 150 | * @irq_bus_lock: function to lock access to slow bus (i2c) chips | ||
| 151 | * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips | ||
| 107 | * | 152 | * |
| 108 | * @release: release function solely used by UML | 153 | * @release: release function solely used by UML |
| 109 | * @typename: obsoleted by name, kept as migration helper | ||
| 110 | */ | 154 | */ |
| 111 | struct irq_chip { | 155 | struct irq_chip { |
| 112 | const char *name; | 156 | const char *name; |
| 157 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 113 | unsigned int (*startup)(unsigned int irq); | 158 | unsigned int (*startup)(unsigned int irq); |
| 114 | void (*shutdown)(unsigned int irq); | 159 | void (*shutdown)(unsigned int irq); |
| 115 | void (*enable)(unsigned int irq); | 160 | void (*enable)(unsigned int irq); |
| @@ -130,154 +175,66 @@ struct irq_chip { | |||
| 130 | 175 | ||
| 131 | void (*bus_lock)(unsigned int irq); | 176 | void (*bus_lock)(unsigned int irq); |
| 132 | void (*bus_sync_unlock)(unsigned int irq); | 177 | void (*bus_sync_unlock)(unsigned int irq); |
| 178 | #endif | ||
| 179 | unsigned int (*irq_startup)(struct irq_data *data); | ||
| 180 | void (*irq_shutdown)(struct irq_data *data); | ||
| 181 | void (*irq_enable)(struct irq_data *data); | ||
| 182 | void (*irq_disable)(struct irq_data *data); | ||
| 183 | |||
| 184 | void (*irq_ack)(struct irq_data *data); | ||
| 185 | void (*irq_mask)(struct irq_data *data); | ||
| 186 | void (*irq_mask_ack)(struct irq_data *data); | ||
| 187 | void (*irq_unmask)(struct irq_data *data); | ||
| 188 | void (*irq_eoi)(struct irq_data *data); | ||
| 189 | |||
| 190 | int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); | ||
| 191 | int (*irq_retrigger)(struct irq_data *data); | ||
| 192 | int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); | ||
| 193 | int (*irq_set_wake)(struct irq_data *data, unsigned int on); | ||
| 194 | |||
| 195 | void (*irq_bus_lock)(struct irq_data *data); | ||
| 196 | void (*irq_bus_sync_unlock)(struct irq_data *data); | ||
| 133 | 197 | ||
| 134 | /* Currently used only by UML, might disappear one day.*/ | 198 | /* Currently used only by UML, might disappear one day.*/ |
| 135 | #ifdef CONFIG_IRQ_RELEASE_METHOD | 199 | #ifdef CONFIG_IRQ_RELEASE_METHOD |
| 136 | void (*release)(unsigned int irq, void *dev_id); | 200 | void (*release)(unsigned int irq, void *dev_id); |
| 137 | #endif | 201 | #endif |
| 138 | /* | ||
| 139 | * For compatibility, ->typename is copied into ->name. | ||
| 140 | * Will disappear. | ||
| 141 | */ | ||
| 142 | const char *typename; | ||
| 143 | }; | 202 | }; |
| 144 | 203 | ||
| 145 | struct timer_rand_state; | 204 | /* This include will go away once we isolated irq_desc usage to core code */ |
| 146 | struct irq_2_iommu; | 205 | #include <linux/irqdesc.h> |
| 147 | /** | ||
| 148 | * struct irq_desc - interrupt descriptor | ||
| 149 | * @irq: interrupt number for this descriptor | ||
| 150 | * @timer_rand_state: pointer to timer rand state struct | ||
| 151 | * @kstat_irqs: irq stats per cpu | ||
| 152 | * @irq_2_iommu: iommu with this irq | ||
| 153 | * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] | ||
| 154 | * @chip: low level interrupt hardware access | ||
| 155 | * @msi_desc: MSI descriptor | ||
| 156 | * @handler_data: per-IRQ data for the irq_chip methods | ||
| 157 | * @chip_data: platform-specific per-chip private data for the chip | ||
| 158 | * methods, to allow shared chip implementations | ||
| 159 | * @action: the irq action chain | ||
| 160 | * @status: status information | ||
| 161 | * @depth: disable-depth, for nested irq_disable() calls | ||
| 162 | * @wake_depth: enable depth, for multiple set_irq_wake() callers | ||
| 163 | * @irq_count: stats field to detect stalled irqs | ||
| 164 | * @last_unhandled: aging timer for unhandled count | ||
| 165 | * @irqs_unhandled: stats field for spurious unhandled interrupts | ||
| 166 | * @lock: locking for SMP | ||
| 167 | * @affinity: IRQ affinity on SMP | ||
| 168 | * @node: node index useful for balancing | ||
| 169 | * @pending_mask: pending rebalanced interrupts | ||
| 170 | * @threads_active: number of irqaction threads currently running | ||
| 171 | * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers | ||
| 172 | * @dir: /proc/irq/ procfs entry | ||
| 173 | * @name: flow handler name for /proc/interrupts output | ||
| 174 | */ | ||
| 175 | struct irq_desc { | ||
| 176 | unsigned int irq; | ||
| 177 | struct timer_rand_state *timer_rand_state; | ||
| 178 | unsigned int *kstat_irqs; | ||
| 179 | #ifdef CONFIG_INTR_REMAP | ||
| 180 | struct irq_2_iommu *irq_2_iommu; | ||
| 181 | #endif | ||
| 182 | irq_flow_handler_t handle_irq; | ||
| 183 | struct irq_chip *chip; | ||
| 184 | struct msi_desc *msi_desc; | ||
| 185 | void *handler_data; | ||
| 186 | void *chip_data; | ||
| 187 | struct irqaction *action; /* IRQ action list */ | ||
| 188 | unsigned int status; /* IRQ status */ | ||
| 189 | |||
| 190 | unsigned int depth; /* nested irq disables */ | ||
| 191 | unsigned int wake_depth; /* nested wake enables */ | ||
| 192 | unsigned int irq_count; /* For detecting broken IRQs */ | ||
| 193 | unsigned long last_unhandled; /* Aging timer for unhandled count */ | ||
| 194 | unsigned int irqs_unhandled; | ||
| 195 | raw_spinlock_t lock; | ||
| 196 | #ifdef CONFIG_SMP | ||
| 197 | cpumask_var_t affinity; | ||
| 198 | const struct cpumask *affinity_hint; | ||
| 199 | unsigned int node; | ||
| 200 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 201 | cpumask_var_t pending_mask; | ||
| 202 | #endif | ||
| 203 | #endif | ||
| 204 | atomic_t threads_active; | ||
| 205 | wait_queue_head_t wait_for_threads; | ||
| 206 | #ifdef CONFIG_PROC_FS | ||
| 207 | struct proc_dir_entry *dir; | ||
| 208 | #endif | ||
| 209 | const char *name; | ||
| 210 | } ____cacheline_internodealigned_in_smp; | ||
| 211 | 206 | ||
| 212 | extern void arch_init_copy_chip_data(struct irq_desc *old_desc, | 207 | /* |
| 213 | struct irq_desc *desc, int node); | 208 | * Pick up the arch-dependent methods: |
| 214 | extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); | 209 | */ |
| 210 | #include <asm/hw_irq.h> | ||
| 215 | 211 | ||
| 216 | #ifndef CONFIG_SPARSE_IRQ | 212 | #ifndef NR_IRQS_LEGACY |
| 217 | extern struct irq_desc irq_desc[NR_IRQS]; | 213 | # define NR_IRQS_LEGACY 0 |
| 218 | #endif | 214 | #endif |
| 219 | 215 | ||
| 220 | #ifdef CONFIG_NUMA_IRQ_DESC | 216 | #ifndef ARCH_IRQ_INIT_FLAGS |
| 221 | extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); | 217 | # define ARCH_IRQ_INIT_FLAGS 0 |
| 222 | #else | ||
| 223 | static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) | ||
| 224 | { | ||
| 225 | return desc; | ||
| 226 | } | ||
| 227 | #endif | 218 | #endif |
| 228 | 219 | ||
| 229 | extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); | 220 | #define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS) |
| 230 | |||
| 231 | /* | ||
| 232 | * Pick up the arch-dependent methods: | ||
| 233 | */ | ||
| 234 | #include <asm/hw_irq.h> | ||
| 235 | 221 | ||
| 222 | struct irqaction; | ||
| 236 | extern int setup_irq(unsigned int irq, struct irqaction *new); | 223 | extern int setup_irq(unsigned int irq, struct irqaction *new); |
| 237 | extern void remove_irq(unsigned int irq, struct irqaction *act); | 224 | extern void remove_irq(unsigned int irq, struct irqaction *act); |
| 238 | 225 | ||
| 239 | #ifdef CONFIG_GENERIC_HARDIRQS | 226 | #ifdef CONFIG_GENERIC_HARDIRQS |
| 240 | 227 | ||
| 241 | #ifdef CONFIG_SMP | 228 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) |
| 242 | |||
| 243 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 244 | |||
| 245 | void move_native_irq(int irq); | 229 | void move_native_irq(int irq); |
| 246 | void move_masked_irq(int irq); | 230 | void move_masked_irq(int irq); |
| 247 | 231 | #else | |
| 248 | #else /* CONFIG_GENERIC_PENDING_IRQ */ | 232 | static inline void move_native_irq(int irq) { } |
| 249 | 233 | static inline void move_masked_irq(int irq) { } | |
| 250 | static inline void move_irq(int irq) | 234 | #endif |
| 251 | { | ||
| 252 | } | ||
| 253 | |||
| 254 | static inline void move_native_irq(int irq) | ||
| 255 | { | ||
| 256 | } | ||
| 257 | |||
| 258 | static inline void move_masked_irq(int irq) | ||
| 259 | { | ||
| 260 | } | ||
| 261 | |||
| 262 | #endif /* CONFIG_GENERIC_PENDING_IRQ */ | ||
| 263 | |||
| 264 | #else /* CONFIG_SMP */ | ||
| 265 | |||
| 266 | #define move_native_irq(x) | ||
| 267 | #define move_masked_irq(x) | ||
| 268 | |||
| 269 | #endif /* CONFIG_SMP */ | ||
| 270 | 235 | ||
| 271 | extern int no_irq_affinity; | 236 | extern int no_irq_affinity; |
| 272 | 237 | ||
| 273 | static inline int irq_balancing_disabled(unsigned int irq) | ||
| 274 | { | ||
| 275 | struct irq_desc *desc; | ||
| 276 | |||
| 277 | desc = irq_to_desc(irq); | ||
| 278 | return desc->status & IRQ_NO_BALANCING_MASK; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* Handle irq action chains: */ | 238 | /* Handle irq action chains: */ |
| 282 | extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); | 239 | extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action); |
| 283 | 240 | ||
| @@ -293,42 +250,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); | |||
| 293 | extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); | 250 | extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); |
| 294 | extern void handle_nested_irq(unsigned int irq); | 251 | extern void handle_nested_irq(unsigned int irq); |
| 295 | 252 | ||
| 296 | /* | ||
| 297 | * Monolithic do_IRQ implementation. | ||
| 298 | */ | ||
| 299 | #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 300 | extern unsigned int __do_IRQ(unsigned int irq); | ||
| 301 | #endif | ||
| 302 | |||
| 303 | /* | ||
| 304 | * Architectures call this to let the generic IRQ layer | ||
| 305 | * handle an interrupt. If the descriptor is attached to an | ||
| 306 | * irqchip-style controller then we call the ->handle_irq() handler, | ||
| 307 | * and it calls __do_IRQ() if it's attached to an irqtype-style controller. | ||
| 308 | */ | ||
| 309 | static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 310 | { | ||
| 311 | #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 312 | desc->handle_irq(irq, desc); | ||
| 313 | #else | ||
| 314 | if (likely(desc->handle_irq)) | ||
| 315 | desc->handle_irq(irq, desc); | ||
| 316 | else | ||
| 317 | __do_IRQ(irq); | ||
| 318 | #endif | ||
| 319 | } | ||
| 320 | |||
| 321 | static inline void generic_handle_irq(unsigned int irq) | ||
| 322 | { | ||
| 323 | generic_handle_irq_desc(irq, irq_to_desc(irq)); | ||
| 324 | } | ||
| 325 | |||
| 326 | /* Handling of unhandled and spurious interrupts: */ | 253 | /* Handling of unhandled and spurious interrupts: */ |
| 327 | extern void note_interrupt(unsigned int irq, struct irq_desc *desc, | 254 | extern void note_interrupt(unsigned int irq, struct irq_desc *desc, |
| 328 | irqreturn_t action_ret); | 255 | irqreturn_t action_ret); |
| 329 | 256 | ||
| 330 | /* Resending of interrupts :*/ | ||
| 331 | void check_irq_resend(struct irq_desc *desc, unsigned int irq); | ||
| 332 | 257 | ||
| 333 | /* Enable/disable irq debugging output: */ | 258 | /* Enable/disable irq debugging output: */ |
| 334 | extern int noirqdebug_setup(char *str); | 259 | extern int noirqdebug_setup(char *str); |
| @@ -351,16 +276,6 @@ extern void | |||
| 351 | __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | 276 | __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, |
| 352 | const char *name); | 277 | const char *name); |
| 353 | 278 | ||
| 354 | /* caller has locked the irq_desc and both params are valid */ | ||
| 355 | static inline void __set_irq_handler_unlocked(int irq, | ||
| 356 | irq_flow_handler_t handler) | ||
| 357 | { | ||
| 358 | struct irq_desc *desc; | ||
| 359 | |||
| 360 | desc = irq_to_desc(irq); | ||
| 361 | desc->handle_irq = handler; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | 279 | /* |
| 365 | * Set a highlevel flow handler for a given IRQ: | 280 | * Set a highlevel flow handler for a given IRQ: |
| 366 | */ | 281 | */ |
| @@ -384,141 +299,121 @@ set_irq_chained_handler(unsigned int irq, | |||
| 384 | 299 | ||
| 385 | extern void set_irq_nested_thread(unsigned int irq, int nest); | 300 | extern void set_irq_nested_thread(unsigned int irq, int nest); |
| 386 | 301 | ||
| 387 | extern void set_irq_noprobe(unsigned int irq); | 302 | void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); |
| 388 | extern void set_irq_probe(unsigned int irq); | 303 | |
| 304 | static inline void irq_set_status_flags(unsigned int irq, unsigned long set) | ||
| 305 | { | ||
| 306 | irq_modify_status(irq, 0, set); | ||
| 307 | } | ||
| 308 | |||
| 309 | static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr) | ||
| 310 | { | ||
| 311 | irq_modify_status(irq, clr, 0); | ||
| 312 | } | ||
| 313 | |||
| 314 | static inline void set_irq_noprobe(unsigned int irq) | ||
| 315 | { | ||
| 316 | irq_modify_status(irq, 0, IRQ_NOPROBE); | ||
| 317 | } | ||
| 318 | |||
| 319 | static inline void set_irq_probe(unsigned int irq) | ||
| 320 | { | ||
| 321 | irq_modify_status(irq, IRQ_NOPROBE, 0); | ||
| 322 | } | ||
| 389 | 323 | ||
| 390 | /* Handle dynamic irq creation and destruction */ | 324 | /* Handle dynamic irq creation and destruction */ |
| 391 | extern unsigned int create_irq_nr(unsigned int irq_want, int node); | 325 | extern unsigned int create_irq_nr(unsigned int irq_want, int node); |
| 392 | extern int create_irq(void); | 326 | extern int create_irq(void); |
| 393 | extern void destroy_irq(unsigned int irq); | 327 | extern void destroy_irq(unsigned int irq); |
| 394 | 328 | ||
| 395 | /* Test to see if a driver has successfully requested an irq */ | 329 | /* |
| 396 | static inline int irq_has_action(unsigned int irq) | 330 | * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and |
| 331 | * irq_free_desc instead. | ||
| 332 | */ | ||
| 333 | extern void dynamic_irq_cleanup(unsigned int irq); | ||
| 334 | static inline void dynamic_irq_init(unsigned int irq) | ||
| 397 | { | 335 | { |
| 398 | struct irq_desc *desc = irq_to_desc(irq); | 336 | dynamic_irq_cleanup(irq); |
| 399 | return desc->action != NULL; | ||
| 400 | } | 337 | } |
| 401 | 338 | ||
| 402 | /* Dynamic irq helper functions */ | ||
| 403 | extern void dynamic_irq_init(unsigned int irq); | ||
| 404 | void dynamic_irq_init_keep_chip_data(unsigned int irq); | ||
| 405 | extern void dynamic_irq_cleanup(unsigned int irq); | ||
| 406 | void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); | ||
| 407 | |||
| 408 | /* Set/get chip/data for an IRQ: */ | 339 | /* Set/get chip/data for an IRQ: */ |
| 409 | extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); | 340 | extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); |
| 410 | extern int set_irq_data(unsigned int irq, void *data); | 341 | extern int set_irq_data(unsigned int irq, void *data); |
| 411 | extern int set_irq_chip_data(unsigned int irq, void *data); | 342 | extern int set_irq_chip_data(unsigned int irq, void *data); |
| 412 | extern int set_irq_type(unsigned int irq, unsigned int type); | 343 | extern int set_irq_type(unsigned int irq, unsigned int type); |
| 413 | extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); | 344 | extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); |
| 345 | extern struct irq_data *irq_get_irq_data(unsigned int irq); | ||
| 414 | 346 | ||
| 415 | #define get_irq_chip(irq) (irq_to_desc(irq)->chip) | 347 | static inline struct irq_chip *get_irq_chip(unsigned int irq) |
| 416 | #define get_irq_chip_data(irq) (irq_to_desc(irq)->chip_data) | ||
| 417 | #define get_irq_data(irq) (irq_to_desc(irq)->handler_data) | ||
| 418 | #define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) | ||
| 419 | |||
| 420 | #define get_irq_desc_chip(desc) ((desc)->chip) | ||
| 421 | #define get_irq_desc_chip_data(desc) ((desc)->chip_data) | ||
| 422 | #define get_irq_desc_data(desc) ((desc)->handler_data) | ||
| 423 | #define get_irq_desc_msi(desc) ((desc)->msi_desc) | ||
| 424 | |||
| 425 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
| 426 | |||
| 427 | #endif /* !CONFIG_S390 */ | ||
| 428 | |||
| 429 | #ifdef CONFIG_SMP | ||
| 430 | /** | ||
| 431 | * alloc_desc_masks - allocate cpumasks for irq_desc | ||
| 432 | * @desc: pointer to irq_desc struct | ||
| 433 | * @node: node which will be handling the cpumasks | ||
| 434 | * @boot: true if need bootmem | ||
| 435 | * | ||
| 436 | * Allocates affinity and pending_mask cpumask if required. | ||
| 437 | * Returns true if successful (or not required). | ||
| 438 | */ | ||
| 439 | static inline bool alloc_desc_masks(struct irq_desc *desc, int node, | ||
| 440 | bool boot) | ||
| 441 | { | 348 | { |
| 442 | gfp_t gfp = GFP_ATOMIC; | 349 | struct irq_data *d = irq_get_irq_data(irq); |
| 443 | 350 | return d ? d->chip : NULL; | |
| 444 | if (boot) | 351 | } |
| 445 | gfp = GFP_NOWAIT; | ||
| 446 | |||
| 447 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
| 448 | if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) | ||
| 449 | return false; | ||
| 450 | 352 | ||
| 451 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 353 | static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d) |
| 452 | if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | 354 | { |
| 453 | free_cpumask_var(desc->affinity); | 355 | return d->chip; |
| 454 | return false; | ||
| 455 | } | ||
| 456 | #endif | ||
| 457 | #endif | ||
| 458 | return true; | ||
| 459 | } | 356 | } |
| 460 | 357 | ||
| 461 | static inline void init_desc_masks(struct irq_desc *desc) | 358 | static inline void *get_irq_chip_data(unsigned int irq) |
| 462 | { | 359 | { |
| 463 | cpumask_setall(desc->affinity); | 360 | struct irq_data *d = irq_get_irq_data(irq); |
| 464 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 361 | return d ? d->chip_data : NULL; |
| 465 | cpumask_clear(desc->pending_mask); | ||
| 466 | #endif | ||
| 467 | } | 362 | } |
| 468 | 363 | ||
| 469 | /** | 364 | static inline void *irq_data_get_irq_chip_data(struct irq_data *d) |
| 470 | * init_copy_desc_masks - copy cpumasks for irq_desc | 365 | { |
| 471 | * @old_desc: pointer to old irq_desc struct | 366 | return d->chip_data; |
| 472 | * @new_desc: pointer to new irq_desc struct | 367 | } |
| 473 | * | ||
| 474 | * Insures affinity and pending_masks are copied to new irq_desc. | ||
| 475 | * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the | ||
| 476 | * irq_desc struct so the copy is redundant. | ||
| 477 | */ | ||
| 478 | 368 | ||
| 479 | static inline void init_copy_desc_masks(struct irq_desc *old_desc, | 369 | static inline void *get_irq_data(unsigned int irq) |
| 480 | struct irq_desc *new_desc) | ||
| 481 | { | 370 | { |
| 482 | #ifdef CONFIG_CPUMASK_OFFSTACK | 371 | struct irq_data *d = irq_get_irq_data(irq); |
| 483 | cpumask_copy(new_desc->affinity, old_desc->affinity); | 372 | return d ? d->handler_data : NULL; |
| 373 | } | ||
| 484 | 374 | ||
| 485 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 375 | static inline void *irq_data_get_irq_data(struct irq_data *d) |
| 486 | cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); | 376 | { |
| 487 | #endif | 377 | return d->handler_data; |
| 488 | #endif | ||
| 489 | } | 378 | } |
| 490 | 379 | ||
| 491 | static inline void free_desc_masks(struct irq_desc *old_desc, | 380 | static inline struct msi_desc *get_irq_msi(unsigned int irq) |
| 492 | struct irq_desc *new_desc) | ||
| 493 | { | 381 | { |
| 494 | free_cpumask_var(old_desc->affinity); | 382 | struct irq_data *d = irq_get_irq_data(irq); |
| 383 | return d ? d->msi_desc : NULL; | ||
| 384 | } | ||
| 495 | 385 | ||
| 496 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 386 | static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) |
| 497 | free_cpumask_var(old_desc->pending_mask); | 387 | { |
| 498 | #endif | 388 | return d->msi_desc; |
| 499 | } | 389 | } |
| 500 | 390 | ||
| 501 | #else /* !CONFIG_SMP */ | 391 | int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); |
| 392 | void irq_free_descs(unsigned int irq, unsigned int cnt); | ||
| 393 | int irq_reserve_irqs(unsigned int from, unsigned int cnt); | ||
| 502 | 394 | ||
| 503 | static inline bool alloc_desc_masks(struct irq_desc *desc, int node, | 395 | static inline int irq_alloc_desc(int node) |
| 504 | bool boot) | ||
| 505 | { | 396 | { |
| 506 | return true; | 397 | return irq_alloc_descs(-1, 0, 1, node); |
| 507 | } | 398 | } |
| 508 | 399 | ||
| 509 | static inline void init_desc_masks(struct irq_desc *desc) | 400 | static inline int irq_alloc_desc_at(unsigned int at, int node) |
| 510 | { | 401 | { |
| 402 | return irq_alloc_descs(at, at, 1, node); | ||
| 511 | } | 403 | } |
| 512 | 404 | ||
| 513 | static inline void init_copy_desc_masks(struct irq_desc *old_desc, | 405 | static inline int irq_alloc_desc_from(unsigned int from, int node) |
| 514 | struct irq_desc *new_desc) | ||
| 515 | { | 406 | { |
| 407 | return irq_alloc_descs(-1, from, 1, node); | ||
| 516 | } | 408 | } |
| 517 | 409 | ||
| 518 | static inline void free_desc_masks(struct irq_desc *old_desc, | 410 | static inline void irq_free_desc(unsigned int irq) |
| 519 | struct irq_desc *new_desc) | ||
| 520 | { | 411 | { |
| 412 | irq_free_descs(irq, 1); | ||
| 521 | } | 413 | } |
| 522 | #endif /* CONFIG_SMP */ | 414 | |
| 415 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
| 416 | |||
| 417 | #endif /* !CONFIG_S390 */ | ||
| 523 | 418 | ||
| 524 | #endif /* _LINUX_IRQ_H */ | 419 | #endif /* _LINUX_IRQ_H */ |
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h new file mode 100644 index 000000000000..979c68cc7458 --- /dev/null +++ b/include/linux/irqdesc.h | |||
| @@ -0,0 +1,159 @@ | |||
| 1 | #ifndef _LINUX_IRQDESC_H | ||
| 2 | #define _LINUX_IRQDESC_H | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Core internal functions to deal with irq descriptors | ||
| 6 | * | ||
| 7 | * This include will move to kernel/irq once we cleaned up the tree. | ||
| 8 | * For now it's included from <linux/irq.h> | ||
| 9 | */ | ||
| 10 | |||
| 11 | struct proc_dir_entry; | ||
| 12 | struct timer_rand_state; | ||
| 13 | /** | ||
| 14 | * struct irq_desc - interrupt descriptor | ||
| 15 | * @irq_data: per irq and chip data passed down to chip functions | ||
| 16 | * @timer_rand_state: pointer to timer rand state struct | ||
| 17 | * @kstat_irqs: irq stats per cpu | ||
| 18 | * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] | ||
| 19 | * @action: the irq action chain | ||
| 20 | * @status: status information | ||
| 21 | * @depth: disable-depth, for nested irq_disable() calls | ||
| 22 | * @wake_depth: enable depth, for multiple set_irq_wake() callers | ||
| 23 | * @irq_count: stats field to detect stalled irqs | ||
| 24 | * @last_unhandled: aging timer for unhandled count | ||
| 25 | * @irqs_unhandled: stats field for spurious unhandled interrupts | ||
| 26 | * @lock: locking for SMP | ||
| 27 | * @pending_mask: pending rebalanced interrupts | ||
| 28 | * @threads_active: number of irqaction threads currently running | ||
| 29 | * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers | ||
| 30 | * @dir: /proc/irq/ procfs entry | ||
| 31 | * @name: flow handler name for /proc/interrupts output | ||
| 32 | */ | ||
| 33 | struct irq_desc { | ||
| 34 | |||
| 35 | #ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 36 | struct irq_data irq_data; | ||
| 37 | #else | ||
| 38 | /* | ||
| 39 | * This union will go away, once we fixed the direct access to | ||
| 40 | * irq_desc all over the place. The direct fields are a 1:1 | ||
| 41 | * overlay of irq_data. | ||
| 42 | */ | ||
| 43 | union { | ||
| 44 | struct irq_data irq_data; | ||
| 45 | struct { | ||
| 46 | unsigned int irq; | ||
| 47 | unsigned int node; | ||
| 48 | struct irq_chip *chip; | ||
| 49 | void *handler_data; | ||
| 50 | void *chip_data; | ||
| 51 | struct msi_desc *msi_desc; | ||
| 52 | #ifdef CONFIG_SMP | ||
| 53 | cpumask_var_t affinity; | ||
| 54 | #endif | ||
| 55 | }; | ||
| 56 | }; | ||
| 57 | #endif | ||
| 58 | |||
| 59 | struct timer_rand_state *timer_rand_state; | ||
| 60 | unsigned int *kstat_irqs; | ||
| 61 | irq_flow_handler_t handle_irq; | ||
| 62 | struct irqaction *action; /* IRQ action list */ | ||
| 63 | unsigned int status; /* IRQ status */ | ||
| 64 | |||
| 65 | unsigned int depth; /* nested irq disables */ | ||
| 66 | unsigned int wake_depth; /* nested wake enables */ | ||
| 67 | unsigned int irq_count; /* For detecting broken IRQs */ | ||
| 68 | unsigned long last_unhandled; /* Aging timer for unhandled count */ | ||
| 69 | unsigned int irqs_unhandled; | ||
| 70 | raw_spinlock_t lock; | ||
| 71 | #ifdef CONFIG_SMP | ||
| 72 | const struct cpumask *affinity_hint; | ||
| 73 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 74 | cpumask_var_t pending_mask; | ||
| 75 | #endif | ||
| 76 | #endif | ||
| 77 | atomic_t threads_active; | ||
| 78 | wait_queue_head_t wait_for_threads; | ||
| 79 | #ifdef CONFIG_PROC_FS | ||
| 80 | struct proc_dir_entry *dir; | ||
| 81 | #endif | ||
| 82 | const char *name; | ||
| 83 | } ____cacheline_internodealigned_in_smp; | ||
| 84 | |||
| 85 | #ifndef CONFIG_SPARSE_IRQ | ||
| 86 | extern struct irq_desc irq_desc[NR_IRQS]; | ||
| 87 | #endif | ||
| 88 | |||
| 89 | /* Will be removed once the last users in power and sh are gone */ | ||
| 90 | extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); | ||
| 91 | static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) | ||
| 92 | { | ||
| 93 | return desc; | ||
| 94 | } | ||
| 95 | |||
| 96 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
| 97 | |||
| 98 | #define get_irq_desc_chip(desc) ((desc)->irq_data.chip) | ||
| 99 | #define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) | ||
| 100 | #define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) | ||
| 101 | #define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Monolithic do_IRQ implementation. | ||
| 105 | */ | ||
| 106 | #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 107 | extern unsigned int __do_IRQ(unsigned int irq); | ||
| 108 | #endif | ||
| 109 | |||
| 110 | /* | ||
| 111 | * Architectures call this to let the generic IRQ layer | ||
| 112 | * handle an interrupt. If the descriptor is attached to an | ||
| 113 | * irqchip-style controller then we call the ->handle_irq() handler, | ||
| 114 | * and it calls __do_IRQ() if it's attached to an irqtype-style controller. | ||
| 115 | */ | ||
| 116 | static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 117 | { | ||
| 118 | #ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 119 | desc->handle_irq(irq, desc); | ||
| 120 | #else | ||
| 121 | if (likely(desc->handle_irq)) | ||
| 122 | desc->handle_irq(irq, desc); | ||
| 123 | else | ||
| 124 | __do_IRQ(irq); | ||
| 125 | #endif | ||
| 126 | } | ||
| 127 | |||
| 128 | static inline void generic_handle_irq(unsigned int irq) | ||
| 129 | { | ||
| 130 | generic_handle_irq_desc(irq, irq_to_desc(irq)); | ||
| 131 | } | ||
| 132 | |||
| 133 | /* Test to see if a driver has successfully requested an irq */ | ||
| 134 | static inline int irq_has_action(unsigned int irq) | ||
| 135 | { | ||
| 136 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 137 | return desc->action != NULL; | ||
| 138 | } | ||
| 139 | |||
| 140 | static inline int irq_balancing_disabled(unsigned int irq) | ||
| 141 | { | ||
| 142 | struct irq_desc *desc; | ||
| 143 | |||
| 144 | desc = irq_to_desc(irq); | ||
| 145 | return desc->status & IRQ_NO_BALANCING_MASK; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* caller has locked the irq_desc and both params are valid */ | ||
| 149 | static inline void __set_irq_handler_unlocked(int irq, | ||
| 150 | irq_flow_handler_t handler) | ||
| 151 | { | ||
| 152 | struct irq_desc *desc; | ||
| 153 | |||
| 154 | desc = irq_to_desc(irq); | ||
| 155 | desc->handle_irq = handler; | ||
| 156 | } | ||
| 157 | #endif | ||
| 158 | |||
| 159 | #endif | ||
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7bf89bc8cbca..05aa8c23483f 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | extern int nr_irqs; | 26 | extern int nr_irqs; |
| 27 | extern struct irq_desc *irq_to_desc(unsigned int irq); | 27 | extern struct irq_desc *irq_to_desc(unsigned int irq); |
| 28 | unsigned int irq_get_next_irq(unsigned int offset); | ||
| 28 | 29 | ||
| 29 | # define for_each_irq_desc(irq, desc) \ | 30 | # define for_each_irq_desc(irq, desc) \ |
| 30 | for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ | 31 | for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ |
| @@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); | |||
| 47 | #define irq_node(irq) 0 | 48 | #define irq_node(irq) 0 |
| 48 | #endif | 49 | #endif |
| 49 | 50 | ||
| 51 | # define for_each_active_irq(irq) \ | ||
| 52 | for (irq = irq_get_next_irq(0); irq < nr_irqs; \ | ||
| 53 | irq = irq_get_next_irq(irq + 1)) | ||
| 54 | |||
| 50 | #endif /* CONFIG_GENERIC_HARDIRQS */ | 55 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
| 51 | 56 | ||
| 52 | #define for_each_irq_nr(irq) \ | 57 | #define for_each_irq_nr(irq) \ |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc69..1759ba5adce8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
| @@ -58,7 +58,18 @@ extern const char linux_proc_banner[]; | |||
| 58 | 58 | ||
| 59 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) | 59 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) |
| 60 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | 60 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) |
| 61 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | 61 | #define roundup(x, y) ( \ |
| 62 | { \ | ||
| 63 | typeof(y) __y = y; \ | ||
| 64 | (((x) + (__y - 1)) / __y) * __y; \ | ||
| 65 | } \ | ||
| 66 | ) | ||
| 67 | #define rounddown(x, y) ( \ | ||
| 68 | { \ | ||
| 69 | typeof(x) __x = (x); \ | ||
| 70 | __x - (__x % (y)); \ | ||
| 71 | } \ | ||
| 72 | ) | ||
| 62 | #define DIV_ROUND_CLOSEST(x, divisor)( \ | 73 | #define DIV_ROUND_CLOSEST(x, divisor)( \ |
| 63 | { \ | 74 | { \ |
| 64 | typeof(divisor) __divisor = divisor; \ | 75 | typeof(divisor) __divisor = divisor; \ |
diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c2..3db0adce1fda 100644 --- a/include/linux/key.h +++ b/include/linux/key.h | |||
| @@ -178,8 +178,9 @@ struct key { | |||
| 178 | */ | 178 | */ |
| 179 | union { | 179 | union { |
| 180 | unsigned long value; | 180 | unsigned long value; |
| 181 | void __rcu *rcudata; | ||
| 181 | void *data; | 182 | void *data; |
| 182 | struct keyring_list *subscriptions; | 183 | struct keyring_list __rcu *subscriptions; |
| 183 | } payload; | 184 | } payload; |
| 184 | }; | 185 | }; |
| 185 | 186 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697aa..ac740b26eb10 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -205,7 +205,7 @@ struct kvm { | |||
| 205 | 205 | ||
| 206 | struct mutex irq_lock; | 206 | struct mutex irq_lock; |
| 207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
| 208 | struct kvm_irq_routing_table *irq_routing; | 208 | struct kvm_irq_routing_table __rcu *irq_routing; |
| 209 | struct hlist_head mask_notifier_list; | 209 | struct hlist_head mask_notifier_list; |
| 210 | struct hlist_head irq_ack_notifier_list; | 210 | struct hlist_head irq_ack_notifier_list; |
| 211 | #endif | 211 | #endif |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf3..71c09b26c759 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
| @@ -32,6 +32,17 @@ extern int lock_stat; | |||
| 32 | #define MAX_LOCKDEP_SUBCLASSES 8UL | 32 | #define MAX_LOCKDEP_SUBCLASSES 8UL |
| 33 | 33 | ||
| 34 | /* | 34 | /* |
| 35 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes | ||
| 36 | * cached in the instance of lockdep_map | ||
| 37 | * | ||
| 38 | * Currently main class (subclass == 0) and signle depth subclass | ||
| 39 | * are cached in lockdep_map. This optimization is mainly targeting | ||
| 40 | * on rq->lock. double_rq_lock() acquires this highly competitive with | ||
| 41 | * single depth. | ||
| 42 | */ | ||
| 43 | #define NR_LOCKDEP_CACHING_CLASSES 2 | ||
| 44 | |||
| 45 | /* | ||
| 35 | * Lock-classes are keyed via unique addresses, by embedding the | 46 | * Lock-classes are keyed via unique addresses, by embedding the |
| 36 | * lockclass-key into the kernel (or module) .data section. (For | 47 | * lockclass-key into the kernel (or module) .data section. (For |
| 37 | * static locks we use the lock address itself as the key.) | 48 | * static locks we use the lock address itself as the key.) |
| @@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class); | |||
| 138 | */ | 149 | */ |
| 139 | struct lockdep_map { | 150 | struct lockdep_map { |
| 140 | struct lock_class_key *key; | 151 | struct lock_class_key *key; |
| 141 | struct lock_class *class_cache; | 152 | struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; |
| 142 | const char *name; | 153 | const char *name; |
| 143 | #ifdef CONFIG_LOCK_STAT | 154 | #ifdef CONFIG_LOCK_STAT |
| 144 | int cpu; | 155 | int cpu; |
| @@ -424,14 +435,6 @@ do { \ | |||
| 424 | 435 | ||
| 425 | #endif /* CONFIG_LOCKDEP */ | 436 | #endif /* CONFIG_LOCKDEP */ |
| 426 | 437 | ||
| 427 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
| 428 | extern void early_init_irq_lock_class(void); | ||
| 429 | #else | ||
| 430 | static inline void early_init_irq_lock_class(void) | ||
| 431 | { | ||
| 432 | } | ||
| 433 | #endif | ||
| 434 | |||
| 435 | #ifdef CONFIG_TRACE_IRQFLAGS | 438 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 436 | extern void early_boot_irqs_off(void); | 439 | extern void early_boot_irqs_off(void); |
| 437 | extern void early_boot_irqs_on(void); | 440 | extern void early_boot_irqs_on(void); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ee7e258627f9..cb57d657ce4d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -299,7 +299,7 @@ struct mm_struct { | |||
| 299 | * new_owner->mm == mm | 299 | * new_owner->mm == mm |
| 300 | * new_owner->alloc_lock is held | 300 | * new_owner->alloc_lock is held |
| 301 | */ | 301 | */ |
| 302 | struct task_struct *owner; | 302 | struct task_struct __rcu *owner; |
| 303 | #endif | 303 | #endif |
| 304 | 304 | ||
| 305 | #ifdef CONFIG_PROC_FS | 305 | #ifdef CONFIG_PROC_FS |
diff --git a/include/linux/msi.h b/include/linux/msi.h index 91b05c171854..05acced439a3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h | |||
| @@ -10,12 +10,13 @@ struct msi_msg { | |||
| 10 | }; | 10 | }; |
| 11 | 11 | ||
| 12 | /* Helper functions */ | 12 | /* Helper functions */ |
| 13 | struct irq_desc; | 13 | struct irq_data; |
| 14 | extern void mask_msi_irq(unsigned int irq); | 14 | struct msi_desc; |
| 15 | extern void unmask_msi_irq(unsigned int irq); | 15 | extern void mask_msi_irq(struct irq_data *data); |
| 16 | extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); | 16 | extern void unmask_msi_irq(struct irq_data *data); |
| 17 | extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); | 17 | extern void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); |
| 18 | extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); | 18 | extern void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); |
| 19 | extern void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); | ||
| 19 | extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); | 20 | extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); |
| 20 | extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); | 21 | extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); |
| 21 | extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); | 22 | extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); |
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b9..70cd0603911c 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h | |||
| @@ -39,8 +39,9 @@ enum ctattr_type { | |||
| 39 | CTA_TUPLE_MASTER, | 39 | CTA_TUPLE_MASTER, |
| 40 | CTA_NAT_SEQ_ADJ_ORIG, | 40 | CTA_NAT_SEQ_ADJ_ORIG, |
| 41 | CTA_NAT_SEQ_ADJ_REPLY, | 41 | CTA_NAT_SEQ_ADJ_REPLY, |
| 42 | CTA_SECMARK, | 42 | CTA_SECMARK, /* obsolete */ |
| 43 | CTA_ZONE, | 43 | CTA_ZONE, |
| 44 | CTA_SECCTX, | ||
| 44 | __CTA_MAX | 45 | __CTA_MAX |
| 45 | }; | 46 | }; |
| 46 | #define CTA_MAX (__CTA_MAX - 1) | 47 | #define CTA_MAX (__CTA_MAX - 1) |
| @@ -172,4 +173,11 @@ enum ctattr_help { | |||
| 172 | }; | 173 | }; |
| 173 | #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) | 174 | #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) |
| 174 | 175 | ||
| 176 | enum ctattr_secctx { | ||
| 177 | CTA_SECCTX_UNSPEC, | ||
| 178 | CTA_SECCTX_NAME, | ||
| 179 | __CTA_SECCTX_MAX | ||
| 180 | }; | ||
| 181 | #define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1) | ||
| 182 | |||
| 175 | #endif /* _IPCONNTRACK_NETLINK_H */ | 183 | #endif /* _IPCONNTRACK_NETLINK_H */ |
diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h index 6fcd3448b186..989092bd6274 100644 --- a/include/linux/netfilter/xt_SECMARK.h +++ b/include/linux/netfilter/xt_SECMARK.h | |||
| @@ -11,18 +11,12 @@ | |||
| 11 | * packets are being marked for. | 11 | * packets are being marked for. |
| 12 | */ | 12 | */ |
| 13 | #define SECMARK_MODE_SEL 0x01 /* SELinux */ | 13 | #define SECMARK_MODE_SEL 0x01 /* SELinux */ |
| 14 | #define SECMARK_SELCTX_MAX 256 | 14 | #define SECMARK_SECCTX_MAX 256 |
| 15 | |||
| 16 | struct xt_secmark_target_selinux_info { | ||
| 17 | __u32 selsid; | ||
| 18 | char selctx[SECMARK_SELCTX_MAX]; | ||
| 19 | }; | ||
| 20 | 15 | ||
| 21 | struct xt_secmark_target_info { | 16 | struct xt_secmark_target_info { |
| 22 | __u8 mode; | 17 | __u8 mode; |
| 23 | union { | 18 | __u32 secid; |
| 24 | struct xt_secmark_target_selinux_info sel; | 19 | char secctx[SECMARK_SECCTX_MAX]; |
| 25 | } u; | ||
| 26 | }; | 20 | }; |
| 27 | 21 | ||
| 28 | #endif /*_XT_SECMARK_H_target */ | 22 | #endif /*_XT_SECMARK_H_target */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..d0edf7d823ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -185,7 +185,7 @@ struct nfs_inode { | |||
| 185 | struct nfs4_cached_acl *nfs4_acl; | 185 | struct nfs4_cached_acl *nfs4_acl; |
| 186 | /* NFSv4 state */ | 186 | /* NFSv4 state */ |
| 187 | struct list_head open_states; | 187 | struct list_head open_states; |
| 188 | struct nfs_delegation *delegation; | 188 | struct nfs_delegation __rcu *delegation; |
| 189 | fmode_t delegation_state; | 189 | fmode_t delegation_state; |
| 190 | struct rw_semaphore rwsem; | 190 | struct rw_semaphore rwsem; |
| 191 | #endif /* CONFIG_NFS_V4*/ | 191 | #endif /* CONFIG_NFS_V4*/ |
diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d83550..2026f9e1ceb8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h | |||
| @@ -49,28 +49,28 @@ | |||
| 49 | 49 | ||
| 50 | struct notifier_block { | 50 | struct notifier_block { |
| 51 | int (*notifier_call)(struct notifier_block *, unsigned long, void *); | 51 | int (*notifier_call)(struct notifier_block *, unsigned long, void *); |
| 52 | struct notifier_block *next; | 52 | struct notifier_block __rcu *next; |
| 53 | int priority; | 53 | int priority; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | struct atomic_notifier_head { | 56 | struct atomic_notifier_head { |
| 57 | spinlock_t lock; | 57 | spinlock_t lock; |
| 58 | struct notifier_block *head; | 58 | struct notifier_block __rcu *head; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | struct blocking_notifier_head { | 61 | struct blocking_notifier_head { |
| 62 | struct rw_semaphore rwsem; | 62 | struct rw_semaphore rwsem; |
| 63 | struct notifier_block *head; | 63 | struct notifier_block __rcu *head; |
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | struct raw_notifier_head { | 66 | struct raw_notifier_head { |
| 67 | struct notifier_block *head; | 67 | struct notifier_block __rcu *head; |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
| 70 | struct srcu_notifier_head { | 70 | struct srcu_notifier_head { |
| 71 | struct mutex mutex; | 71 | struct mutex mutex; |
| 72 | struct srcu_struct srcu; | 72 | struct srcu_struct srcu; |
| 73 | struct notifier_block *head; | 73 | struct notifier_block __rcu *head; |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ | 76 | #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ |
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 570fddeb0388..2615c37c8fe5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h | |||
| @@ -517,6 +517,7 @@ | |||
| 517 | #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 | 517 | #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 |
| 518 | #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 | 518 | #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 |
| 519 | #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 | 519 | #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 |
| 520 | #define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 | ||
| 520 | #define PCI_DEVICE_ID_AMD_LANCE 0x2000 | 521 | #define PCI_DEVICE_ID_AMD_LANCE 0x2000 |
| 521 | #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 | 522 | #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 |
| 522 | #define PCI_DEVICE_ID_AMD_SCSI 0x2020 | 523 | #define PCI_DEVICE_ID_AMD_SCSI 0x2020 |
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1d..27ef6b190ea6 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h | |||
| @@ -139,6 +139,15 @@ | |||
| 139 | __aligned(PAGE_SIZE) | 139 | __aligned(PAGE_SIZE) |
| 140 | 140 | ||
| 141 | /* | 141 | /* |
| 142 | * Declaration/definition used for per-CPU variables that must be read mostly. | ||
| 143 | */ | ||
| 144 | #define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ | ||
| 145 | DECLARE_PER_CPU_SECTION(type, name, "..readmostly") | ||
| 146 | |||
| 147 | #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ | ||
| 148 | DEFINE_PER_CPU_SECTION(type, name, "..readmostly") | ||
| 149 | |||
| 150 | /* | ||
| 142 | * Intermodule exports for per-CPU variables. sparse forgets about | 151 | * Intermodule exports for per-CPU variables. sparse forgets about |
| 143 | * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to | 152 | * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to |
| 144 | * noop if __CHECKER__. | 153 | * noop if __CHECKER__. |
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac5..a39cbed9ee17 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h | |||
| @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) | |||
| 47 | { | 47 | { |
| 48 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); | 48 | return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); |
| 49 | } | 49 | } |
| 50 | #define radix_tree_indirect_to_ptr(ptr) \ | ||
| 51 | radix_tree_indirect_to_ptr((void __force *)(ptr)) | ||
| 50 | 52 | ||
| 51 | static inline int radix_tree_is_indirect_ptr(void *ptr) | 53 | static inline int radix_tree_is_indirect_ptr(void *ptr) |
| 52 | { | 54 | { |
| @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) | |||
| 61 | struct radix_tree_root { | 63 | struct radix_tree_root { |
| 62 | unsigned int height; | 64 | unsigned int height; |
| 63 | gfp_t gfp_mask; | 65 | gfp_t gfp_mask; |
| 64 | struct radix_tree_node *rnode; | 66 | struct radix_tree_node __rcu *rnode; |
| 65 | }; | 67 | }; |
| 66 | 68 | ||
| 67 | #define RADIX_TREE_INIT(mask) { \ | 69 | #define RADIX_TREE_INIT(mask) { \ |
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c5..f31ef61f1c65 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h | |||
| @@ -10,6 +10,21 @@ | |||
| 10 | #include <linux/rcupdate.h> | 10 | #include <linux/rcupdate.h> |
| 11 | 11 | ||
| 12 | /* | 12 | /* |
| 13 | * Why is there no list_empty_rcu()? Because list_empty() serves this | ||
| 14 | * purpose. The list_empty() function fetches the RCU-protected pointer | ||
| 15 | * and compares it to the address of the list head, but neither dereferences | ||
| 16 | * this pointer itself nor provides this pointer to the caller. Therefore, | ||
| 17 | * it is not necessary to use rcu_dereference(), so that list_empty() can | ||
| 18 | * be used anywhere you would want to use a list_empty_rcu(). | ||
| 19 | */ | ||
| 20 | |||
| 21 | /* | ||
| 22 | * return the ->next pointer of a list_head in an rcu safe | ||
| 23 | * way, we must not access it directly | ||
| 24 | */ | ||
| 25 | #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) | ||
| 26 | |||
| 27 | /* | ||
| 13 | * Insert a new entry between two known consecutive entries. | 28 | * Insert a new entry between two known consecutive entries. |
| 14 | * | 29 | * |
| 15 | * This is only for internal list manipulation where we know | 30 | * This is only for internal list manipulation where we know |
| @@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new, | |||
| 20 | { | 35 | { |
| 21 | new->next = next; | 36 | new->next = next; |
| 22 | new->prev = prev; | 37 | new->prev = prev; |
| 23 | rcu_assign_pointer(prev->next, new); | 38 | rcu_assign_pointer(list_next_rcu(prev), new); |
| 24 | next->prev = new; | 39 | next->prev = new; |
| 25 | } | 40 | } |
| 26 | 41 | ||
| @@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old, | |||
| 138 | { | 153 | { |
| 139 | new->next = old->next; | 154 | new->next = old->next; |
| 140 | new->prev = old->prev; | 155 | new->prev = old->prev; |
| 141 | rcu_assign_pointer(new->prev->next, new); | 156 | rcu_assign_pointer(list_next_rcu(new->prev), new); |
| 142 | new->next->prev = new; | 157 | new->next->prev = new; |
| 143 | old->prev = LIST_POISON2; | 158 | old->prev = LIST_POISON2; |
| 144 | } | 159 | } |
| @@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
| 193 | */ | 208 | */ |
| 194 | 209 | ||
| 195 | last->next = at; | 210 | last->next = at; |
| 196 | rcu_assign_pointer(head->next, first); | 211 | rcu_assign_pointer(list_next_rcu(head), first); |
| 197 | first->prev = head; | 212 | first->prev = head; |
| 198 | at->prev = last; | 213 | at->prev = last; |
| 199 | } | 214 | } |
| @@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
| 208 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). | 223 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). |
| 209 | */ | 224 | */ |
| 210 | #define list_entry_rcu(ptr, type, member) \ | 225 | #define list_entry_rcu(ptr, type, member) \ |
| 211 | container_of(rcu_dereference_raw(ptr), type, member) | 226 | ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ |
| 227 | container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ | ||
| 228 | }) | ||
| 212 | 229 | ||
| 213 | /** | 230 | /** |
| 214 | * list_first_entry_rcu - get the first element from a list | 231 | * list_first_entry_rcu - get the first element from a list |
| @@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
| 225 | list_entry_rcu((ptr)->next, type, member) | 242 | list_entry_rcu((ptr)->next, type, member) |
| 226 | 243 | ||
| 227 | #define __list_for_each_rcu(pos, head) \ | 244 | #define __list_for_each_rcu(pos, head) \ |
| 228 | for (pos = rcu_dereference_raw((head)->next); \ | 245 | for (pos = rcu_dereference_raw(list_next_rcu(head)); \ |
| 229 | pos != (head); \ | 246 | pos != (head); \ |
| 230 | pos = rcu_dereference_raw(pos->next)) | 247 | pos = rcu_dereference_raw(list_next_rcu((pos))) |
| 231 | 248 | ||
| 232 | /** | 249 | /** |
| 233 | * list_for_each_entry_rcu - iterate over rcu list of given type | 250 | * list_for_each_entry_rcu - iterate over rcu list of given type |
| @@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
| 257 | * as long as the traversal is guarded by rcu_read_lock(). | 274 | * as long as the traversal is guarded by rcu_read_lock(). |
| 258 | */ | 275 | */ |
| 259 | #define list_for_each_continue_rcu(pos, head) \ | 276 | #define list_for_each_continue_rcu(pos, head) \ |
| 260 | for ((pos) = rcu_dereference_raw((pos)->next); \ | 277 | for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ |
| 261 | prefetch((pos)->next), (pos) != (head); \ | 278 | prefetch((pos)->next), (pos) != (head); \ |
| 262 | (pos) = rcu_dereference_raw((pos)->next)) | 279 | (pos) = rcu_dereference_raw(list_next_rcu(pos))) |
| 263 | 280 | ||
| 264 | /** | 281 | /** |
| 265 | * list_for_each_entry_continue_rcu - continue iteration over list of given type | 282 | * list_for_each_entry_continue_rcu - continue iteration over list of given type |
| @@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, | |||
| 314 | 331 | ||
| 315 | new->next = next; | 332 | new->next = next; |
| 316 | new->pprev = old->pprev; | 333 | new->pprev = old->pprev; |
| 317 | rcu_assign_pointer(*new->pprev, new); | 334 | rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); |
| 318 | if (next) | 335 | if (next) |
| 319 | new->next->pprev = &new->next; | 336 | new->next->pprev = &new->next; |
| 320 | old->pprev = LIST_POISON2; | 337 | old->pprev = LIST_POISON2; |
| 321 | } | 338 | } |
| 322 | 339 | ||
| 340 | /* | ||
| 341 | * return the first or the next element in an RCU protected hlist | ||
| 342 | */ | ||
| 343 | #define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) | ||
| 344 | #define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) | ||
| 345 | #define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) | ||
| 346 | |||
| 323 | /** | 347 | /** |
| 324 | * hlist_add_head_rcu | 348 | * hlist_add_head_rcu |
| 325 | * @n: the element to add to the hash list. | 349 | * @n: the element to add to the hash list. |
| @@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, | |||
| 346 | 370 | ||
| 347 | n->next = first; | 371 | n->next = first; |
| 348 | n->pprev = &h->first; | 372 | n->pprev = &h->first; |
| 349 | rcu_assign_pointer(h->first, n); | 373 | rcu_assign_pointer(hlist_first_rcu(h), n); |
| 350 | if (first) | 374 | if (first) |
| 351 | first->pprev = &n->next; | 375 | first->pprev = &n->next; |
| 352 | } | 376 | } |
| @@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, | |||
| 374 | { | 398 | { |
| 375 | n->pprev = next->pprev; | 399 | n->pprev = next->pprev; |
| 376 | n->next = next; | 400 | n->next = next; |
| 377 | rcu_assign_pointer(*(n->pprev), n); | 401 | rcu_assign_pointer(hlist_pprev_rcu(n), n); |
| 378 | next->pprev = &n->next; | 402 | next->pprev = &n->next; |
| 379 | } | 403 | } |
| 380 | 404 | ||
| @@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
| 401 | { | 425 | { |
| 402 | n->next = prev->next; | 426 | n->next = prev->next; |
| 403 | n->pprev = &prev->next; | 427 | n->pprev = &prev->next; |
| 404 | rcu_assign_pointer(prev->next, n); | 428 | rcu_assign_pointer(hlist_next_rcu(prev), n); |
| 405 | if (n->next) | 429 | if (n->next) |
| 406 | n->next->pprev = &n->next; | 430 | n->next->pprev = &n->next; |
| 407 | } | 431 | } |
| 408 | 432 | ||
| 409 | #define __hlist_for_each_rcu(pos, head) \ | 433 | #define __hlist_for_each_rcu(pos, head) \ |
| 410 | for (pos = rcu_dereference((head)->first); \ | 434 | for (pos = rcu_dereference(hlist_first_rcu(head)); \ |
| 411 | pos && ({ prefetch(pos->next); 1; }); \ | 435 | pos && ({ prefetch(pos->next); 1; }); \ |
| 412 | pos = rcu_dereference(pos->next)) | 436 | pos = rcu_dereference(hlist_next_rcu(pos))) |
| 413 | 437 | ||
| 414 | /** | 438 | /** |
| 415 | * hlist_for_each_entry_rcu - iterate over rcu list of given type | 439 | * hlist_for_each_entry_rcu - iterate over rcu list of given type |
| @@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
| 422 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() | 446 | * the _rcu list-mutation primitives such as hlist_add_head_rcu() |
| 423 | * as long as the traversal is guarded by rcu_read_lock(). | 447 | * as long as the traversal is guarded by rcu_read_lock(). |
| 424 | */ | 448 | */ |
| 425 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ | 449 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ |
| 426 | for (pos = rcu_dereference_raw((head)->first); \ | 450 | for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ |
| 427 | pos && ({ prefetch(pos->next); 1; }) && \ | 451 | pos && ({ prefetch(pos->next); 1; }) && \ |
| 428 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 452 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
| 429 | pos = rcu_dereference_raw(pos->next)) | 453 | pos = rcu_dereference_raw(hlist_next_rcu(pos))) |
| 430 | 454 | ||
| 431 | /** | 455 | /** |
| 432 | * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type | 456 | * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type |
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9f..2ae13714828b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h | |||
| @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) | |||
| 37 | } | 37 | } |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | #define hlist_nulls_first_rcu(head) \ | ||
| 41 | (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) | ||
| 42 | |||
| 43 | #define hlist_nulls_next_rcu(node) \ | ||
| 44 | (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) | ||
| 45 | |||
| 40 | /** | 46 | /** |
| 41 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization | 47 | * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization |
| 42 | * @n: the element to delete from the hash list. | 48 | * @n: the element to delete from the hash list. |
| @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | |||
| 88 | 94 | ||
| 89 | n->next = first; | 95 | n->next = first; |
| 90 | n->pprev = &h->first; | 96 | n->pprev = &h->first; |
| 91 | rcu_assign_pointer(h->first, n); | 97 | rcu_assign_pointer(hlist_nulls_first_rcu(h), n); |
| 92 | if (!is_a_nulls(first)) | 98 | if (!is_a_nulls(first)) |
| 93 | first->pprev = &n->next; | 99 | first->pprev = &n->next; |
| 94 | } | 100 | } |
| @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | |||
| 100 | * @member: the name of the hlist_nulls_node within the struct. | 106 | * @member: the name of the hlist_nulls_node within the struct. |
| 101 | * | 107 | * |
| 102 | */ | 108 | */ |
| 103 | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | 109 | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ |
| 104 | for (pos = rcu_dereference_raw((head)->first); \ | 110 | for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ |
| 105 | (!is_a_nulls(pos)) && \ | 111 | (!is_a_nulls(pos)) && \ |
| 106 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ | 112 | ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ |
| 107 | pos = rcu_dereference_raw(pos->next)) | 113 | pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) |
| 108 | 114 | ||
| 109 | #endif | 115 | #endif |
| 110 | #endif | 116 | #endif |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 83af1f8d8b74..03cda7bed985 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
| @@ -41,11 +41,15 @@ | |||
| 41 | #include <linux/lockdep.h> | 41 | #include <linux/lockdep.h> |
| 42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
| 43 | #include <linux/debugobjects.h> | 43 | #include <linux/debugobjects.h> |
| 44 | #include <linux/compiler.h> | ||
| 44 | 45 | ||
| 45 | #ifdef CONFIG_RCU_TORTURE_TEST | 46 | #ifdef CONFIG_RCU_TORTURE_TEST |
| 46 | extern int rcutorture_runnable; /* for sysctl */ | 47 | extern int rcutorture_runnable; /* for sysctl */ |
| 47 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 48 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
| 48 | 49 | ||
| 50 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | ||
| 51 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | ||
| 52 | |||
| 49 | /** | 53 | /** |
| 50 | * struct rcu_head - callback structure for use with RCU | 54 | * struct rcu_head - callback structure for use with RCU |
| 51 | * @next: next update requests in a list | 55 | * @next: next update requests in a list |
| @@ -57,29 +61,94 @@ struct rcu_head { | |||
| 57 | }; | 61 | }; |
| 58 | 62 | ||
| 59 | /* Exported common interfaces */ | 63 | /* Exported common interfaces */ |
| 60 | extern void rcu_barrier(void); | 64 | extern void call_rcu_sched(struct rcu_head *head, |
| 65 | void (*func)(struct rcu_head *rcu)); | ||
| 66 | extern void synchronize_sched(void); | ||
| 61 | extern void rcu_barrier_bh(void); | 67 | extern void rcu_barrier_bh(void); |
| 62 | extern void rcu_barrier_sched(void); | 68 | extern void rcu_barrier_sched(void); |
| 63 | extern void synchronize_sched_expedited(void); | 69 | extern void synchronize_sched_expedited(void); |
| 64 | extern int sched_expedited_torture_stats(char *page); | 70 | extern int sched_expedited_torture_stats(char *page); |
| 65 | 71 | ||
| 72 | static inline void __rcu_read_lock_bh(void) | ||
| 73 | { | ||
| 74 | local_bh_disable(); | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline void __rcu_read_unlock_bh(void) | ||
| 78 | { | ||
| 79 | local_bh_enable(); | ||
| 80 | } | ||
| 81 | |||
| 82 | #ifdef CONFIG_PREEMPT_RCU | ||
| 83 | |||
| 84 | extern void __rcu_read_lock(void); | ||
| 85 | extern void __rcu_read_unlock(void); | ||
| 86 | void synchronize_rcu(void); | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Defined as a macro as it is a very low level header included from | ||
| 90 | * areas that don't even know about current. This gives the rcu_read_lock() | ||
| 91 | * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other | ||
| 92 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | ||
| 93 | */ | ||
| 94 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | ||
| 95 | |||
| 96 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 97 | |||
| 98 | static inline void __rcu_read_lock(void) | ||
| 99 | { | ||
| 100 | preempt_disable(); | ||
| 101 | } | ||
| 102 | |||
| 103 | static inline void __rcu_read_unlock(void) | ||
| 104 | { | ||
| 105 | preempt_enable(); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void synchronize_rcu(void) | ||
| 109 | { | ||
| 110 | synchronize_sched(); | ||
| 111 | } | ||
| 112 | |||
| 113 | static inline int rcu_preempt_depth(void) | ||
| 114 | { | ||
| 115 | return 0; | ||
| 116 | } | ||
| 117 | |||
| 118 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
| 119 | |||
| 66 | /* Internal to kernel */ | 120 | /* Internal to kernel */ |
| 67 | extern void rcu_init(void); | 121 | extern void rcu_init(void); |
| 122 | extern void rcu_sched_qs(int cpu); | ||
| 123 | extern void rcu_bh_qs(int cpu); | ||
| 124 | extern void rcu_check_callbacks(int cpu, int user); | ||
| 125 | struct notifier_block; | ||
| 126 | |||
| 127 | #ifdef CONFIG_NO_HZ | ||
| 128 | |||
| 129 | extern void rcu_enter_nohz(void); | ||
| 130 | extern void rcu_exit_nohz(void); | ||
| 131 | |||
| 132 | #else /* #ifdef CONFIG_NO_HZ */ | ||
| 133 | |||
| 134 | static inline void rcu_enter_nohz(void) | ||
| 135 | { | ||
| 136 | } | ||
| 137 | |||
| 138 | static inline void rcu_exit_nohz(void) | ||
| 139 | { | ||
| 140 | } | ||
| 141 | |||
| 142 | #endif /* #else #ifdef CONFIG_NO_HZ */ | ||
| 68 | 143 | ||
| 69 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 144 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
| 70 | #include <linux/rcutree.h> | 145 | #include <linux/rcutree.h> |
| 71 | #elif defined(CONFIG_TINY_RCU) | 146 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
| 72 | #include <linux/rcutiny.h> | 147 | #include <linux/rcutiny.h> |
| 73 | #else | 148 | #else |
| 74 | #error "Unknown RCU implementation specified to kernel configuration" | 149 | #error "Unknown RCU implementation specified to kernel configuration" |
| 75 | #endif | 150 | #endif |
| 76 | 151 | ||
| 77 | #define RCU_HEAD_INIT { .next = NULL, .func = NULL } | ||
| 78 | #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT | ||
| 79 | #define INIT_RCU_HEAD(ptr) do { \ | ||
| 80 | (ptr)->next = NULL; (ptr)->func = NULL; \ | ||
| 81 | } while (0) | ||
| 82 | |||
| 83 | /* | 152 | /* |
| 84 | * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic | 153 | * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic |
| 85 | * initialization and destruction of rcu_head on the stack. rcu_head structures | 154 | * initialization and destruction of rcu_head on the stack. rcu_head structures |
| @@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map; | |||
| 120 | extern int debug_lockdep_rcu_enabled(void); | 189 | extern int debug_lockdep_rcu_enabled(void); |
| 121 | 190 | ||
| 122 | /** | 191 | /** |
| 123 | * rcu_read_lock_held - might we be in RCU read-side critical section? | 192 | * rcu_read_lock_held() - might we be in RCU read-side critical section? |
| 124 | * | 193 | * |
| 125 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU | 194 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU |
| 126 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, | 195 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, |
| 127 | * this assumes we are in an RCU read-side critical section unless it can | 196 | * this assumes we are in an RCU read-side critical section unless it can |
| 128 | * prove otherwise. | 197 | * prove otherwise. This is useful for debug checks in functions that |
| 198 | * require that they be called within an RCU read-side critical section. | ||
| 129 | * | 199 | * |
| 130 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | 200 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot |
| 131 | * and while lockdep is disabled. | 201 | * and while lockdep is disabled. |
| 132 | */ | 202 | */ |
| 133 | static inline int rcu_read_lock_held(void) | 203 | static inline int rcu_read_lock_held(void) |
| @@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void) | |||
| 144 | extern int rcu_read_lock_bh_held(void); | 214 | extern int rcu_read_lock_bh_held(void); |
| 145 | 215 | ||
| 146 | /** | 216 | /** |
| 147 | * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? | 217 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? |
| 148 | * | 218 | * |
| 149 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an | 219 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an |
| 150 | * RCU-sched read-side critical section. In absence of | 220 | * RCU-sched read-side critical section. In absence of |
| 151 | * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side | 221 | * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side |
| 152 | * critical section unless it can prove otherwise. Note that disabling | 222 | * critical section unless it can prove otherwise. Note that disabling |
| 153 | * of preemption (including disabling irqs) counts as an RCU-sched | 223 | * of preemption (including disabling irqs) counts as an RCU-sched |
| 154 | * read-side critical section. | 224 | * read-side critical section. This is useful for debug checks in functions |
| 225 | * that required that they be called within an RCU-sched read-side | ||
| 226 | * critical section. | ||
| 155 | * | 227 | * |
| 156 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot | 228 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot |
| 157 | * and while lockdep is disabled. | 229 | * and while lockdep is disabled. |
| @@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void) | |||
| 211 | 283 | ||
| 212 | extern int rcu_my_thread_group_empty(void); | 284 | extern int rcu_my_thread_group_empty(void); |
| 213 | 285 | ||
| 214 | #define __do_rcu_dereference_check(c) \ | 286 | /** |
| 287 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met | ||
| 288 | * @c: condition to check | ||
| 289 | */ | ||
| 290 | #define rcu_lockdep_assert(c) \ | ||
| 215 | do { \ | 291 | do { \ |
| 216 | static bool __warned; \ | 292 | static bool __warned; \ |
| 217 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ | 293 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ |
| @@ -220,41 +296,163 @@ extern int rcu_my_thread_group_empty(void); | |||
| 220 | } \ | 296 | } \ |
| 221 | } while (0) | 297 | } while (0) |
| 222 | 298 | ||
| 299 | #else /* #ifdef CONFIG_PROVE_RCU */ | ||
| 300 | |||
| 301 | #define rcu_lockdep_assert(c) do { } while (0) | ||
| 302 | |||
| 303 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||
| 304 | |||
| 305 | /* | ||
| 306 | * Helper functions for rcu_dereference_check(), rcu_dereference_protected() | ||
| 307 | * and rcu_assign_pointer(). Some of these could be folded into their | ||
| 308 | * callers, but they are left separate in order to ease introduction of | ||
| 309 | * multiple flavors of pointers to match the multiple flavors of RCU | ||
| 310 | * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in | ||
| 311 | * the future. | ||
| 312 | */ | ||
| 313 | |||
| 314 | #ifdef __CHECKER__ | ||
| 315 | #define rcu_dereference_sparse(p, space) \ | ||
| 316 | ((void)(((typeof(*p) space *)p) == p)) | ||
| 317 | #else /* #ifdef __CHECKER__ */ | ||
| 318 | #define rcu_dereference_sparse(p, space) | ||
| 319 | #endif /* #else #ifdef __CHECKER__ */ | ||
| 320 | |||
| 321 | #define __rcu_access_pointer(p, space) \ | ||
| 322 | ({ \ | ||
| 323 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | ||
| 324 | rcu_dereference_sparse(p, space); \ | ||
| 325 | ((typeof(*p) __force __kernel *)(_________p1)); \ | ||
| 326 | }) | ||
| 327 | #define __rcu_dereference_check(p, c, space) \ | ||
| 328 | ({ \ | ||
| 329 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | ||
| 330 | rcu_lockdep_assert(c); \ | ||
| 331 | rcu_dereference_sparse(p, space); \ | ||
| 332 | smp_read_barrier_depends(); \ | ||
| 333 | ((typeof(*p) __force __kernel *)(_________p1)); \ | ||
| 334 | }) | ||
| 335 | #define __rcu_dereference_protected(p, c, space) \ | ||
| 336 | ({ \ | ||
| 337 | rcu_lockdep_assert(c); \ | ||
| 338 | rcu_dereference_sparse(p, space); \ | ||
| 339 | ((typeof(*p) __force __kernel *)(p)); \ | ||
| 340 | }) | ||
| 341 | |||
| 342 | #define __rcu_dereference_index_check(p, c) \ | ||
| 343 | ({ \ | ||
| 344 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
| 345 | rcu_lockdep_assert(c); \ | ||
| 346 | smp_read_barrier_depends(); \ | ||
| 347 | (_________p1); \ | ||
| 348 | }) | ||
| 349 | #define __rcu_assign_pointer(p, v, space) \ | ||
| 350 | ({ \ | ||
| 351 | if (!__builtin_constant_p(v) || \ | ||
| 352 | ((v) != NULL)) \ | ||
| 353 | smp_wmb(); \ | ||
| 354 | (p) = (typeof(*v) __force space *)(v); \ | ||
| 355 | }) | ||
| 356 | |||
| 357 | |||
| 358 | /** | ||
| 359 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | ||
| 360 | * @p: The pointer to read | ||
| 361 | * | ||
| 362 | * Return the value of the specified RCU-protected pointer, but omit the | ||
| 363 | * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | ||
| 364 | * when the value of this pointer is accessed, but the pointer is not | ||
| 365 | * dereferenced, for example, when testing an RCU-protected pointer against | ||
| 366 | * NULL. Although rcu_access_pointer() may also be used in cases where | ||
| 367 | * update-side locks prevent the value of the pointer from changing, you | ||
| 368 | * should instead use rcu_dereference_protected() for this use case. | ||
| 369 | */ | ||
| 370 | #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) | ||
| 371 | |||
| 223 | /** | 372 | /** |
| 224 | * rcu_dereference_check - rcu_dereference with debug checking | 373 | * rcu_dereference_check() - rcu_dereference with debug checking |
| 225 | * @p: The pointer to read, prior to dereferencing | 374 | * @p: The pointer to read, prior to dereferencing |
| 226 | * @c: The conditions under which the dereference will take place | 375 | * @c: The conditions under which the dereference will take place |
| 227 | * | 376 | * |
| 228 | * Do an rcu_dereference(), but check that the conditions under which the | 377 | * Do an rcu_dereference(), but check that the conditions under which the |
| 229 | * dereference will take place are correct. Typically the conditions indicate | 378 | * dereference will take place are correct. Typically the conditions |
| 230 | * the various locking conditions that should be held at that point. The check | 379 | * indicate the various locking conditions that should be held at that |
| 231 | * should return true if the conditions are satisfied. | 380 | * point. The check should return true if the conditions are satisfied. |
| 381 | * An implicit check for being in an RCU read-side critical section | ||
| 382 | * (rcu_read_lock()) is included. | ||
| 232 | * | 383 | * |
| 233 | * For example: | 384 | * For example: |
| 234 | * | 385 | * |
| 235 | * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | 386 | * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); |
| 236 | * lockdep_is_held(&foo->lock)); | ||
| 237 | * | 387 | * |
| 238 | * could be used to indicate to lockdep that foo->bar may only be dereferenced | 388 | * could be used to indicate to lockdep that foo->bar may only be dereferenced |
| 239 | * if either the RCU read lock is held, or that the lock required to replace | 389 | * if either rcu_read_lock() is held, or that the lock required to replace |
| 240 | * the bar struct at foo->bar is held. | 390 | * the bar struct at foo->bar is held. |
| 241 | * | 391 | * |
| 242 | * Note that the list of conditions may also include indications of when a lock | 392 | * Note that the list of conditions may also include indications of when a lock |
| 243 | * need not be held, for example during initialisation or destruction of the | 393 | * need not be held, for example during initialisation or destruction of the |
| 244 | * target struct: | 394 | * target struct: |
| 245 | * | 395 | * |
| 246 | * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || | 396 | * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || |
| 247 | * lockdep_is_held(&foo->lock) || | ||
| 248 | * atomic_read(&foo->usage) == 0); | 397 | * atomic_read(&foo->usage) == 0); |
| 398 | * | ||
| 399 | * Inserts memory barriers on architectures that require them | ||
| 400 | * (currently only the Alpha), prevents the compiler from refetching | ||
| 401 | * (and from merging fetches), and, more importantly, documents exactly | ||
| 402 | * which pointers are protected by RCU and checks that the pointer is | ||
| 403 | * annotated as __rcu. | ||
| 249 | */ | 404 | */ |
| 250 | #define rcu_dereference_check(p, c) \ | 405 | #define rcu_dereference_check(p, c) \ |
| 251 | ({ \ | 406 | __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) |
| 252 | __do_rcu_dereference_check(c); \ | 407 | |
| 253 | rcu_dereference_raw(p); \ | 408 | /** |
| 254 | }) | 409 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking |
| 410 | * @p: The pointer to read, prior to dereferencing | ||
| 411 | * @c: The conditions under which the dereference will take place | ||
| 412 | * | ||
| 413 | * This is the RCU-bh counterpart to rcu_dereference_check(). | ||
| 414 | */ | ||
| 415 | #define rcu_dereference_bh_check(p, c) \ | ||
| 416 | __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) | ||
| 255 | 417 | ||
| 256 | /** | 418 | /** |
| 257 | * rcu_dereference_protected - fetch RCU pointer when updates prevented | 419 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking |
| 420 | * @p: The pointer to read, prior to dereferencing | ||
| 421 | * @c: The conditions under which the dereference will take place | ||
| 422 | * | ||
| 423 | * This is the RCU-sched counterpart to rcu_dereference_check(). | ||
| 424 | */ | ||
| 425 | #define rcu_dereference_sched_check(p, c) \ | ||
| 426 | __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ | ||
| 427 | __rcu) | ||
| 428 | |||
| 429 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ | ||
| 430 | |||
| 431 | /** | ||
| 432 | * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | ||
| 433 | * @p: The pointer to read, prior to dereferencing | ||
| 434 | * @c: The conditions under which the dereference will take place | ||
| 435 | * | ||
| 436 | * Similar to rcu_dereference_check(), but omits the sparse checking. | ||
| 437 | * This allows rcu_dereference_index_check() to be used on integers, | ||
| 438 | * which can then be used as array indices. Attempting to use | ||
| 439 | * rcu_dereference_check() on an integer will give compiler warnings | ||
| 440 | * because the sparse address-space mechanism relies on dereferencing | ||
| 441 | * the RCU-protected pointer. Dereferencing integers is not something | ||
| 442 | * that even gcc will put up with. | ||
| 443 | * | ||
| 444 | * Note that this function does not implicitly check for RCU read-side | ||
| 445 | * critical sections. If this function gains lots of uses, it might | ||
| 446 | * make sense to provide versions for each flavor of RCU, but it does | ||
| 447 | * not make sense as of early 2010. | ||
| 448 | */ | ||
| 449 | #define rcu_dereference_index_check(p, c) \ | ||
| 450 | __rcu_dereference_index_check((p), (c)) | ||
| 451 | |||
| 452 | /** | ||
| 453 | * rcu_dereference_protected() - fetch RCU pointer when updates prevented | ||
| 454 | * @p: The pointer to read, prior to dereferencing | ||
| 455 | * @c: The conditions under which the dereference will take place | ||
| 258 | * | 456 | * |
| 259 | * Return the value of the specified RCU-protected pointer, but omit | 457 | * Return the value of the specified RCU-protected pointer, but omit |
| 260 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This | 458 | * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This |
| @@ -263,35 +461,61 @@ extern int rcu_my_thread_group_empty(void); | |||
| 263 | * prevent the compiler from repeating this reference or combining it | 461 | * prevent the compiler from repeating this reference or combining it |
| 264 | * with other references, so it should not be used without protection | 462 | * with other references, so it should not be used without protection |
| 265 | * of appropriate locks. | 463 | * of appropriate locks. |
| 464 | * | ||
| 465 | * This function is only for update-side use. Using this function | ||
| 466 | * when protected only by rcu_read_lock() will result in infrequent | ||
| 467 | * but very ugly failures. | ||
| 266 | */ | 468 | */ |
| 267 | #define rcu_dereference_protected(p, c) \ | 469 | #define rcu_dereference_protected(p, c) \ |
| 268 | ({ \ | 470 | __rcu_dereference_protected((p), (c), __rcu) |
| 269 | __do_rcu_dereference_check(c); \ | ||
| 270 | (p); \ | ||
| 271 | }) | ||
| 272 | 471 | ||
| 273 | #else /* #ifdef CONFIG_PROVE_RCU */ | 472 | /** |
| 473 | * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | ||
| 474 | * @p: The pointer to read, prior to dereferencing | ||
| 475 | * @c: The conditions under which the dereference will take place | ||
| 476 | * | ||
| 477 | * This is the RCU-bh counterpart to rcu_dereference_protected(). | ||
| 478 | */ | ||
| 479 | #define rcu_dereference_bh_protected(p, c) \ | ||
| 480 | __rcu_dereference_protected((p), (c), __rcu) | ||
| 274 | 481 | ||
| 275 | #define rcu_dereference_check(p, c) rcu_dereference_raw(p) | 482 | /** |
| 276 | #define rcu_dereference_protected(p, c) (p) | 483 | * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented |
| 484 | * @p: The pointer to read, prior to dereferencing | ||
| 485 | * @c: The conditions under which the dereference will take place | ||
| 486 | * | ||
| 487 | * This is the RCU-sched counterpart to rcu_dereference_protected(). | ||
| 488 | */ | ||
| 489 | #define rcu_dereference_sched_protected(p, c) \ | ||
| 490 | __rcu_dereference_protected((p), (c), __rcu) | ||
| 277 | 491 | ||
| 278 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||
| 279 | 492 | ||
| 280 | /** | 493 | /** |
| 281 | * rcu_access_pointer - fetch RCU pointer with no dereferencing | 494 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing |
| 495 | * @p: The pointer to read, prior to dereferencing | ||
| 282 | * | 496 | * |
| 283 | * Return the value of the specified RCU-protected pointer, but omit the | 497 | * This is a simple wrapper around rcu_dereference_check(). |
| 284 | * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful | 498 | */ |
| 285 | * when the value of this pointer is accessed, but the pointer is not | 499 | #define rcu_dereference(p) rcu_dereference_check(p, 0) |
| 286 | * dereferenced, for example, when testing an RCU-protected pointer against | 500 | |
| 287 | * NULL. This may also be used in cases where update-side locks prevent | 501 | /** |
| 288 | * the value of the pointer from changing, but rcu_dereference_protected() | 502 | * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing |
| 289 | * is a lighter-weight primitive for this use case. | 503 | * @p: The pointer to read, prior to dereferencing |
| 504 | * | ||
| 505 | * Makes rcu_dereference_check() do the dirty work. | ||
| 506 | */ | ||
| 507 | #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) | ||
| 508 | |||
| 509 | /** | ||
| 510 | * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing | ||
| 511 | * @p: The pointer to read, prior to dereferencing | ||
| 512 | * | ||
| 513 | * Makes rcu_dereference_check() do the dirty work. | ||
| 290 | */ | 514 | */ |
| 291 | #define rcu_access_pointer(p) ACCESS_ONCE(p) | 515 | #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) |
| 292 | 516 | ||
| 293 | /** | 517 | /** |
| 294 | * rcu_read_lock - mark the beginning of an RCU read-side critical section. | 518 | * rcu_read_lock() - mark the beginning of an RCU read-side critical section |
| 295 | * | 519 | * |
| 296 | * When synchronize_rcu() is invoked on one CPU while other CPUs | 520 | * When synchronize_rcu() is invoked on one CPU while other CPUs |
| 297 | * are within RCU read-side critical sections, then the | 521 | * are within RCU read-side critical sections, then the |
| @@ -302,7 +526,7 @@ extern int rcu_my_thread_group_empty(void); | |||
| 302 | * until after the all the other CPUs exit their critical sections. | 526 | * until after the all the other CPUs exit their critical sections. |
| 303 | * | 527 | * |
| 304 | * Note, however, that RCU callbacks are permitted to run concurrently | 528 | * Note, however, that RCU callbacks are permitted to run concurrently |
| 305 | * with RCU read-side critical sections. One way that this can happen | 529 | * with new RCU read-side critical sections. One way that this can happen |
| 306 | * is via the following sequence of events: (1) CPU 0 enters an RCU | 530 | * is via the following sequence of events: (1) CPU 0 enters an RCU |
| 307 | * read-side critical section, (2) CPU 1 invokes call_rcu() to register | 531 | * read-side critical section, (2) CPU 1 invokes call_rcu() to register |
| 308 | * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, | 532 | * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, |
| @@ -317,7 +541,20 @@ extern int rcu_my_thread_group_empty(void); | |||
| 317 | * will be deferred until the outermost RCU read-side critical section | 541 | * will be deferred until the outermost RCU read-side critical section |
| 318 | * completes. | 542 | * completes. |
| 319 | * | 543 | * |
| 320 | * It is illegal to block while in an RCU read-side critical section. | 544 | * You can avoid reading and understanding the next paragraph by |
| 545 | * following this rule: don't put anything in an rcu_read_lock() RCU | ||
| 546 | * read-side critical section that would block in a !PREEMPT kernel. | ||
| 547 | * But if you want the full story, read on! | ||
| 548 | * | ||
| 549 | * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it | ||
| 550 | * is illegal to block while in an RCU read-side critical section. In | ||
| 551 | * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) | ||
| 552 | * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may | ||
| 553 | * be preempted, but explicit blocking is illegal. Finally, in preemptible | ||
| 554 | * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, | ||
| 555 | * RCU read-side critical sections may be preempted and they may also | ||
| 556 | * block, but only when acquiring spinlocks that are subject to priority | ||
| 557 | * inheritance. | ||
| 321 | */ | 558 | */ |
| 322 | static inline void rcu_read_lock(void) | 559 | static inline void rcu_read_lock(void) |
| 323 | { | 560 | { |
| @@ -337,7 +574,7 @@ static inline void rcu_read_lock(void) | |||
| 337 | */ | 574 | */ |
| 338 | 575 | ||
| 339 | /** | 576 | /** |
| 340 | * rcu_read_unlock - marks the end of an RCU read-side critical section. | 577 | * rcu_read_unlock() - marks the end of an RCU read-side critical section. |
| 341 | * | 578 | * |
| 342 | * See rcu_read_lock() for more information. | 579 | * See rcu_read_lock() for more information. |
| 343 | */ | 580 | */ |
| @@ -349,15 +586,16 @@ static inline void rcu_read_unlock(void) | |||
| 349 | } | 586 | } |
| 350 | 587 | ||
| 351 | /** | 588 | /** |
| 352 | * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section | 589 | * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section |
| 353 | * | 590 | * |
| 354 | * This is equivalent of rcu_read_lock(), but to be used when updates | 591 | * This is equivalent of rcu_read_lock(), but to be used when updates |
| 355 | * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks | 592 | * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since |
| 356 | * consider completion of a softirq handler to be a quiescent state, | 593 | * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a |
| 357 | * a process in RCU read-side critical section must be protected by | 594 | * softirq handler to be a quiescent state, a process in RCU read-side |
| 358 | * disabling softirqs. Read-side critical sections in interrupt context | 595 | * critical section must be protected by disabling softirqs. Read-side |
| 359 | * can use just rcu_read_lock(). | 596 | * critical sections in interrupt context can use just rcu_read_lock(), |
| 360 | * | 597 | * though this should at least be commented to avoid confusing people |
| 598 | * reading the code. | ||
| 361 | */ | 599 | */ |
| 362 | static inline void rcu_read_lock_bh(void) | 600 | static inline void rcu_read_lock_bh(void) |
| 363 | { | 601 | { |
| @@ -379,13 +617,12 @@ static inline void rcu_read_unlock_bh(void) | |||
| 379 | } | 617 | } |
| 380 | 618 | ||
| 381 | /** | 619 | /** |
| 382 | * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section | 620 | * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section |
| 383 | * | 621 | * |
| 384 | * Should be used with either | 622 | * This is equivalent of rcu_read_lock(), but to be used when updates |
| 385 | * - synchronize_sched() | 623 | * are being done using call_rcu_sched() or synchronize_rcu_sched(). |
| 386 | * or | 624 | * Read-side critical sections can also be introduced by anything that |
| 387 | * - call_rcu_sched() and rcu_barrier_sched() | 625 | * disables preemption, including local_irq_disable() and friends. |
| 388 | * on the write-side to insure proper synchronization. | ||
| 389 | */ | 626 | */ |
| 390 | static inline void rcu_read_lock_sched(void) | 627 | static inline void rcu_read_lock_sched(void) |
| 391 | { | 628 | { |
| @@ -420,54 +657,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
| 420 | preempt_enable_notrace(); | 657 | preempt_enable_notrace(); |
| 421 | } | 658 | } |
| 422 | 659 | ||
| 423 | |||
| 424 | /** | 660 | /** |
| 425 | * rcu_dereference_raw - fetch an RCU-protected pointer | 661 | * rcu_assign_pointer() - assign to RCU-protected pointer |
| 662 | * @p: pointer to assign to | ||
| 663 | * @v: value to assign (publish) | ||
| 426 | * | 664 | * |
| 427 | * The caller must be within some flavor of RCU read-side critical | 665 | * Assigns the specified value to the specified RCU-protected |
| 428 | * section, or must be otherwise preventing the pointer from changing, | 666 | * pointer, ensuring that any concurrent RCU readers will see |
| 429 | * for example, by holding an appropriate lock. This pointer may later | 667 | * any prior initialization. Returns the value assigned. |
| 430 | * be safely dereferenced. It is the caller's responsibility to have | ||
| 431 | * done the right thing, as this primitive does no checking of any kind. | ||
| 432 | * | ||
| 433 | * Inserts memory barriers on architectures that require them | ||
| 434 | * (currently only the Alpha), and, more importantly, documents | ||
| 435 | * exactly which pointers are protected by RCU. | ||
| 436 | */ | ||
| 437 | #define rcu_dereference_raw(p) ({ \ | ||
| 438 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
| 439 | smp_read_barrier_depends(); \ | ||
| 440 | (_________p1); \ | ||
| 441 | }) | ||
| 442 | |||
| 443 | /** | ||
| 444 | * rcu_dereference - fetch an RCU-protected pointer, checking for RCU | ||
| 445 | * | ||
| 446 | * Makes rcu_dereference_check() do the dirty work. | ||
| 447 | */ | ||
| 448 | #define rcu_dereference(p) \ | ||
| 449 | rcu_dereference_check(p, rcu_read_lock_held()) | ||
| 450 | |||
| 451 | /** | ||
| 452 | * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh | ||
| 453 | * | ||
| 454 | * Makes rcu_dereference_check() do the dirty work. | ||
| 455 | */ | ||
| 456 | #define rcu_dereference_bh(p) \ | ||
| 457 | rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) | ||
| 458 | |||
| 459 | /** | ||
| 460 | * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched | ||
| 461 | * | ||
| 462 | * Makes rcu_dereference_check() do the dirty work. | ||
| 463 | */ | ||
| 464 | #define rcu_dereference_sched(p) \ | ||
| 465 | rcu_dereference_check(p, rcu_read_lock_sched_held()) | ||
| 466 | |||
| 467 | /** | ||
| 468 | * rcu_assign_pointer - assign (publicize) a pointer to a newly | ||
| 469 | * initialized structure that will be dereferenced by RCU read-side | ||
| 470 | * critical sections. Returns the value assigned. | ||
| 471 | * | 668 | * |
| 472 | * Inserts memory barriers on architectures that require them | 669 | * Inserts memory barriers on architectures that require them |
| 473 | * (pretty much all of them other than x86), and also prevents | 670 | * (pretty much all of them other than x86), and also prevents |
| @@ -476,14 +673,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
| 476 | * call documents which pointers will be dereferenced by RCU read-side | 673 | * call documents which pointers will be dereferenced by RCU read-side |
| 477 | * code. | 674 | * code. |
| 478 | */ | 675 | */ |
| 479 | |||
| 480 | #define rcu_assign_pointer(p, v) \ | 676 | #define rcu_assign_pointer(p, v) \ |
| 481 | ({ \ | 677 | __rcu_assign_pointer((p), (v), __rcu) |
| 482 | if (!__builtin_constant_p(v) || \ | 678 | |
| 483 | ((v) != NULL)) \ | 679 | /** |
| 484 | smp_wmb(); \ | 680 | * RCU_INIT_POINTER() - initialize an RCU protected pointer |
| 485 | (p) = (v); \ | 681 | * |
| 486 | }) | 682 | * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep |
| 683 | * splats. | ||
| 684 | */ | ||
| 685 | #define RCU_INIT_POINTER(p, v) \ | ||
| 686 | p = (typeof(*v) __force __rcu *)(v) | ||
| 487 | 687 | ||
| 488 | /* Infrastructure to implement the synchronize_() primitives. */ | 688 | /* Infrastructure to implement the synchronize_() primitives. */ |
| 489 | 689 | ||
| @@ -494,26 +694,37 @@ struct rcu_synchronize { | |||
| 494 | 694 | ||
| 495 | extern void wakeme_after_rcu(struct rcu_head *head); | 695 | extern void wakeme_after_rcu(struct rcu_head *head); |
| 496 | 696 | ||
| 697 | #ifdef CONFIG_PREEMPT_RCU | ||
| 698 | |||
| 497 | /** | 699 | /** |
| 498 | * call_rcu - Queue an RCU callback for invocation after a grace period. | 700 | * call_rcu() - Queue an RCU callback for invocation after a grace period. |
| 499 | * @head: structure to be used for queueing the RCU updates. | 701 | * @head: structure to be used for queueing the RCU updates. |
| 500 | * @func: actual update function to be invoked after the grace period | 702 | * @func: actual callback function to be invoked after the grace period |
| 501 | * | 703 | * |
| 502 | * The update function will be invoked some time after a full grace | 704 | * The callback function will be invoked some time after a full grace |
| 503 | * period elapses, in other words after all currently executing RCU | 705 | * period elapses, in other words after all pre-existing RCU read-side |
| 504 | * read-side critical sections have completed. RCU read-side critical | 706 | * critical sections have completed. However, the callback function |
| 707 | * might well execute concurrently with RCU read-side critical sections | ||
| 708 | * that started after call_rcu() was invoked. RCU read-side critical | ||
| 505 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 709 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), |
| 506 | * and may be nested. | 710 | * and may be nested. |
| 507 | */ | 711 | */ |
| 508 | extern void call_rcu(struct rcu_head *head, | 712 | extern void call_rcu(struct rcu_head *head, |
| 509 | void (*func)(struct rcu_head *head)); | 713 | void (*func)(struct rcu_head *head)); |
| 510 | 714 | ||
| 715 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 716 | |||
| 717 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
| 718 | #define call_rcu call_rcu_sched | ||
| 719 | |||
| 720 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
| 721 | |||
| 511 | /** | 722 | /** |
| 512 | * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. | 723 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. |
| 513 | * @head: structure to be used for queueing the RCU updates. | 724 | * @head: structure to be used for queueing the RCU updates. |
| 514 | * @func: actual update function to be invoked after the grace period | 725 | * @func: actual callback function to be invoked after the grace period |
| 515 | * | 726 | * |
| 516 | * The update function will be invoked some time after a full grace | 727 | * The callback function will be invoked some time after a full grace |
| 517 | * period elapses, in other words after all currently executing RCU | 728 | * period elapses, in other words after all currently executing RCU |
| 518 | * read-side critical sections have completed. call_rcu_bh() assumes | 729 | * read-side critical sections have completed. call_rcu_bh() assumes |
| 519 | * that the read-side critical sections end on completion of a softirq | 730 | * that the read-side critical sections end on completion of a softirq |
| @@ -566,37 +777,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) | |||
| 566 | } | 777 | } |
| 567 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | 778 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
| 568 | 779 | ||
| 569 | #ifndef CONFIG_PROVE_RCU | ||
| 570 | #define __do_rcu_dereference_check(c) do { } while (0) | ||
| 571 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
| 572 | |||
| 573 | #define __rcu_dereference_index_check(p, c) \ | ||
| 574 | ({ \ | ||
| 575 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | ||
| 576 | __do_rcu_dereference_check(c); \ | ||
| 577 | smp_read_barrier_depends(); \ | ||
| 578 | (_________p1); \ | ||
| 579 | }) | ||
| 580 | |||
| 581 | /** | ||
| 582 | * rcu_dereference_index_check() - rcu_dereference for indices with debug checking | ||
| 583 | * @p: The pointer to read, prior to dereferencing | ||
| 584 | * @c: The conditions under which the dereference will take place | ||
| 585 | * | ||
| 586 | * Similar to rcu_dereference_check(), but omits the sparse checking. | ||
| 587 | * This allows rcu_dereference_index_check() to be used on integers, | ||
| 588 | * which can then be used as array indices. Attempting to use | ||
| 589 | * rcu_dereference_check() on an integer will give compiler warnings | ||
| 590 | * because the sparse address-space mechanism relies on dereferencing | ||
| 591 | * the RCU-protected pointer. Dereferencing integers is not something | ||
| 592 | * that even gcc will put up with. | ||
| 593 | * | ||
| 594 | * Note that this function does not implicitly check for RCU read-side | ||
| 595 | * critical sections. If this function gains lots of uses, it might | ||
| 596 | * make sense to provide versions for each flavor of RCU, but it does | ||
| 597 | * not make sense as of early 2010. | ||
| 598 | */ | ||
| 599 | #define rcu_dereference_index_check(p, c) \ | ||
| 600 | __rcu_dereference_index_check((p), (c)) | ||
| 601 | |||
| 602 | #endif /* __LINUX_RCUPDATE_H */ | 780 | #endif /* __LINUX_RCUPDATE_H */ |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a84..13877cb93a60 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
| @@ -27,103 +27,101 @@ | |||
| 27 | 27 | ||
| 28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
| 29 | 29 | ||
| 30 | void rcu_sched_qs(int cpu); | 30 | #define rcu_init_sched() do { } while (0) |
| 31 | void rcu_bh_qs(int cpu); | ||
| 32 | static inline void rcu_note_context_switch(int cpu) | ||
| 33 | { | ||
| 34 | rcu_sched_qs(cpu); | ||
| 35 | } | ||
| 36 | 31 | ||
| 37 | #define __rcu_read_lock() preempt_disable() | 32 | #ifdef CONFIG_TINY_RCU |
| 38 | #define __rcu_read_unlock() preempt_enable() | ||
| 39 | #define __rcu_read_lock_bh() local_bh_disable() | ||
| 40 | #define __rcu_read_unlock_bh() local_bh_enable() | ||
| 41 | #define call_rcu_sched call_rcu | ||
| 42 | 33 | ||
| 43 | #define rcu_init_sched() do { } while (0) | 34 | static inline void synchronize_rcu_expedited(void) |
| 44 | extern void rcu_check_callbacks(int cpu, int user); | 35 | { |
| 36 | synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ | ||
| 37 | } | ||
| 45 | 38 | ||
| 46 | static inline int rcu_needs_cpu(int cpu) | 39 | static inline void rcu_barrier(void) |
| 47 | { | 40 | { |
| 48 | return 0; | 41 | rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ |
| 49 | } | 42 | } |
| 50 | 43 | ||
| 51 | /* | 44 | #else /* #ifdef CONFIG_TINY_RCU */ |
| 52 | * Return the number of grace periods. | 45 | |
| 53 | */ | 46 | void rcu_barrier(void); |
| 54 | static inline long rcu_batches_completed(void) | 47 | void synchronize_rcu_expedited(void); |
| 48 | |||
| 49 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||
| 50 | |||
| 51 | static inline void synchronize_rcu_bh(void) | ||
| 55 | { | 52 | { |
| 56 | return 0; | 53 | synchronize_sched(); |
| 57 | } | 54 | } |
| 58 | 55 | ||
| 59 | /* | 56 | static inline void synchronize_rcu_bh_expedited(void) |
| 60 | * Return the number of bottom-half grace periods. | ||
| 61 | */ | ||
| 62 | static inline long rcu_batches_completed_bh(void) | ||
| 63 | { | 57 | { |
| 64 | return 0; | 58 | synchronize_sched(); |
| 65 | } | 59 | } |
| 66 | 60 | ||
| 67 | static inline void rcu_force_quiescent_state(void) | 61 | #ifdef CONFIG_TINY_RCU |
| 62 | |||
| 63 | static inline void rcu_preempt_note_context_switch(void) | ||
| 68 | { | 64 | { |
| 69 | } | 65 | } |
| 70 | 66 | ||
| 71 | static inline void rcu_bh_force_quiescent_state(void) | 67 | static inline void exit_rcu(void) |
| 72 | { | 68 | { |
| 73 | } | 69 | } |
| 74 | 70 | ||
| 75 | static inline void rcu_sched_force_quiescent_state(void) | 71 | static inline int rcu_needs_cpu(int cpu) |
| 76 | { | 72 | { |
| 73 | return 0; | ||
| 77 | } | 74 | } |
| 78 | 75 | ||
| 79 | extern void synchronize_sched(void); | 76 | #else /* #ifdef CONFIG_TINY_RCU */ |
| 77 | |||
| 78 | void rcu_preempt_note_context_switch(void); | ||
| 79 | extern void exit_rcu(void); | ||
| 80 | int rcu_preempt_needs_cpu(void); | ||
| 80 | 81 | ||
| 81 | static inline void synchronize_rcu(void) | 82 | static inline int rcu_needs_cpu(int cpu) |
| 82 | { | 83 | { |
| 83 | synchronize_sched(); | 84 | return rcu_preempt_needs_cpu(); |
| 84 | } | 85 | } |
| 85 | 86 | ||
| 86 | static inline void synchronize_rcu_bh(void) | 87 | #endif /* #else #ifdef CONFIG_TINY_RCU */ |
| 88 | |||
| 89 | static inline void rcu_note_context_switch(int cpu) | ||
| 87 | { | 90 | { |
| 88 | synchronize_sched(); | 91 | rcu_sched_qs(cpu); |
| 92 | rcu_preempt_note_context_switch(); | ||
| 89 | } | 93 | } |
| 90 | 94 | ||
| 91 | static inline void synchronize_rcu_expedited(void) | 95 | /* |
| 96 | * Return the number of grace periods. | ||
| 97 | */ | ||
| 98 | static inline long rcu_batches_completed(void) | ||
| 92 | { | 99 | { |
| 93 | synchronize_sched(); | 100 | return 0; |
| 94 | } | 101 | } |
| 95 | 102 | ||
| 96 | static inline void synchronize_rcu_bh_expedited(void) | 103 | /* |
| 104 | * Return the number of bottom-half grace periods. | ||
| 105 | */ | ||
| 106 | static inline long rcu_batches_completed_bh(void) | ||
| 97 | { | 107 | { |
| 98 | synchronize_sched(); | 108 | return 0; |
| 99 | } | 109 | } |
| 100 | 110 | ||
| 101 | struct notifier_block; | 111 | static inline void rcu_force_quiescent_state(void) |
| 102 | |||
| 103 | #ifdef CONFIG_NO_HZ | ||
| 104 | |||
| 105 | extern void rcu_enter_nohz(void); | ||
| 106 | extern void rcu_exit_nohz(void); | ||
| 107 | |||
| 108 | #else /* #ifdef CONFIG_NO_HZ */ | ||
| 109 | |||
| 110 | static inline void rcu_enter_nohz(void) | ||
| 111 | { | 112 | { |
| 112 | } | 113 | } |
| 113 | 114 | ||
| 114 | static inline void rcu_exit_nohz(void) | 115 | static inline void rcu_bh_force_quiescent_state(void) |
| 115 | { | 116 | { |
| 116 | } | 117 | } |
| 117 | 118 | ||
| 118 | #endif /* #else #ifdef CONFIG_NO_HZ */ | 119 | static inline void rcu_sched_force_quiescent_state(void) |
| 119 | |||
| 120 | static inline void exit_rcu(void) | ||
| 121 | { | 120 | { |
| 122 | } | 121 | } |
| 123 | 122 | ||
| 124 | static inline int rcu_preempt_depth(void) | 123 | static inline void rcu_cpu_stall_reset(void) |
| 125 | { | 124 | { |
| 126 | return 0; | ||
| 127 | } | 125 | } |
| 128 | 126 | ||
| 129 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 127 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f29..95518e628794 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
| @@ -30,64 +30,23 @@ | |||
| 30 | #ifndef __LINUX_RCUTREE_H | 30 | #ifndef __LINUX_RCUTREE_H |
| 31 | #define __LINUX_RCUTREE_H | 31 | #define __LINUX_RCUTREE_H |
| 32 | 32 | ||
| 33 | struct notifier_block; | ||
| 34 | |||
| 35 | extern void rcu_sched_qs(int cpu); | ||
| 36 | extern void rcu_bh_qs(int cpu); | ||
| 37 | extern void rcu_note_context_switch(int cpu); | 33 | extern void rcu_note_context_switch(int cpu); |
| 38 | extern int rcu_needs_cpu(int cpu); | 34 | extern int rcu_needs_cpu(int cpu); |
| 35 | extern void rcu_cpu_stall_reset(void); | ||
| 39 | 36 | ||
| 40 | #ifdef CONFIG_TREE_PREEMPT_RCU | 37 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 41 | 38 | ||
| 42 | extern void __rcu_read_lock(void); | ||
| 43 | extern void __rcu_read_unlock(void); | ||
| 44 | extern void synchronize_rcu(void); | ||
| 45 | extern void exit_rcu(void); | 39 | extern void exit_rcu(void); |
| 46 | 40 | ||
| 47 | /* | ||
| 48 | * Defined as macro as it is a very low level header | ||
| 49 | * included from areas that don't even know about current | ||
| 50 | */ | ||
| 51 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | ||
| 52 | |||
| 53 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 41 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 54 | 42 | ||
| 55 | static inline void __rcu_read_lock(void) | ||
| 56 | { | ||
| 57 | preempt_disable(); | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline void __rcu_read_unlock(void) | ||
| 61 | { | ||
| 62 | preempt_enable(); | ||
| 63 | } | ||
| 64 | |||
| 65 | #define synchronize_rcu synchronize_sched | ||
| 66 | |||
| 67 | static inline void exit_rcu(void) | 43 | static inline void exit_rcu(void) |
| 68 | { | 44 | { |
| 69 | } | 45 | } |
| 70 | 46 | ||
| 71 | static inline int rcu_preempt_depth(void) | ||
| 72 | { | ||
| 73 | return 0; | ||
| 74 | } | ||
| 75 | |||
| 76 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 47 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 77 | 48 | ||
| 78 | static inline void __rcu_read_lock_bh(void) | ||
| 79 | { | ||
| 80 | local_bh_disable(); | ||
| 81 | } | ||
| 82 | static inline void __rcu_read_unlock_bh(void) | ||
| 83 | { | ||
| 84 | local_bh_enable(); | ||
| 85 | } | ||
| 86 | |||
| 87 | extern void call_rcu_sched(struct rcu_head *head, | ||
| 88 | void (*func)(struct rcu_head *rcu)); | ||
| 89 | extern void synchronize_rcu_bh(void); | 49 | extern void synchronize_rcu_bh(void); |
| 90 | extern void synchronize_sched(void); | ||
| 91 | extern void synchronize_rcu_expedited(void); | 50 | extern void synchronize_rcu_expedited(void); |
| 92 | 51 | ||
| 93 | static inline void synchronize_rcu_bh_expedited(void) | 52 | static inline void synchronize_rcu_bh_expedited(void) |
| @@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void) | |||
| 95 | synchronize_sched_expedited(); | 54 | synchronize_sched_expedited(); |
| 96 | } | 55 | } |
| 97 | 56 | ||
| 98 | extern void rcu_check_callbacks(int cpu, int user); | 57 | extern void rcu_barrier(void); |
| 99 | 58 | ||
| 100 | extern long rcu_batches_completed(void); | 59 | extern long rcu_batches_completed(void); |
| 101 | extern long rcu_batches_completed_bh(void); | 60 | extern long rcu_batches_completed_bh(void); |
| @@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void); | |||
| 104 | extern void rcu_bh_force_quiescent_state(void); | 63 | extern void rcu_bh_force_quiescent_state(void); |
| 105 | extern void rcu_sched_force_quiescent_state(void); | 64 | extern void rcu_sched_force_quiescent_state(void); |
| 106 | 65 | ||
| 107 | #ifdef CONFIG_NO_HZ | ||
| 108 | void rcu_enter_nohz(void); | ||
| 109 | void rcu_exit_nohz(void); | ||
| 110 | #else /* CONFIG_NO_HZ */ | ||
| 111 | static inline void rcu_enter_nohz(void) | ||
| 112 | { | ||
| 113 | } | ||
| 114 | static inline void rcu_exit_nohz(void) | ||
| 115 | { | ||
| 116 | } | ||
| 117 | #endif /* CONFIG_NO_HZ */ | ||
| 118 | |||
| 119 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ | 66 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ |
| 120 | static inline int rcu_blocking_is_gp(void) | 67 | static inline int rcu_blocking_is_gp(void) |
| 121 | { | 68 | { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index eb3c1ceec06e..0383601a927c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -875,6 +875,7 @@ enum sched_domain_level { | |||
| 875 | SD_LV_NONE = 0, | 875 | SD_LV_NONE = 0, |
| 876 | SD_LV_SIBLING, | 876 | SD_LV_SIBLING, |
| 877 | SD_LV_MC, | 877 | SD_LV_MC, |
| 878 | SD_LV_BOOK, | ||
| 878 | SD_LV_CPU, | 879 | SD_LV_CPU, |
| 879 | SD_LV_NODE, | 880 | SD_LV_NODE, |
| 880 | SD_LV_ALLNODES, | 881 | SD_LV_ALLNODES, |
| @@ -1209,11 +1210,13 @@ struct task_struct { | |||
| 1209 | unsigned int policy; | 1210 | unsigned int policy; |
| 1210 | cpumask_t cpus_allowed; | 1211 | cpumask_t cpus_allowed; |
| 1211 | 1212 | ||
| 1212 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1213 | #ifdef CONFIG_PREEMPT_RCU |
| 1213 | int rcu_read_lock_nesting; | 1214 | int rcu_read_lock_nesting; |
| 1214 | char rcu_read_unlock_special; | 1215 | char rcu_read_unlock_special; |
| 1215 | struct rcu_node *rcu_blocked_node; | ||
| 1216 | struct list_head rcu_node_entry; | 1216 | struct list_head rcu_node_entry; |
| 1217 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 1218 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
| 1219 | struct rcu_node *rcu_blocked_node; | ||
| 1217 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1220 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 1218 | 1221 | ||
| 1219 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1222 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| @@ -1295,9 +1298,9 @@ struct task_struct { | |||
| 1295 | struct list_head cpu_timers[3]; | 1298 | struct list_head cpu_timers[3]; |
| 1296 | 1299 | ||
| 1297 | /* process credentials */ | 1300 | /* process credentials */ |
| 1298 | const struct cred *real_cred; /* objective and real subjective task | 1301 | const struct cred __rcu *real_cred; /* objective and real subjective task |
| 1299 | * credentials (COW) */ | 1302 | * credentials (COW) */ |
| 1300 | const struct cred *cred; /* effective (overridable) subjective task | 1303 | const struct cred __rcu *cred; /* effective (overridable) subjective task |
| 1301 | * credentials (COW) */ | 1304 | * credentials (COW) */ |
| 1302 | struct mutex cred_guard_mutex; /* guard against foreign influences on | 1305 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
| 1303 | * credential calculations | 1306 | * credential calculations |
| @@ -1425,7 +1428,7 @@ struct task_struct { | |||
| 1425 | #endif | 1428 | #endif |
| 1426 | #ifdef CONFIG_CGROUPS | 1429 | #ifdef CONFIG_CGROUPS |
| 1427 | /* Control Group info protected by css_set_lock */ | 1430 | /* Control Group info protected by css_set_lock */ |
| 1428 | struct css_set *cgroups; | 1431 | struct css_set __rcu *cgroups; |
| 1429 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ | 1432 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ |
| 1430 | struct list_head cg_list; | 1433 | struct list_head cg_list; |
| 1431 | #endif | 1434 | #endif |
| @@ -1688,8 +1691,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * | |||
| 1688 | /* | 1691 | /* |
| 1689 | * Per process flags | 1692 | * Per process flags |
| 1690 | */ | 1693 | */ |
| 1691 | #define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ | 1694 | #define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */ |
| 1692 | /* Not implemented yet, only for 486*/ | ||
| 1693 | #define PF_STARTING 0x00000002 /* being created */ | 1695 | #define PF_STARTING 0x00000002 /* being created */ |
| 1694 | #define PF_EXITING 0x00000004 /* getting shut down */ | 1696 | #define PF_EXITING 0x00000004 /* getting shut down */ |
| 1695 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | 1697 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ |
| @@ -1747,7 +1749,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * | |||
| 1747 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) | 1749 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
| 1748 | #define used_math() tsk_used_math(current) | 1750 | #define used_math() tsk_used_math(current) |
| 1749 | 1751 | ||
| 1750 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1752 | #ifdef CONFIG_PREEMPT_RCU |
| 1751 | 1753 | ||
| 1752 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ | 1754 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ |
| 1753 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ | 1755 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ |
| @@ -1756,7 +1758,9 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
| 1756 | { | 1758 | { |
| 1757 | p->rcu_read_lock_nesting = 0; | 1759 | p->rcu_read_lock_nesting = 0; |
| 1758 | p->rcu_read_unlock_special = 0; | 1760 | p->rcu_read_unlock_special = 0; |
| 1761 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
| 1759 | p->rcu_blocked_node = NULL; | 1762 | p->rcu_blocked_node = NULL; |
| 1763 | #endif | ||
| 1760 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1764 | INIT_LIST_HEAD(&p->rcu_node_entry); |
| 1761 | } | 1765 | } |
| 1762 | 1766 | ||
| @@ -1833,6 +1837,19 @@ extern void sched_clock_idle_sleep_event(void); | |||
| 1833 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1837 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
| 1834 | #endif | 1838 | #endif |
| 1835 | 1839 | ||
| 1840 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1841 | /* | ||
| 1842 | * An i/f to runtime opt-in for irq time accounting based off of sched_clock. | ||
| 1843 | * The reason for this explicit opt-in is not to have perf penalty with | ||
| 1844 | * slow sched_clocks. | ||
| 1845 | */ | ||
| 1846 | extern void enable_sched_clock_irqtime(void); | ||
| 1847 | extern void disable_sched_clock_irqtime(void); | ||
| 1848 | #else | ||
| 1849 | static inline void enable_sched_clock_irqtime(void) {} | ||
| 1850 | static inline void disable_sched_clock_irqtime(void) {} | ||
| 1851 | #endif | ||
| 1852 | |||
| 1836 | extern unsigned long long | 1853 | extern unsigned long long |
| 1837 | task_sched_runtime(struct task_struct *task); | 1854 | task_sched_runtime(struct task_struct *task); |
| 1838 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); | 1855 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); |
| @@ -2374,9 +2391,9 @@ extern int __cond_resched_lock(spinlock_t *lock); | |||
| 2374 | 2391 | ||
| 2375 | extern int __cond_resched_softirq(void); | 2392 | extern int __cond_resched_softirq(void); |
| 2376 | 2393 | ||
| 2377 | #define cond_resched_softirq() ({ \ | 2394 | #define cond_resched_softirq() ({ \ |
| 2378 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ | 2395 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ |
| 2379 | __cond_resched_softirq(); \ | 2396 | __cond_resched_softirq(); \ |
| 2380 | }) | 2397 | }) |
| 2381 | 2398 | ||
| 2382 | /* | 2399 | /* |
diff --git a/include/linux/security.h b/include/linux/security.h index a22219afff09..b8246a8df7d2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
| @@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot, | |||
| 74 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); | 74 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); |
| 75 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, | 75 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, |
| 76 | unsigned long arg4, unsigned long arg5); | 76 | unsigned long arg4, unsigned long arg5); |
| 77 | extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); | 77 | extern int cap_task_setscheduler(struct task_struct *p); |
| 78 | extern int cap_task_setioprio(struct task_struct *p, int ioprio); | 78 | extern int cap_task_setioprio(struct task_struct *p, int ioprio); |
| 79 | extern int cap_task_setnice(struct task_struct *p, int nice); | 79 | extern int cap_task_setnice(struct task_struct *p, int nice); |
| 80 | extern int cap_syslog(int type, bool from_file); | 80 | extern int cap_syslog(int type, bool from_file); |
| @@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) | |||
| 959 | * Sets the new child socket's sid to the openreq sid. | 959 | * Sets the new child socket's sid to the openreq sid. |
| 960 | * @inet_conn_established: | 960 | * @inet_conn_established: |
| 961 | * Sets the connection's peersid to the secmark on skb. | 961 | * Sets the connection's peersid to the secmark on skb. |
| 962 | * @secmark_relabel_packet: | ||
| 963 | * check if the process should be allowed to relabel packets to the given secid | ||
| 964 | * @security_secmark_refcount_inc | ||
| 965 | * tells the LSM to increment the number of secmark labeling rules loaded | ||
| 966 | * @security_secmark_refcount_dec | ||
| 967 | * tells the LSM to decrement the number of secmark labeling rules loaded | ||
| 962 | * @req_classify_flow: | 968 | * @req_classify_flow: |
| 963 | * Sets the flow's sid to the openreq sid. | 969 | * Sets the flow's sid to the openreq sid. |
| 964 | * @tun_dev_create: | 970 | * @tun_dev_create: |
| @@ -1279,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) | |||
| 1279 | * Return 0 if permission is granted. | 1285 | * Return 0 if permission is granted. |
| 1280 | * | 1286 | * |
| 1281 | * @secid_to_secctx: | 1287 | * @secid_to_secctx: |
| 1282 | * Convert secid to security context. | 1288 | * Convert secid to security context. If secdata is NULL the length of |
| 1289 | * the result will be returned in seclen, but no secdata will be returned. | ||
| 1290 | * This does mean that the length could change between calls to check the | ||
| 1291 | * length and the next call which actually allocates and returns the secdata. | ||
| 1283 | * @secid contains the security ID. | 1292 | * @secid contains the security ID. |
| 1284 | * @secdata contains the pointer that stores the converted security context. | 1293 | * @secdata contains the pointer that stores the converted security context. |
| 1294 | * @seclen pointer which contains the length of the data | ||
| 1285 | * @secctx_to_secid: | 1295 | * @secctx_to_secid: |
| 1286 | * Convert security context to secid. | 1296 | * Convert security context to secid. |
| 1287 | * @secid contains the pointer to the generated security ID. | 1297 | * @secid contains the pointer to the generated security ID. |
| @@ -1501,8 +1511,7 @@ struct security_operations { | |||
| 1501 | int (*task_getioprio) (struct task_struct *p); | 1511 | int (*task_getioprio) (struct task_struct *p); |
| 1502 | int (*task_setrlimit) (struct task_struct *p, unsigned int resource, | 1512 | int (*task_setrlimit) (struct task_struct *p, unsigned int resource, |
| 1503 | struct rlimit *new_rlim); | 1513 | struct rlimit *new_rlim); |
| 1504 | int (*task_setscheduler) (struct task_struct *p, int policy, | 1514 | int (*task_setscheduler) (struct task_struct *p); |
| 1505 | struct sched_param *lp); | ||
| 1506 | int (*task_getscheduler) (struct task_struct *p); | 1515 | int (*task_getscheduler) (struct task_struct *p); |
| 1507 | int (*task_movememory) (struct task_struct *p); | 1516 | int (*task_movememory) (struct task_struct *p); |
| 1508 | int (*task_kill) (struct task_struct *p, | 1517 | int (*task_kill) (struct task_struct *p, |
| @@ -1594,6 +1603,9 @@ struct security_operations { | |||
| 1594 | struct request_sock *req); | 1603 | struct request_sock *req); |
| 1595 | void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); | 1604 | void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); |
| 1596 | void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); | 1605 | void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); |
| 1606 | int (*secmark_relabel_packet) (u32 secid); | ||
| 1607 | void (*secmark_refcount_inc) (void); | ||
| 1608 | void (*secmark_refcount_dec) (void); | ||
| 1597 | void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); | 1609 | void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); |
| 1598 | int (*tun_dev_create)(void); | 1610 | int (*tun_dev_create)(void); |
| 1599 | void (*tun_dev_post_create)(struct sock *sk); | 1611 | void (*tun_dev_post_create)(struct sock *sk); |
| @@ -1752,8 +1764,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio); | |||
| 1752 | int security_task_getioprio(struct task_struct *p); | 1764 | int security_task_getioprio(struct task_struct *p); |
| 1753 | int security_task_setrlimit(struct task_struct *p, unsigned int resource, | 1765 | int security_task_setrlimit(struct task_struct *p, unsigned int resource, |
| 1754 | struct rlimit *new_rlim); | 1766 | struct rlimit *new_rlim); |
| 1755 | int security_task_setscheduler(struct task_struct *p, | 1767 | int security_task_setscheduler(struct task_struct *p); |
| 1756 | int policy, struct sched_param *lp); | ||
| 1757 | int security_task_getscheduler(struct task_struct *p); | 1768 | int security_task_getscheduler(struct task_struct *p); |
| 1758 | int security_task_movememory(struct task_struct *p); | 1769 | int security_task_movememory(struct task_struct *p); |
| 1759 | int security_task_kill(struct task_struct *p, struct siginfo *info, | 1770 | int security_task_kill(struct task_struct *p, struct siginfo *info, |
| @@ -2320,11 +2331,9 @@ static inline int security_task_setrlimit(struct task_struct *p, | |||
| 2320 | return 0; | 2331 | return 0; |
| 2321 | } | 2332 | } |
| 2322 | 2333 | ||
| 2323 | static inline int security_task_setscheduler(struct task_struct *p, | 2334 | static inline int security_task_setscheduler(struct task_struct *p) |
| 2324 | int policy, | ||
| 2325 | struct sched_param *lp) | ||
| 2326 | { | 2335 | { |
| 2327 | return cap_task_setscheduler(p, policy, lp); | 2336 | return cap_task_setscheduler(p); |
| 2328 | } | 2337 | } |
| 2329 | 2338 | ||
| 2330 | static inline int security_task_getscheduler(struct task_struct *p) | 2339 | static inline int security_task_getscheduler(struct task_struct *p) |
| @@ -2551,6 +2560,9 @@ void security_inet_csk_clone(struct sock *newsk, | |||
| 2551 | const struct request_sock *req); | 2560 | const struct request_sock *req); |
| 2552 | void security_inet_conn_established(struct sock *sk, | 2561 | void security_inet_conn_established(struct sock *sk, |
| 2553 | struct sk_buff *skb); | 2562 | struct sk_buff *skb); |
| 2563 | int security_secmark_relabel_packet(u32 secid); | ||
| 2564 | void security_secmark_refcount_inc(void); | ||
| 2565 | void security_secmark_refcount_dec(void); | ||
| 2554 | int security_tun_dev_create(void); | 2566 | int security_tun_dev_create(void); |
| 2555 | void security_tun_dev_post_create(struct sock *sk); | 2567 | void security_tun_dev_post_create(struct sock *sk); |
| 2556 | int security_tun_dev_attach(struct sock *sk); | 2568 | int security_tun_dev_attach(struct sock *sk); |
| @@ -2705,6 +2717,19 @@ static inline void security_inet_conn_established(struct sock *sk, | |||
| 2705 | { | 2717 | { |
| 2706 | } | 2718 | } |
| 2707 | 2719 | ||
| 2720 | static inline int security_secmark_relabel_packet(u32 secid) | ||
| 2721 | { | ||
| 2722 | return 0; | ||
| 2723 | } | ||
| 2724 | |||
| 2725 | static inline void security_secmark_refcount_inc(void) | ||
| 2726 | { | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | static inline void security_secmark_refcount_dec(void) | ||
| 2730 | { | ||
| 2731 | } | ||
| 2732 | |||
| 2708 | static inline int security_tun_dev_create(void) | 2733 | static inline int security_tun_dev_create(void) |
| 2709 | { | 2734 | { |
| 2710 | return 0; | 2735 | return 0; |
diff --git a/include/linux/selinux.h b/include/linux/selinux.h index 82e0f26a1299..44f459612690 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h | |||
| @@ -21,74 +21,11 @@ struct kern_ipc_perm; | |||
| 21 | #ifdef CONFIG_SECURITY_SELINUX | 21 | #ifdef CONFIG_SECURITY_SELINUX |
| 22 | 22 | ||
| 23 | /** | 23 | /** |
| 24 | * selinux_string_to_sid - map a security context string to a security ID | ||
| 25 | * @str: the security context string to be mapped | ||
| 26 | * @sid: ID value returned via this. | ||
| 27 | * | ||
| 28 | * Returns 0 if successful, with the SID stored in sid. A value | ||
| 29 | * of zero for sid indicates no SID could be determined (but no error | ||
| 30 | * occurred). | ||
| 31 | */ | ||
| 32 | int selinux_string_to_sid(char *str, u32 *sid); | ||
| 33 | |||
| 34 | /** | ||
| 35 | * selinux_secmark_relabel_packet_permission - secmark permission check | ||
| 36 | * @sid: SECMARK ID value to be applied to network packet | ||
| 37 | * | ||
| 38 | * Returns 0 if the current task is allowed to set the SECMARK label of | ||
| 39 | * packets with the supplied security ID. Note that it is implicit that | ||
| 40 | * the packet is always being relabeled from the default unlabeled value, | ||
| 41 | * and that the access control decision is made in the AVC. | ||
| 42 | */ | ||
| 43 | int selinux_secmark_relabel_packet_permission(u32 sid); | ||
| 44 | |||
| 45 | /** | ||
| 46 | * selinux_secmark_refcount_inc - increments the secmark use counter | ||
| 47 | * | ||
| 48 | * SELinux keeps track of the current SECMARK targets in use so it knows | ||
| 49 | * when to apply SECMARK label access checks to network packets. This | ||
| 50 | * function incements this reference count to indicate that a new SECMARK | ||
| 51 | * target has been configured. | ||
| 52 | */ | ||
| 53 | void selinux_secmark_refcount_inc(void); | ||
| 54 | |||
| 55 | /** | ||
| 56 | * selinux_secmark_refcount_dec - decrements the secmark use counter | ||
| 57 | * | ||
| 58 | * SELinux keeps track of the current SECMARK targets in use so it knows | ||
| 59 | * when to apply SECMARK label access checks to network packets. This | ||
| 60 | * function decements this reference count to indicate that one of the | ||
| 61 | * existing SECMARK targets has been removed/flushed. | ||
| 62 | */ | ||
| 63 | void selinux_secmark_refcount_dec(void); | ||
| 64 | |||
| 65 | /** | ||
| 66 | * selinux_is_enabled - is SELinux enabled? | 24 | * selinux_is_enabled - is SELinux enabled? |
| 67 | */ | 25 | */ |
| 68 | bool selinux_is_enabled(void); | 26 | bool selinux_is_enabled(void); |
| 69 | #else | 27 | #else |
| 70 | 28 | ||
| 71 | static inline int selinux_string_to_sid(const char *str, u32 *sid) | ||
| 72 | { | ||
| 73 | *sid = 0; | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline int selinux_secmark_relabel_packet_permission(u32 sid) | ||
| 78 | { | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | static inline void selinux_secmark_refcount_inc(void) | ||
| 83 | { | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | |||
| 87 | static inline void selinux_secmark_refcount_dec(void) | ||
| 88 | { | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline bool selinux_is_enabled(void) | 29 | static inline bool selinux_is_enabled(void) |
| 93 | { | 30 | { |
| 94 | return false; | 31 | return false; |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546dbf..58971e891f48 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
| @@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) | |||
| 108 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 108 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 109 | 109 | ||
| 110 | /** | 110 | /** |
| 111 | * srcu_dereference - fetch SRCU-protected pointer with checking | 111 | * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing |
| 112 | * @p: the pointer to fetch and protect for later dereferencing | ||
| 113 | * @sp: pointer to the srcu_struct, which is used to check that we | ||
| 114 | * really are in an SRCU read-side critical section. | ||
| 115 | * @c: condition to check for update-side use | ||
| 112 | * | 116 | * |
| 113 | * Makes rcu_dereference_check() do the dirty work. | 117 | * If PROVE_RCU is enabled, invoking this outside of an RCU read-side |
| 118 | * critical section will result in an RCU-lockdep splat, unless @c evaluates | ||
| 119 | * to 1. The @c argument will normally be a logical expression containing | ||
| 120 | * lockdep_is_held() calls. | ||
| 114 | */ | 121 | */ |
| 115 | #define srcu_dereference(p, sp) \ | 122 | #define srcu_dereference_check(p, sp, c) \ |
| 116 | rcu_dereference_check(p, srcu_read_lock_held(sp)) | 123 | __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) |
| 124 | |||
| 125 | /** | ||
| 126 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing | ||
| 127 | * @p: the pointer to fetch and protect for later dereferencing | ||
| 128 | * @sp: pointer to the srcu_struct, which is used to check that we | ||
| 129 | * really are in an SRCU read-side critical section. | ||
| 130 | * | ||
| 131 | * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU | ||
| 132 | * is enabled, invoking this outside of an RCU read-side critical | ||
| 133 | * section will result in an RCU-lockdep splat. | ||
| 134 | */ | ||
| 135 | #define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) | ||
| 117 | 136 | ||
| 118 | /** | 137 | /** |
| 119 | * srcu_read_lock - register a new reader for an SRCU-protected structure. | 138 | * srcu_read_lock - register a new reader for an SRCU-protected structure. |
| 120 | * @sp: srcu_struct in which to register the new reader. | 139 | * @sp: srcu_struct in which to register the new reader. |
| 121 | * | 140 | * |
| 122 | * Enter an SRCU read-side critical section. Note that SRCU read-side | 141 | * Enter an SRCU read-side critical section. Note that SRCU read-side |
| 123 | * critical sections may be nested. | 142 | * critical sections may be nested. However, it is illegal to |
| 143 | * call anything that waits on an SRCU grace period for the same | ||
| 144 | * srcu_struct, whether directly or indirectly. Please note that | ||
| 145 | * one way to indirectly wait on an SRCU grace period is to acquire | ||
| 146 | * a mutex that is held elsewhere while calling synchronize_srcu() or | ||
| 147 | * synchronize_srcu_expedited(). | ||
| 124 | */ | 148 | */ |
| 125 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) | 149 | static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) |
| 126 | { | 150 | { |
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc1..8eee9dbbfe7a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h | |||
| @@ -69,7 +69,7 @@ struct gss_cl_ctx { | |||
| 69 | enum rpc_gss_proc gc_proc; | 69 | enum rpc_gss_proc gc_proc; |
| 70 | u32 gc_seq; | 70 | u32 gc_seq; |
| 71 | spinlock_t gc_seq_lock; | 71 | spinlock_t gc_seq_lock; |
| 72 | struct gss_ctx *gc_gss_ctx; | 72 | struct gss_ctx __rcu *gc_gss_ctx; |
| 73 | struct xdr_netobj gc_wire_ctx; | 73 | struct xdr_netobj gc_wire_ctx; |
| 74 | u32 gc_win; | 74 | u32 gc_win; |
| 75 | unsigned long gc_expiry; | 75 | unsigned long gc_expiry; |
| @@ -80,7 +80,7 @@ struct gss_upcall_msg; | |||
| 80 | struct gss_cred { | 80 | struct gss_cred { |
| 81 | struct rpc_cred gc_base; | 81 | struct rpc_cred gc_base; |
| 82 | enum rpc_gss_svc gc_service; | 82 | enum rpc_gss_svc gc_service; |
| 83 | struct gss_cl_ctx *gc_ctx; | 83 | struct gss_cl_ctx __rcu *gc_ctx; |
| 84 | struct gss_upcall_msg *gc_upcall; | 84 | struct gss_upcall_msg *gc_upcall; |
| 85 | unsigned long gc_upcall_timestamp; | 85 | unsigned long gc_upcall_timestamp; |
| 86 | unsigned char gc_machine_cred : 1; | 86 | unsigned char gc_machine_cred : 1; |
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a8cc4e13434c..c90696544176 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
| @@ -23,12 +23,12 @@ struct restart_block { | |||
| 23 | }; | 23 | }; |
| 24 | /* For futex_wait and futex_wait_requeue_pi */ | 24 | /* For futex_wait and futex_wait_requeue_pi */ |
| 25 | struct { | 25 | struct { |
| 26 | u32 *uaddr; | 26 | u32 __user *uaddr; |
| 27 | u32 val; | 27 | u32 val; |
| 28 | u32 flags; | 28 | u32 flags; |
| 29 | u32 bitset; | 29 | u32 bitset; |
| 30 | u64 time; | 30 | u64 time; |
| 31 | u32 *uaddr2; | 31 | u32 __user *uaddr2; |
| 32 | } futex; | 32 | } futex; |
| 33 | /* For nanosleep */ | 33 | /* For nanosleep */ |
| 34 | struct { | 34 | struct { |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 64e084ff5e5c..b91a40e847d2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -201,6 +201,12 @@ int arch_update_cpu_topology(void); | |||
| 201 | .balance_interval = 64, \ | 201 | .balance_interval = 64, \ |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | #ifdef CONFIG_SCHED_BOOK | ||
| 205 | #ifndef SD_BOOK_INIT | ||
| 206 | #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! | ||
| 207 | #endif | ||
| 208 | #endif /* CONFIG_SCHED_BOOK */ | ||
| 209 | |||
| 204 | #ifdef CONFIG_NUMA | 210 | #ifdef CONFIG_NUMA |
| 205 | #ifndef SD_NODE_INIT | 211 | #ifndef SD_NODE_INIT |
| 206 | #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! | 212 | #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! |
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ef6c24a529e1..a4dc5b027bd9 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h | |||
| @@ -51,7 +51,8 @@ static inline u32 task_cls_classid(struct task_struct *p) | |||
| 51 | return 0; | 51 | return 0; |
| 52 | 52 | ||
| 53 | rcu_read_lock(); | 53 | rcu_read_lock(); |
| 54 | id = rcu_dereference(net_cls_subsys_id); | 54 | id = rcu_dereference_index_check(net_cls_subsys_id, |
| 55 | rcu_read_lock_held()); | ||
| 55 | if (id >= 0) | 56 | if (id >= 0) |
| 56 | classid = container_of(task_subsys_state(p, id), | 57 | classid = container_of(task_subsys_state(p, id), |
| 57 | struct cgroup_cls_state, css)->classid; | 58 | struct cgroup_cls_state, css)->classid; |
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
| @@ -75,7 +75,7 @@ struct nf_conntrack_helper; | |||
| 75 | /* nf_conn feature for connections that have a helper */ | 75 | /* nf_conn feature for connections that have a helper */ |
| 76 | struct nf_conn_help { | 76 | struct nf_conn_help { |
| 77 | /* Helper. if any */ | 77 | /* Helper. if any */ |
| 78 | struct nf_conntrack_helper *helper; | 78 | struct nf_conntrack_helper __rcu *helper; |
| 79 | 79 | ||
| 80 | union nf_conntrack_help help; | 80 | union nf_conntrack_help help; |
| 81 | 81 | ||
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9208c92aeab5..f6334782a593 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h | |||
| @@ -362,6 +362,35 @@ TRACE_EVENT(sched_stat_runtime, | |||
| 362 | (unsigned long long)__entry->vruntime) | 362 | (unsigned long long)__entry->vruntime) |
| 363 | ); | 363 | ); |
| 364 | 364 | ||
| 365 | /* | ||
| 366 | * Tracepoint for showing priority inheritance modifying a tasks | ||
| 367 | * priority. | ||
| 368 | */ | ||
| 369 | TRACE_EVENT(sched_pi_setprio, | ||
| 370 | |||
| 371 | TP_PROTO(struct task_struct *tsk, int newprio), | ||
| 372 | |||
| 373 | TP_ARGS(tsk, newprio), | ||
| 374 | |||
| 375 | TP_STRUCT__entry( | ||
| 376 | __array( char, comm, TASK_COMM_LEN ) | ||
| 377 | __field( pid_t, pid ) | ||
| 378 | __field( int, oldprio ) | ||
| 379 | __field( int, newprio ) | ||
| 380 | ), | ||
| 381 | |||
| 382 | TP_fast_assign( | ||
| 383 | memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); | ||
| 384 | __entry->pid = tsk->pid; | ||
| 385 | __entry->oldprio = tsk->prio; | ||
| 386 | __entry->newprio = newprio; | ||
| 387 | ), | ||
| 388 | |||
| 389 | TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", | ||
| 390 | __entry->comm, __entry->pid, | ||
| 391 | __entry->oldprio, __entry->newprio) | ||
| 392 | ); | ||
| 393 | |||
| 365 | #endif /* _TRACE_SCHED_H */ | 394 | #endif /* _TRACE_SCHED_H */ |
| 366 | 395 | ||
| 367 | /* This part must be outside protection */ | 396 | /* This part must be outside protection */ |
diff --git a/init/Kconfig b/init/Kconfig index 1ef0b439908e..36890f0c8456 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -339,6 +339,8 @@ config AUDIT_TREE | |||
| 339 | depends on AUDITSYSCALL | 339 | depends on AUDITSYSCALL |
| 340 | select FSNOTIFY | 340 | select FSNOTIFY |
| 341 | 341 | ||
| 342 | source "kernel/irq/Kconfig" | ||
| 343 | |||
| 342 | menu "RCU Subsystem" | 344 | menu "RCU Subsystem" |
| 343 | 345 | ||
| 344 | choice | 346 | choice |
| @@ -347,6 +349,7 @@ choice | |||
| 347 | 349 | ||
| 348 | config TREE_RCU | 350 | config TREE_RCU |
| 349 | bool "Tree-based hierarchical RCU" | 351 | bool "Tree-based hierarchical RCU" |
| 352 | depends on !PREEMPT && SMP | ||
| 350 | help | 353 | help |
| 351 | This option selects the RCU implementation that is | 354 | This option selects the RCU implementation that is |
| 352 | designed for very large SMP system with hundreds or | 355 | designed for very large SMP system with hundreds or |
| @@ -354,7 +357,7 @@ config TREE_RCU | |||
| 354 | smaller systems. | 357 | smaller systems. |
| 355 | 358 | ||
| 356 | config TREE_PREEMPT_RCU | 359 | config TREE_PREEMPT_RCU |
| 357 | bool "Preemptable tree-based hierarchical RCU" | 360 | bool "Preemptible tree-based hierarchical RCU" |
| 358 | depends on PREEMPT | 361 | depends on PREEMPT |
| 359 | help | 362 | help |
| 360 | This option selects the RCU implementation that is | 363 | This option selects the RCU implementation that is |
| @@ -372,8 +375,22 @@ config TINY_RCU | |||
| 372 | is not required. This option greatly reduces the | 375 | is not required. This option greatly reduces the |
| 373 | memory footprint of RCU. | 376 | memory footprint of RCU. |
| 374 | 377 | ||
| 378 | config TINY_PREEMPT_RCU | ||
| 379 | bool "Preemptible UP-only small-memory-footprint RCU" | ||
| 380 | depends on !SMP && PREEMPT | ||
| 381 | help | ||
| 382 | This option selects the RCU implementation that is designed | ||
| 383 | for real-time UP systems. This option greatly reduces the | ||
| 384 | memory footprint of RCU. | ||
| 385 | |||
| 375 | endchoice | 386 | endchoice |
| 376 | 387 | ||
| 388 | config PREEMPT_RCU | ||
| 389 | def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) | ||
| 390 | help | ||
| 391 | This option enables preemptible-RCU code that is common between | ||
| 392 | the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. | ||
| 393 | |||
| 377 | config RCU_TRACE | 394 | config RCU_TRACE |
| 378 | bool "Enable tracing for RCU" | 395 | bool "Enable tracing for RCU" |
| 379 | depends on TREE_RCU || TREE_PREEMPT_RCU | 396 | depends on TREE_RCU || TREE_PREEMPT_RCU |
| @@ -394,9 +411,12 @@ config RCU_FANOUT | |||
| 394 | help | 411 | help |
| 395 | This option controls the fanout of hierarchical implementations | 412 | This option controls the fanout of hierarchical implementations |
| 396 | of RCU, allowing RCU to work efficiently on machines with | 413 | of RCU, allowing RCU to work efficiently on machines with |
| 397 | large numbers of CPUs. This value must be at least the cube | 414 | large numbers of CPUs. This value must be at least the fourth |
| 398 | root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit | 415 | root of NR_CPUS, which allows NR_CPUS to be insanely large. |
| 399 | systems and up to 262,144 for 64-bit systems. | 416 | The default value of RCU_FANOUT should be used for production |
| 417 | systems, but if you are stress-testing the RCU implementation | ||
| 418 | itself, small RCU_FANOUT values allow you to test large-system | ||
| 419 | code paths on small(er) systems. | ||
| 400 | 420 | ||
| 401 | Select a specific number if testing RCU itself. | 421 | Select a specific number if testing RCU itself. |
| 402 | Take the default if unsure. | 422 | Take the default if unsure. |
diff --git a/init/main.c b/init/main.c index 94ab488039aa..9684c9670b48 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -556,7 +556,6 @@ asmlinkage void __init start_kernel(void) | |||
| 556 | 556 | ||
| 557 | local_irq_disable(); | 557 | local_irq_disable(); |
| 558 | early_boot_irqs_off(); | 558 | early_boot_irqs_off(); |
| 559 | early_init_irq_lock_class(); | ||
| 560 | 559 | ||
| 561 | /* | 560 | /* |
| 562 | * Interrupts are still disabled. Do necessary setups, then | 561 | * Interrupts are still disabled. Do necessary setups, then |
diff --git a/kernel/Makefile b/kernel/Makefile index 4d9bf5f8531f..e2c9d52cfe9e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -87,6 +87,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o | |||
| 87 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o | 87 | obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o |
| 88 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o | 88 | obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o |
| 89 | obj-$(CONFIG_TINY_RCU) += rcutiny.o | 89 | obj-$(CONFIG_TINY_RCU) += rcutiny.o |
| 90 | obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o | ||
| 90 | obj-$(CONFIG_RELAY) += relay.o | 91 | obj-$(CONFIG_RELAY) += relay.o |
| 91 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 92 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
| 92 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 93 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c9483d8f6140..291ba3d04bea 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -138,7 +138,7 @@ struct css_id { | |||
| 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | 138 | * is called after synchronize_rcu(). But for safe use, css_is_removed() |
| 139 | * css_tryget() should be used for avoiding race. | 139 | * css_tryget() should be used for avoiding race. |
| 140 | */ | 140 | */ |
| 141 | struct cgroup_subsys_state *css; | 141 | struct cgroup_subsys_state __rcu *css; |
| 142 | /* | 142 | /* |
| 143 | * ID of this css. | 143 | * ID of this css. |
| 144 | */ | 144 | */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b23c0979bbe7..51b143e2a07a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1397,7 +1397,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
| 1397 | if (tsk->flags & PF_THREAD_BOUND) | 1397 | if (tsk->flags & PF_THREAD_BOUND) |
| 1398 | return -EINVAL; | 1398 | return -EINVAL; |
| 1399 | 1399 | ||
| 1400 | ret = security_task_setscheduler(tsk, 0, NULL); | 1400 | ret = security_task_setscheduler(tsk); |
| 1401 | if (ret) | 1401 | if (ret) |
| 1402 | return ret; | 1402 | return ret; |
| 1403 | if (threadgroup) { | 1403 | if (threadgroup) { |
| @@ -1405,7 +1405,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
| 1405 | 1405 | ||
| 1406 | rcu_read_lock(); | 1406 | rcu_read_lock(); |
| 1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1407 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
| 1408 | ret = security_task_setscheduler(c, 0, NULL); | 1408 | ret = security_task_setscheduler(c); |
| 1409 | if (ret) { | 1409 | if (ret) { |
| 1410 | rcu_read_unlock(); | 1410 | rcu_read_unlock(); |
| 1411 | return ret; | 1411 | return ret; |
diff --git a/kernel/futex.c b/kernel/futex.c index 6a3a5fa1526d..a118bf160e0b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -91,6 +91,7 @@ struct futex_pi_state { | |||
| 91 | 91 | ||
| 92 | /** | 92 | /** |
| 93 | * struct futex_q - The hashed futex queue entry, one per waiting task | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 94 | * @list: priority-sorted list of tasks waiting on this futex | ||
| 94 | * @task: the task waiting on the futex | 95 | * @task: the task waiting on the futex |
| 95 | * @lock_ptr: the hash bucket lock | 96 | * @lock_ptr: the hash bucket lock |
| 96 | * @key: the key the futex is hashed on | 97 | * @key: the key the futex is hashed on |
| @@ -104,7 +105,7 @@ struct futex_pi_state { | |||
| 104 | * | 105 | * |
| 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 106 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 107 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 107 | * The order of wakup is always to make the first condition true, then | 108 | * The order of wakeup is always to make the first condition true, then |
| 108 | * the second. | 109 | * the second. |
| 109 | * | 110 | * |
| 110 | * PI futexes are typically woken before they are removed from the hash list via | 111 | * PI futexes are typically woken before they are removed from the hash list via |
| @@ -295,7 +296,7 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 295 | * Slow path to fixup the fault we just took in the atomic write | 296 | * Slow path to fixup the fault we just took in the atomic write |
| 296 | * access to @uaddr. | 297 | * access to @uaddr. |
| 297 | * | 298 | * |
| 298 | * We have no generic implementation of a non destructive write to the | 299 | * We have no generic implementation of a non-destructive write to the |
| 299 | * user address. We know that we faulted in the atomic pagefault | 300 | * user address. We know that we faulted in the atomic pagefault |
| 300 | * disabled section so we can as well avoid the #PF overhead by | 301 | * disabled section so we can as well avoid the #PF overhead by |
| 301 | * calling get_user_pages() right away. | 302 | * calling get_user_pages() right away. |
| @@ -515,7 +516,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 515 | */ | 516 | */ |
| 516 | pi_state = this->pi_state; | 517 | pi_state = this->pi_state; |
| 517 | /* | 518 | /* |
| 518 | * Userspace might have messed up non PI and PI futexes | 519 | * Userspace might have messed up non-PI and PI futexes |
| 519 | */ | 520 | */ |
| 520 | if (unlikely(!pi_state)) | 521 | if (unlikely(!pi_state)) |
| 521 | return -EINVAL; | 522 | return -EINVAL; |
| @@ -736,8 +737,8 @@ static void wake_futex(struct futex_q *q) | |||
| 736 | 737 | ||
| 737 | /* | 738 | /* |
| 738 | * We set q->lock_ptr = NULL _before_ we wake up the task. If | 739 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
| 739 | * a non futex wake up happens on another CPU then the task | 740 | * a non-futex wake up happens on another CPU then the task |
| 740 | * might exit and p would dereference a non existing task | 741 | * might exit and p would dereference a non-existing task |
| 741 | * struct. Prevent this by holding a reference on p across the | 742 | * struct. Prevent this by holding a reference on p across the |
| 742 | * wake up. | 743 | * wake up. |
| 743 | */ | 744 | */ |
| @@ -1131,11 +1132,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
| 1131 | 1132 | ||
| 1132 | /** | 1133 | /** |
| 1133 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | 1134 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 |
| 1134 | * uaddr1: source futex user address | 1135 | * @uaddr1: source futex user address |
| 1135 | * uaddr2: target futex user address | 1136 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 1136 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | 1137 | * @uaddr2: target futex user address |
| 1137 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | 1138 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) |
| 1138 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | 1139 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) |
| 1140 | * @cmpval: @uaddr1 expected value (or %NULL) | ||
| 1141 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
| 1139 | * pi futex (pi to pi requeue is not supported) | 1142 | * pi futex (pi to pi requeue is not supported) |
| 1140 | * | 1143 | * |
| 1141 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1144 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
| @@ -1360,10 +1363,10 @@ out: | |||
| 1360 | 1363 | ||
| 1361 | /* The key must be already stored in q->key. */ | 1364 | /* The key must be already stored in q->key. */ |
| 1362 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | 1365 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) |
| 1366 | __acquires(&hb->lock) | ||
| 1363 | { | 1367 | { |
| 1364 | struct futex_hash_bucket *hb; | 1368 | struct futex_hash_bucket *hb; |
| 1365 | 1369 | ||
| 1366 | get_futex_key_refs(&q->key); | ||
| 1367 | hb = hash_futex(&q->key); | 1370 | hb = hash_futex(&q->key); |
| 1368 | q->lock_ptr = &hb->lock; | 1371 | q->lock_ptr = &hb->lock; |
| 1369 | 1372 | ||
| @@ -1373,9 +1376,9 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1373 | 1376 | ||
| 1374 | static inline void | 1377 | static inline void |
| 1375 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1378 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1379 | __releases(&hb->lock) | ||
| 1376 | { | 1380 | { |
| 1377 | spin_unlock(&hb->lock); | 1381 | spin_unlock(&hb->lock); |
| 1378 | drop_futex_key_refs(&q->key); | ||
| 1379 | } | 1382 | } |
| 1380 | 1383 | ||
| 1381 | /** | 1384 | /** |
| @@ -1391,6 +1394,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | |||
| 1391 | * an example). | 1394 | * an example). |
| 1392 | */ | 1395 | */ |
| 1393 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1396 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1397 | __releases(&hb->lock) | ||
| 1394 | { | 1398 | { |
| 1395 | int prio; | 1399 | int prio; |
| 1396 | 1400 | ||
| @@ -1471,6 +1475,7 @@ retry: | |||
| 1471 | * and dropped here. | 1475 | * and dropped here. |
| 1472 | */ | 1476 | */ |
| 1473 | static void unqueue_me_pi(struct futex_q *q) | 1477 | static void unqueue_me_pi(struct futex_q *q) |
| 1478 | __releases(q->lock_ptr) | ||
| 1474 | { | 1479 | { |
| 1475 | WARN_ON(plist_node_empty(&q->list)); | 1480 | WARN_ON(plist_node_empty(&q->list)); |
| 1476 | plist_del(&q->list, &q->list.plist); | 1481 | plist_del(&q->list, &q->list.plist); |
| @@ -1480,8 +1485,6 @@ static void unqueue_me_pi(struct futex_q *q) | |||
| 1480 | q->pi_state = NULL; | 1485 | q->pi_state = NULL; |
| 1481 | 1486 | ||
| 1482 | spin_unlock(q->lock_ptr); | 1487 | spin_unlock(q->lock_ptr); |
| 1483 | |||
| 1484 | drop_futex_key_refs(&q->key); | ||
| 1485 | } | 1488 | } |
| 1486 | 1489 | ||
| 1487 | /* | 1490 | /* |
| @@ -1812,7 +1815,10 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
| 1812 | } | 1815 | } |
| 1813 | 1816 | ||
| 1814 | retry: | 1817 | retry: |
| 1815 | /* Prepare to wait on uaddr. */ | 1818 | /* |
| 1819 | * Prepare to wait on uaddr. On success, holds hb lock and increments | ||
| 1820 | * q.key refs. | ||
| 1821 | */ | ||
| 1816 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1822 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 1817 | if (ret) | 1823 | if (ret) |
| 1818 | goto out; | 1824 | goto out; |
| @@ -1822,28 +1828,27 @@ retry: | |||
| 1822 | 1828 | ||
| 1823 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1829 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
| 1824 | ret = 0; | 1830 | ret = 0; |
| 1831 | /* unqueue_me() drops q.key ref */ | ||
| 1825 | if (!unqueue_me(&q)) | 1832 | if (!unqueue_me(&q)) |
| 1826 | goto out_put_key; | 1833 | goto out; |
| 1827 | ret = -ETIMEDOUT; | 1834 | ret = -ETIMEDOUT; |
| 1828 | if (to && !to->task) | 1835 | if (to && !to->task) |
| 1829 | goto out_put_key; | 1836 | goto out; |
| 1830 | 1837 | ||
| 1831 | /* | 1838 | /* |
| 1832 | * We expect signal_pending(current), but we might be the | 1839 | * We expect signal_pending(current), but we might be the |
| 1833 | * victim of a spurious wakeup as well. | 1840 | * victim of a spurious wakeup as well. |
| 1834 | */ | 1841 | */ |
| 1835 | if (!signal_pending(current)) { | 1842 | if (!signal_pending(current)) |
| 1836 | put_futex_key(fshared, &q.key); | ||
| 1837 | goto retry; | 1843 | goto retry; |
| 1838 | } | ||
| 1839 | 1844 | ||
| 1840 | ret = -ERESTARTSYS; | 1845 | ret = -ERESTARTSYS; |
| 1841 | if (!abs_time) | 1846 | if (!abs_time) |
| 1842 | goto out_put_key; | 1847 | goto out; |
| 1843 | 1848 | ||
| 1844 | restart = ¤t_thread_info()->restart_block; | 1849 | restart = ¤t_thread_info()->restart_block; |
| 1845 | restart->fn = futex_wait_restart; | 1850 | restart->fn = futex_wait_restart; |
| 1846 | restart->futex.uaddr = (u32 *)uaddr; | 1851 | restart->futex.uaddr = uaddr; |
| 1847 | restart->futex.val = val; | 1852 | restart->futex.val = val; |
| 1848 | restart->futex.time = abs_time->tv64; | 1853 | restart->futex.time = abs_time->tv64; |
| 1849 | restart->futex.bitset = bitset; | 1854 | restart->futex.bitset = bitset; |
| @@ -1856,8 +1861,6 @@ retry: | |||
| 1856 | 1861 | ||
| 1857 | ret = -ERESTART_RESTARTBLOCK; | 1862 | ret = -ERESTART_RESTARTBLOCK; |
| 1858 | 1863 | ||
| 1859 | out_put_key: | ||
| 1860 | put_futex_key(fshared, &q.key); | ||
| 1861 | out: | 1864 | out: |
| 1862 | if (to) { | 1865 | if (to) { |
| 1863 | hrtimer_cancel(&to->timer); | 1866 | hrtimer_cancel(&to->timer); |
| @@ -1869,7 +1872,7 @@ out: | |||
| 1869 | 1872 | ||
| 1870 | static long futex_wait_restart(struct restart_block *restart) | 1873 | static long futex_wait_restart(struct restart_block *restart) |
| 1871 | { | 1874 | { |
| 1872 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | 1875 | u32 __user *uaddr = restart->futex.uaddr; |
| 1873 | int fshared = 0; | 1876 | int fshared = 0; |
| 1874 | ktime_t t, *tp = NULL; | 1877 | ktime_t t, *tp = NULL; |
| 1875 | 1878 | ||
| @@ -2236,7 +2239,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2236 | q.rt_waiter = &rt_waiter; | 2239 | q.rt_waiter = &rt_waiter; |
| 2237 | q.requeue_pi_key = &key2; | 2240 | q.requeue_pi_key = &key2; |
| 2238 | 2241 | ||
| 2239 | /* Prepare to wait on uaddr. */ | 2242 | /* |
| 2243 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref | ||
| 2244 | * count. | ||
| 2245 | */ | ||
| 2240 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2246 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 2241 | if (ret) | 2247 | if (ret) |
| 2242 | goto out_key2; | 2248 | goto out_key2; |
| @@ -2254,7 +2260,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2254 | * In order for us to be here, we know our q.key == key2, and since | 2260 | * In order for us to be here, we know our q.key == key2, and since |
| 2255 | * we took the hb->lock above, we also know that futex_requeue() has | 2261 | * we took the hb->lock above, we also know that futex_requeue() has |
| 2256 | * completed and we no longer have to concern ourselves with a wakeup | 2262 | * completed and we no longer have to concern ourselves with a wakeup |
| 2257 | * race with the atomic proxy lock acquition by the requeue code. | 2263 | * race with the atomic proxy lock acquisition by the requeue code. The |
| 2264 | * futex_requeue dropped our key1 reference and incremented our key2 | ||
| 2265 | * reference count. | ||
| 2258 | */ | 2266 | */ |
| 2259 | 2267 | ||
| 2260 | /* Check if the requeue code acquired the second futex for us. */ | 2268 | /* Check if the requeue code acquired the second futex for us. */ |
| @@ -2458,7 +2466,7 @@ retry: | |||
| 2458 | */ | 2466 | */ |
| 2459 | static inline int fetch_robust_entry(struct robust_list __user **entry, | 2467 | static inline int fetch_robust_entry(struct robust_list __user **entry, |
| 2460 | struct robust_list __user * __user *head, | 2468 | struct robust_list __user * __user *head, |
| 2461 | int *pi) | 2469 | unsigned int *pi) |
| 2462 | { | 2470 | { |
| 2463 | unsigned long uentry; | 2471 | unsigned long uentry; |
| 2464 | 2472 | ||
| @@ -2647,7 +2655,7 @@ static int __init futex_init(void) | |||
| 2647 | * of the complex code paths. Also we want to prevent | 2655 | * of the complex code paths. Also we want to prevent |
| 2648 | * registration of robust lists in that case. NULL is | 2656 | * registration of robust lists in that case. NULL is |
| 2649 | * guaranteed to fault and we get -EFAULT on functional | 2657 | * guaranteed to fault and we get -EFAULT on functional |
| 2650 | * implementation, the non functional ones will return | 2658 | * implementation, the non-functional ones will return |
| 2651 | * -ENOSYS. | 2659 | * -ENOSYS. |
| 2652 | */ | 2660 | */ |
| 2653 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | 2661 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d49afb2395e5..06da4dfc339b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | */ | 19 | */ |
| 20 | static inline int | 20 | static inline int |
| 21 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | 21 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, |
| 22 | compat_uptr_t __user *head, int *pi) | 22 | compat_uptr_t __user *head, unsigned int *pi) |
| 23 | { | 23 | { |
| 24 | if (get_user(*uentry, head)) | 24 | if (get_user(*uentry, head)) |
| 25 | return -EFAULT; | 25 | return -EFAULT; |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c642d51aac2..53ead174da2f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
| @@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
| 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" |
| 99 | " disables this message.\n"); | 99 | " disables this message.\n"); |
| 100 | sched_show_task(t); | 100 | sched_show_task(t); |
| 101 | __debug_show_held_locks(t); | 101 | debug_show_held_locks(t); |
| 102 | 102 | ||
| 103 | touch_nmi_watchdog(); | 103 | touch_nmi_watchdog(); |
| 104 | 104 | ||
| @@ -111,7 +111,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) | |||
| 111 | * periodically exit the critical section and enter a new one. | 111 | * periodically exit the critical section and enter a new one. |
| 112 | * | 112 | * |
| 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order |
| 114 | * exit the grace period. For classic RCU, a reschedule is required. | 114 | * to exit the grace period. For classic RCU, a reschedule is required. |
| 115 | */ | 115 | */ |
| 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) |
| 117 | { | 117 | { |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig new file mode 100644 index 000000000000..31d766bf5d2e --- /dev/null +++ b/kernel/irq/Kconfig | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | config HAVE_GENERIC_HARDIRQS | ||
| 2 | def_bool n | ||
| 3 | |||
| 4 | if HAVE_GENERIC_HARDIRQS | ||
| 5 | menu "IRQ subsystem" | ||
| 6 | # | ||
| 7 | # Interrupt subsystem related configuration options | ||
| 8 | # | ||
| 9 | config GENERIC_HARDIRQS | ||
| 10 | def_bool y | ||
| 11 | |||
| 12 | config GENERIC_HARDIRQS_NO__DO_IRQ | ||
| 13 | def_bool y | ||
| 14 | |||
| 15 | # Select this to disable the deprecated stuff | ||
| 16 | config GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 17 | def_bool n | ||
| 18 | |||
| 19 | # Options selectable by the architecture code | ||
| 20 | config HAVE_SPARSE_IRQ | ||
| 21 | def_bool n | ||
| 22 | |||
| 23 | config GENERIC_IRQ_PROBE | ||
| 24 | def_bool n | ||
| 25 | |||
| 26 | config GENERIC_PENDING_IRQ | ||
| 27 | def_bool n | ||
| 28 | |||
| 29 | config AUTO_IRQ_AFFINITY | ||
| 30 | def_bool n | ||
| 31 | |||
| 32 | config IRQ_PER_CPU | ||
| 33 | def_bool n | ||
| 34 | |||
| 35 | config HARDIRQS_SW_RESEND | ||
| 36 | def_bool n | ||
| 37 | |||
| 38 | config SPARSE_IRQ | ||
| 39 | bool "Support sparse irq numbering" | ||
| 40 | depends on HAVE_SPARSE_IRQ | ||
| 41 | ---help--- | ||
| 42 | |||
| 43 | Sparse irq numbering is useful for distro kernels that want | ||
| 44 | to define a high CONFIG_NR_CPUS value but still want to have | ||
| 45 | low kernel memory footprint on smaller machines. | ||
| 46 | |||
| 47 | ( Sparse irqs can also be beneficial on NUMA boxes, as they spread | ||
| 48 | out the interrupt descriptors in a more NUMA-friendly way. ) | ||
| 49 | |||
| 50 | If you don't know what to do here, say N. | ||
| 51 | |||
| 52 | endmenu | ||
| 53 | endif | ||
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 7d047808419d..54329cd7b3ee 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o | 2 | obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o |
| 3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o | 3 | obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o |
| 4 | obj-$(CONFIG_PROC_FS) += proc.o | 4 | obj-$(CONFIG_PROC_FS) += proc.o |
| 5 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | 5 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o |
| 6 | obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o | ||
| 7 | obj-$(CONFIG_PM_SLEEP) += pm.o | 6 | obj-$(CONFIG_PM_SLEEP) += pm.o |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 2295a31ef110..505798f86c36 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
| @@ -57,9 +57,10 @@ unsigned long probe_irq_on(void) | |||
| 57 | * Some chips need to know about probing in | 57 | * Some chips need to know about probing in |
| 58 | * progress: | 58 | * progress: |
| 59 | */ | 59 | */ |
| 60 | if (desc->chip->set_type) | 60 | if (desc->irq_data.chip->irq_set_type) |
| 61 | desc->chip->set_type(i, IRQ_TYPE_PROBE); | 61 | desc->irq_data.chip->irq_set_type(&desc->irq_data, |
| 62 | desc->chip->startup(i); | 62 | IRQ_TYPE_PROBE); |
| 63 | desc->irq_data.chip->irq_startup(&desc->irq_data); | ||
| 63 | } | 64 | } |
| 64 | raw_spin_unlock_irq(&desc->lock); | 65 | raw_spin_unlock_irq(&desc->lock); |
| 65 | } | 66 | } |
| @@ -76,7 +77,7 @@ unsigned long probe_irq_on(void) | |||
| 76 | raw_spin_lock_irq(&desc->lock); | 77 | raw_spin_lock_irq(&desc->lock); |
| 77 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { | 78 | if (!desc->action && !(desc->status & IRQ_NOPROBE)) { |
| 78 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; | 79 | desc->status |= IRQ_AUTODETECT | IRQ_WAITING; |
| 79 | if (desc->chip->startup(i)) | 80 | if (desc->irq_data.chip->irq_startup(&desc->irq_data)) |
| 80 | desc->status |= IRQ_PENDING; | 81 | desc->status |= IRQ_PENDING; |
| 81 | } | 82 | } |
| 82 | raw_spin_unlock_irq(&desc->lock); | 83 | raw_spin_unlock_irq(&desc->lock); |
| @@ -98,7 +99,7 @@ unsigned long probe_irq_on(void) | |||
| 98 | /* It triggered already - consider it spurious. */ | 99 | /* It triggered already - consider it spurious. */ |
| 99 | if (!(status & IRQ_WAITING)) { | 100 | if (!(status & IRQ_WAITING)) { |
| 100 | desc->status = status & ~IRQ_AUTODETECT; | 101 | desc->status = status & ~IRQ_AUTODETECT; |
| 101 | desc->chip->shutdown(i); | 102 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 102 | } else | 103 | } else |
| 103 | if (i < 32) | 104 | if (i < 32) |
| 104 | mask |= 1 << i; | 105 | mask |= 1 << i; |
| @@ -137,7 +138,7 @@ unsigned int probe_irq_mask(unsigned long val) | |||
| 137 | mask |= 1 << i; | 138 | mask |= 1 << i; |
| 138 | 139 | ||
| 139 | desc->status = status & ~IRQ_AUTODETECT; | 140 | desc->status = status & ~IRQ_AUTODETECT; |
| 140 | desc->chip->shutdown(i); | 141 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 141 | } | 142 | } |
| 142 | raw_spin_unlock_irq(&desc->lock); | 143 | raw_spin_unlock_irq(&desc->lock); |
| 143 | } | 144 | } |
| @@ -181,7 +182,7 @@ int probe_irq_off(unsigned long val) | |||
| 181 | nr_of_irqs++; | 182 | nr_of_irqs++; |
| 182 | } | 183 | } |
| 183 | desc->status = status & ~IRQ_AUTODETECT; | 184 | desc->status = status & ~IRQ_AUTODETECT; |
| 184 | desc->chip->shutdown(i); | 185 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 185 | } | 186 | } |
| 186 | raw_spin_unlock_irq(&desc->lock); | 187 | raw_spin_unlock_irq(&desc->lock); |
| 187 | } | 188 | } |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b7091d5ca2f8..baa5c4acad83 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -18,108 +18,6 @@ | |||
| 18 | 18 | ||
| 19 | #include "internals.h" | 19 | #include "internals.h" |
| 20 | 20 | ||
| 21 | static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) | ||
| 22 | { | ||
| 23 | struct irq_desc *desc; | ||
| 24 | unsigned long flags; | ||
| 25 | |||
| 26 | desc = irq_to_desc(irq); | ||
| 27 | if (!desc) { | ||
| 28 | WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); | ||
| 29 | return; | ||
| 30 | } | ||
| 31 | |||
| 32 | /* Ensure we don't have left over values from a previous use of this irq */ | ||
| 33 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 34 | desc->status = IRQ_DISABLED; | ||
| 35 | desc->chip = &no_irq_chip; | ||
| 36 | desc->handle_irq = handle_bad_irq; | ||
| 37 | desc->depth = 1; | ||
| 38 | desc->msi_desc = NULL; | ||
| 39 | desc->handler_data = NULL; | ||
| 40 | if (!keep_chip_data) | ||
| 41 | desc->chip_data = NULL; | ||
| 42 | desc->action = NULL; | ||
| 43 | desc->irq_count = 0; | ||
| 44 | desc->irqs_unhandled = 0; | ||
| 45 | #ifdef CONFIG_SMP | ||
| 46 | cpumask_setall(desc->affinity); | ||
| 47 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 48 | cpumask_clear(desc->pending_mask); | ||
| 49 | #endif | ||
| 50 | #endif | ||
| 51 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 52 | } | ||
| 53 | |||
| 54 | /** | ||
| 55 | * dynamic_irq_init - initialize a dynamically allocated irq | ||
| 56 | * @irq: irq number to initialize | ||
| 57 | */ | ||
| 58 | void dynamic_irq_init(unsigned int irq) | ||
| 59 | { | ||
| 60 | dynamic_irq_init_x(irq, false); | ||
| 61 | } | ||
| 62 | |||
| 63 | /** | ||
| 64 | * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq | ||
| 65 | * @irq: irq number to initialize | ||
| 66 | * | ||
| 67 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 68 | */ | ||
| 69 | void dynamic_irq_init_keep_chip_data(unsigned int irq) | ||
| 70 | { | ||
| 71 | dynamic_irq_init_x(irq, true); | ||
| 72 | } | ||
| 73 | |||
| 74 | static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) | ||
| 75 | { | ||
| 76 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 77 | unsigned long flags; | ||
| 78 | |||
| 79 | if (!desc) { | ||
| 80 | WARN(1, KERN_ERR "Trying to cleanup invalid IRQ%d\n", irq); | ||
| 81 | return; | ||
| 82 | } | ||
| 83 | |||
| 84 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 85 | if (desc->action) { | ||
| 86 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 87 | WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", | ||
| 88 | irq); | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | desc->msi_desc = NULL; | ||
| 92 | desc->handler_data = NULL; | ||
| 93 | if (!keep_chip_data) | ||
| 94 | desc->chip_data = NULL; | ||
| 95 | desc->handle_irq = handle_bad_irq; | ||
| 96 | desc->chip = &no_irq_chip; | ||
| 97 | desc->name = NULL; | ||
| 98 | clear_kstat_irqs(desc); | ||
| 99 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
| 104 | * @irq: irq number to initialize | ||
| 105 | */ | ||
| 106 | void dynamic_irq_cleanup(unsigned int irq) | ||
| 107 | { | ||
| 108 | dynamic_irq_cleanup_x(irq, false); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq | ||
| 113 | * @irq: irq number to initialize | ||
| 114 | * | ||
| 115 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 116 | */ | ||
| 117 | void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) | ||
| 118 | { | ||
| 119 | dynamic_irq_cleanup_x(irq, true); | ||
| 120 | } | ||
| 121 | |||
| 122 | |||
| 123 | /** | 21 | /** |
| 124 | * set_irq_chip - set the irq chip for an irq | 22 | * set_irq_chip - set the irq chip for an irq |
| 125 | * @irq: irq number | 23 | * @irq: irq number |
| @@ -140,7 +38,7 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) | |||
| 140 | 38 | ||
| 141 | raw_spin_lock_irqsave(&desc->lock, flags); | 39 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 142 | irq_chip_set_defaults(chip); | 40 | irq_chip_set_defaults(chip); |
| 143 | desc->chip = chip; | 41 | desc->irq_data.chip = chip; |
| 144 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 42 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 145 | 43 | ||
| 146 | return 0; | 44 | return 0; |
| @@ -193,7 +91,7 @@ int set_irq_data(unsigned int irq, void *data) | |||
| 193 | } | 91 | } |
| 194 | 92 | ||
| 195 | raw_spin_lock_irqsave(&desc->lock, flags); | 93 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 196 | desc->handler_data = data; | 94 | desc->irq_data.handler_data = data; |
| 197 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 95 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 198 | return 0; | 96 | return 0; |
| 199 | } | 97 | } |
| @@ -218,7 +116,7 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) | |||
| 218 | } | 116 | } |
| 219 | 117 | ||
| 220 | raw_spin_lock_irqsave(&desc->lock, flags); | 118 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 221 | desc->msi_desc = entry; | 119 | desc->irq_data.msi_desc = entry; |
| 222 | if (entry) | 120 | if (entry) |
| 223 | entry->irq = irq; | 121 | entry->irq = irq; |
| 224 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 122 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| @@ -243,19 +141,27 @@ int set_irq_chip_data(unsigned int irq, void *data) | |||
| 243 | return -EINVAL; | 141 | return -EINVAL; |
| 244 | } | 142 | } |
| 245 | 143 | ||
| 246 | if (!desc->chip) { | 144 | if (!desc->irq_data.chip) { |
| 247 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); | 145 | printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq); |
| 248 | return -EINVAL; | 146 | return -EINVAL; |
| 249 | } | 147 | } |
| 250 | 148 | ||
| 251 | raw_spin_lock_irqsave(&desc->lock, flags); | 149 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 252 | desc->chip_data = data; | 150 | desc->irq_data.chip_data = data; |
| 253 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 151 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 254 | 152 | ||
| 255 | return 0; | 153 | return 0; |
| 256 | } | 154 | } |
| 257 | EXPORT_SYMBOL(set_irq_chip_data); | 155 | EXPORT_SYMBOL(set_irq_chip_data); |
| 258 | 156 | ||
| 157 | struct irq_data *irq_get_irq_data(unsigned int irq) | ||
| 158 | { | ||
| 159 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 160 | |||
| 161 | return desc ? &desc->irq_data : NULL; | ||
| 162 | } | ||
| 163 | EXPORT_SYMBOL_GPL(irq_get_irq_data); | ||
| 164 | |||
| 259 | /** | 165 | /** |
| 260 | * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq | 166 | * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq |
| 261 | * | 167 | * |
| @@ -287,93 +193,216 @@ EXPORT_SYMBOL_GPL(set_irq_nested_thread); | |||
| 287 | /* | 193 | /* |
| 288 | * default enable function | 194 | * default enable function |
| 289 | */ | 195 | */ |
| 290 | static void default_enable(unsigned int irq) | 196 | static void default_enable(struct irq_data *data) |
| 291 | { | 197 | { |
| 292 | struct irq_desc *desc = irq_to_desc(irq); | 198 | struct irq_desc *desc = irq_data_to_desc(data); |
| 293 | 199 | ||
| 294 | desc->chip->unmask(irq); | 200 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 295 | desc->status &= ~IRQ_MASKED; | 201 | desc->status &= ~IRQ_MASKED; |
| 296 | } | 202 | } |
| 297 | 203 | ||
| 298 | /* | 204 | /* |
| 299 | * default disable function | 205 | * default disable function |
| 300 | */ | 206 | */ |
| 301 | static void default_disable(unsigned int irq) | 207 | static void default_disable(struct irq_data *data) |
| 302 | { | 208 | { |
| 303 | } | 209 | } |
| 304 | 210 | ||
| 305 | /* | 211 | /* |
| 306 | * default startup function | 212 | * default startup function |
| 307 | */ | 213 | */ |
| 308 | static unsigned int default_startup(unsigned int irq) | 214 | static unsigned int default_startup(struct irq_data *data) |
| 309 | { | 215 | { |
| 310 | struct irq_desc *desc = irq_to_desc(irq); | 216 | struct irq_desc *desc = irq_data_to_desc(data); |
| 311 | 217 | ||
| 312 | desc->chip->enable(irq); | 218 | desc->irq_data.chip->irq_enable(data); |
| 313 | return 0; | 219 | return 0; |
| 314 | } | 220 | } |
| 315 | 221 | ||
| 316 | /* | 222 | /* |
| 317 | * default shutdown function | 223 | * default shutdown function |
| 318 | */ | 224 | */ |
| 319 | static void default_shutdown(unsigned int irq) | 225 | static void default_shutdown(struct irq_data *data) |
| 320 | { | 226 | { |
| 321 | struct irq_desc *desc = irq_to_desc(irq); | 227 | struct irq_desc *desc = irq_data_to_desc(data); |
| 322 | 228 | ||
| 323 | desc->chip->mask(irq); | 229 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 324 | desc->status |= IRQ_MASKED; | 230 | desc->status |= IRQ_MASKED; |
| 325 | } | 231 | } |
| 326 | 232 | ||
| 233 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 234 | /* Temporary migration helpers */ | ||
| 235 | static void compat_irq_mask(struct irq_data *data) | ||
| 236 | { | ||
| 237 | data->chip->mask(data->irq); | ||
| 238 | } | ||
| 239 | |||
| 240 | static void compat_irq_unmask(struct irq_data *data) | ||
| 241 | { | ||
| 242 | data->chip->unmask(data->irq); | ||
| 243 | } | ||
| 244 | |||
| 245 | static void compat_irq_ack(struct irq_data *data) | ||
| 246 | { | ||
| 247 | data->chip->ack(data->irq); | ||
| 248 | } | ||
| 249 | |||
| 250 | static void compat_irq_mask_ack(struct irq_data *data) | ||
| 251 | { | ||
| 252 | data->chip->mask_ack(data->irq); | ||
| 253 | } | ||
| 254 | |||
| 255 | static void compat_irq_eoi(struct irq_data *data) | ||
| 256 | { | ||
| 257 | data->chip->eoi(data->irq); | ||
| 258 | } | ||
| 259 | |||
| 260 | static void compat_irq_enable(struct irq_data *data) | ||
| 261 | { | ||
| 262 | data->chip->enable(data->irq); | ||
| 263 | } | ||
| 264 | |||
| 265 | static void compat_irq_disable(struct irq_data *data) | ||
| 266 | { | ||
| 267 | data->chip->disable(data->irq); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void compat_irq_shutdown(struct irq_data *data) | ||
| 271 | { | ||
| 272 | data->chip->shutdown(data->irq); | ||
| 273 | } | ||
| 274 | |||
| 275 | static unsigned int compat_irq_startup(struct irq_data *data) | ||
| 276 | { | ||
| 277 | return data->chip->startup(data->irq); | ||
| 278 | } | ||
| 279 | |||
| 280 | static int compat_irq_set_affinity(struct irq_data *data, | ||
| 281 | const struct cpumask *dest, bool force) | ||
| 282 | { | ||
| 283 | return data->chip->set_affinity(data->irq, dest); | ||
| 284 | } | ||
| 285 | |||
| 286 | static int compat_irq_set_type(struct irq_data *data, unsigned int type) | ||
| 287 | { | ||
| 288 | return data->chip->set_type(data->irq, type); | ||
| 289 | } | ||
| 290 | |||
| 291 | static int compat_irq_set_wake(struct irq_data *data, unsigned int on) | ||
| 292 | { | ||
| 293 | return data->chip->set_wake(data->irq, on); | ||
| 294 | } | ||
| 295 | |||
| 296 | static int compat_irq_retrigger(struct irq_data *data) | ||
| 297 | { | ||
| 298 | return data->chip->retrigger(data->irq); | ||
| 299 | } | ||
| 300 | |||
| 301 | static void compat_bus_lock(struct irq_data *data) | ||
| 302 | { | ||
| 303 | data->chip->bus_lock(data->irq); | ||
| 304 | } | ||
| 305 | |||
| 306 | static void compat_bus_sync_unlock(struct irq_data *data) | ||
| 307 | { | ||
| 308 | data->chip->bus_sync_unlock(data->irq); | ||
| 309 | } | ||
| 310 | #endif | ||
| 311 | |||
| 327 | /* | 312 | /* |
| 328 | * Fixup enable/disable function pointers | 313 | * Fixup enable/disable function pointers |
| 329 | */ | 314 | */ |
| 330 | void irq_chip_set_defaults(struct irq_chip *chip) | 315 | void irq_chip_set_defaults(struct irq_chip *chip) |
| 331 | { | 316 | { |
| 332 | if (!chip->enable) | 317 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED |
| 333 | chip->enable = default_enable; | ||
| 334 | if (!chip->disable) | ||
| 335 | chip->disable = default_disable; | ||
| 336 | if (!chip->startup) | ||
| 337 | chip->startup = default_startup; | ||
| 338 | /* | 318 | /* |
| 339 | * We use chip->disable, when the user provided its own. When | 319 | * Compat fixup functions need to be before we set the |
| 340 | * we have default_disable set for chip->disable, then we need | 320 | * defaults for enable/disable/startup/shutdown |
| 321 | */ | ||
| 322 | if (chip->enable) | ||
| 323 | chip->irq_enable = compat_irq_enable; | ||
| 324 | if (chip->disable) | ||
| 325 | chip->irq_disable = compat_irq_disable; | ||
| 326 | if (chip->shutdown) | ||
| 327 | chip->irq_shutdown = compat_irq_shutdown; | ||
| 328 | if (chip->startup) | ||
| 329 | chip->irq_startup = compat_irq_startup; | ||
| 330 | #endif | ||
| 331 | /* | ||
| 332 | * The real defaults | ||
| 333 | */ | ||
| 334 | if (!chip->irq_enable) | ||
| 335 | chip->irq_enable = default_enable; | ||
| 336 | if (!chip->irq_disable) | ||
| 337 | chip->irq_disable = default_disable; | ||
| 338 | if (!chip->irq_startup) | ||
| 339 | chip->irq_startup = default_startup; | ||
| 340 | /* | ||
| 341 | * We use chip->irq_disable, when the user provided its own. When | ||
| 342 | * we have default_disable set for chip->irq_disable, then we need | ||
| 341 | * to use default_shutdown, otherwise the irq line is not | 343 | * to use default_shutdown, otherwise the irq line is not |
| 342 | * disabled on free_irq(): | 344 | * disabled on free_irq(): |
| 343 | */ | 345 | */ |
| 344 | if (!chip->shutdown) | 346 | if (!chip->irq_shutdown) |
| 345 | chip->shutdown = chip->disable != default_disable ? | 347 | chip->irq_shutdown = chip->irq_disable != default_disable ? |
| 346 | chip->disable : default_shutdown; | 348 | chip->irq_disable : default_shutdown; |
| 347 | if (!chip->name) | 349 | |
| 348 | chip->name = chip->typename; | 350 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED |
| 349 | if (!chip->end) | 351 | if (!chip->end) |
| 350 | chip->end = dummy_irq_chip.end; | 352 | chip->end = dummy_irq_chip.end; |
| 353 | |||
| 354 | /* | ||
| 355 | * Now fix up the remaining compat handlers | ||
| 356 | */ | ||
| 357 | if (chip->bus_lock) | ||
| 358 | chip->irq_bus_lock = compat_bus_lock; | ||
| 359 | if (chip->bus_sync_unlock) | ||
| 360 | chip->irq_bus_sync_unlock = compat_bus_sync_unlock; | ||
| 361 | if (chip->mask) | ||
| 362 | chip->irq_mask = compat_irq_mask; | ||
| 363 | if (chip->unmask) | ||
| 364 | chip->irq_unmask = compat_irq_unmask; | ||
| 365 | if (chip->ack) | ||
| 366 | chip->irq_ack = compat_irq_ack; | ||
| 367 | if (chip->mask_ack) | ||
| 368 | chip->irq_mask_ack = compat_irq_mask_ack; | ||
| 369 | if (chip->eoi) | ||
| 370 | chip->irq_eoi = compat_irq_eoi; | ||
| 371 | if (chip->set_affinity) | ||
| 372 | chip->irq_set_affinity = compat_irq_set_affinity; | ||
| 373 | if (chip->set_type) | ||
| 374 | chip->irq_set_type = compat_irq_set_type; | ||
| 375 | if (chip->set_wake) | ||
| 376 | chip->irq_set_wake = compat_irq_set_wake; | ||
| 377 | if (chip->retrigger) | ||
| 378 | chip->irq_retrigger = compat_irq_retrigger; | ||
| 379 | #endif | ||
| 351 | } | 380 | } |
| 352 | 381 | ||
| 353 | static inline void mask_ack_irq(struct irq_desc *desc, int irq) | 382 | static inline void mask_ack_irq(struct irq_desc *desc) |
| 354 | { | 383 | { |
| 355 | if (desc->chip->mask_ack) | 384 | if (desc->irq_data.chip->irq_mask_ack) |
| 356 | desc->chip->mask_ack(irq); | 385 | desc->irq_data.chip->irq_mask_ack(&desc->irq_data); |
| 357 | else { | 386 | else { |
| 358 | desc->chip->mask(irq); | 387 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 359 | if (desc->chip->ack) | 388 | if (desc->irq_data.chip->irq_ack) |
| 360 | desc->chip->ack(irq); | 389 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 361 | } | 390 | } |
| 362 | desc->status |= IRQ_MASKED; | 391 | desc->status |= IRQ_MASKED; |
| 363 | } | 392 | } |
| 364 | 393 | ||
| 365 | static inline void mask_irq(struct irq_desc *desc, int irq) | 394 | static inline void mask_irq(struct irq_desc *desc) |
| 366 | { | 395 | { |
| 367 | if (desc->chip->mask) { | 396 | if (desc->irq_data.chip->irq_mask) { |
| 368 | desc->chip->mask(irq); | 397 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 369 | desc->status |= IRQ_MASKED; | 398 | desc->status |= IRQ_MASKED; |
| 370 | } | 399 | } |
| 371 | } | 400 | } |
| 372 | 401 | ||
| 373 | static inline void unmask_irq(struct irq_desc *desc, int irq) | 402 | static inline void unmask_irq(struct irq_desc *desc) |
| 374 | { | 403 | { |
| 375 | if (desc->chip->unmask) { | 404 | if (desc->irq_data.chip->irq_unmask) { |
| 376 | desc->chip->unmask(irq); | 405 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 377 | desc->status &= ~IRQ_MASKED; | 406 | desc->status &= ~IRQ_MASKED; |
| 378 | } | 407 | } |
| 379 | } | 408 | } |
| @@ -476,7 +505,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
| 476 | irqreturn_t action_ret; | 505 | irqreturn_t action_ret; |
| 477 | 506 | ||
| 478 | raw_spin_lock(&desc->lock); | 507 | raw_spin_lock(&desc->lock); |
| 479 | mask_ack_irq(desc, irq); | 508 | mask_ack_irq(desc); |
| 480 | 509 | ||
| 481 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 510 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
| 482 | goto out_unlock; | 511 | goto out_unlock; |
| @@ -502,7 +531,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
| 502 | desc->status &= ~IRQ_INPROGRESS; | 531 | desc->status &= ~IRQ_INPROGRESS; |
| 503 | 532 | ||
| 504 | if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) | 533 | if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) |
| 505 | unmask_irq(desc, irq); | 534 | unmask_irq(desc); |
| 506 | out_unlock: | 535 | out_unlock: |
| 507 | raw_spin_unlock(&desc->lock); | 536 | raw_spin_unlock(&desc->lock); |
| 508 | } | 537 | } |
| @@ -539,7 +568,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
| 539 | action = desc->action; | 568 | action = desc->action; |
| 540 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { | 569 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
| 541 | desc->status |= IRQ_PENDING; | 570 | desc->status |= IRQ_PENDING; |
| 542 | mask_irq(desc, irq); | 571 | mask_irq(desc); |
| 543 | goto out; | 572 | goto out; |
| 544 | } | 573 | } |
| 545 | 574 | ||
| @@ -554,7 +583,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
| 554 | raw_spin_lock(&desc->lock); | 583 | raw_spin_lock(&desc->lock); |
| 555 | desc->status &= ~IRQ_INPROGRESS; | 584 | desc->status &= ~IRQ_INPROGRESS; |
| 556 | out: | 585 | out: |
| 557 | desc->chip->eoi(irq); | 586 | desc->irq_data.chip->irq_eoi(&desc->irq_data); |
| 558 | 587 | ||
| 559 | raw_spin_unlock(&desc->lock); | 588 | raw_spin_unlock(&desc->lock); |
| 560 | } | 589 | } |
| @@ -590,14 +619,13 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 590 | if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || | 619 | if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || |
| 591 | !desc->action)) { | 620 | !desc->action)) { |
| 592 | desc->status |= (IRQ_PENDING | IRQ_MASKED); | 621 | desc->status |= (IRQ_PENDING | IRQ_MASKED); |
| 593 | mask_ack_irq(desc, irq); | 622 | mask_ack_irq(desc); |
| 594 | goto out_unlock; | 623 | goto out_unlock; |
| 595 | } | 624 | } |
| 596 | kstat_incr_irqs_this_cpu(irq, desc); | 625 | kstat_incr_irqs_this_cpu(irq, desc); |
| 597 | 626 | ||
| 598 | /* Start handling the irq */ | 627 | /* Start handling the irq */ |
| 599 | if (desc->chip->ack) | 628 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 600 | desc->chip->ack(irq); | ||
| 601 | 629 | ||
| 602 | /* Mark the IRQ currently in progress.*/ | 630 | /* Mark the IRQ currently in progress.*/ |
| 603 | desc->status |= IRQ_INPROGRESS; | 631 | desc->status |= IRQ_INPROGRESS; |
| @@ -607,7 +635,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 607 | irqreturn_t action_ret; | 635 | irqreturn_t action_ret; |
| 608 | 636 | ||
| 609 | if (unlikely(!action)) { | 637 | if (unlikely(!action)) { |
| 610 | mask_irq(desc, irq); | 638 | mask_irq(desc); |
| 611 | goto out_unlock; | 639 | goto out_unlock; |
| 612 | } | 640 | } |
| 613 | 641 | ||
| @@ -619,7 +647,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
| 619 | if (unlikely((desc->status & | 647 | if (unlikely((desc->status & |
| 620 | (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == | 648 | (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == |
| 621 | (IRQ_PENDING | IRQ_MASKED))) { | 649 | (IRQ_PENDING | IRQ_MASKED))) { |
| 622 | unmask_irq(desc, irq); | 650 | unmask_irq(desc); |
| 623 | } | 651 | } |
| 624 | 652 | ||
| 625 | desc->status &= ~IRQ_PENDING; | 653 | desc->status &= ~IRQ_PENDING; |
| @@ -650,15 +678,15 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
| 650 | 678 | ||
| 651 | kstat_incr_irqs_this_cpu(irq, desc); | 679 | kstat_incr_irqs_this_cpu(irq, desc); |
| 652 | 680 | ||
| 653 | if (desc->chip->ack) | 681 | if (desc->irq_data.chip->irq_ack) |
| 654 | desc->chip->ack(irq); | 682 | desc->irq_data.chip->irq_ack(&desc->irq_data); |
| 655 | 683 | ||
| 656 | action_ret = handle_IRQ_event(irq, desc->action); | 684 | action_ret = handle_IRQ_event(irq, desc->action); |
| 657 | if (!noirqdebug) | 685 | if (!noirqdebug) |
| 658 | note_interrupt(irq, desc, action_ret); | 686 | note_interrupt(irq, desc, action_ret); |
| 659 | 687 | ||
| 660 | if (desc->chip->eoi) | 688 | if (desc->irq_data.chip->irq_eoi) |
| 661 | desc->chip->eoi(irq); | 689 | desc->irq_data.chip->irq_eoi(&desc->irq_data); |
| 662 | } | 690 | } |
| 663 | 691 | ||
| 664 | void | 692 | void |
| @@ -676,7 +704,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 676 | 704 | ||
| 677 | if (!handle) | 705 | if (!handle) |
| 678 | handle = handle_bad_irq; | 706 | handle = handle_bad_irq; |
| 679 | else if (desc->chip == &no_irq_chip) { | 707 | else if (desc->irq_data.chip == &no_irq_chip) { |
| 680 | printk(KERN_WARNING "Trying to install %sinterrupt handler " | 708 | printk(KERN_WARNING "Trying to install %sinterrupt handler " |
| 681 | "for IRQ%d\n", is_chained ? "chained " : "", irq); | 709 | "for IRQ%d\n", is_chained ? "chained " : "", irq); |
| 682 | /* | 710 | /* |
| @@ -686,16 +714,16 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 686 | * prevent us to setup the interrupt at all. Switch it to | 714 | * prevent us to setup the interrupt at all. Switch it to |
| 687 | * dummy_irq_chip for easy transition. | 715 | * dummy_irq_chip for easy transition. |
| 688 | */ | 716 | */ |
| 689 | desc->chip = &dummy_irq_chip; | 717 | desc->irq_data.chip = &dummy_irq_chip; |
| 690 | } | 718 | } |
| 691 | 719 | ||
| 692 | chip_bus_lock(irq, desc); | 720 | chip_bus_lock(desc); |
| 693 | raw_spin_lock_irqsave(&desc->lock, flags); | 721 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 694 | 722 | ||
| 695 | /* Uninstall? */ | 723 | /* Uninstall? */ |
| 696 | if (handle == handle_bad_irq) { | 724 | if (handle == handle_bad_irq) { |
| 697 | if (desc->chip != &no_irq_chip) | 725 | if (desc->irq_data.chip != &no_irq_chip) |
| 698 | mask_ack_irq(desc, irq); | 726 | mask_ack_irq(desc); |
| 699 | desc->status |= IRQ_DISABLED; | 727 | desc->status |= IRQ_DISABLED; |
| 700 | desc->depth = 1; | 728 | desc->depth = 1; |
| 701 | } | 729 | } |
| @@ -706,10 +734,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 706 | desc->status &= ~IRQ_DISABLED; | 734 | desc->status &= ~IRQ_DISABLED; |
| 707 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; | 735 | desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; |
| 708 | desc->depth = 0; | 736 | desc->depth = 0; |
| 709 | desc->chip->startup(irq); | 737 | desc->irq_data.chip->irq_startup(&desc->irq_data); |
| 710 | } | 738 | } |
| 711 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 739 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 712 | chip_bus_sync_unlock(irq, desc); | 740 | chip_bus_sync_unlock(desc); |
| 713 | } | 741 | } |
| 714 | EXPORT_SYMBOL_GPL(__set_irq_handler); | 742 | EXPORT_SYMBOL_GPL(__set_irq_handler); |
| 715 | 743 | ||
| @@ -729,32 +757,20 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, | |||
| 729 | __set_irq_handler(irq, handle, 0, name); | 757 | __set_irq_handler(irq, handle, 0, name); |
| 730 | } | 758 | } |
| 731 | 759 | ||
| 732 | void set_irq_noprobe(unsigned int irq) | 760 | void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) |
| 733 | { | 761 | { |
| 734 | struct irq_desc *desc = irq_to_desc(irq); | 762 | struct irq_desc *desc = irq_to_desc(irq); |
| 735 | unsigned long flags; | 763 | unsigned long flags; |
| 736 | 764 | ||
| 737 | if (!desc) { | 765 | if (!desc) |
| 738 | printk(KERN_ERR "Trying to mark IRQ%d non-probeable\n", irq); | ||
| 739 | return; | 766 | return; |
| 740 | } | ||
| 741 | |||
| 742 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 743 | desc->status |= IRQ_NOPROBE; | ||
| 744 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 745 | } | ||
| 746 | |||
| 747 | void set_irq_probe(unsigned int irq) | ||
| 748 | { | ||
| 749 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 750 | unsigned long flags; | ||
| 751 | 767 | ||
| 752 | if (!desc) { | 768 | /* Sanitize flags */ |
| 753 | printk(KERN_ERR "Trying to mark IRQ%d probeable\n", irq); | 769 | set &= IRQF_MODIFY_MASK; |
| 754 | return; | 770 | clr &= IRQF_MODIFY_MASK; |
| 755 | } | ||
| 756 | 771 | ||
| 757 | raw_spin_lock_irqsave(&desc->lock, flags); | 772 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 758 | desc->status &= ~IRQ_NOPROBE; | 773 | desc->status &= ~clr; |
| 774 | desc->status |= set; | ||
| 759 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 775 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 760 | } | 776 | } |
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c new file mode 100644 index 000000000000..20dc5474947e --- /dev/null +++ b/kernel/irq/dummychip.c | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar | ||
| 3 | * Copyright (C) 2005-2006, Thomas Gleixner, Russell King | ||
| 4 | * | ||
| 5 | * This file contains the dummy interrupt chip implementation | ||
| 6 | */ | ||
| 7 | #include <linux/interrupt.h> | ||
| 8 | #include <linux/irq.h> | ||
| 9 | |||
| 10 | #include "internals.h" | ||
| 11 | |||
| 12 | /* | ||
| 13 | * What should we do if we get a hw irq event on an illegal vector? | ||
| 14 | * Each architecture has to answer this themself. | ||
| 15 | */ | ||
| 16 | static void ack_bad(struct irq_data *data) | ||
| 17 | { | ||
| 18 | struct irq_desc *desc = irq_data_to_desc(data); | ||
| 19 | |||
| 20 | print_irq_desc(data->irq, desc); | ||
| 21 | ack_bad_irq(data->irq); | ||
| 22 | } | ||
| 23 | |||
| 24 | /* | ||
| 25 | * NOP functions | ||
| 26 | */ | ||
| 27 | static void noop(struct irq_data *data) { } | ||
| 28 | |||
| 29 | static unsigned int noop_ret(struct irq_data *data) | ||
| 30 | { | ||
| 31 | return 0; | ||
| 32 | } | ||
| 33 | |||
| 34 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 35 | static void compat_noop(unsigned int irq) { } | ||
| 36 | #define END_INIT .end = compat_noop | ||
| 37 | #else | ||
| 38 | #define END_INIT | ||
| 39 | #endif | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Generic no controller implementation | ||
| 43 | */ | ||
| 44 | struct irq_chip no_irq_chip = { | ||
| 45 | .name = "none", | ||
| 46 | .irq_startup = noop_ret, | ||
| 47 | .irq_shutdown = noop, | ||
| 48 | .irq_enable = noop, | ||
| 49 | .irq_disable = noop, | ||
| 50 | .irq_ack = ack_bad, | ||
| 51 | END_INIT | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Generic dummy implementation which can be used for | ||
| 56 | * real dumb interrupt sources | ||
| 57 | */ | ||
| 58 | struct irq_chip dummy_irq_chip = { | ||
| 59 | .name = "dummy", | ||
| 60 | .irq_startup = noop_ret, | ||
| 61 | .irq_shutdown = noop, | ||
| 62 | .irq_enable = noop, | ||
| 63 | .irq_disable = noop, | ||
| 64 | .irq_ack = noop, | ||
| 65 | .irq_mask = noop, | ||
| 66 | .irq_unmask = noop, | ||
| 67 | END_INIT | ||
| 68 | }; | ||
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 27e5c6911223..e2347eb63306 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -11,24 +11,15 @@ | |||
| 11 | */ | 11 | */ |
| 12 | 12 | ||
| 13 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/random.h> | 14 | #include <linux/random.h> |
| 15 | #include <linux/sched.h> | ||
| 18 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 19 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
| 20 | #include <linux/rculist.h> | 18 | |
| 21 | #include <linux/hash.h> | ||
| 22 | #include <linux/radix-tree.h> | ||
| 23 | #include <trace/events/irq.h> | 19 | #include <trace/events/irq.h> |
| 24 | 20 | ||
| 25 | #include "internals.h" | 21 | #include "internals.h" |
| 26 | 22 | ||
| 27 | /* | ||
| 28 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
| 29 | */ | ||
| 30 | struct lock_class_key irq_desc_lock_class; | ||
| 31 | |||
| 32 | /** | 23 | /** |
| 33 | * handle_bad_irq - handle spurious and unhandled irqs | 24 | * handle_bad_irq - handle spurious and unhandled irqs |
| 34 | * @irq: the interrupt number | 25 | * @irq: the interrupt number |
| @@ -43,304 +34,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) | |||
| 43 | ack_bad_irq(irq); | 34 | ack_bad_irq(irq); |
| 44 | } | 35 | } |
| 45 | 36 | ||
| 46 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 47 | static void __init init_irq_default_affinity(void) | ||
| 48 | { | ||
| 49 | alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
| 50 | cpumask_setall(irq_default_affinity); | ||
| 51 | } | ||
| 52 | #else | ||
| 53 | static void __init init_irq_default_affinity(void) | ||
| 54 | { | ||
| 55 | } | ||
| 56 | #endif | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Linux has a controller-independent interrupt architecture. | ||
| 60 | * Every controller has a 'controller-template', that is used | ||
| 61 | * by the main code to do the right thing. Each driver-visible | ||
| 62 | * interrupt source is transparently wired to the appropriate | ||
| 63 | * controller. Thus drivers need not be aware of the | ||
| 64 | * interrupt-controller. | ||
| 65 | * | ||
| 66 | * The code is designed to be easily extended with new/different | ||
| 67 | * interrupt controllers, without having to do assembly magic or | ||
| 68 | * having to touch the generic code. | ||
| 69 | * | ||
| 70 | * Controller mappings for all interrupt sources: | ||
| 71 | */ | ||
| 72 | int nr_irqs = NR_IRQS; | ||
| 73 | EXPORT_SYMBOL_GPL(nr_irqs); | ||
| 74 | |||
| 75 | #ifdef CONFIG_SPARSE_IRQ | ||
| 76 | |||
| 77 | static struct irq_desc irq_desc_init = { | ||
| 78 | .irq = -1, | ||
| 79 | .status = IRQ_DISABLED, | ||
| 80 | .chip = &no_irq_chip, | ||
| 81 | .handle_irq = handle_bad_irq, | ||
| 82 | .depth = 1, | ||
| 83 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
| 84 | }; | ||
| 85 | |||
| 86 | void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) | ||
| 87 | { | ||
| 88 | void *ptr; | ||
| 89 | |||
| 90 | ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), | ||
| 91 | GFP_ATOMIC, node); | ||
| 92 | |||
| 93 | /* | ||
| 94 | * don't overwite if can not get new one | ||
| 95 | * init_copy_kstat_irqs() could still use old one | ||
| 96 | */ | ||
| 97 | if (ptr) { | ||
| 98 | printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node); | ||
| 99 | desc->kstat_irqs = ptr; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) | ||
| 104 | { | ||
| 105 | memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); | ||
| 106 | |||
| 107 | raw_spin_lock_init(&desc->lock); | ||
| 108 | desc->irq = irq; | ||
| 109 | #ifdef CONFIG_SMP | ||
| 110 | desc->node = node; | ||
| 111 | #endif | ||
| 112 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 113 | init_kstat_irqs(desc, node, nr_cpu_ids); | ||
| 114 | if (!desc->kstat_irqs) { | ||
| 115 | printk(KERN_ERR "can not alloc kstat_irqs\n"); | ||
| 116 | BUG_ON(1); | ||
| 117 | } | ||
| 118 | if (!alloc_desc_masks(desc, node, false)) { | ||
| 119 | printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); | ||
| 120 | BUG_ON(1); | ||
| 121 | } | ||
| 122 | init_desc_masks(desc); | ||
| 123 | arch_init_chip_data(desc, node); | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Protect the sparse_irqs: | ||
| 128 | */ | ||
| 129 | DEFINE_RAW_SPINLOCK(sparse_irq_lock); | ||
| 130 | |||
| 131 | static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); | ||
| 132 | |||
| 133 | static void set_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 134 | { | ||
| 135 | radix_tree_insert(&irq_desc_tree, irq, desc); | ||
| 136 | } | ||
| 137 | |||
| 138 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 139 | { | ||
| 140 | return radix_tree_lookup(&irq_desc_tree, irq); | ||
| 141 | } | ||
| 142 | |||
| 143 | void replace_irq_desc(unsigned int irq, struct irq_desc *desc) | ||
| 144 | { | ||
| 145 | void **ptr; | ||
| 146 | |||
| 147 | ptr = radix_tree_lookup_slot(&irq_desc_tree, irq); | ||
| 148 | if (ptr) | ||
| 149 | radix_tree_replace_slot(ptr, desc); | ||
| 150 | } | ||
| 151 | |||
| 152 | static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { | ||
| 153 | [0 ... NR_IRQS_LEGACY-1] = { | ||
| 154 | .irq = -1, | ||
| 155 | .status = IRQ_DISABLED, | ||
| 156 | .chip = &no_irq_chip, | ||
| 157 | .handle_irq = handle_bad_irq, | ||
| 158 | .depth = 1, | ||
| 159 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), | ||
| 160 | } | ||
| 161 | }; | ||
| 162 | |||
| 163 | static unsigned int *kstat_irqs_legacy; | ||
| 164 | |||
| 165 | int __init early_irq_init(void) | ||
| 166 | { | ||
| 167 | struct irq_desc *desc; | ||
| 168 | int legacy_count; | ||
| 169 | int node; | ||
| 170 | int i; | ||
| 171 | |||
| 172 | init_irq_default_affinity(); | ||
| 173 | |||
| 174 | /* initialize nr_irqs based on nr_cpu_ids */ | ||
| 175 | arch_probe_nr_irqs(); | ||
| 176 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); | ||
| 177 | |||
| 178 | desc = irq_desc_legacy; | ||
| 179 | legacy_count = ARRAY_SIZE(irq_desc_legacy); | ||
| 180 | node = first_online_node; | ||
| 181 | |||
| 182 | /* allocate based on nr_cpu_ids */ | ||
| 183 | kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * | ||
| 184 | sizeof(int), GFP_NOWAIT, node); | ||
| 185 | |||
| 186 | for (i = 0; i < legacy_count; i++) { | ||
| 187 | desc[i].irq = i; | ||
| 188 | #ifdef CONFIG_SMP | ||
| 189 | desc[i].node = node; | ||
| 190 | #endif | ||
| 191 | desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; | ||
| 192 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | ||
| 193 | alloc_desc_masks(&desc[i], node, true); | ||
| 194 | init_desc_masks(&desc[i]); | ||
| 195 | set_irq_desc(i, &desc[i]); | ||
| 196 | } | ||
| 197 | |||
| 198 | return arch_early_irq_init(); | ||
| 199 | } | ||
| 200 | |||
| 201 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 202 | { | ||
| 203 | struct irq_desc *desc; | ||
| 204 | unsigned long flags; | ||
| 205 | |||
| 206 | if (irq >= nr_irqs) { | ||
| 207 | WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", | ||
| 208 | irq, nr_irqs); | ||
| 209 | return NULL; | ||
| 210 | } | ||
| 211 | |||
| 212 | desc = irq_to_desc(irq); | ||
| 213 | if (desc) | ||
| 214 | return desc; | ||
| 215 | |||
| 216 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 217 | |||
| 218 | /* We have to check it to avoid races with another CPU */ | ||
| 219 | desc = irq_to_desc(irq); | ||
| 220 | if (desc) | ||
| 221 | goto out_unlock; | ||
| 222 | |||
| 223 | desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); | ||
| 224 | |||
| 225 | printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); | ||
| 226 | if (!desc) { | ||
| 227 | printk(KERN_ERR "can not alloc irq_desc\n"); | ||
| 228 | BUG_ON(1); | ||
| 229 | } | ||
| 230 | init_one_irq_desc(irq, desc, node); | ||
| 231 | |||
| 232 | set_irq_desc(irq, desc); | ||
| 233 | |||
| 234 | out_unlock: | ||
| 235 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 236 | |||
| 237 | return desc; | ||
| 238 | } | ||
| 239 | |||
| 240 | #else /* !CONFIG_SPARSE_IRQ */ | ||
| 241 | |||
| 242 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | ||
| 243 | [0 ... NR_IRQS-1] = { | ||
| 244 | .status = IRQ_DISABLED, | ||
| 245 | .chip = &no_irq_chip, | ||
| 246 | .handle_irq = handle_bad_irq, | ||
| 247 | .depth = 1, | ||
| 248 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), | ||
| 249 | } | ||
| 250 | }; | ||
| 251 | |||
| 252 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
| 253 | int __init early_irq_init(void) | ||
| 254 | { | ||
| 255 | struct irq_desc *desc; | ||
| 256 | int count; | ||
| 257 | int i; | ||
| 258 | |||
| 259 | init_irq_default_affinity(); | ||
| 260 | |||
| 261 | printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); | ||
| 262 | |||
| 263 | desc = irq_desc; | ||
| 264 | count = ARRAY_SIZE(irq_desc); | ||
| 265 | |||
| 266 | for (i = 0; i < count; i++) { | ||
| 267 | desc[i].irq = i; | ||
| 268 | alloc_desc_masks(&desc[i], 0, true); | ||
| 269 | init_desc_masks(&desc[i]); | ||
| 270 | desc[i].kstat_irqs = kstat_irqs_all[i]; | ||
| 271 | } | ||
| 272 | return arch_early_irq_init(); | ||
| 273 | } | ||
| 274 | |||
| 275 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 276 | { | ||
| 277 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | ||
| 278 | } | ||
| 279 | |||
| 280 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 281 | { | ||
| 282 | return irq_to_desc(irq); | ||
| 283 | } | ||
| 284 | #endif /* !CONFIG_SPARSE_IRQ */ | ||
| 285 | |||
| 286 | void clear_kstat_irqs(struct irq_desc *desc) | ||
| 287 | { | ||
| 288 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
| 289 | } | ||
| 290 | |||
| 291 | /* | ||
| 292 | * What should we do if we get a hw irq event on an illegal vector? | ||
| 293 | * Each architecture has to answer this themself. | ||
| 294 | */ | ||
| 295 | static void ack_bad(unsigned int irq) | ||
| 296 | { | ||
| 297 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 298 | |||
| 299 | print_irq_desc(irq, desc); | ||
| 300 | ack_bad_irq(irq); | ||
| 301 | } | ||
| 302 | |||
| 303 | /* | ||
| 304 | * NOP functions | ||
| 305 | */ | ||
| 306 | static void noop(unsigned int irq) | ||
| 307 | { | ||
| 308 | } | ||
| 309 | |||
| 310 | static unsigned int noop_ret(unsigned int irq) | ||
| 311 | { | ||
| 312 | return 0; | ||
| 313 | } | ||
| 314 | |||
| 315 | /* | ||
| 316 | * Generic no controller implementation | ||
| 317 | */ | ||
| 318 | struct irq_chip no_irq_chip = { | ||
| 319 | .name = "none", | ||
| 320 | .startup = noop_ret, | ||
| 321 | .shutdown = noop, | ||
| 322 | .enable = noop, | ||
| 323 | .disable = noop, | ||
| 324 | .ack = ack_bad, | ||
| 325 | .end = noop, | ||
| 326 | }; | ||
| 327 | |||
| 328 | /* | ||
| 329 | * Generic dummy implementation which can be used for | ||
| 330 | * real dumb interrupt sources | ||
| 331 | */ | ||
| 332 | struct irq_chip dummy_irq_chip = { | ||
| 333 | .name = "dummy", | ||
| 334 | .startup = noop_ret, | ||
| 335 | .shutdown = noop, | ||
| 336 | .enable = noop, | ||
| 337 | .disable = noop, | ||
| 338 | .ack = noop, | ||
| 339 | .mask = noop, | ||
| 340 | .unmask = noop, | ||
| 341 | .end = noop, | ||
| 342 | }; | ||
| 343 | |||
| 344 | /* | 37 | /* |
| 345 | * Special, empty irq handler: | 38 | * Special, empty irq handler: |
| 346 | */ | 39 | */ |
| @@ -457,20 +150,20 @@ unsigned int __do_IRQ(unsigned int irq) | |||
| 457 | /* | 150 | /* |
| 458 | * No locking required for CPU-local interrupts: | 151 | * No locking required for CPU-local interrupts: |
| 459 | */ | 152 | */ |
| 460 | if (desc->chip->ack) | 153 | if (desc->irq_data.chip->ack) |
| 461 | desc->chip->ack(irq); | 154 | desc->irq_data.chip->ack(irq); |
| 462 | if (likely(!(desc->status & IRQ_DISABLED))) { | 155 | if (likely(!(desc->status & IRQ_DISABLED))) { |
| 463 | action_ret = handle_IRQ_event(irq, desc->action); | 156 | action_ret = handle_IRQ_event(irq, desc->action); |
| 464 | if (!noirqdebug) | 157 | if (!noirqdebug) |
| 465 | note_interrupt(irq, desc, action_ret); | 158 | note_interrupt(irq, desc, action_ret); |
| 466 | } | 159 | } |
| 467 | desc->chip->end(irq); | 160 | desc->irq_data.chip->end(irq); |
| 468 | return 1; | 161 | return 1; |
| 469 | } | 162 | } |
| 470 | 163 | ||
| 471 | raw_spin_lock(&desc->lock); | 164 | raw_spin_lock(&desc->lock); |
| 472 | if (desc->chip->ack) | 165 | if (desc->irq_data.chip->ack) |
| 473 | desc->chip->ack(irq); | 166 | desc->irq_data.chip->ack(irq); |
| 474 | /* | 167 | /* |
| 475 | * REPLAY is when Linux resends an IRQ that was dropped earlier | 168 | * REPLAY is when Linux resends an IRQ that was dropped earlier |
| 476 | * WAITING is used by probe to mark irqs that are being tested | 169 | * WAITING is used by probe to mark irqs that are being tested |
| @@ -530,27 +223,9 @@ out: | |||
| 530 | * The ->end() handler has to deal with interrupts which got | 223 | * The ->end() handler has to deal with interrupts which got |
| 531 | * disabled while the handler was running. | 224 | * disabled while the handler was running. |
| 532 | */ | 225 | */ |
| 533 | desc->chip->end(irq); | 226 | desc->irq_data.chip->end(irq); |
| 534 | raw_spin_unlock(&desc->lock); | 227 | raw_spin_unlock(&desc->lock); |
| 535 | 228 | ||
| 536 | return 1; | 229 | return 1; |
| 537 | } | 230 | } |
| 538 | #endif | 231 | #endif |
| 539 | |||
| 540 | void early_init_irq_lock_class(void) | ||
| 541 | { | ||
| 542 | struct irq_desc *desc; | ||
| 543 | int i; | ||
| 544 | |||
| 545 | for_each_irq_desc(i, desc) { | ||
| 546 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 547 | } | ||
| 548 | } | ||
| 549 | |||
| 550 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | ||
| 551 | { | ||
| 552 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 553 | return desc ? desc->kstat_irqs[cpu] : 0; | ||
| 554 | } | ||
| 555 | EXPORT_SYMBOL(kstat_irqs_cpu); | ||
| 556 | |||
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index c63f3bc88f0b..4571ae7e085a 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
| @@ -1,9 +1,12 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * IRQ subsystem internal functions and variables: | 2 | * IRQ subsystem internal functions and variables: |
| 3 | */ | 3 | */ |
| 4 | #include <linux/irqdesc.h> | ||
| 4 | 5 | ||
| 5 | extern int noirqdebug; | 6 | extern int noirqdebug; |
| 6 | 7 | ||
| 8 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) | ||
| 9 | |||
| 7 | /* Set default functions for irq_chip structures: */ | 10 | /* Set default functions for irq_chip structures: */ |
| 8 | extern void irq_chip_set_defaults(struct irq_chip *chip); | 11 | extern void irq_chip_set_defaults(struct irq_chip *chip); |
| 9 | 12 | ||
| @@ -15,21 +18,19 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 15 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); | 18 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); |
| 16 | extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); | 19 | extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); |
| 17 | 20 | ||
| 18 | extern struct lock_class_key irq_desc_lock_class; | ||
| 19 | extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); | 21 | extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); |
| 20 | extern void clear_kstat_irqs(struct irq_desc *desc); | ||
| 21 | extern raw_spinlock_t sparse_irq_lock; | ||
| 22 | 22 | ||
| 23 | #ifdef CONFIG_SPARSE_IRQ | 23 | /* Resending of interrupts :*/ |
| 24 | void replace_irq_desc(unsigned int irq, struct irq_desc *desc); | 24 | void check_irq_resend(struct irq_desc *desc, unsigned int irq); |
| 25 | #endif | ||
| 26 | 25 | ||
| 27 | #ifdef CONFIG_PROC_FS | 26 | #ifdef CONFIG_PROC_FS |
| 28 | extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); | 27 | extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); |
| 28 | extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); | ||
| 29 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); | 29 | extern void register_handler_proc(unsigned int irq, struct irqaction *action); |
| 30 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); | 30 | extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); |
| 31 | #else | 31 | #else |
| 32 | static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } | 32 | static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } |
| 33 | static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { } | ||
| 33 | static inline void register_handler_proc(unsigned int irq, | 34 | static inline void register_handler_proc(unsigned int irq, |
| 34 | struct irqaction *action) { } | 35 | struct irqaction *action) { } |
| 35 | static inline void unregister_handler_proc(unsigned int irq, | 36 | static inline void unregister_handler_proc(unsigned int irq, |
| @@ -40,17 +41,27 @@ extern int irq_select_affinity_usr(unsigned int irq); | |||
| 40 | 41 | ||
| 41 | extern void irq_set_thread_affinity(struct irq_desc *desc); | 42 | extern void irq_set_thread_affinity(struct irq_desc *desc); |
| 42 | 43 | ||
| 44 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
| 45 | static inline void irq_end(unsigned int irq, struct irq_desc *desc) | ||
| 46 | { | ||
| 47 | if (desc->irq_data.chip && desc->irq_data.chip->end) | ||
| 48 | desc->irq_data.chip->end(irq); | ||
| 49 | } | ||
| 50 | #else | ||
| 51 | static inline void irq_end(unsigned int irq, struct irq_desc *desc) { } | ||
| 52 | #endif | ||
| 53 | |||
| 43 | /* Inline functions for support of irq chips on slow busses */ | 54 | /* Inline functions for support of irq chips on slow busses */ |
| 44 | static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc) | 55 | static inline void chip_bus_lock(struct irq_desc *desc) |
| 45 | { | 56 | { |
| 46 | if (unlikely(desc->chip->bus_lock)) | 57 | if (unlikely(desc->irq_data.chip->irq_bus_lock)) |
| 47 | desc->chip->bus_lock(irq); | 58 | desc->irq_data.chip->irq_bus_lock(&desc->irq_data); |
| 48 | } | 59 | } |
| 49 | 60 | ||
| 50 | static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc) | 61 | static inline void chip_bus_sync_unlock(struct irq_desc *desc) |
| 51 | { | 62 | { |
| 52 | if (unlikely(desc->chip->bus_sync_unlock)) | 63 | if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock)) |
| 53 | desc->chip->bus_sync_unlock(irq); | 64 | desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); |
| 54 | } | 65 | } |
| 55 | 66 | ||
| 56 | /* | 67 | /* |
| @@ -67,8 +78,8 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) | |||
| 67 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); | 78 | irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); |
| 68 | printk("->handle_irq(): %p, ", desc->handle_irq); | 79 | printk("->handle_irq(): %p, ", desc->handle_irq); |
| 69 | print_symbol("%s\n", (unsigned long)desc->handle_irq); | 80 | print_symbol("%s\n", (unsigned long)desc->handle_irq); |
| 70 | printk("->chip(): %p, ", desc->chip); | 81 | printk("->irq_data.chip(): %p, ", desc->irq_data.chip); |
| 71 | print_symbol("%s\n", (unsigned long)desc->chip); | 82 | print_symbol("%s\n", (unsigned long)desc->irq_data.chip); |
| 72 | printk("->action(): %p\n", desc->action); | 83 | printk("->action(): %p\n", desc->action); |
| 73 | if (desc->action) { | 84 | if (desc->action) { |
| 74 | printk("->action->handler(): %p, ", desc->action->handler); | 85 | printk("->action->handler(): %p, ", desc->action->handler); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c new file mode 100644 index 000000000000..9d917ff72675 --- /dev/null +++ b/kernel/irq/irqdesc.c | |||
| @@ -0,0 +1,395 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar | ||
| 3 | * Copyright (C) 2005-2006, Thomas Gleixner, Russell King | ||
| 4 | * | ||
| 5 | * This file contains the interrupt descriptor management code | ||
| 6 | * | ||
| 7 | * Detailed information is available in Documentation/DocBook/genericirq | ||
| 8 | * | ||
| 9 | */ | ||
| 10 | #include <linux/irq.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/module.h> | ||
| 13 | #include <linux/interrupt.h> | ||
| 14 | #include <linux/kernel_stat.h> | ||
| 15 | #include <linux/radix-tree.h> | ||
| 16 | #include <linux/bitmap.h> | ||
| 17 | |||
| 18 | #include "internals.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * lockdep: we want to handle all irq_desc locks as a single lock-class: | ||
| 22 | */ | ||
| 23 | static struct lock_class_key irq_desc_lock_class; | ||
| 24 | |||
| 25 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 26 | static void __init init_irq_default_affinity(void) | ||
| 27 | { | ||
| 28 | alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | ||
| 29 | cpumask_setall(irq_default_affinity); | ||
| 30 | } | ||
| 31 | #else | ||
| 32 | static void __init init_irq_default_affinity(void) | ||
| 33 | { | ||
| 34 | } | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #ifdef CONFIG_SMP | ||
| 38 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | ||
| 39 | { | ||
| 40 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | ||
| 41 | return -ENOMEM; | ||
| 42 | |||
| 43 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 44 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | ||
| 45 | free_cpumask_var(desc->irq_data.affinity); | ||
| 46 | return -ENOMEM; | ||
| 47 | } | ||
| 48 | #endif | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void desc_smp_init(struct irq_desc *desc, int node) | ||
| 53 | { | ||
| 54 | desc->irq_data.node = node; | ||
| 55 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
| 56 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 57 | cpumask_clear(desc->pending_mask); | ||
| 58 | #endif | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline int desc_node(struct irq_desc *desc) | ||
| 62 | { | ||
| 63 | return desc->irq_data.node; | ||
| 64 | } | ||
| 65 | |||
| 66 | #else | ||
| 67 | static inline int | ||
| 68 | alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; } | ||
| 69 | static inline void desc_smp_init(struct irq_desc *desc, int node) { } | ||
| 70 | static inline int desc_node(struct irq_desc *desc) { return 0; } | ||
| 71 | #endif | ||
| 72 | |||
| 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | ||
| 74 | { | ||
| 75 | desc->irq_data.irq = irq; | ||
| 76 | desc->irq_data.chip = &no_irq_chip; | ||
| 77 | desc->irq_data.chip_data = NULL; | ||
| 78 | desc->irq_data.handler_data = NULL; | ||
| 79 | desc->irq_data.msi_desc = NULL; | ||
| 80 | desc->status = IRQ_DEFAULT_INIT_FLAGS; | ||
| 81 | desc->handle_irq = handle_bad_irq; | ||
| 82 | desc->depth = 1; | ||
| 83 | desc->irq_count = 0; | ||
| 84 | desc->irqs_unhandled = 0; | ||
| 85 | desc->name = NULL; | ||
| 86 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | ||
| 87 | desc_smp_init(desc, node); | ||
| 88 | } | ||
| 89 | |||
| 90 | int nr_irqs = NR_IRQS; | ||
| 91 | EXPORT_SYMBOL_GPL(nr_irqs); | ||
| 92 | |||
| 93 | static DEFINE_MUTEX(sparse_irq_lock); | ||
| 94 | static DECLARE_BITMAP(allocated_irqs, NR_IRQS); | ||
| 95 | |||
| 96 | #ifdef CONFIG_SPARSE_IRQ | ||
| 97 | |||
| 98 | static RADIX_TREE(irq_desc_tree, GFP_KERNEL); | ||
| 99 | |||
| 100 | static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) | ||
| 101 | { | ||
| 102 | radix_tree_insert(&irq_desc_tree, irq, desc); | ||
| 103 | } | ||
| 104 | |||
| 105 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 106 | { | ||
| 107 | return radix_tree_lookup(&irq_desc_tree, irq); | ||
| 108 | } | ||
| 109 | |||
| 110 | static void delete_irq_desc(unsigned int irq) | ||
| 111 | { | ||
| 112 | radix_tree_delete(&irq_desc_tree, irq); | ||
| 113 | } | ||
| 114 | |||
| 115 | #ifdef CONFIG_SMP | ||
| 116 | static void free_masks(struct irq_desc *desc) | ||
| 117 | { | ||
| 118 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 119 | free_cpumask_var(desc->pending_mask); | ||
| 120 | #endif | ||
| 121 | free_cpumask_var(desc->irq_data.affinity); | ||
| 122 | } | ||
| 123 | #else | ||
| 124 | static inline void free_masks(struct irq_desc *desc) { } | ||
| 125 | #endif | ||
| 126 | |||
| 127 | static struct irq_desc *alloc_desc(int irq, int node) | ||
| 128 | { | ||
| 129 | struct irq_desc *desc; | ||
| 130 | gfp_t gfp = GFP_KERNEL; | ||
| 131 | |||
| 132 | desc = kzalloc_node(sizeof(*desc), gfp, node); | ||
| 133 | if (!desc) | ||
| 134 | return NULL; | ||
| 135 | /* allocate based on nr_cpu_ids */ | ||
| 136 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | ||
| 137 | gfp, node); | ||
| 138 | if (!desc->kstat_irqs) | ||
| 139 | goto err_desc; | ||
| 140 | |||
| 141 | if (alloc_masks(desc, gfp, node)) | ||
| 142 | goto err_kstat; | ||
| 143 | |||
| 144 | raw_spin_lock_init(&desc->lock); | ||
| 145 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 146 | |||
| 147 | desc_set_defaults(irq, desc, node); | ||
| 148 | |||
| 149 | return desc; | ||
| 150 | |||
| 151 | err_kstat: | ||
| 152 | kfree(desc->kstat_irqs); | ||
| 153 | err_desc: | ||
| 154 | kfree(desc); | ||
| 155 | return NULL; | ||
| 156 | } | ||
| 157 | |||
| 158 | static void free_desc(unsigned int irq) | ||
| 159 | { | ||
| 160 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 161 | |||
| 162 | unregister_irq_proc(irq, desc); | ||
| 163 | |||
| 164 | mutex_lock(&sparse_irq_lock); | ||
| 165 | delete_irq_desc(irq); | ||
| 166 | mutex_unlock(&sparse_irq_lock); | ||
| 167 | |||
| 168 | free_masks(desc); | ||
| 169 | kfree(desc->kstat_irqs); | ||
| 170 | kfree(desc); | ||
| 171 | } | ||
| 172 | |||
| 173 | static int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 174 | { | ||
| 175 | struct irq_desc *desc; | ||
| 176 | int i; | ||
| 177 | |||
| 178 | for (i = 0; i < cnt; i++) { | ||
| 179 | desc = alloc_desc(start + i, node); | ||
| 180 | if (!desc) | ||
| 181 | goto err; | ||
| 182 | mutex_lock(&sparse_irq_lock); | ||
| 183 | irq_insert_desc(start + i, desc); | ||
| 184 | mutex_unlock(&sparse_irq_lock); | ||
| 185 | } | ||
| 186 | return start; | ||
| 187 | |||
| 188 | err: | ||
| 189 | for (i--; i >= 0; i--) | ||
| 190 | free_desc(start + i); | ||
| 191 | |||
| 192 | mutex_lock(&sparse_irq_lock); | ||
| 193 | bitmap_clear(allocated_irqs, start, cnt); | ||
| 194 | mutex_unlock(&sparse_irq_lock); | ||
| 195 | return -ENOMEM; | ||
| 196 | } | ||
| 197 | |||
| 198 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 199 | { | ||
| 200 | int res = irq_alloc_descs(irq, irq, 1, node); | ||
| 201 | |||
| 202 | if (res == -EEXIST || res == irq) | ||
| 203 | return irq_to_desc(irq); | ||
| 204 | return NULL; | ||
| 205 | } | ||
| 206 | |||
| 207 | int __init early_irq_init(void) | ||
| 208 | { | ||
| 209 | int i, initcnt, node = first_online_node; | ||
| 210 | struct irq_desc *desc; | ||
| 211 | |||
| 212 | init_irq_default_affinity(); | ||
| 213 | |||
| 214 | /* Let arch update nr_irqs and return the nr of preallocated irqs */ | ||
| 215 | initcnt = arch_probe_nr_irqs(); | ||
| 216 | printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); | ||
| 217 | |||
| 218 | for (i = 0; i < initcnt; i++) { | ||
| 219 | desc = alloc_desc(i, node); | ||
| 220 | set_bit(i, allocated_irqs); | ||
| 221 | irq_insert_desc(i, desc); | ||
| 222 | } | ||
| 223 | return arch_early_irq_init(); | ||
| 224 | } | ||
| 225 | |||
| 226 | #else /* !CONFIG_SPARSE_IRQ */ | ||
| 227 | |||
| 228 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | ||
| 229 | [0 ... NR_IRQS-1] = { | ||
| 230 | .status = IRQ_DEFAULT_INIT_FLAGS, | ||
| 231 | .handle_irq = handle_bad_irq, | ||
| 232 | .depth = 1, | ||
| 233 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), | ||
| 234 | } | ||
| 235 | }; | ||
| 236 | |||
| 237 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
| 238 | int __init early_irq_init(void) | ||
| 239 | { | ||
| 240 | int count, i, node = first_online_node; | ||
| 241 | struct irq_desc *desc; | ||
| 242 | |||
| 243 | init_irq_default_affinity(); | ||
| 244 | |||
| 245 | printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); | ||
| 246 | |||
| 247 | desc = irq_desc; | ||
| 248 | count = ARRAY_SIZE(irq_desc); | ||
| 249 | |||
| 250 | for (i = 0; i < count; i++) { | ||
| 251 | desc[i].irq_data.irq = i; | ||
| 252 | desc[i].irq_data.chip = &no_irq_chip; | ||
| 253 | desc[i].kstat_irqs = kstat_irqs_all[i]; | ||
| 254 | alloc_masks(desc + i, GFP_KERNEL, node); | ||
| 255 | desc_smp_init(desc + i, node); | ||
| 256 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | ||
| 257 | } | ||
| 258 | return arch_early_irq_init(); | ||
| 259 | } | ||
| 260 | |||
| 261 | struct irq_desc *irq_to_desc(unsigned int irq) | ||
| 262 | { | ||
| 263 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | ||
| 264 | } | ||
| 265 | |||
| 266 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
| 267 | { | ||
| 268 | return irq_to_desc(irq); | ||
| 269 | } | ||
| 270 | |||
| 271 | static void free_desc(unsigned int irq) | ||
| 272 | { | ||
| 273 | dynamic_irq_cleanup(irq); | ||
| 274 | } | ||
| 275 | |||
| 276 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | ||
| 277 | { | ||
| 278 | return start; | ||
| 279 | } | ||
| 280 | #endif /* !CONFIG_SPARSE_IRQ */ | ||
| 281 | |||
| 282 | /* Dynamic interrupt handling */ | ||
| 283 | |||
| 284 | /** | ||
| 285 | * irq_free_descs - free irq descriptors | ||
| 286 | * @from: Start of descriptor range | ||
| 287 | * @cnt: Number of consecutive irqs to free | ||
| 288 | */ | ||
| 289 | void irq_free_descs(unsigned int from, unsigned int cnt) | ||
| 290 | { | ||
| 291 | int i; | ||
| 292 | |||
| 293 | if (from >= nr_irqs || (from + cnt) > nr_irqs) | ||
| 294 | return; | ||
| 295 | |||
| 296 | for (i = 0; i < cnt; i++) | ||
| 297 | free_desc(from + i); | ||
| 298 | |||
| 299 | mutex_lock(&sparse_irq_lock); | ||
| 300 | bitmap_clear(allocated_irqs, from, cnt); | ||
| 301 | mutex_unlock(&sparse_irq_lock); | ||
| 302 | } | ||
| 303 | |||
| 304 | /** | ||
| 305 | * irq_alloc_descs - allocate and initialize a range of irq descriptors | ||
| 306 | * @irq: Allocate for specific irq number if irq >= 0 | ||
| 307 | * @from: Start the search from this irq number | ||
| 308 | * @cnt: Number of consecutive irqs to allocate. | ||
| 309 | * @node: Preferred node on which the irq descriptor should be allocated | ||
| 310 | * | ||
| 311 | * Returns the first irq number or error code | ||
| 312 | */ | ||
| 313 | int __ref | ||
| 314 | irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) | ||
| 315 | { | ||
| 316 | int start, ret; | ||
| 317 | |||
| 318 | if (!cnt) | ||
| 319 | return -EINVAL; | ||
| 320 | |||
| 321 | mutex_lock(&sparse_irq_lock); | ||
| 322 | |||
| 323 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
| 324 | ret = -EEXIST; | ||
| 325 | if (irq >=0 && start != irq) | ||
| 326 | goto err; | ||
| 327 | |||
| 328 | ret = -ENOMEM; | ||
| 329 | if (start >= nr_irqs) | ||
| 330 | goto err; | ||
| 331 | |||
| 332 | bitmap_set(allocated_irqs, start, cnt); | ||
| 333 | mutex_unlock(&sparse_irq_lock); | ||
| 334 | return alloc_descs(start, cnt, node); | ||
| 335 | |||
| 336 | err: | ||
| 337 | mutex_unlock(&sparse_irq_lock); | ||
| 338 | return ret; | ||
| 339 | } | ||
| 340 | |||
| 341 | /** | ||
| 342 | * irq_reserve_irqs - mark irqs allocated | ||
| 343 | * @from: mark from irq number | ||
| 344 | * @cnt: number of irqs to mark | ||
| 345 | * | ||
| 346 | * Returns 0 on success or an appropriate error code | ||
| 347 | */ | ||
| 348 | int irq_reserve_irqs(unsigned int from, unsigned int cnt) | ||
| 349 | { | ||
| 350 | unsigned int start; | ||
| 351 | int ret = 0; | ||
| 352 | |||
| 353 | if (!cnt || (from + cnt) > nr_irqs) | ||
| 354 | return -EINVAL; | ||
| 355 | |||
| 356 | mutex_lock(&sparse_irq_lock); | ||
| 357 | start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); | ||
| 358 | if (start == from) | ||
| 359 | bitmap_set(allocated_irqs, start, cnt); | ||
| 360 | else | ||
| 361 | ret = -EEXIST; | ||
| 362 | mutex_unlock(&sparse_irq_lock); | ||
| 363 | return ret; | ||
| 364 | } | ||
| 365 | |||
| 366 | /** | ||
| 367 | * irq_get_next_irq - get next allocated irq number | ||
| 368 | * @offset: where to start the search | ||
| 369 | * | ||
| 370 | * Returns next irq number after offset or nr_irqs if none is found. | ||
| 371 | */ | ||
| 372 | unsigned int irq_get_next_irq(unsigned int offset) | ||
| 373 | { | ||
| 374 | return find_next_bit(allocated_irqs, nr_irqs, offset); | ||
| 375 | } | ||
| 376 | |||
| 377 | /** | ||
| 378 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
| 379 | * @irq: irq number to initialize | ||
| 380 | */ | ||
| 381 | void dynamic_irq_cleanup(unsigned int irq) | ||
| 382 | { | ||
| 383 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 384 | unsigned long flags; | ||
| 385 | |||
| 386 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
| 387 | desc_set_defaults(irq, desc, desc_node(desc)); | ||
| 388 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
| 389 | } | ||
| 390 | |||
| 391 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | ||
| 392 | { | ||
| 393 | struct irq_desc *desc = irq_to_desc(irq); | ||
| 394 | return desc ? desc->kstat_irqs[cpu] : 0; | ||
| 395 | } | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c3003e9d91a3..644e8d5fa367 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -73,8 +73,8 @@ int irq_can_set_affinity(unsigned int irq) | |||
| 73 | { | 73 | { |
| 74 | struct irq_desc *desc = irq_to_desc(irq); | 74 | struct irq_desc *desc = irq_to_desc(irq); |
| 75 | 75 | ||
| 76 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || | 76 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip || |
| 77 | !desc->chip->set_affinity) | 77 | !desc->irq_data.chip->irq_set_affinity) |
| 78 | return 0; | 78 | return 0; |
| 79 | 79 | ||
| 80 | return 1; | 80 | return 1; |
| @@ -109,17 +109,18 @@ void irq_set_thread_affinity(struct irq_desc *desc) | |||
| 109 | int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | 109 | int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) |
| 110 | { | 110 | { |
| 111 | struct irq_desc *desc = irq_to_desc(irq); | 111 | struct irq_desc *desc = irq_to_desc(irq); |
| 112 | struct irq_chip *chip = desc->irq_data.chip; | ||
| 112 | unsigned long flags; | 113 | unsigned long flags; |
| 113 | 114 | ||
| 114 | if (!desc->chip->set_affinity) | 115 | if (!chip->irq_set_affinity) |
| 115 | return -EINVAL; | 116 | return -EINVAL; |
| 116 | 117 | ||
| 117 | raw_spin_lock_irqsave(&desc->lock, flags); | 118 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 118 | 119 | ||
| 119 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 120 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 120 | if (desc->status & IRQ_MOVE_PCNTXT) { | 121 | if (desc->status & IRQ_MOVE_PCNTXT) { |
| 121 | if (!desc->chip->set_affinity(irq, cpumask)) { | 122 | if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { |
| 122 | cpumask_copy(desc->affinity, cpumask); | 123 | cpumask_copy(desc->irq_data.affinity, cpumask); |
| 123 | irq_set_thread_affinity(desc); | 124 | irq_set_thread_affinity(desc); |
| 124 | } | 125 | } |
| 125 | } | 126 | } |
| @@ -128,8 +129,8 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
| 128 | cpumask_copy(desc->pending_mask, cpumask); | 129 | cpumask_copy(desc->pending_mask, cpumask); |
| 129 | } | 130 | } |
| 130 | #else | 131 | #else |
| 131 | if (!desc->chip->set_affinity(irq, cpumask)) { | 132 | if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { |
| 132 | cpumask_copy(desc->affinity, cpumask); | 133 | cpumask_copy(desc->irq_data.affinity, cpumask); |
| 133 | irq_set_thread_affinity(desc); | 134 | irq_set_thread_affinity(desc); |
| 134 | } | 135 | } |
| 135 | #endif | 136 | #endif |
| @@ -168,16 +169,16 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc) | |||
| 168 | * one of the targets is online. | 169 | * one of the targets is online. |
| 169 | */ | 170 | */ |
| 170 | if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { | 171 | if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { |
| 171 | if (cpumask_any_and(desc->affinity, cpu_online_mask) | 172 | if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask) |
| 172 | < nr_cpu_ids) | 173 | < nr_cpu_ids) |
| 173 | goto set_affinity; | 174 | goto set_affinity; |
| 174 | else | 175 | else |
| 175 | desc->status &= ~IRQ_AFFINITY_SET; | 176 | desc->status &= ~IRQ_AFFINITY_SET; |
| 176 | } | 177 | } |
| 177 | 178 | ||
| 178 | cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity); | 179 | cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity); |
| 179 | set_affinity: | 180 | set_affinity: |
| 180 | desc->chip->set_affinity(irq, desc->affinity); | 181 | desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false); |
| 181 | 182 | ||
| 182 | return 0; | 183 | return 0; |
| 183 | } | 184 | } |
| @@ -223,7 +224,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) | |||
| 223 | 224 | ||
| 224 | if (!desc->depth++) { | 225 | if (!desc->depth++) { |
| 225 | desc->status |= IRQ_DISABLED; | 226 | desc->status |= IRQ_DISABLED; |
| 226 | desc->chip->disable(irq); | 227 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 227 | } | 228 | } |
| 228 | } | 229 | } |
| 229 | 230 | ||
| @@ -246,11 +247,11 @@ void disable_irq_nosync(unsigned int irq) | |||
| 246 | if (!desc) | 247 | if (!desc) |
| 247 | return; | 248 | return; |
| 248 | 249 | ||
| 249 | chip_bus_lock(irq, desc); | 250 | chip_bus_lock(desc); |
| 250 | raw_spin_lock_irqsave(&desc->lock, flags); | 251 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 251 | __disable_irq(desc, irq, false); | 252 | __disable_irq(desc, irq, false); |
| 252 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 253 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 253 | chip_bus_sync_unlock(irq, desc); | 254 | chip_bus_sync_unlock(desc); |
| 254 | } | 255 | } |
| 255 | EXPORT_SYMBOL(disable_irq_nosync); | 256 | EXPORT_SYMBOL(disable_irq_nosync); |
| 256 | 257 | ||
| @@ -313,7 +314,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) | |||
| 313 | * IRQ line is re-enabled. | 314 | * IRQ line is re-enabled. |
| 314 | * | 315 | * |
| 315 | * This function may be called from IRQ context only when | 316 | * This function may be called from IRQ context only when |
| 316 | * desc->chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! | 317 | * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! |
| 317 | */ | 318 | */ |
| 318 | void enable_irq(unsigned int irq) | 319 | void enable_irq(unsigned int irq) |
| 319 | { | 320 | { |
| @@ -323,11 +324,11 @@ void enable_irq(unsigned int irq) | |||
| 323 | if (!desc) | 324 | if (!desc) |
| 324 | return; | 325 | return; |
| 325 | 326 | ||
| 326 | chip_bus_lock(irq, desc); | 327 | chip_bus_lock(desc); |
| 327 | raw_spin_lock_irqsave(&desc->lock, flags); | 328 | raw_spin_lock_irqsave(&desc->lock, flags); |
| 328 | __enable_irq(desc, irq, false); | 329 | __enable_irq(desc, irq, false); |
| 329 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 330 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 330 | chip_bus_sync_unlock(irq, desc); | 331 | chip_bus_sync_unlock(desc); |
| 331 | } | 332 | } |
| 332 | EXPORT_SYMBOL(enable_irq); | 333 | EXPORT_SYMBOL(enable_irq); |
| 333 | 334 | ||
| @@ -336,8 +337,8 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) | |||
| 336 | struct irq_desc *desc = irq_to_desc(irq); | 337 | struct irq_desc *desc = irq_to_desc(irq); |
| 337 | int ret = -ENXIO; | 338 | int ret = -ENXIO; |
| 338 | 339 | ||
| 339 | if (desc->chip->set_wake) | 340 | if (desc->irq_data.chip->irq_set_wake) |
| 340 | ret = desc->chip->set_wake(irq, on); | 341 | ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); |
| 341 | 342 | ||
| 342 | return ret; | 343 | return ret; |
| 343 | } | 344 | } |
| @@ -429,12 +430,12 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc) | |||
| 429 | } | 430 | } |
| 430 | 431 | ||
| 431 | int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | 432 | int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, |
| 432 | unsigned long flags) | 433 | unsigned long flags) |
| 433 | { | 434 | { |
| 434 | int ret; | 435 | int ret; |
| 435 | struct irq_chip *chip = desc->chip; | 436 | struct irq_chip *chip = desc->irq_data.chip; |
| 436 | 437 | ||
| 437 | if (!chip || !chip->set_type) { | 438 | if (!chip || !chip->irq_set_type) { |
| 438 | /* | 439 | /* |
| 439 | * IRQF_TRIGGER_* but the PIC does not support multiple | 440 | * IRQF_TRIGGER_* but the PIC does not support multiple |
| 440 | * flow-types? | 441 | * flow-types? |
| @@ -445,11 +446,11 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 445 | } | 446 | } |
| 446 | 447 | ||
| 447 | /* caller masked out all except trigger mode flags */ | 448 | /* caller masked out all except trigger mode flags */ |
| 448 | ret = chip->set_type(irq, flags); | 449 | ret = chip->irq_set_type(&desc->irq_data, flags); |
| 449 | 450 | ||
| 450 | if (ret) | 451 | if (ret) |
| 451 | pr_err("setting trigger mode %d for irq %u failed (%pF)\n", | 452 | pr_err("setting trigger mode %lu for irq %u failed (%pF)\n", |
| 452 | (int)flags, irq, chip->set_type); | 453 | flags, irq, chip->irq_set_type); |
| 453 | else { | 454 | else { |
| 454 | if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) | 455 | if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) |
| 455 | flags |= IRQ_LEVEL; | 456 | flags |= IRQ_LEVEL; |
| @@ -457,8 +458,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 457 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); | 458 | desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); |
| 458 | desc->status |= flags; | 459 | desc->status |= flags; |
| 459 | 460 | ||
| 460 | if (chip != desc->chip) | 461 | if (chip != desc->irq_data.chip) |
| 461 | irq_chip_set_defaults(desc->chip); | 462 | irq_chip_set_defaults(desc->irq_data.chip); |
| 462 | } | 463 | } |
| 463 | 464 | ||
| 464 | return ret; | 465 | return ret; |
| @@ -507,7 +508,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) | |||
| 507 | static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) | 508 | static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) |
| 508 | { | 509 | { |
| 509 | again: | 510 | again: |
| 510 | chip_bus_lock(irq, desc); | 511 | chip_bus_lock(desc); |
| 511 | raw_spin_lock_irq(&desc->lock); | 512 | raw_spin_lock_irq(&desc->lock); |
| 512 | 513 | ||
| 513 | /* | 514 | /* |
| @@ -521,17 +522,17 @@ again: | |||
| 521 | */ | 522 | */ |
| 522 | if (unlikely(desc->status & IRQ_INPROGRESS)) { | 523 | if (unlikely(desc->status & IRQ_INPROGRESS)) { |
| 523 | raw_spin_unlock_irq(&desc->lock); | 524 | raw_spin_unlock_irq(&desc->lock); |
| 524 | chip_bus_sync_unlock(irq, desc); | 525 | chip_bus_sync_unlock(desc); |
| 525 | cpu_relax(); | 526 | cpu_relax(); |
| 526 | goto again; | 527 | goto again; |
| 527 | } | 528 | } |
| 528 | 529 | ||
| 529 | if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { | 530 | if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { |
| 530 | desc->status &= ~IRQ_MASKED; | 531 | desc->status &= ~IRQ_MASKED; |
| 531 | desc->chip->unmask(irq); | 532 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 532 | } | 533 | } |
| 533 | raw_spin_unlock_irq(&desc->lock); | 534 | raw_spin_unlock_irq(&desc->lock); |
| 534 | chip_bus_sync_unlock(irq, desc); | 535 | chip_bus_sync_unlock(desc); |
| 535 | } | 536 | } |
| 536 | 537 | ||
| 537 | #ifdef CONFIG_SMP | 538 | #ifdef CONFIG_SMP |
| @@ -556,7 +557,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) | |||
| 556 | } | 557 | } |
| 557 | 558 | ||
| 558 | raw_spin_lock_irq(&desc->lock); | 559 | raw_spin_lock_irq(&desc->lock); |
| 559 | cpumask_copy(mask, desc->affinity); | 560 | cpumask_copy(mask, desc->irq_data.affinity); |
| 560 | raw_spin_unlock_irq(&desc->lock); | 561 | raw_spin_unlock_irq(&desc->lock); |
| 561 | 562 | ||
| 562 | set_cpus_allowed_ptr(current, mask); | 563 | set_cpus_allowed_ptr(current, mask); |
| @@ -657,7 +658,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 657 | if (!desc) | 658 | if (!desc) |
| 658 | return -EINVAL; | 659 | return -EINVAL; |
| 659 | 660 | ||
| 660 | if (desc->chip == &no_irq_chip) | 661 | if (desc->irq_data.chip == &no_irq_chip) |
| 661 | return -ENOSYS; | 662 | return -ENOSYS; |
| 662 | /* | 663 | /* |
| 663 | * Some drivers like serial.c use request_irq() heavily, | 664 | * Some drivers like serial.c use request_irq() heavily, |
| @@ -752,7 +753,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 752 | } | 753 | } |
| 753 | 754 | ||
| 754 | if (!shared) { | 755 | if (!shared) { |
| 755 | irq_chip_set_defaults(desc->chip); | 756 | irq_chip_set_defaults(desc->irq_data.chip); |
| 756 | 757 | ||
| 757 | init_waitqueue_head(&desc->wait_for_threads); | 758 | init_waitqueue_head(&desc->wait_for_threads); |
| 758 | 759 | ||
| @@ -779,7 +780,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 779 | if (!(desc->status & IRQ_NOAUTOEN)) { | 780 | if (!(desc->status & IRQ_NOAUTOEN)) { |
| 780 | desc->depth = 0; | 781 | desc->depth = 0; |
| 781 | desc->status &= ~IRQ_DISABLED; | 782 | desc->status &= ~IRQ_DISABLED; |
| 782 | desc->chip->startup(irq); | 783 | desc->irq_data.chip->irq_startup(&desc->irq_data); |
| 783 | } else | 784 | } else |
| 784 | /* Undo nested disables: */ | 785 | /* Undo nested disables: */ |
| 785 | desc->depth = 1; | 786 | desc->depth = 1; |
| @@ -912,17 +913,17 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 912 | 913 | ||
| 913 | /* Currently used only by UML, might disappear one day: */ | 914 | /* Currently used only by UML, might disappear one day: */ |
| 914 | #ifdef CONFIG_IRQ_RELEASE_METHOD | 915 | #ifdef CONFIG_IRQ_RELEASE_METHOD |
| 915 | if (desc->chip->release) | 916 | if (desc->irq_data.chip->release) |
| 916 | desc->chip->release(irq, dev_id); | 917 | desc->irq_data.chip->release(irq, dev_id); |
| 917 | #endif | 918 | #endif |
| 918 | 919 | ||
| 919 | /* If this was the last handler, shut down the IRQ line: */ | 920 | /* If this was the last handler, shut down the IRQ line: */ |
| 920 | if (!desc->action) { | 921 | if (!desc->action) { |
| 921 | desc->status |= IRQ_DISABLED; | 922 | desc->status |= IRQ_DISABLED; |
| 922 | if (desc->chip->shutdown) | 923 | if (desc->irq_data.chip->irq_shutdown) |
| 923 | desc->chip->shutdown(irq); | 924 | desc->irq_data.chip->irq_shutdown(&desc->irq_data); |
| 924 | else | 925 | else |
| 925 | desc->chip->disable(irq); | 926 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 926 | } | 927 | } |
| 927 | 928 | ||
| 928 | #ifdef CONFIG_SMP | 929 | #ifdef CONFIG_SMP |
| @@ -997,9 +998,9 @@ void free_irq(unsigned int irq, void *dev_id) | |||
| 997 | if (!desc) | 998 | if (!desc) |
| 998 | return; | 999 | return; |
| 999 | 1000 | ||
| 1000 | chip_bus_lock(irq, desc); | 1001 | chip_bus_lock(desc); |
| 1001 | kfree(__free_irq(irq, dev_id)); | 1002 | kfree(__free_irq(irq, dev_id)); |
| 1002 | chip_bus_sync_unlock(irq, desc); | 1003 | chip_bus_sync_unlock(desc); |
| 1003 | } | 1004 | } |
| 1004 | EXPORT_SYMBOL(free_irq); | 1005 | EXPORT_SYMBOL(free_irq); |
| 1005 | 1006 | ||
| @@ -1086,9 +1087,9 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, | |||
| 1086 | action->name = devname; | 1087 | action->name = devname; |
| 1087 | action->dev_id = dev_id; | 1088 | action->dev_id = dev_id; |
| 1088 | 1089 | ||
| 1089 | chip_bus_lock(irq, desc); | 1090 | chip_bus_lock(desc); |
| 1090 | retval = __setup_irq(irq, desc, action); | 1091 | retval = __setup_irq(irq, desc, action); |
| 1091 | chip_bus_sync_unlock(irq, desc); | 1092 | chip_bus_sync_unlock(desc); |
| 1092 | 1093 | ||
| 1093 | if (retval) | 1094 | if (retval) |
| 1094 | kfree(action); | 1095 | kfree(action); |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 241962280836..1d2541940480 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | void move_masked_irq(int irq) | 7 | void move_masked_irq(int irq) |
| 8 | { | 8 | { |
| 9 | struct irq_desc *desc = irq_to_desc(irq); | 9 | struct irq_desc *desc = irq_to_desc(irq); |
| 10 | struct irq_chip *chip = desc->irq_data.chip; | ||
| 10 | 11 | ||
| 11 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 12 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
| 12 | return; | 13 | return; |
| @@ -24,7 +25,7 @@ void move_masked_irq(int irq) | |||
| 24 | if (unlikely(cpumask_empty(desc->pending_mask))) | 25 | if (unlikely(cpumask_empty(desc->pending_mask))) |
| 25 | return; | 26 | return; |
| 26 | 27 | ||
| 27 | if (!desc->chip->set_affinity) | 28 | if (!chip->irq_set_affinity) |
| 28 | return; | 29 | return; |
| 29 | 30 | ||
| 30 | assert_raw_spin_locked(&desc->lock); | 31 | assert_raw_spin_locked(&desc->lock); |
| @@ -43,8 +44,9 @@ void move_masked_irq(int irq) | |||
| 43 | */ | 44 | */ |
| 44 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) | 45 | if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) |
| 45 | < nr_cpu_ids)) | 46 | < nr_cpu_ids)) |
| 46 | if (!desc->chip->set_affinity(irq, desc->pending_mask)) { | 47 | if (!chip->irq_set_affinity(&desc->irq_data, |
| 47 | cpumask_copy(desc->affinity, desc->pending_mask); | 48 | desc->pending_mask, false)) { |
| 49 | cpumask_copy(desc->irq_data.affinity, desc->pending_mask); | ||
| 48 | irq_set_thread_affinity(desc); | 50 | irq_set_thread_affinity(desc); |
| 49 | } | 51 | } |
| 50 | 52 | ||
| @@ -61,8 +63,8 @@ void move_native_irq(int irq) | |||
| 61 | if (unlikely(desc->status & IRQ_DISABLED)) | 63 | if (unlikely(desc->status & IRQ_DISABLED)) |
| 62 | return; | 64 | return; |
| 63 | 65 | ||
| 64 | desc->chip->mask(irq); | 66 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
| 65 | move_masked_irq(irq); | 67 | move_masked_irq(irq); |
| 66 | desc->chip->unmask(irq); | 68 | desc->irq_data.chip->irq_unmask(&desc->irq_data); |
| 67 | } | 69 | } |
| 68 | 70 | ||
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c deleted file mode 100644 index 65d3845665ac..000000000000 --- a/kernel/irq/numa_migrate.c +++ /dev/null | |||
| @@ -1,120 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * NUMA irq-desc migration code | ||
| 3 | * | ||
| 4 | * Migrate IRQ data structures (irq_desc, chip_data, etc.) over to | ||
| 5 | * the new "home node" of the IRQ. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/irq.h> | ||
| 9 | #include <linux/slab.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/random.h> | ||
| 12 | #include <linux/interrupt.h> | ||
| 13 | #include <linux/kernel_stat.h> | ||
| 14 | |||
| 15 | #include "internals.h" | ||
| 16 | |||
| 17 | static void init_copy_kstat_irqs(struct irq_desc *old_desc, | ||
| 18 | struct irq_desc *desc, | ||
| 19 | int node, int nr) | ||
| 20 | { | ||
| 21 | init_kstat_irqs(desc, node, nr); | ||
| 22 | |||
| 23 | if (desc->kstat_irqs != old_desc->kstat_irqs) | ||
| 24 | memcpy(desc->kstat_irqs, old_desc->kstat_irqs, | ||
| 25 | nr * sizeof(*desc->kstat_irqs)); | ||
| 26 | } | ||
| 27 | |||
| 28 | static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) | ||
| 29 | { | ||
| 30 | if (old_desc->kstat_irqs == desc->kstat_irqs) | ||
| 31 | return; | ||
| 32 | |||
| 33 | kfree(old_desc->kstat_irqs); | ||
| 34 | old_desc->kstat_irqs = NULL; | ||
| 35 | } | ||
| 36 | |||
| 37 | static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, | ||
| 38 | struct irq_desc *desc, int node) | ||
| 39 | { | ||
| 40 | memcpy(desc, old_desc, sizeof(struct irq_desc)); | ||
| 41 | if (!alloc_desc_masks(desc, node, false)) { | ||
| 42 | printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " | ||
| 43 | "for migration.\n", irq); | ||
| 44 | return false; | ||
| 45 | } | ||
| 46 | raw_spin_lock_init(&desc->lock); | ||
| 47 | desc->node = node; | ||
| 48 | lockdep_set_class(&desc->lock, &irq_desc_lock_class); | ||
| 49 | init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); | ||
| 50 | init_copy_desc_masks(old_desc, desc); | ||
| 51 | arch_init_copy_chip_data(old_desc, desc, node); | ||
| 52 | return true; | ||
| 53 | } | ||
| 54 | |||
| 55 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) | ||
| 56 | { | ||
| 57 | free_kstat_irqs(old_desc, desc); | ||
| 58 | free_desc_masks(old_desc, desc); | ||
| 59 | arch_free_chip_data(old_desc, desc); | ||
| 60 | } | ||
| 61 | |||
| 62 | static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, | ||
| 63 | int node) | ||
| 64 | { | ||
| 65 | struct irq_desc *desc; | ||
| 66 | unsigned int irq; | ||
| 67 | unsigned long flags; | ||
| 68 | |||
| 69 | irq = old_desc->irq; | ||
| 70 | |||
| 71 | raw_spin_lock_irqsave(&sparse_irq_lock, flags); | ||
| 72 | |||
| 73 | /* We have to check it to avoid races with another CPU */ | ||
| 74 | desc = irq_to_desc(irq); | ||
| 75 | |||
| 76 | if (desc && old_desc != desc) | ||
| 77 | goto out_unlock; | ||
| 78 | |||
| 79 | desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); | ||
| 80 | if (!desc) { | ||
| 81 | printk(KERN_ERR "irq %d: can not get new irq_desc " | ||
| 82 | "for migration.\n", irq); | ||
| 83 | /* still use old one */ | ||
| 84 | desc = old_desc; | ||
| 85 | goto out_unlock; | ||
| 86 | } | ||
| 87 | if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) { | ||
| 88 | /* still use old one */ | ||
| 89 | kfree(desc); | ||
| 90 | desc = old_desc; | ||
| 91 | goto out_unlock; | ||
| 92 | } | ||
| 93 | |||
| 94 | replace_irq_desc(irq, desc); | ||
| 95 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 96 | |||
| 97 | /* free the old one */ | ||
| 98 | free_one_irq_desc(old_desc, desc); | ||
| 99 | kfree(old_desc); | ||
| 100 | |||
| 101 | return desc; | ||
| 102 | |||
| 103 | out_unlock: | ||
| 104 | raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); | ||
| 105 | |||
| 106 | return desc; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) | ||
| 110 | { | ||
| 111 | /* those static or target node is -1, do not move them */ | ||
| 112 | if (desc->irq < NR_IRQS_LEGACY || node == -1) | ||
| 113 | return desc; | ||
| 114 | |||
| 115 | if (desc->node != node) | ||
| 116 | desc = __real_move_irq_desc(desc, node); | ||
| 117 | |||
| 118 | return desc; | ||
| 119 | } | ||
| 120 | |||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 09a2ee540bd2..01b1d3a88983 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -21,7 +21,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
| 21 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 21 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
| 22 | { | 22 | { |
| 23 | struct irq_desc *desc = irq_to_desc((long)m->private); | 23 | struct irq_desc *desc = irq_to_desc((long)m->private); |
| 24 | const struct cpumask *mask = desc->affinity; | 24 | const struct cpumask *mask = desc->irq_data.affinity; |
| 25 | 25 | ||
| 26 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 26 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 27 | if (desc->status & IRQ_MOVE_PENDING) | 27 | if (desc->status & IRQ_MOVE_PENDING) |
| @@ -65,7 +65,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
| 65 | cpumask_var_t new_value; | 65 | cpumask_var_t new_value; |
| 66 | int err; | 66 | int err; |
| 67 | 67 | ||
| 68 | if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || | 68 | if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity || |
| 69 | irq_balancing_disabled(irq)) | 69 | irq_balancing_disabled(irq)) |
| 70 | return -EIO; | 70 | return -EIO; |
| 71 | 71 | ||
| @@ -185,7 +185,7 @@ static int irq_node_proc_show(struct seq_file *m, void *v) | |||
| 185 | { | 185 | { |
| 186 | struct irq_desc *desc = irq_to_desc((long) m->private); | 186 | struct irq_desc *desc = irq_to_desc((long) m->private); |
| 187 | 187 | ||
| 188 | seq_printf(m, "%d\n", desc->node); | 188 | seq_printf(m, "%d\n", desc->irq_data.node); |
| 189 | return 0; | 189 | return 0; |
| 190 | } | 190 | } |
| 191 | 191 | ||
| @@ -269,7 +269,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
| 269 | { | 269 | { |
| 270 | char name [MAX_NAMELEN]; | 270 | char name [MAX_NAMELEN]; |
| 271 | 271 | ||
| 272 | if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir) | 272 | if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) |
| 273 | return; | 273 | return; |
| 274 | 274 | ||
| 275 | memset(name, 0, MAX_NAMELEN); | 275 | memset(name, 0, MAX_NAMELEN); |
| @@ -297,6 +297,24 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
| 297 | &irq_spurious_proc_fops, (void *)(long)irq); | 297 | &irq_spurious_proc_fops, (void *)(long)irq); |
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) | ||
| 301 | { | ||
| 302 | char name [MAX_NAMELEN]; | ||
| 303 | |||
| 304 | if (!root_irq_dir || !desc->dir) | ||
| 305 | return; | ||
| 306 | #ifdef CONFIG_SMP | ||
| 307 | remove_proc_entry("smp_affinity", desc->dir); | ||
| 308 | remove_proc_entry("affinity_hint", desc->dir); | ||
| 309 | remove_proc_entry("node", desc->dir); | ||
| 310 | #endif | ||
| 311 | remove_proc_entry("spurious", desc->dir); | ||
| 312 | |||
| 313 | memset(name, 0, MAX_NAMELEN); | ||
| 314 | sprintf(name, "%u", irq); | ||
| 315 | remove_proc_entry(name, root_irq_dir); | ||
| 316 | } | ||
| 317 | |||
| 300 | #undef MAX_NAMELEN | 318 | #undef MAX_NAMELEN |
| 301 | 319 | ||
| 302 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) | 320 | void unregister_handler_proc(unsigned int irq, struct irqaction *action) |
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 090c3763f3a2..891115a929aa 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
| @@ -60,7 +60,7 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) | |||
| 60 | /* | 60 | /* |
| 61 | * Make sure the interrupt is enabled, before resending it: | 61 | * Make sure the interrupt is enabled, before resending it: |
| 62 | */ | 62 | */ |
| 63 | desc->chip->enable(irq); | 63 | desc->irq_data.chip->irq_enable(&desc->irq_data); |
| 64 | 64 | ||
| 65 | /* | 65 | /* |
| 66 | * We do not resend level type interrupts. Level type | 66 | * We do not resend level type interrupts. Level type |
| @@ -70,7 +70,8 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) | |||
| 70 | if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { | 70 | if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { |
| 71 | desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; | 71 | desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; |
| 72 | 72 | ||
| 73 | if (!desc->chip->retrigger || !desc->chip->retrigger(irq)) { | 73 | if (!desc->irq_data.chip->irq_retrigger || |
| 74 | !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { | ||
| 74 | #ifdef CONFIG_HARDIRQS_SW_RESEND | 75 | #ifdef CONFIG_HARDIRQS_SW_RESEND |
| 75 | /* Set it pending and activate the softirq: */ | 76 | /* Set it pending and activate the softirq: */ |
| 76 | set_bit(irq, irqs_resend); | 77 | set_bit(irq, irqs_resend); |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 89fb90ae534f..3089d3b9d5f3 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -14,6 +14,8 @@ | |||
| 14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
| 15 | #include <linux/timer.h> | 15 | #include <linux/timer.h> |
| 16 | 16 | ||
| 17 | #include "internals.h" | ||
| 18 | |||
| 17 | static int irqfixup __read_mostly; | 19 | static int irqfixup __read_mostly; |
| 18 | 20 | ||
| 19 | #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) | 21 | #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) |
| @@ -78,8 +80,8 @@ static int try_one_irq(int irq, struct irq_desc *desc) | |||
| 78 | * If we did actual work for the real IRQ line we must let the | 80 | * If we did actual work for the real IRQ line we must let the |
| 79 | * IRQ controller clean up too | 81 | * IRQ controller clean up too |
| 80 | */ | 82 | */ |
| 81 | if (work && desc->chip && desc->chip->end) | 83 | if (work) |
| 82 | desc->chip->end(irq); | 84 | irq_end(irq, desc); |
| 83 | raw_spin_unlock(&desc->lock); | 85 | raw_spin_unlock(&desc->lock); |
| 84 | 86 | ||
| 85 | return ok; | 87 | return ok; |
| @@ -254,7 +256,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 254 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); | 256 | printk(KERN_EMERG "Disabling IRQ #%d\n", irq); |
| 255 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; | 257 | desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; |
| 256 | desc->depth++; | 258 | desc->depth++; |
| 257 | desc->chip->disable(irq); | 259 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
| 258 | 260 | ||
| 259 | mod_timer(&poll_spurious_irq_timer, | 261 | mod_timer(&poll_spurious_irq_timer, |
| 260 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); | 262 | jiffies + POLL_SPURIOUS_IRQ_INTERVAL); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..42ba65dff7d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -639,6 +639,16 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
| 639 | } | 639 | } |
| 640 | #endif | 640 | #endif |
| 641 | 641 | ||
| 642 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
| 643 | debug_locks_off(); | ||
| 644 | printk(KERN_ERR | ||
| 645 | "BUG: looking up invalid subclass: %u\n", subclass); | ||
| 646 | printk(KERN_ERR | ||
| 647 | "turning off the locking correctness validator.\n"); | ||
| 648 | dump_stack(); | ||
| 649 | return NULL; | ||
| 650 | } | ||
| 651 | |||
| 642 | /* | 652 | /* |
| 643 | * Static locks do not have their class-keys yet - for them the key | 653 | * Static locks do not have their class-keys yet - for them the key |
| 644 | * is the lock object itself: | 654 | * is the lock object itself: |
| @@ -774,7 +784,9 @@ out_unlock_set: | |||
| 774 | raw_local_irq_restore(flags); | 784 | raw_local_irq_restore(flags); |
| 775 | 785 | ||
| 776 | if (!subclass || force) | 786 | if (!subclass || force) |
| 777 | lock->class_cache = class; | 787 | lock->class_cache[0] = class; |
| 788 | else if (subclass < NR_LOCKDEP_CACHING_CLASSES) | ||
| 789 | lock->class_cache[subclass] = class; | ||
| 778 | 790 | ||
| 779 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) | 791 | if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) |
| 780 | return NULL; | 792 | return NULL; |
| @@ -2679,7 +2691,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 2679 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2691 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 2680 | struct lock_class_key *key, int subclass) | 2692 | struct lock_class_key *key, int subclass) |
| 2681 | { | 2693 | { |
| 2682 | lock->class_cache = NULL; | 2694 | int i; |
| 2695 | |||
| 2696 | for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) | ||
| 2697 | lock->class_cache[i] = NULL; | ||
| 2698 | |||
| 2683 | #ifdef CONFIG_LOCK_STAT | 2699 | #ifdef CONFIG_LOCK_STAT |
| 2684 | lock->cpu = raw_smp_processor_id(); | 2700 | lock->cpu = raw_smp_processor_id(); |
| 2685 | #endif | 2701 | #endif |
| @@ -2739,21 +2755,13 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2739 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2755 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 2740 | return 0; | 2756 | return 0; |
| 2741 | 2757 | ||
| 2742 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | ||
| 2743 | debug_locks_off(); | ||
| 2744 | printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); | ||
| 2745 | printk("turning off the locking correctness validator.\n"); | ||
| 2746 | dump_stack(); | ||
| 2747 | return 0; | ||
| 2748 | } | ||
| 2749 | |||
| 2750 | if (lock->key == &__lockdep_no_validate__) | 2758 | if (lock->key == &__lockdep_no_validate__) |
| 2751 | check = 1; | 2759 | check = 1; |
| 2752 | 2760 | ||
| 2753 | if (!subclass) | 2761 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
| 2754 | class = lock->class_cache; | 2762 | class = lock->class_cache[subclass]; |
| 2755 | /* | 2763 | /* |
| 2756 | * Not cached yet or subclass? | 2764 | * Not cached? |
| 2757 | */ | 2765 | */ |
| 2758 | if (unlikely(!class)) { | 2766 | if (unlikely(!class)) { |
| 2759 | class = register_lock_class(lock, subclass, 0); | 2767 | class = register_lock_class(lock, subclass, 0); |
| @@ -2918,7 +2926,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |||
| 2918 | return 1; | 2926 | return 1; |
| 2919 | 2927 | ||
| 2920 | if (hlock->references) { | 2928 | if (hlock->references) { |
| 2921 | struct lock_class *class = lock->class_cache; | 2929 | struct lock_class *class = lock->class_cache[0]; |
| 2922 | 2930 | ||
| 2923 | if (!class) | 2931 | if (!class) |
| 2924 | class = look_up_lock_class(lock, 0); | 2932 | class = look_up_lock_class(lock, 0); |
| @@ -3559,7 +3567,12 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
| 3559 | if (list_empty(head)) | 3567 | if (list_empty(head)) |
| 3560 | continue; | 3568 | continue; |
| 3561 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3569 | list_for_each_entry_safe(class, next, head, hash_entry) { |
| 3562 | if (unlikely(class == lock->class_cache)) { | 3570 | int match = 0; |
| 3571 | |||
| 3572 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | ||
| 3573 | match |= class == lock->class_cache[j]; | ||
| 3574 | |||
| 3575 | if (unlikely(match)) { | ||
| 3563 | if (debug_locks_off_graph_unlock()) | 3576 | if (debug_locks_off_graph_unlock()) |
| 3564 | WARN_ON(1); | 3577 | WARN_ON(1); |
| 3565 | goto out_restore; | 3578 | goto out_restore; |
| @@ -3775,7 +3788,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); | |||
| 3775 | * Careful: only use this function if you are sure that | 3788 | * Careful: only use this function if you are sure that |
| 3776 | * the task cannot run in parallel! | 3789 | * the task cannot run in parallel! |
| 3777 | */ | 3790 | */ |
| 3778 | void __debug_show_held_locks(struct task_struct *task) | 3791 | void debug_show_held_locks(struct task_struct *task) |
| 3779 | { | 3792 | { |
| 3780 | if (unlikely(!debug_locks)) { | 3793 | if (unlikely(!debug_locks)) { |
| 3781 | printk("INFO: lockdep is turned off.\n"); | 3794 | printk("INFO: lockdep is turned off.\n"); |
| @@ -3783,12 +3796,6 @@ void __debug_show_held_locks(struct task_struct *task) | |||
| 3783 | } | 3796 | } |
| 3784 | lockdep_print_held_locks(task); | 3797 | lockdep_print_held_locks(task); |
| 3785 | } | 3798 | } |
| 3786 | EXPORT_SYMBOL_GPL(__debug_show_held_locks); | ||
| 3787 | |||
| 3788 | void debug_show_held_locks(struct task_struct *task) | ||
| 3789 | { | ||
| 3790 | __debug_show_held_locks(task); | ||
| 3791 | } | ||
| 3792 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 3799 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
| 3793 | 3800 | ||
| 3794 | void lockdep_sys_exit(void) | 3801 | void lockdep_sys_exit(void) |
diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb8d087..39b65b69584f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) | |||
| 401 | struct task_struct *result = NULL; | 401 | struct task_struct *result = NULL; |
| 402 | if (pid) { | 402 | if (pid) { |
| 403 | struct hlist_node *first; | 403 | struct hlist_node *first; |
| 404 | first = rcu_dereference_check(pid->tasks[type].first, | 404 | first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), |
| 405 | rcu_read_lock_held() || | 405 | rcu_read_lock_held() || |
| 406 | lockdep_tasklist_lock_is_held()); | 406 | lockdep_tasklist_lock_is_held()); |
| 407 | if (first) | 407 | if (first) |
| @@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); | |||
| 416 | */ | 416 | */ |
| 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 417 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
| 418 | { | 418 | { |
| 419 | rcu_lockdep_assert(rcu_read_lock_held()); | ||
| 419 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 420 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
| 420 | } | 421 | } |
| 421 | 422 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 8fe465ac008a..2531017795f6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -85,7 +85,7 @@ EXPORT_SYMBOL(oops_in_progress); | |||
| 85 | * provides serialisation for access to the entire console | 85 | * provides serialisation for access to the entire console |
| 86 | * driver system. | 86 | * driver system. |
| 87 | */ | 87 | */ |
| 88 | static DECLARE_MUTEX(console_sem); | 88 | static DEFINE_SEMAPHORE(console_sem); |
| 89 | struct console *console_drivers; | 89 | struct console *console_drivers; |
| 90 | EXPORT_SYMBOL_GPL(console_drivers); | 90 | EXPORT_SYMBOL_GPL(console_drivers); |
| 91 | 91 | ||
| @@ -556,7 +556,7 @@ static void zap_locks(void) | |||
| 556 | /* If a crash is occurring, make sure we can't deadlock */ | 556 | /* If a crash is occurring, make sure we can't deadlock */ |
| 557 | spin_lock_init(&logbuf_lock); | 557 | spin_lock_init(&logbuf_lock); |
| 558 | /* And make sure that we print immediately */ | 558 | /* And make sure that we print immediately */ |
| 559 | init_MUTEX(&console_sem); | 559 | sema_init(&console_sem, 1); |
| 560 | } | 560 | } |
| 561 | 561 | ||
| 562 | #if defined(CONFIG_PRINTK_TIME) | 562 | #if defined(CONFIG_PRINTK_TIME) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d169835fb36..a23a57a976d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) | |||
| 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); | 73 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); |
| 74 | 74 | ||
| 75 | /** | 75 | /** |
| 76 | * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? | 76 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? |
| 77 | * | 77 | * |
| 78 | * Check for bottom half being disabled, which covers both the | 78 | * Check for bottom half being disabled, which covers both the |
| 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses | 79 | * CONFIG_PROVE_RCU and not cases. Note that if someone uses |
| 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) | 80 | * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) |
| 81 | * will show the situation. | 81 | * will show the situation. This is useful for debug checks in functions |
| 82 | * that require that they be called within an RCU read-side critical | ||
| 83 | * section. | ||
| 82 | * | 84 | * |
| 83 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. | 85 | * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. |
| 84 | */ | 86 | */ |
| @@ -86,7 +88,7 @@ int rcu_read_lock_bh_held(void) | |||
| 86 | { | 88 | { |
| 87 | if (!debug_lockdep_rcu_enabled()) | 89 | if (!debug_lockdep_rcu_enabled()) |
| 88 | return 1; | 90 | return 1; |
| 89 | return in_softirq(); | 91 | return in_softirq() || irqs_disabled(); |
| 90 | } | 92 | } |
| 91 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | 93 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); |
| 92 | 94 | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02f8be0..d806735342ac 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
| @@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; | |||
| 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
| 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 61 | 61 | ||
| 62 | /* Forward declarations for rcutiny_plugin.h. */ | ||
| 63 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||
| 64 | static void __call_rcu(struct rcu_head *head, | ||
| 65 | void (*func)(struct rcu_head *rcu), | ||
| 66 | struct rcu_ctrlblk *rcp); | ||
| 67 | |||
| 68 | #include "rcutiny_plugin.h" | ||
| 69 | |||
| 62 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
| 63 | 71 | ||
| 64 | static long rcu_dynticks_nesting = 1; | 72 | static long rcu_dynticks_nesting = 1; |
| @@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 140 | rcu_sched_qs(cpu); | 148 | rcu_sched_qs(cpu); |
| 141 | else if (!in_softirq()) | 149 | else if (!in_softirq()) |
| 142 | rcu_bh_qs(cpu); | 150 | rcu_bh_qs(cpu); |
| 151 | rcu_preempt_check_callbacks(); | ||
| 143 | } | 152 | } |
| 144 | 153 | ||
| 145 | /* | 154 | /* |
| @@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 162 | *rcp->donetail = NULL; | 171 | *rcp->donetail = NULL; |
| 163 | if (rcp->curtail == rcp->donetail) | 172 | if (rcp->curtail == rcp->donetail) |
| 164 | rcp->curtail = &rcp->rcucblist; | 173 | rcp->curtail = &rcp->rcucblist; |
| 174 | rcu_preempt_remove_callbacks(rcp); | ||
| 165 | rcp->donetail = &rcp->rcucblist; | 175 | rcp->donetail = &rcp->rcucblist; |
| 166 | local_irq_restore(flags); | 176 | local_irq_restore(flags); |
| 167 | 177 | ||
| @@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
| 182 | { | 192 | { |
| 183 | __rcu_process_callbacks(&rcu_sched_ctrlblk); | 193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
| 184 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
| 195 | rcu_preempt_process_callbacks(); | ||
| 185 | } | 196 | } |
| 186 | 197 | ||
| 187 | /* | 198 | /* |
| @@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, | |||
| 223 | } | 234 | } |
| 224 | 235 | ||
| 225 | /* | 236 | /* |
| 226 | * Post an RCU callback to be invoked after the end of an RCU grace | 237 | * Post an RCU callback to be invoked after the end of an RCU-sched grace |
| 227 | * period. But since we have but one CPU, that would be after any | 238 | * period. But since we have but one CPU, that would be after any |
| 228 | * quiescent state. | 239 | * quiescent state. |
| 229 | */ | 240 | */ |
| 230 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 241 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
| 231 | { | 242 | { |
| 232 | __call_rcu(head, func, &rcu_sched_ctrlblk); | 243 | __call_rcu(head, func, &rcu_sched_ctrlblk); |
| 233 | } | 244 | } |
| 234 | EXPORT_SYMBOL_GPL(call_rcu); | 245 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
| 235 | 246 | ||
| 236 | /* | 247 | /* |
| 237 | * Post an RCU bottom-half callback to be invoked after any subsequent | 248 | * Post an RCU bottom-half callback to be invoked after any subsequent |
| @@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 243 | } | 254 | } |
| 244 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
| 245 | 256 | ||
| 246 | void rcu_barrier(void) | ||
| 247 | { | ||
| 248 | struct rcu_synchronize rcu; | ||
| 249 | |||
| 250 | init_rcu_head_on_stack(&rcu.head); | ||
| 251 | init_completion(&rcu.completion); | ||
| 252 | /* Will wake me after RCU finished. */ | ||
| 253 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 254 | /* Wait for it. */ | ||
| 255 | wait_for_completion(&rcu.completion); | ||
| 256 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 257 | } | ||
| 258 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
| 259 | |||
| 260 | void rcu_barrier_bh(void) | 257 | void rcu_barrier_bh(void) |
| 261 | { | 258 | { |
| 262 | struct rcu_synchronize rcu; | 259 | struct rcu_synchronize rcu; |
| @@ -289,5 +286,3 @@ void __init rcu_init(void) | |||
| 289 | { | 286 | { |
| 290 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| 291 | } | 288 | } |
| 292 | |||
| 293 | #include "rcutiny_plugin.h" | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92bc742..6ceca4f745ff 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition |
| 3 | * Internal non-public definitions that provide either classic | 3 | * Internal non-public definitions that provide either classic |
| 4 | * or preemptable semantics. | 4 | * or preemptible semantics. |
| 5 | * | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
| @@ -17,11 +17,587 @@ | |||
| 17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
| 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 19 | * | 19 | * |
| 20 | * Copyright IBM Corporation, 2009 | 20 | * Copyright (c) 2010 Linaro |
| 21 | * | 21 | * |
| 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #ifdef CONFIG_TINY_PREEMPT_RCU | ||
| 26 | |||
| 27 | #include <linux/delay.h> | ||
| 28 | |||
| 29 | /* Global control variables for preemptible RCU. */ | ||
| 30 | struct rcu_preempt_ctrlblk { | ||
| 31 | struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ | ||
| 32 | struct rcu_head **nexttail; | ||
| 33 | /* Tasks blocked in a preemptible RCU */ | ||
| 34 | /* read-side critical section while an */ | ||
| 35 | /* preemptible-RCU grace period is in */ | ||
| 36 | /* progress must wait for a later grace */ | ||
| 37 | /* period. This pointer points to the */ | ||
| 38 | /* ->next pointer of the last task that */ | ||
| 39 | /* must wait for a later grace period, or */ | ||
| 40 | /* to &->rcb.rcucblist if there is no */ | ||
| 41 | /* such task. */ | ||
| 42 | struct list_head blkd_tasks; | ||
| 43 | /* Tasks blocked in RCU read-side critical */ | ||
| 44 | /* section. Tasks are placed at the head */ | ||
| 45 | /* of this list and age towards the tail. */ | ||
| 46 | struct list_head *gp_tasks; | ||
| 47 | /* Pointer to the first task blocking the */ | ||
| 48 | /* current grace period, or NULL if there */ | ||
| 49 | /* is not such task. */ | ||
| 50 | struct list_head *exp_tasks; | ||
| 51 | /* Pointer to first task blocking the */ | ||
| 52 | /* current expedited grace period, or NULL */ | ||
| 53 | /* if there is no such task. If there */ | ||
| 54 | /* is no current expedited grace period, */ | ||
| 55 | /* then there cannot be any such task. */ | ||
| 56 | u8 gpnum; /* Current grace period. */ | ||
| 57 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | ||
| 58 | u8 completed; /* Last grace period completed. */ | ||
| 59 | /* If all three are equal, RCU is idle. */ | ||
| 60 | }; | ||
| 61 | |||
| 62 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | ||
| 63 | .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 64 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 65 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | ||
| 66 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | ||
| 67 | }; | ||
| 68 | |||
| 69 | static int rcu_preempted_readers_exp(void); | ||
| 70 | static void rcu_report_exp_done(void); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Return true if the CPU has not yet responded to the current grace period. | ||
| 74 | */ | ||
| 75 | static int rcu_cpu_blocking_cur_gp(void) | ||
| 76 | { | ||
| 77 | return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Check for a running RCU reader. Because there is only one CPU, | ||
| 82 | * there can be but one running RCU reader at a time. ;-) | ||
| 83 | */ | ||
| 84 | static int rcu_preempt_running_reader(void) | ||
| 85 | { | ||
| 86 | return current->rcu_read_lock_nesting; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Check for preempted RCU readers blocking any grace period. | ||
| 91 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
| 92 | */ | ||
| 93 | static int rcu_preempt_blocked_readers_any(void) | ||
| 94 | { | ||
| 95 | return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); | ||
| 96 | } | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Check for preempted RCU readers blocking the current grace period. | ||
| 100 | * If the caller needs a reliable answer, it must disable hard irqs. | ||
| 101 | */ | ||
| 102 | static int rcu_preempt_blocked_readers_cgp(void) | ||
| 103 | { | ||
| 104 | return rcu_preempt_ctrlblk.gp_tasks != NULL; | ||
| 105 | } | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Return true if another preemptible-RCU grace period is needed. | ||
| 109 | */ | ||
| 110 | static int rcu_preempt_needs_another_gp(void) | ||
| 111 | { | ||
| 112 | return *rcu_preempt_ctrlblk.rcb.curtail != NULL; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Return true if a preemptible-RCU grace period is in progress. | ||
| 117 | * The caller must disable hardirqs. | ||
| 118 | */ | ||
| 119 | static int rcu_preempt_gp_in_progress(void) | ||
| 120 | { | ||
| 121 | return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | ||
| 126 | * that this just means that the task currently running on the CPU is | ||
| 127 | * in a quiescent state. There might be any number of tasks blocked | ||
| 128 | * while in an RCU read-side critical section. | ||
| 129 | * | ||
| 130 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
| 131 | * must disable irqs in order to protect the assignment to | ||
| 132 | * ->rcu_read_unlock_special. | ||
| 133 | * | ||
| 134 | * Because this is a single-CPU implementation, the only way a grace | ||
| 135 | * period can end is if the CPU is in a quiescent state. The reason is | ||
| 136 | * that a blocked preemptible-RCU reader can exit its critical section | ||
| 137 | * only if the CPU is running it at the time. Therefore, when the | ||
| 138 | * last task blocking the current grace period exits its RCU read-side | ||
| 139 | * critical section, neither the CPU nor blocked tasks will be stopping | ||
| 140 | * the current grace period. (In contrast, SMP implementations | ||
| 141 | * might have CPUs running in RCU read-side critical sections that | ||
| 142 | * block later grace periods -- but this is not possible given only | ||
| 143 | * one CPU.) | ||
| 144 | */ | ||
| 145 | static void rcu_preempt_cpu_qs(void) | ||
| 146 | { | ||
| 147 | /* Record both CPU and task as having responded to current GP. */ | ||
| 148 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | ||
| 149 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
| 150 | |||
| 151 | /* | ||
| 152 | * If there is no GP, or if blocked readers are still blocking GP, | ||
| 153 | * then there is nothing more to do. | ||
| 154 | */ | ||
| 155 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | ||
| 156 | return; | ||
| 157 | |||
| 158 | /* Advance callbacks. */ | ||
| 159 | rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; | ||
| 160 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; | ||
| 161 | rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; | ||
| 162 | |||
| 163 | /* If there are no blocked readers, next GP is done instantly. */ | ||
| 164 | if (!rcu_preempt_blocked_readers_any()) | ||
| 165 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | ||
| 166 | |||
| 167 | /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | ||
| 168 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | ||
| 169 | raise_softirq(RCU_SOFTIRQ); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Start a new RCU grace period if warranted. Hard irqs must be disabled. | ||
| 174 | */ | ||
| 175 | static void rcu_preempt_start_gp(void) | ||
| 176 | { | ||
| 177 | if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { | ||
| 178 | |||
| 179 | /* Official start of GP. */ | ||
| 180 | rcu_preempt_ctrlblk.gpnum++; | ||
| 181 | |||
| 182 | /* Any blocked RCU readers block new GP. */ | ||
| 183 | if (rcu_preempt_blocked_readers_any()) | ||
| 184 | rcu_preempt_ctrlblk.gp_tasks = | ||
| 185 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
| 186 | |||
| 187 | /* If there is no running reader, CPU is done with GP. */ | ||
| 188 | if (!rcu_preempt_running_reader()) | ||
| 189 | rcu_preempt_cpu_qs(); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * We have entered the scheduler, and the current task might soon be | ||
| 195 | * context-switched away from. If this task is in an RCU read-side | ||
| 196 | * critical section, we will no longer be able to rely on the CPU to | ||
| 197 | * record that fact, so we enqueue the task on the blkd_tasks list. | ||
| 198 | * If the task started after the current grace period began, as recorded | ||
| 199 | * by ->gpcpu, we enqueue at the beginning of the list. Otherwise | ||
| 200 | * before the element referenced by ->gp_tasks (or at the tail if | ||
| 201 | * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. | ||
| 202 | * The task will dequeue itself when it exits the outermost enclosing | ||
| 203 | * RCU read-side critical section. Therefore, the current grace period | ||
| 204 | * cannot be permitted to complete until the ->gp_tasks pointer becomes | ||
| 205 | * NULL. | ||
| 206 | * | ||
| 207 | * Caller must disable preemption. | ||
| 208 | */ | ||
| 209 | void rcu_preempt_note_context_switch(void) | ||
| 210 | { | ||
| 211 | struct task_struct *t = current; | ||
| 212 | unsigned long flags; | ||
| 213 | |||
| 214 | local_irq_save(flags); /* must exclude scheduler_tick(). */ | ||
| 215 | if (rcu_preempt_running_reader() && | ||
| 216 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | ||
| 217 | |||
| 218 | /* Possibly blocking in an RCU read-side critical section. */ | ||
| 219 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | ||
| 220 | |||
| 221 | /* | ||
| 222 | * If this CPU has already checked in, then this task | ||
| 223 | * will hold up the next grace period rather than the | ||
| 224 | * current grace period. Queue the task accordingly. | ||
| 225 | * If the task is queued for the current grace period | ||
| 226 | * (i.e., this CPU has not yet passed through a quiescent | ||
| 227 | * state for the current grace period), then as long | ||
| 228 | * as that task remains queued, the current grace period | ||
| 229 | * cannot end. | ||
| 230 | */ | ||
| 231 | list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); | ||
| 232 | if (rcu_cpu_blocking_cur_gp()) | ||
| 233 | rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Either we were not in an RCU read-side critical section to | ||
| 238 | * begin with, or we have now recorded that critical section | ||
| 239 | * globally. Either way, we can now note a quiescent state | ||
| 240 | * for this CPU. Again, if we were in an RCU read-side critical | ||
| 241 | * section, and if that critical section was blocking the current | ||
| 242 | * grace period, then the fact that the task has been enqueued | ||
| 243 | * means that current grace period continues to be blocked. | ||
| 244 | */ | ||
| 245 | rcu_preempt_cpu_qs(); | ||
| 246 | local_irq_restore(flags); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Tiny-preemptible RCU implementation for rcu_read_lock(). | ||
| 251 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | ||
| 252 | * if we block. | ||
| 253 | */ | ||
| 254 | void __rcu_read_lock(void) | ||
| 255 | { | ||
| 256 | current->rcu_read_lock_nesting++; | ||
| 257 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ | ||
| 258 | } | ||
| 259 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | ||
| 260 | |||
| 261 | /* | ||
| 262 | * Handle special cases during rcu_read_unlock(), such as needing to | ||
| 263 | * notify RCU core processing or task having blocked during the RCU | ||
| 264 | * read-side critical section. | ||
| 265 | */ | ||
| 266 | static void rcu_read_unlock_special(struct task_struct *t) | ||
| 267 | { | ||
| 268 | int empty; | ||
| 269 | int empty_exp; | ||
| 270 | unsigned long flags; | ||
| 271 | struct list_head *np; | ||
| 272 | int special; | ||
| 273 | |||
| 274 | /* | ||
| 275 | * NMI handlers cannot block and cannot safely manipulate state. | ||
| 276 | * They therefore cannot possibly be special, so just leave. | ||
| 277 | */ | ||
| 278 | if (in_nmi()) | ||
| 279 | return; | ||
| 280 | |||
| 281 | local_irq_save(flags); | ||
| 282 | |||
| 283 | /* | ||
| 284 | * If RCU core is waiting for this CPU to exit critical section, | ||
| 285 | * let it know that we have done so. | ||
| 286 | */ | ||
| 287 | special = t->rcu_read_unlock_special; | ||
| 288 | if (special & RCU_READ_UNLOCK_NEED_QS) | ||
| 289 | rcu_preempt_cpu_qs(); | ||
| 290 | |||
| 291 | /* Hardware IRQ handlers cannot block. */ | ||
| 292 | if (in_irq()) { | ||
| 293 | local_irq_restore(flags); | ||
| 294 | return; | ||
| 295 | } | ||
| 296 | |||
| 297 | /* Clean up if blocked during RCU read-side critical section. */ | ||
| 298 | if (special & RCU_READ_UNLOCK_BLOCKED) { | ||
| 299 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Remove this task from the ->blkd_tasks list and adjust | ||
| 303 | * any pointers that might have been referencing it. | ||
| 304 | */ | ||
| 305 | empty = !rcu_preempt_blocked_readers_cgp(); | ||
| 306 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | ||
| 307 | np = t->rcu_node_entry.next; | ||
| 308 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
| 309 | np = NULL; | ||
| 310 | list_del(&t->rcu_node_entry); | ||
| 311 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | ||
| 312 | rcu_preempt_ctrlblk.gp_tasks = np; | ||
| 313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | ||
| 314 | rcu_preempt_ctrlblk.exp_tasks = np; | ||
| 315 | INIT_LIST_HEAD(&t->rcu_node_entry); | ||
| 316 | |||
| 317 | /* | ||
| 318 | * If this was the last task on the current list, and if | ||
| 319 | * we aren't waiting on the CPU, report the quiescent state | ||
| 320 | * and start a new grace period if needed. | ||
| 321 | */ | ||
| 322 | if (!empty && !rcu_preempt_blocked_readers_cgp()) { | ||
| 323 | rcu_preempt_cpu_qs(); | ||
| 324 | rcu_preempt_start_gp(); | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * If this was the last task on the expedited lists, | ||
| 329 | * then we need wake up the waiting task. | ||
| 330 | */ | ||
| 331 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | ||
| 332 | rcu_report_exp_done(); | ||
| 333 | } | ||
| 334 | local_irq_restore(flags); | ||
| 335 | } | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Tiny-preemptible RCU implementation for rcu_read_unlock(). | ||
| 339 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | ||
| 340 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | ||
| 341 | * invoke rcu_read_unlock_special() to clean up after a context switch | ||
| 342 | * in an RCU read-side critical section and other special cases. | ||
| 343 | */ | ||
| 344 | void __rcu_read_unlock(void) | ||
| 345 | { | ||
| 346 | struct task_struct *t = current; | ||
| 347 | |||
| 348 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ | ||
| 349 | --t->rcu_read_lock_nesting; | ||
| 350 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
| 351 | if (t->rcu_read_lock_nesting == 0 && | ||
| 352 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | ||
| 353 | rcu_read_unlock_special(t); | ||
| 354 | #ifdef CONFIG_PROVE_LOCKING | ||
| 355 | WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); | ||
| 356 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
| 357 | } | ||
| 358 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Check for a quiescent state from the current CPU. When a task blocks, | ||
| 362 | * the task is recorded in the rcu_preempt_ctrlblk structure, which is | ||
| 363 | * checked elsewhere. This is called from the scheduling-clock interrupt. | ||
| 364 | * | ||
| 365 | * Caller must disable hard irqs. | ||
| 366 | */ | ||
| 367 | static void rcu_preempt_check_callbacks(void) | ||
| 368 | { | ||
| 369 | struct task_struct *t = current; | ||
| 370 | |||
| 371 | if (rcu_preempt_gp_in_progress() && | ||
| 372 | (!rcu_preempt_running_reader() || | ||
| 373 | !rcu_cpu_blocking_cur_gp())) | ||
| 374 | rcu_preempt_cpu_qs(); | ||
| 375 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | ||
| 376 | rcu_preempt_ctrlblk.rcb.donetail) | ||
| 377 | raise_softirq(RCU_SOFTIRQ); | ||
| 378 | if (rcu_preempt_gp_in_progress() && | ||
| 379 | rcu_cpu_blocking_cur_gp() && | ||
| 380 | rcu_preempt_running_reader()) | ||
| 381 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | ||
| 382 | } | ||
| 383 | |||
| 384 | /* | ||
| 385 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | ||
| 386 | * update, so this is invoked from __rcu_process_callbacks() to | ||
| 387 | * handle that case. Of course, it is invoked for all flavors of | ||
| 388 | * RCU, but RCU callbacks can appear only on one of the lists, and | ||
| 389 | * neither ->nexttail nor ->donetail can possibly be NULL, so there | ||
| 390 | * is no need for an explicit check. | ||
| 391 | */ | ||
| 392 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
| 393 | { | ||
| 394 | if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) | ||
| 395 | rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* | ||
| 399 | * Process callbacks for preemptible RCU. | ||
| 400 | */ | ||
| 401 | static void rcu_preempt_process_callbacks(void) | ||
| 402 | { | ||
| 403 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Queue a preemptible -RCU callback for invocation after a grace period. | ||
| 408 | */ | ||
| 409 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
| 410 | { | ||
| 411 | unsigned long flags; | ||
| 412 | |||
| 413 | debug_rcu_head_queue(head); | ||
| 414 | head->func = func; | ||
| 415 | head->next = NULL; | ||
| 416 | |||
| 417 | local_irq_save(flags); | ||
| 418 | *rcu_preempt_ctrlblk.nexttail = head; | ||
| 419 | rcu_preempt_ctrlblk.nexttail = &head->next; | ||
| 420 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ | ||
| 421 | local_irq_restore(flags); | ||
| 422 | } | ||
| 423 | EXPORT_SYMBOL_GPL(call_rcu); | ||
| 424 | |||
| 425 | void rcu_barrier(void) | ||
| 426 | { | ||
| 427 | struct rcu_synchronize rcu; | ||
| 428 | |||
| 429 | init_rcu_head_on_stack(&rcu.head); | ||
| 430 | init_completion(&rcu.completion); | ||
| 431 | /* Will wake me after RCU finished. */ | ||
| 432 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 433 | /* Wait for it. */ | ||
| 434 | wait_for_completion(&rcu.completion); | ||
| 435 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 436 | } | ||
| 437 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
| 438 | |||
| 439 | /* | ||
| 440 | * synchronize_rcu - wait until a grace period has elapsed. | ||
| 441 | * | ||
| 442 | * Control will return to the caller some time after a full grace | ||
| 443 | * period has elapsed, in other words after all currently executing RCU | ||
| 444 | * read-side critical sections have completed. RCU read-side critical | ||
| 445 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
| 446 | * and may be nested. | ||
| 447 | */ | ||
| 448 | void synchronize_rcu(void) | ||
| 449 | { | ||
| 450 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 451 | if (!rcu_scheduler_active) | ||
| 452 | return; | ||
| 453 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 454 | |||
| 455 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
| 456 | if (!rcu_preempt_blocked_readers_any()) | ||
| 457 | return; | ||
| 458 | |||
| 459 | /* Once we get past the fastpath checks, same code as rcu_barrier(). */ | ||
| 460 | rcu_barrier(); | ||
| 461 | } | ||
| 462 | EXPORT_SYMBOL_GPL(synchronize_rcu); | ||
| 463 | |||
| 464 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
| 465 | static unsigned long sync_rcu_preempt_exp_count; | ||
| 466 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
| 467 | |||
| 468 | /* | ||
| 469 | * Return non-zero if there are any tasks in RCU read-side critical | ||
| 470 | * sections blocking the current preemptible-RCU expedited grace period. | ||
| 471 | * If there is no preemptible-RCU expedited grace period currently in | ||
| 472 | * progress, returns zero unconditionally. | ||
| 473 | */ | ||
| 474 | static int rcu_preempted_readers_exp(void) | ||
| 475 | { | ||
| 476 | return rcu_preempt_ctrlblk.exp_tasks != NULL; | ||
| 477 | } | ||
| 478 | |||
| 479 | /* | ||
| 480 | * Report the exit from RCU read-side critical section for the last task | ||
| 481 | * that queued itself during or before the current expedited preemptible-RCU | ||
| 482 | * grace period. | ||
| 483 | */ | ||
| 484 | static void rcu_report_exp_done(void) | ||
| 485 | { | ||
| 486 | wake_up(&sync_rcu_preempt_exp_wq); | ||
| 487 | } | ||
| 488 | |||
| 489 | /* | ||
| 490 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | ||
| 491 | * is to rely in the fact that there is but one CPU, and that it is | ||
| 492 | * illegal for a task to invoke synchronize_rcu_expedited() while in a | ||
| 493 | * preemptible-RCU read-side critical section. Therefore, any such | ||
| 494 | * critical sections must correspond to blocked tasks, which must therefore | ||
| 495 | * be on the ->blkd_tasks list. So just record the current head of the | ||
| 496 | * list in the ->exp_tasks pointer, and wait for all tasks including and | ||
| 497 | * after the task pointed to by ->exp_tasks to drain. | ||
| 498 | */ | ||
| 499 | void synchronize_rcu_expedited(void) | ||
| 500 | { | ||
| 501 | unsigned long flags; | ||
| 502 | struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; | ||
| 503 | unsigned long snap; | ||
| 504 | |||
| 505 | barrier(); /* ensure prior action seen before grace period. */ | ||
| 506 | |||
| 507 | WARN_ON_ONCE(rcu_preempt_running_reader()); | ||
| 508 | |||
| 509 | /* | ||
| 510 | * Acquire lock so that there is only one preemptible RCU grace | ||
| 511 | * period in flight. Of course, if someone does the expedited | ||
| 512 | * grace period for us while we are acquiring the lock, just leave. | ||
| 513 | */ | ||
| 514 | snap = sync_rcu_preempt_exp_count + 1; | ||
| 515 | mutex_lock(&sync_rcu_preempt_exp_mutex); | ||
| 516 | if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) | ||
| 517 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
| 518 | |||
| 519 | local_irq_save(flags); | ||
| 520 | |||
| 521 | /* | ||
| 522 | * All RCU readers have to already be on blkd_tasks because | ||
| 523 | * we cannot legally be executing in an RCU read-side critical | ||
| 524 | * section. | ||
| 525 | */ | ||
| 526 | |||
| 527 | /* Snapshot current head of ->blkd_tasks list. */ | ||
| 528 | rpcp->exp_tasks = rpcp->blkd_tasks.next; | ||
| 529 | if (rpcp->exp_tasks == &rpcp->blkd_tasks) | ||
| 530 | rpcp->exp_tasks = NULL; | ||
| 531 | local_irq_restore(flags); | ||
| 532 | |||
| 533 | /* Wait for tail of ->blkd_tasks list to drain. */ | ||
| 534 | if (rcu_preempted_readers_exp()) | ||
| 535 | wait_event(sync_rcu_preempt_exp_wq, | ||
| 536 | !rcu_preempted_readers_exp()); | ||
| 537 | |||
| 538 | /* Clean up and exit. */ | ||
| 539 | barrier(); /* ensure expedited GP seen before counter increment. */ | ||
| 540 | sync_rcu_preempt_exp_count++; | ||
| 541 | unlock_mb_ret: | ||
| 542 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
| 543 | barrier(); /* ensure subsequent action seen after grace period. */ | ||
| 544 | } | ||
| 545 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||
| 546 | |||
| 547 | /* | ||
| 548 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | ||
| 549 | */ | ||
| 550 | int rcu_preempt_needs_cpu(void) | ||
| 551 | { | ||
| 552 | if (!rcu_preempt_running_reader()) | ||
| 553 | rcu_preempt_cpu_qs(); | ||
| 554 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | ||
| 558 | * Check for a task exiting while in a preemptible -RCU read-side | ||
| 559 | * critical section, clean up if so. No need to issue warnings, | ||
| 560 | * as debug_check_no_locks_held() already does this if lockdep | ||
| 561 | * is enabled. | ||
| 562 | */ | ||
| 563 | void exit_rcu(void) | ||
| 564 | { | ||
| 565 | struct task_struct *t = current; | ||
| 566 | |||
| 567 | if (t->rcu_read_lock_nesting == 0) | ||
| 568 | return; | ||
| 569 | t->rcu_read_lock_nesting = 1; | ||
| 570 | rcu_read_unlock(); | ||
| 571 | } | ||
| 572 | |||
| 573 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 577 | * to check. | ||
| 578 | */ | ||
| 579 | static void rcu_preempt_check_callbacks(void) | ||
| 580 | { | ||
| 581 | } | ||
| 582 | |||
| 583 | /* | ||
| 584 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 585 | * to remove. | ||
| 586 | */ | ||
| 587 | static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | ||
| 588 | { | ||
| 589 | } | ||
| 590 | |||
| 591 | /* | ||
| 592 | * Because preemptible RCU does not exist, it never has any callbacks | ||
| 593 | * to process. | ||
| 594 | */ | ||
| 595 | static void rcu_preempt_process_callbacks(void) | ||
| 596 | { | ||
| 597 | } | ||
| 598 | |||
| 599 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | ||
| 600 | |||
| 25 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 601 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 26 | 602 | ||
| 27 | #include <linux/kernel_stat.h> | 603 | #include <linux/kernel_stat.h> |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2e2726d790b9..9d8e8fb2515f 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -120,7 +120,7 @@ struct rcu_torture { | |||
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | static LIST_HEAD(rcu_torture_freelist); | 122 | static LIST_HEAD(rcu_torture_freelist); |
| 123 | static struct rcu_torture *rcu_torture_current; | 123 | static struct rcu_torture __rcu *rcu_torture_current; |
| 124 | static long rcu_torture_current_version; | 124 | static long rcu_torture_current_version; |
| 125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; | 125 | static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; |
| 126 | static DEFINE_SPINLOCK(rcu_torture_lock); | 126 | static DEFINE_SPINLOCK(rcu_torture_lock); |
| @@ -153,8 +153,10 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | |||
| 153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ | 153 | #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ |
| 154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ | 154 | #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ |
| 155 | static int fullstop = FULLSTOP_RMMOD; | 155 | static int fullstop = FULLSTOP_RMMOD; |
| 156 | DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */ | 156 | /* |
| 157 | /* of kthreads. */ | 157 | * Protect fullstop transitions and spawning of kthreads. |
| 158 | */ | ||
| 159 | static DEFINE_MUTEX(fullstop_mutex); | ||
| 158 | 160 | ||
| 159 | /* | 161 | /* |
| 160 | * Detect and respond to a system shutdown. | 162 | * Detect and respond to a system shutdown. |
| @@ -303,6 +305,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp) | |||
| 303 | mdelay(longdelay_ms); | 305 | mdelay(longdelay_ms); |
| 304 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) | 306 | if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) |
| 305 | udelay(shortdelay_us); | 307 | udelay(shortdelay_us); |
| 308 | #ifdef CONFIG_PREEMPT | ||
| 309 | if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) | ||
| 310 | preempt_schedule(); /* No QS if preempt_disable() in effect */ | ||
| 311 | #endif | ||
| 306 | } | 312 | } |
| 307 | 313 | ||
| 308 | static void rcu_torture_read_unlock(int idx) __releases(RCU) | 314 | static void rcu_torture_read_unlock(int idx) __releases(RCU) |
| @@ -536,6 +542,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) | |||
| 536 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); | 542 | delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); |
| 537 | if (!delay) | 543 | if (!delay) |
| 538 | schedule_timeout_interruptible(longdelay); | 544 | schedule_timeout_interruptible(longdelay); |
| 545 | else | ||
| 546 | rcu_read_delay(rrsp); | ||
| 539 | } | 547 | } |
| 540 | 548 | ||
| 541 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) | 549 | static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) |
| @@ -731,7 +739,8 @@ rcu_torture_writer(void *arg) | |||
| 731 | continue; | 739 | continue; |
| 732 | rp->rtort_pipe_count = 0; | 740 | rp->rtort_pipe_count = 0; |
| 733 | udelay(rcu_random(&rand) & 0x3ff); | 741 | udelay(rcu_random(&rand) & 0x3ff); |
| 734 | old_rp = rcu_torture_current; | 742 | old_rp = rcu_dereference_check(rcu_torture_current, |
| 743 | current == writer_task); | ||
| 735 | rp->rtort_mbtest = 1; | 744 | rp->rtort_mbtest = 1; |
| 736 | rcu_assign_pointer(rcu_torture_current, rp); | 745 | rcu_assign_pointer(rcu_torture_current, rp); |
| 737 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ | 746 | smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d5bc43976c5a..ccdc04c47981 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -143,6 +143,11 @@ module_param(blimit, int, 0); | |||
| 143 | module_param(qhimark, int, 0); | 143 | module_param(qhimark, int, 0); |
| 144 | module_param(qlowmark, int, 0); | 144 | module_param(qlowmark, int, 0); |
| 145 | 145 | ||
| 146 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
| 147 | int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; | ||
| 148 | module_param(rcu_cpu_stall_suppress, int, 0644); | ||
| 149 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
| 150 | |||
| 146 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 151 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
| 147 | static int rcu_pending(int cpu); | 152 | static int rcu_pending(int cpu); |
| 148 | 153 | ||
| @@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
| 450 | 455 | ||
| 451 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 456 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
| 452 | 457 | ||
| 453 | int rcu_cpu_stall_panicking __read_mostly; | 458 | int rcu_cpu_stall_suppress __read_mostly; |
| 454 | 459 | ||
| 455 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 460 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
| 456 | { | 461 | { |
| @@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 482 | rcu_print_task_stall(rnp); | 487 | rcu_print_task_stall(rnp); |
| 483 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 488 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 484 | 489 | ||
| 485 | /* OK, time to rat on our buddy... */ | 490 | /* |
| 486 | 491 | * OK, time to rat on our buddy... | |
| 492 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
| 493 | * RCU CPU stall warnings. | ||
| 494 | */ | ||
| 487 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 495 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
| 488 | rsp->name); | 496 | rsp->name); |
| 489 | rcu_for_each_leaf_node(rsp, rnp) { | 497 | rcu_for_each_leaf_node(rsp, rnp) { |
| @@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
| 512 | unsigned long flags; | 520 | unsigned long flags; |
| 513 | struct rcu_node *rnp = rcu_get_root(rsp); | 521 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 514 | 522 | ||
| 523 | /* | ||
| 524 | * OK, time to rat on ourselves... | ||
| 525 | * See Documentation/RCU/stallwarn.txt for info on how to debug | ||
| 526 | * RCU CPU stall warnings. | ||
| 527 | */ | ||
| 515 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 528 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
| 516 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 529 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
| 517 | trigger_all_cpu_backtrace(); | 530 | trigger_all_cpu_backtrace(); |
| @@ -530,11 +543,11 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 530 | long delta; | 543 | long delta; |
| 531 | struct rcu_node *rnp; | 544 | struct rcu_node *rnp; |
| 532 | 545 | ||
| 533 | if (rcu_cpu_stall_panicking) | 546 | if (rcu_cpu_stall_suppress) |
| 534 | return; | 547 | return; |
| 535 | delta = jiffies - rsp->jiffies_stall; | 548 | delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); |
| 536 | rnp = rdp->mynode; | 549 | rnp = rdp->mynode; |
| 537 | if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { | 550 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { |
| 538 | 551 | ||
| 539 | /* We haven't checked in, so go dump stack. */ | 552 | /* We haven't checked in, so go dump stack. */ |
| 540 | print_cpu_stall(rsp); | 553 | print_cpu_stall(rsp); |
| @@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 548 | 561 | ||
| 549 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | 562 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
| 550 | { | 563 | { |
| 551 | rcu_cpu_stall_panicking = 1; | 564 | rcu_cpu_stall_suppress = 1; |
| 552 | return NOTIFY_DONE; | 565 | return NOTIFY_DONE; |
| 553 | } | 566 | } |
| 554 | 567 | ||
| 568 | /** | ||
| 569 | * rcu_cpu_stall_reset - prevent further stall warnings in current grace period | ||
| 570 | * | ||
| 571 | * Set the stall-warning timeout way off into the future, thus preventing | ||
| 572 | * any RCU CPU stall-warning messages from appearing in the current set of | ||
| 573 | * RCU grace periods. | ||
| 574 | * | ||
| 575 | * The caller must disable hard irqs. | ||
| 576 | */ | ||
| 577 | void rcu_cpu_stall_reset(void) | ||
| 578 | { | ||
| 579 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 580 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 581 | rcu_preempt_stall_reset(); | ||
| 582 | } | ||
| 583 | |||
| 555 | static struct notifier_block rcu_panic_block = { | 584 | static struct notifier_block rcu_panic_block = { |
| 556 | .notifier_call = rcu_panic, | 585 | .notifier_call = rcu_panic, |
| 557 | }; | 586 | }; |
| @@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 571 | { | 600 | { |
| 572 | } | 601 | } |
| 573 | 602 | ||
| 603 | void rcu_cpu_stall_reset(void) | ||
| 604 | { | ||
| 605 | } | ||
| 606 | |||
| 574 | static void __init check_cpu_stall_init(void) | 607 | static void __init check_cpu_stall_init(void) |
| 575 | { | 608 | { |
| 576 | } | 609 | } |
| @@ -712,7 +745,7 @@ static void | |||
| 712 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 745 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
| 713 | __releases(rcu_get_root(rsp)->lock) | 746 | __releases(rcu_get_root(rsp)->lock) |
| 714 | { | 747 | { |
| 715 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 748 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 716 | struct rcu_node *rnp = rcu_get_root(rsp); | 749 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 717 | 750 | ||
| 718 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 751 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { |
| @@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 960 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) |
| 961 | { | 994 | { |
| 962 | int i; | 995 | int i; |
| 963 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | 996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 964 | 997 | ||
| 965 | if (rdp->nxtlist == NULL) | 998 | if (rdp->nxtlist == NULL) |
| 966 | return; /* irqs disabled, so comparison is stable. */ | 999 | return; /* irqs disabled, so comparison is stable. */ |
| @@ -971,6 +1004,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | |||
| 971 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 972 | rdp->nxttail[i] = &rdp->nxtlist; | 1005 | rdp->nxttail[i] = &rdp->nxtlist; |
| 973 | rsp->orphan_qlen += rdp->qlen; | 1006 | rsp->orphan_qlen += rdp->qlen; |
| 1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 974 | rdp->qlen = 0; | 1008 | rdp->qlen = 0; |
| 975 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
| 976 | } | 1010 | } |
| @@ -984,7 +1018,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
| 984 | struct rcu_data *rdp; | 1018 | struct rcu_data *rdp; |
| 985 | 1019 | ||
| 986 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| 987 | rdp = rsp->rda[smp_processor_id()]; | 1021 | rdp = this_cpu_ptr(rsp->rda); |
| 988 | if (rsp->orphan_cbs_list == NULL) { | 1022 | if (rsp->orphan_cbs_list == NULL) { |
| 989 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
| 990 | return; | 1024 | return; |
| @@ -992,6 +1026,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
| 992 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | 1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; |
| 993 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | 1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; |
| 994 | rdp->qlen += rsp->orphan_qlen; | 1028 | rdp->qlen += rsp->orphan_qlen; |
| 1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
| 995 | rsp->orphan_cbs_list = NULL; | 1030 | rsp->orphan_cbs_list = NULL; |
| 996 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | 1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; |
| 997 | rsp->orphan_qlen = 0; | 1032 | rsp->orphan_qlen = 0; |
| @@ -1007,7 +1042,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1007 | unsigned long flags; | 1042 | unsigned long flags; |
| 1008 | unsigned long mask; | 1043 | unsigned long mask; |
| 1009 | int need_report = 0; | 1044 | int need_report = 0; |
| 1010 | struct rcu_data *rdp = rsp->rda[cpu]; | 1045 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1011 | struct rcu_node *rnp; | 1046 | struct rcu_node *rnp; |
| 1012 | 1047 | ||
| 1013 | /* Exclude any attempts to start a new grace period. */ | 1048 | /* Exclude any attempts to start a new grace period. */ |
| @@ -1123,6 +1158,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1123 | 1158 | ||
| 1124 | /* Update count, and requeue any remaining callbacks. */ | 1159 | /* Update count, and requeue any remaining callbacks. */ |
| 1125 | rdp->qlen -= count; | 1160 | rdp->qlen -= count; |
| 1161 | rdp->n_cbs_invoked += count; | ||
| 1126 | if (list != NULL) { | 1162 | if (list != NULL) { |
| 1127 | *tail = rdp->nxtlist; | 1163 | *tail = rdp->nxtlist; |
| 1128 | rdp->nxtlist = list; | 1164 | rdp->nxtlist = list; |
| @@ -1226,7 +1262,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
| 1226 | cpu = rnp->grplo; | 1262 | cpu = rnp->grplo; |
| 1227 | bit = 1; | 1263 | bit = 1; |
| 1228 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 1264 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
| 1229 | if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) | 1265 | if ((rnp->qsmask & bit) != 0 && |
| 1266 | f(per_cpu_ptr(rsp->rda, cpu))) | ||
| 1230 | mask |= bit; | 1267 | mask |= bit; |
| 1231 | } | 1268 | } |
| 1232 | if (mask != 0) { | 1269 | if (mask != 0) { |
| @@ -1402,7 +1439,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1402 | * a quiescent state betweentimes. | 1439 | * a quiescent state betweentimes. |
| 1403 | */ | 1440 | */ |
| 1404 | local_irq_save(flags); | 1441 | local_irq_save(flags); |
| 1405 | rdp = rsp->rda[smp_processor_id()]; | 1442 | rdp = this_cpu_ptr(rsp->rda); |
| 1406 | rcu_process_gp_end(rsp, rdp); | 1443 | rcu_process_gp_end(rsp, rdp); |
| 1407 | check_for_new_grace_period(rsp, rdp); | 1444 | check_for_new_grace_period(rsp, rdp); |
| 1408 | 1445 | ||
| @@ -1701,7 +1738,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 1701 | { | 1738 | { |
| 1702 | unsigned long flags; | 1739 | unsigned long flags; |
| 1703 | int i; | 1740 | int i; |
| 1704 | struct rcu_data *rdp = rsp->rda[cpu]; | 1741 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1705 | struct rcu_node *rnp = rcu_get_root(rsp); | 1742 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1706 | 1743 | ||
| 1707 | /* Set up local state, ensuring consistent view of global state. */ | 1744 | /* Set up local state, ensuring consistent view of global state. */ |
| @@ -1729,7 +1766,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
| 1729 | { | 1766 | { |
| 1730 | unsigned long flags; | 1767 | unsigned long flags; |
| 1731 | unsigned long mask; | 1768 | unsigned long mask; |
| 1732 | struct rcu_data *rdp = rsp->rda[cpu]; | 1769 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1733 | struct rcu_node *rnp = rcu_get_root(rsp); | 1770 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1734 | 1771 | ||
| 1735 | /* Set up local state, ensuring consistent view of global state. */ | 1772 | /* Set up local state, ensuring consistent view of global state. */ |
| @@ -1865,7 +1902,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 1865 | /* | 1902 | /* |
| 1866 | * Helper function for rcu_init() that initializes one rcu_state structure. | 1903 | * Helper function for rcu_init() that initializes one rcu_state structure. |
| 1867 | */ | 1904 | */ |
| 1868 | static void __init rcu_init_one(struct rcu_state *rsp) | 1905 | static void __init rcu_init_one(struct rcu_state *rsp, |
| 1906 | struct rcu_data __percpu *rda) | ||
| 1869 | { | 1907 | { |
| 1870 | static char *buf[] = { "rcu_node_level_0", | 1908 | static char *buf[] = { "rcu_node_level_0", |
| 1871 | "rcu_node_level_1", | 1909 | "rcu_node_level_1", |
| @@ -1918,37 +1956,23 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
| 1918 | } | 1956 | } |
| 1919 | } | 1957 | } |
| 1920 | 1958 | ||
| 1959 | rsp->rda = rda; | ||
| 1921 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 1960 | rnp = rsp->level[NUM_RCU_LVLS - 1]; |
| 1922 | for_each_possible_cpu(i) { | 1961 | for_each_possible_cpu(i) { |
| 1923 | while (i > rnp->grphi) | 1962 | while (i > rnp->grphi) |
| 1924 | rnp++; | 1963 | rnp++; |
| 1925 | rsp->rda[i]->mynode = rnp; | 1964 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
| 1926 | rcu_boot_init_percpu_data(i, rsp); | 1965 | rcu_boot_init_percpu_data(i, rsp); |
| 1927 | } | 1966 | } |
| 1928 | } | 1967 | } |
| 1929 | 1968 | ||
| 1930 | /* | ||
| 1931 | * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used | ||
| 1932 | * nowhere else! Assigns leaf node pointers into each CPU's rcu_data | ||
| 1933 | * structure. | ||
| 1934 | */ | ||
| 1935 | #define RCU_INIT_FLAVOR(rsp, rcu_data) \ | ||
| 1936 | do { \ | ||
| 1937 | int i; \ | ||
| 1938 | \ | ||
| 1939 | for_each_possible_cpu(i) { \ | ||
| 1940 | (rsp)->rda[i] = &per_cpu(rcu_data, i); \ | ||
| 1941 | } \ | ||
| 1942 | rcu_init_one(rsp); \ | ||
| 1943 | } while (0) | ||
| 1944 | |||
| 1945 | void __init rcu_init(void) | 1969 | void __init rcu_init(void) |
| 1946 | { | 1970 | { |
| 1947 | int cpu; | 1971 | int cpu; |
| 1948 | 1972 | ||
| 1949 | rcu_bootup_announce(); | 1973 | rcu_bootup_announce(); |
| 1950 | RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); | 1974 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
| 1951 | RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); | 1975 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
| 1952 | __rcu_init_preempt(); | 1976 | __rcu_init_preempt(); |
| 1953 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 1977 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
| 1954 | 1978 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14c040b18ed0..91d4170c5c13 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -202,6 +202,9 @@ struct rcu_data { | |||
| 202 | long qlen; /* # of queued callbacks */ | 202 | long qlen; /* # of queued callbacks */ |
| 203 | long qlen_last_fqs_check; | 203 | long qlen_last_fqs_check; |
| 204 | /* qlen at last check for QS forcing */ | 204 | /* qlen at last check for QS forcing */ |
| 205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | ||
| 206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | ||
| 207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | ||
| 205 | unsigned long n_force_qs_snap; | 208 | unsigned long n_force_qs_snap; |
| 206 | /* did other CPU force QS recently? */ | 209 | /* did other CPU force QS recently? */ |
| 207 | long blimit; /* Upper limit on a processed batch */ | 210 | long blimit; /* Upper limit on a processed batch */ |
| @@ -254,19 +257,23 @@ struct rcu_data { | |||
| 254 | #define RCU_STALL_DELAY_DELTA 0 | 257 | #define RCU_STALL_DELAY_DELTA 0 |
| 255 | #endif | 258 | #endif |
| 256 | 259 | ||
| 257 | #define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) | 260 | #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ |
| 261 | RCU_STALL_DELAY_DELTA) | ||
| 258 | /* for rsp->jiffies_stall */ | 262 | /* for rsp->jiffies_stall */ |
| 259 | #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) | 263 | #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) |
| 260 | /* for rsp->jiffies_stall */ | 264 | /* for rsp->jiffies_stall */ |
| 261 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ | 265 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ |
| 262 | /* to take at least one */ | 266 | /* to take at least one */ |
| 263 | /* scheduling clock irq */ | 267 | /* scheduling clock irq */ |
| 264 | /* before ratting on them. */ | 268 | /* before ratting on them. */ |
| 265 | 269 | ||
| 266 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 270 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE |
| 271 | #define RCU_CPU_STALL_SUPPRESS_INIT 0 | ||
| 272 | #else | ||
| 273 | #define RCU_CPU_STALL_SUPPRESS_INIT 1 | ||
| 274 | #endif | ||
| 267 | 275 | ||
| 268 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 276 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 269 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | ||
| 270 | 277 | ||
| 271 | /* | 278 | /* |
| 272 | * RCU global state, including node hierarchy. This hierarchy is | 279 | * RCU global state, including node hierarchy. This hierarchy is |
| @@ -283,7 +290,7 @@ struct rcu_state { | |||
| 283 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ | 290 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ |
| 284 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 291 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
| 285 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ | 292 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ |
| 286 | struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ | 293 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
| 287 | 294 | ||
| 288 | /* The following fields are guarded by the root rcu_node's lock. */ | 295 | /* The following fields are guarded by the root rcu_node's lock. */ |
| 289 | 296 | ||
| @@ -365,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
| 365 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 372 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
| 366 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 373 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
| 367 | static void rcu_print_task_stall(struct rcu_node *rnp); | 374 | static void rcu_print_task_stall(struct rcu_node *rnp); |
| 375 | static void rcu_preempt_stall_reset(void); | ||
| 368 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 376 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 369 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 377 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
| 370 | #ifdef CONFIG_HOTPLUG_CPU | 378 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0e4f420245d9..71a4147473f9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -57,7 +57,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
| 57 | printk(KERN_INFO | 57 | printk(KERN_INFO |
| 58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); | 58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); |
| 59 | #endif | 59 | #endif |
| 60 | #ifndef CONFIG_RCU_CPU_STALL_VERBOSE | 60 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
| 61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); |
| 62 | #endif | 62 | #endif |
| 63 | #if NUM_RCU_LVL_4 != 0 | 63 | #if NUM_RCU_LVL_4 != 0 |
| @@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 154 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
| 155 | 155 | ||
| 156 | /* Possibly blocking in an RCU read-side critical section. */ | 156 | /* Possibly blocking in an RCU read-side critical section. */ |
| 157 | rdp = rcu_preempt_state.rda[cpu]; | 157 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); |
| 158 | rnp = rdp->mynode; | 158 | rnp = rdp->mynode; |
| 159 | raw_spin_lock_irqsave(&rnp->lock, flags); | 159 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 160 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
| @@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 201 | */ | 201 | */ |
| 202 | void __rcu_read_lock(void) | 202 | void __rcu_read_lock(void) |
| 203 | { | 203 | { |
| 204 | ACCESS_ONCE(current->rcu_read_lock_nesting)++; | 204 | current->rcu_read_lock_nesting++; |
| 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ | 205 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ |
| 206 | } | 206 | } |
| 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | 207 | EXPORT_SYMBOL_GPL(__rcu_read_lock); |
| @@ -344,7 +344,9 @@ void __rcu_read_unlock(void) | |||
| 344 | struct task_struct *t = current; | 344 | struct task_struct *t = current; |
| 345 | 345 | ||
| 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | 346 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ |
| 347 | if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && | 347 | --t->rcu_read_lock_nesting; |
| 348 | barrier(); /* decrement before load of ->rcu_read_unlock_special */ | ||
| 349 | if (t->rcu_read_lock_nesting == 0 && | ||
| 348 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 350 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
| 349 | rcu_read_unlock_special(t); | 351 | rcu_read_unlock_special(t); |
| 350 | #ifdef CONFIG_PROVE_LOCKING | 352 | #ifdef CONFIG_PROVE_LOCKING |
| @@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
| 417 | } | 419 | } |
| 418 | } | 420 | } |
| 419 | 421 | ||
| 422 | /* | ||
| 423 | * Suppress preemptible RCU's CPU stall warnings by pushing the | ||
| 424 | * time of the next stall-warning message comfortably far into the | ||
| 425 | * future. | ||
| 426 | */ | ||
| 427 | static void rcu_preempt_stall_reset(void) | ||
| 428 | { | ||
| 429 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | ||
| 430 | } | ||
| 431 | |||
| 420 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 432 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 421 | 433 | ||
| 422 | /* | 434 | /* |
| @@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
| 546 | * | 558 | * |
| 547 | * Control will return to the caller some time after a full grace | 559 | * Control will return to the caller some time after a full grace |
| 548 | * period has elapsed, in other words after all currently executing RCU | 560 | * period has elapsed, in other words after all currently executing RCU |
| 549 | * read-side critical sections have completed. RCU read-side critical | 561 | * read-side critical sections have completed. Note, however, that |
| 550 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 562 | * upon return from synchronize_rcu(), the caller might well be executing |
| 551 | * and may be nested. | 563 | * concurrently with new RCU read-side critical sections that began while |
| 564 | * synchronize_rcu() was waiting. RCU read-side critical sections are | ||
| 565 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | ||
| 552 | */ | 566 | */ |
| 553 | void synchronize_rcu(void) | 567 | void synchronize_rcu(void) |
| 554 | { | 568 | { |
| @@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void) | |||
| 771 | */ | 785 | */ |
| 772 | static void __init __rcu_init_preempt(void) | 786 | static void __init __rcu_init_preempt(void) |
| 773 | { | 787 | { |
| 774 | RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); | 788 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
| 775 | } | 789 | } |
| 776 | 790 | ||
| 777 | /* | 791 | /* |
| @@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) | |||
| 865 | { | 879 | { |
| 866 | } | 880 | } |
| 867 | 881 | ||
| 882 | /* | ||
| 883 | * Because preemptible RCU does not exist, there is no need to suppress | ||
| 884 | * its CPU stall warnings. | ||
| 885 | */ | ||
| 886 | static void rcu_preempt_stall_reset(void) | ||
| 887 | { | ||
| 888 | } | ||
| 889 | |||
| 868 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 890 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
| 869 | 891 | ||
| 870 | /* | 892 | /* |
| @@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void) | |||
| 919 | } | 941 | } |
| 920 | 942 | ||
| 921 | /* | 943 | /* |
| 922 | * In classic RCU, call_rcu() is just call_rcu_sched(). | ||
| 923 | */ | ||
| 924 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
| 925 | { | ||
| 926 | call_rcu_sched(head, func); | ||
| 927 | } | ||
| 928 | EXPORT_SYMBOL_GPL(call_rcu); | ||
| 929 | |||
| 930 | /* | ||
| 931 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 944 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
| 932 | * But because preemptable RCU does not exist, map to rcu-sched. | 945 | * But because preemptable RCU does not exist, map to rcu-sched. |
| 933 | */ | 946 | */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 36c95b45738e..d15430b9d122 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -64,7 +64,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 64 | rdp->dynticks_fqs); | 64 | rdp->dynticks_fqs); |
| 65 | #endif /* #ifdef CONFIG_NO_HZ */ | 65 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); | 66 | seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 67 | seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); | 67 | seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); |
| 68 | seq_printf(m, " ci=%lu co=%lu ca=%lu\n", | ||
| 69 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
| 68 | } | 70 | } |
| 69 | 71 | ||
| 70 | #define PRINT_RCU_DATA(name, func, m) \ | 72 | #define PRINT_RCU_DATA(name, func, m) \ |
| @@ -119,7 +121,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 119 | rdp->dynticks_fqs); | 121 | rdp->dynticks_fqs); |
| 120 | #endif /* #ifdef CONFIG_NO_HZ */ | 122 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 121 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); | 123 | seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); |
| 122 | seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); | 124 | seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); |
| 125 | seq_printf(m, ",%lu,%lu,%lu\n", | ||
| 126 | rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); | ||
| 123 | } | 127 | } |
| 124 | 128 | ||
| 125 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 129 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
| @@ -128,7 +132,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) | |||
| 128 | #ifdef CONFIG_NO_HZ | 132 | #ifdef CONFIG_NO_HZ |
| 129 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 133 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); |
| 130 | #endif /* #ifdef CONFIG_NO_HZ */ | 134 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 131 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); | 135 | seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); |
| 132 | #ifdef CONFIG_TREE_PREEMPT_RCU | 136 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 133 | seq_puts(m, "\"rcu_preempt:\"\n"); | 137 | seq_puts(m, "\"rcu_preempt:\"\n"); |
| 134 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); | 138 | PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); |
| @@ -262,7 +266,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) | |||
| 262 | struct rcu_data *rdp; | 266 | struct rcu_data *rdp; |
| 263 | 267 | ||
| 264 | for_each_possible_cpu(cpu) { | 268 | for_each_possible_cpu(cpu) { |
| 265 | rdp = rsp->rda[cpu]; | 269 | rdp = per_cpu_ptr(rsp->rda, cpu); |
| 266 | if (rdp->beenonline) | 270 | if (rdp->beenonline) |
| 267 | print_one_rcu_pending(m, rdp); | 271 | print_one_rcu_pending(m, rdp); |
| 268 | } | 272 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index c0d2067f3e0d..d42992bccdfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -426,9 +426,7 @@ struct root_domain { | |||
| 426 | */ | 426 | */ |
| 427 | cpumask_var_t rto_mask; | 427 | cpumask_var_t rto_mask; |
| 428 | atomic_t rto_count; | 428 | atomic_t rto_count; |
| 429 | #ifdef CONFIG_SMP | ||
| 430 | struct cpupri cpupri; | 429 | struct cpupri cpupri; |
| 431 | #endif | ||
| 432 | }; | 430 | }; |
| 433 | 431 | ||
| 434 | /* | 432 | /* |
| @@ -437,7 +435,7 @@ struct root_domain { | |||
| 437 | */ | 435 | */ |
| 438 | static struct root_domain def_root_domain; | 436 | static struct root_domain def_root_domain; |
| 439 | 437 | ||
| 440 | #endif | 438 | #endif /* CONFIG_SMP */ |
| 441 | 439 | ||
| 442 | /* | 440 | /* |
| 443 | * This is the main, per-CPU runqueue data structure. | 441 | * This is the main, per-CPU runqueue data structure. |
| @@ -488,11 +486,12 @@ struct rq { | |||
| 488 | */ | 486 | */ |
| 489 | unsigned long nr_uninterruptible; | 487 | unsigned long nr_uninterruptible; |
| 490 | 488 | ||
| 491 | struct task_struct *curr, *idle; | 489 | struct task_struct *curr, *idle, *stop; |
| 492 | unsigned long next_balance; | 490 | unsigned long next_balance; |
| 493 | struct mm_struct *prev_mm; | 491 | struct mm_struct *prev_mm; |
| 494 | 492 | ||
| 495 | u64 clock; | 493 | u64 clock; |
| 494 | u64 clock_task; | ||
| 496 | 495 | ||
| 497 | atomic_t nr_iowait; | 496 | atomic_t nr_iowait; |
| 498 | 497 | ||
| @@ -520,6 +519,10 @@ struct rq { | |||
| 520 | u64 avg_idle; | 519 | u64 avg_idle; |
| 521 | #endif | 520 | #endif |
| 522 | 521 | ||
| 522 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 523 | u64 prev_irq_time; | ||
| 524 | #endif | ||
| 525 | |||
| 523 | /* calc_load related fields */ | 526 | /* calc_load related fields */ |
| 524 | unsigned long calc_load_update; | 527 | unsigned long calc_load_update; |
| 525 | long calc_load_active; | 528 | long calc_load_active; |
| @@ -643,10 +646,22 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 643 | 646 | ||
| 644 | #endif /* CONFIG_CGROUP_SCHED */ | 647 | #endif /* CONFIG_CGROUP_SCHED */ |
| 645 | 648 | ||
| 649 | static u64 irq_time_cpu(int cpu); | ||
| 650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
| 651 | |||
| 646 | inline void update_rq_clock(struct rq *rq) | 652 | inline void update_rq_clock(struct rq *rq) |
| 647 | { | 653 | { |
| 648 | if (!rq->skip_clock_update) | 654 | if (!rq->skip_clock_update) { |
| 649 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 655 | int cpu = cpu_of(rq); |
| 656 | u64 irq_time; | ||
| 657 | |||
| 658 | rq->clock = sched_clock_cpu(cpu); | ||
| 659 | irq_time = irq_time_cpu(cpu); | ||
| 660 | if (rq->clock - irq_time > rq->clock_task) | ||
| 661 | rq->clock_task = rq->clock - irq_time; | ||
| 662 | |||
| 663 | sched_irq_time_avg_update(rq, irq_time); | ||
| 664 | } | ||
| 650 | } | 665 | } |
| 651 | 666 | ||
| 652 | /* | 667 | /* |
| @@ -723,7 +738,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 723 | size_t cnt, loff_t *ppos) | 738 | size_t cnt, loff_t *ppos) |
| 724 | { | 739 | { |
| 725 | char buf[64]; | 740 | char buf[64]; |
| 726 | char *cmp = buf; | 741 | char *cmp; |
| 727 | int neg = 0; | 742 | int neg = 0; |
| 728 | int i; | 743 | int i; |
| 729 | 744 | ||
| @@ -734,6 +749,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 734 | return -EFAULT; | 749 | return -EFAULT; |
| 735 | 750 | ||
| 736 | buf[cnt] = 0; | 751 | buf[cnt] = 0; |
| 752 | cmp = strstrip(buf); | ||
| 737 | 753 | ||
| 738 | if (strncmp(buf, "NO_", 3) == 0) { | 754 | if (strncmp(buf, "NO_", 3) == 0) { |
| 739 | neg = 1; | 755 | neg = 1; |
| @@ -741,9 +757,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 741 | } | 757 | } |
| 742 | 758 | ||
| 743 | for (i = 0; sched_feat_names[i]; i++) { | 759 | for (i = 0; sched_feat_names[i]; i++) { |
| 744 | int len = strlen(sched_feat_names[i]); | 760 | if (strcmp(cmp, sched_feat_names[i]) == 0) { |
| 745 | |||
| 746 | if (strncmp(cmp, sched_feat_names[i], len) == 0) { | ||
| 747 | if (neg) | 761 | if (neg) |
| 748 | sysctl_sched_features &= ~(1UL << i); | 762 | sysctl_sched_features &= ~(1UL << i); |
| 749 | else | 763 | else |
| @@ -1840,7 +1854,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 1840 | 1854 | ||
| 1841 | static const struct sched_class rt_sched_class; | 1855 | static const struct sched_class rt_sched_class; |
| 1842 | 1856 | ||
| 1843 | #define sched_class_highest (&rt_sched_class) | 1857 | #define sched_class_highest (&stop_sched_class) |
| 1844 | #define for_each_class(class) \ | 1858 | #define for_each_class(class) \ |
| 1845 | for (class = sched_class_highest; class; class = class->next) | 1859 | for (class = sched_class_highest; class; class = class->next) |
| 1846 | 1860 | ||
| @@ -1858,12 +1872,6 @@ static void dec_nr_running(struct rq *rq) | |||
| 1858 | 1872 | ||
| 1859 | static void set_load_weight(struct task_struct *p) | 1873 | static void set_load_weight(struct task_struct *p) |
| 1860 | { | 1874 | { |
| 1861 | if (task_has_rt_policy(p)) { | ||
| 1862 | p->se.load.weight = 0; | ||
| 1863 | p->se.load.inv_weight = WMULT_CONST; | ||
| 1864 | return; | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | /* | 1875 | /* |
| 1868 | * SCHED_IDLE tasks get minimal weight: | 1876 | * SCHED_IDLE tasks get minimal weight: |
| 1869 | */ | 1877 | */ |
| @@ -1917,13 +1925,132 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1917 | dec_nr_running(rq); | 1925 | dec_nr_running(rq); |
| 1918 | } | 1926 | } |
| 1919 | 1927 | ||
| 1928 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1929 | |||
| 1930 | /* | ||
| 1931 | * There are no locks covering percpu hardirq/softirq time. | ||
| 1932 | * They are only modified in account_system_vtime, on corresponding CPU | ||
| 1933 | * with interrupts disabled. So, writes are safe. | ||
| 1934 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
| 1935 | * This may result in other CPU reading this CPU's irq time and can | ||
| 1936 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
| 1937 | * or new value (or semi updated value on 32 bit) with a side effect of | ||
| 1938 | * accounting a slice of irq time to wrong task when irq is in progress | ||
| 1939 | * while we read rq->clock. That is a worthy compromise in place of having | ||
| 1940 | * locks on each irq in account_system_time. | ||
| 1941 | */ | ||
| 1942 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
| 1943 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
| 1944 | |||
| 1945 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
| 1946 | static int sched_clock_irqtime; | ||
| 1947 | |||
| 1948 | void enable_sched_clock_irqtime(void) | ||
| 1949 | { | ||
| 1950 | sched_clock_irqtime = 1; | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | void disable_sched_clock_irqtime(void) | ||
| 1954 | { | ||
| 1955 | sched_clock_irqtime = 0; | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | static u64 irq_time_cpu(int cpu) | ||
| 1959 | { | ||
| 1960 | if (!sched_clock_irqtime) | ||
| 1961 | return 0; | ||
| 1962 | |||
| 1963 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
| 1964 | } | ||
| 1965 | |||
| 1966 | void account_system_vtime(struct task_struct *curr) | ||
| 1967 | { | ||
| 1968 | unsigned long flags; | ||
| 1969 | int cpu; | ||
| 1970 | u64 now, delta; | ||
| 1971 | |||
| 1972 | if (!sched_clock_irqtime) | ||
| 1973 | return; | ||
| 1974 | |||
| 1975 | local_irq_save(flags); | ||
| 1976 | |||
| 1977 | cpu = smp_processor_id(); | ||
| 1978 | now = sched_clock_cpu(cpu); | ||
| 1979 | delta = now - per_cpu(irq_start_time, cpu); | ||
| 1980 | per_cpu(irq_start_time, cpu) = now; | ||
| 1981 | /* | ||
| 1982 | * We do not account for softirq time from ksoftirqd here. | ||
| 1983 | * We want to continue accounting softirq time to ksoftirqd thread | ||
| 1984 | * in that case, so as not to confuse scheduler with a special task | ||
| 1985 | * that do not consume any time, but still wants to run. | ||
| 1986 | */ | ||
| 1987 | if (hardirq_count()) | ||
| 1988 | per_cpu(cpu_hardirq_time, cpu) += delta; | ||
| 1989 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | ||
| 1990 | per_cpu(cpu_softirq_time, cpu) += delta; | ||
| 1991 | |||
| 1992 | local_irq_restore(flags); | ||
| 1993 | } | ||
| 1994 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
| 1995 | |||
| 1996 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | ||
| 1997 | { | ||
| 1998 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | ||
| 1999 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | ||
| 2000 | rq->prev_irq_time = curr_irq_time; | ||
| 2001 | sched_rt_avg_update(rq, delta_irq); | ||
| 2002 | } | ||
| 2003 | } | ||
| 2004 | |||
| 2005 | #else | ||
| 2006 | |||
| 2007 | static u64 irq_time_cpu(int cpu) | ||
| 2008 | { | ||
| 2009 | return 0; | ||
| 2010 | } | ||
| 2011 | |||
| 2012 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | ||
| 2013 | |||
| 2014 | #endif | ||
| 2015 | |||
| 1920 | #include "sched_idletask.c" | 2016 | #include "sched_idletask.c" |
| 1921 | #include "sched_fair.c" | 2017 | #include "sched_fair.c" |
| 1922 | #include "sched_rt.c" | 2018 | #include "sched_rt.c" |
| 2019 | #include "sched_stoptask.c" | ||
| 1923 | #ifdef CONFIG_SCHED_DEBUG | 2020 | #ifdef CONFIG_SCHED_DEBUG |
| 1924 | # include "sched_debug.c" | 2021 | # include "sched_debug.c" |
| 1925 | #endif | 2022 | #endif |
| 1926 | 2023 | ||
| 2024 | void sched_set_stop_task(int cpu, struct task_struct *stop) | ||
| 2025 | { | ||
| 2026 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
| 2027 | struct task_struct *old_stop = cpu_rq(cpu)->stop; | ||
| 2028 | |||
| 2029 | if (stop) { | ||
| 2030 | /* | ||
| 2031 | * Make it appear like a SCHED_FIFO task, its something | ||
| 2032 | * userspace knows about and won't get confused about. | ||
| 2033 | * | ||
| 2034 | * Also, it will make PI more or less work without too | ||
| 2035 | * much confusion -- but then, stop work should not | ||
| 2036 | * rely on PI working anyway. | ||
| 2037 | */ | ||
| 2038 | sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); | ||
| 2039 | |||
| 2040 | stop->sched_class = &stop_sched_class; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | cpu_rq(cpu)->stop = stop; | ||
| 2044 | |||
| 2045 | if (old_stop) { | ||
| 2046 | /* | ||
| 2047 | * Reset it back to a normal scheduling class so that | ||
| 2048 | * it can die in pieces. | ||
| 2049 | */ | ||
| 2050 | old_stop->sched_class = &rt_sched_class; | ||
| 2051 | } | ||
| 2052 | } | ||
| 2053 | |||
| 1927 | /* | 2054 | /* |
| 1928 | * __normal_prio - return the priority that is based on the static prio | 2055 | * __normal_prio - return the priority that is based on the static prio |
| 1929 | */ | 2056 | */ |
| @@ -2003,6 +2130,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
| 2003 | if (p->sched_class != &fair_sched_class) | 2130 | if (p->sched_class != &fair_sched_class) |
| 2004 | return 0; | 2131 | return 0; |
| 2005 | 2132 | ||
| 2133 | if (unlikely(p->policy == SCHED_IDLE)) | ||
| 2134 | return 0; | ||
| 2135 | |||
| 2006 | /* | 2136 | /* |
| 2007 | * Buddy candidates are cache hot: | 2137 | * Buddy candidates are cache hot: |
| 2008 | */ | 2138 | */ |
| @@ -2852,14 +2982,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2852 | */ | 2982 | */ |
| 2853 | arch_start_context_switch(prev); | 2983 | arch_start_context_switch(prev); |
| 2854 | 2984 | ||
| 2855 | if (likely(!mm)) { | 2985 | if (!mm) { |
| 2856 | next->active_mm = oldmm; | 2986 | next->active_mm = oldmm; |
| 2857 | atomic_inc(&oldmm->mm_count); | 2987 | atomic_inc(&oldmm->mm_count); |
| 2858 | enter_lazy_tlb(oldmm, next); | 2988 | enter_lazy_tlb(oldmm, next); |
| 2859 | } else | 2989 | } else |
| 2860 | switch_mm(oldmm, mm, next); | 2990 | switch_mm(oldmm, mm, next); |
| 2861 | 2991 | ||
| 2862 | if (likely(!prev->mm)) { | 2992 | if (!prev->mm) { |
| 2863 | prev->active_mm = NULL; | 2993 | prev->active_mm = NULL; |
| 2864 | rq->prev_mm = oldmm; | 2994 | rq->prev_mm = oldmm; |
| 2865 | } | 2995 | } |
| @@ -3248,7 +3378,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
| 3248 | 3378 | ||
| 3249 | if (task_current(rq, p)) { | 3379 | if (task_current(rq, p)) { |
| 3250 | update_rq_clock(rq); | 3380 | update_rq_clock(rq); |
| 3251 | ns = rq->clock - p->se.exec_start; | 3381 | ns = rq->clock_task - p->se.exec_start; |
| 3252 | if ((s64)ns < 0) | 3382 | if ((s64)ns < 0) |
| 3253 | ns = 0; | 3383 | ns = 0; |
| 3254 | } | 3384 | } |
| @@ -3397,7 +3527,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 3397 | tmp = cputime_to_cputime64(cputime); | 3527 | tmp = cputime_to_cputime64(cputime); |
| 3398 | if (hardirq_count() - hardirq_offset) | 3528 | if (hardirq_count() - hardirq_offset) |
| 3399 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3529 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| 3400 | else if (softirq_count()) | 3530 | else if (in_serving_softirq()) |
| 3401 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3531 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
| 3402 | else | 3532 | else |
| 3403 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3533 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| @@ -3723,17 +3853,13 @@ pick_next_task(struct rq *rq) | |||
| 3723 | return p; | 3853 | return p; |
| 3724 | } | 3854 | } |
| 3725 | 3855 | ||
| 3726 | class = sched_class_highest; | 3856 | for_each_class(class) { |
| 3727 | for ( ; ; ) { | ||
| 3728 | p = class->pick_next_task(rq); | 3857 | p = class->pick_next_task(rq); |
| 3729 | if (p) | 3858 | if (p) |
| 3730 | return p; | 3859 | return p; |
| 3731 | /* | ||
| 3732 | * Will never be NULL as the idle class always | ||
| 3733 | * returns a non-NULL p: | ||
| 3734 | */ | ||
| 3735 | class = class->next; | ||
| 3736 | } | 3860 | } |
| 3861 | |||
| 3862 | BUG(); /* the idle class will always have a runnable task */ | ||
| 3737 | } | 3863 | } |
| 3738 | 3864 | ||
| 3739 | /* | 3865 | /* |
| @@ -4358,6 +4484,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 4358 | 4484 | ||
| 4359 | rq = task_rq_lock(p, &flags); | 4485 | rq = task_rq_lock(p, &flags); |
| 4360 | 4486 | ||
| 4487 | trace_sched_pi_setprio(p, prio); | ||
| 4361 | oldprio = p->prio; | 4488 | oldprio = p->prio; |
| 4362 | prev_class = p->sched_class; | 4489 | prev_class = p->sched_class; |
| 4363 | on_rq = p->se.on_rq; | 4490 | on_rq = p->se.on_rq; |
| @@ -4645,7 +4772,7 @@ recheck: | |||
| 4645 | } | 4772 | } |
| 4646 | 4773 | ||
| 4647 | if (user) { | 4774 | if (user) { |
| 4648 | retval = security_task_setscheduler(p, policy, param); | 4775 | retval = security_task_setscheduler(p); |
| 4649 | if (retval) | 4776 | if (retval) |
| 4650 | return retval; | 4777 | return retval; |
| 4651 | } | 4778 | } |
| @@ -4661,6 +4788,15 @@ recheck: | |||
| 4661 | */ | 4788 | */ |
| 4662 | rq = __task_rq_lock(p); | 4789 | rq = __task_rq_lock(p); |
| 4663 | 4790 | ||
| 4791 | /* | ||
| 4792 | * Changing the policy of the stop threads its a very bad idea | ||
| 4793 | */ | ||
| 4794 | if (p == rq->stop) { | ||
| 4795 | __task_rq_unlock(rq); | ||
| 4796 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 4797 | return -EINVAL; | ||
| 4798 | } | ||
| 4799 | |||
| 4664 | #ifdef CONFIG_RT_GROUP_SCHED | 4800 | #ifdef CONFIG_RT_GROUP_SCHED |
| 4665 | if (user) { | 4801 | if (user) { |
| 4666 | /* | 4802 | /* |
| @@ -4887,13 +5023,13 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 4887 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5023 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
| 4888 | goto out_unlock; | 5024 | goto out_unlock; |
| 4889 | 5025 | ||
| 4890 | retval = security_task_setscheduler(p, 0, NULL); | 5026 | retval = security_task_setscheduler(p); |
| 4891 | if (retval) | 5027 | if (retval) |
| 4892 | goto out_unlock; | 5028 | goto out_unlock; |
| 4893 | 5029 | ||
| 4894 | cpuset_cpus_allowed(p, cpus_allowed); | 5030 | cpuset_cpus_allowed(p, cpus_allowed); |
| 4895 | cpumask_and(new_mask, in_mask, cpus_allowed); | 5031 | cpumask_and(new_mask, in_mask, cpus_allowed); |
| 4896 | again: | 5032 | again: |
| 4897 | retval = set_cpus_allowed_ptr(p, new_mask); | 5033 | retval = set_cpus_allowed_ptr(p, new_mask); |
| 4898 | 5034 | ||
| 4899 | if (!retval) { | 5035 | if (!retval) { |
| @@ -5337,7 +5473,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5337 | idle->se.exec_start = sched_clock(); | 5473 | idle->se.exec_start = sched_clock(); |
| 5338 | 5474 | ||
| 5339 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 5475 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
| 5476 | /* | ||
| 5477 | * We're having a chicken and egg problem, even though we are | ||
| 5478 | * holding rq->lock, the cpu isn't yet set to this cpu so the | ||
| 5479 | * lockdep check in task_group() will fail. | ||
| 5480 | * | ||
| 5481 | * Similar case to sched_fork(). / Alternatively we could | ||
| 5482 | * use task_rq_lock() here and obtain the other rq->lock. | ||
| 5483 | * | ||
| 5484 | * Silence PROVE_RCU | ||
| 5485 | */ | ||
| 5486 | rcu_read_lock(); | ||
| 5340 | __set_task_cpu(idle, cpu); | 5487 | __set_task_cpu(idle, cpu); |
| 5488 | rcu_read_unlock(); | ||
| 5341 | 5489 | ||
| 5342 | rq->curr = rq->idle = idle; | 5490 | rq->curr = rq->idle = idle; |
| 5343 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5491 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
| @@ -6514,6 +6662,7 @@ struct s_data { | |||
| 6514 | cpumask_var_t nodemask; | 6662 | cpumask_var_t nodemask; |
| 6515 | cpumask_var_t this_sibling_map; | 6663 | cpumask_var_t this_sibling_map; |
| 6516 | cpumask_var_t this_core_map; | 6664 | cpumask_var_t this_core_map; |
| 6665 | cpumask_var_t this_book_map; | ||
| 6517 | cpumask_var_t send_covered; | 6666 | cpumask_var_t send_covered; |
| 6518 | cpumask_var_t tmpmask; | 6667 | cpumask_var_t tmpmask; |
| 6519 | struct sched_group **sched_group_nodes; | 6668 | struct sched_group **sched_group_nodes; |
| @@ -6525,6 +6674,7 @@ enum s_alloc { | |||
| 6525 | sa_rootdomain, | 6674 | sa_rootdomain, |
| 6526 | sa_tmpmask, | 6675 | sa_tmpmask, |
| 6527 | sa_send_covered, | 6676 | sa_send_covered, |
| 6677 | sa_this_book_map, | ||
| 6528 | sa_this_core_map, | 6678 | sa_this_core_map, |
| 6529 | sa_this_sibling_map, | 6679 | sa_this_sibling_map, |
| 6530 | sa_nodemask, | 6680 | sa_nodemask, |
| @@ -6560,31 +6710,48 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | |||
| 6560 | #ifdef CONFIG_SCHED_MC | 6710 | #ifdef CONFIG_SCHED_MC |
| 6561 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | 6711 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); |
| 6562 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | 6712 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); |
| 6563 | #endif /* CONFIG_SCHED_MC */ | ||
| 6564 | 6713 | ||
| 6565 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
| 6566 | static int | 6714 | static int |
| 6567 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6715 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, |
| 6568 | struct sched_group **sg, struct cpumask *mask) | 6716 | struct sched_group **sg, struct cpumask *mask) |
| 6569 | { | 6717 | { |
| 6570 | int group; | 6718 | int group; |
| 6571 | 6719 | #ifdef CONFIG_SCHED_SMT | |
| 6572 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | 6720 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); |
| 6573 | group = cpumask_first(mask); | 6721 | group = cpumask_first(mask); |
| 6722 | #else | ||
| 6723 | group = cpu; | ||
| 6724 | #endif | ||
| 6574 | if (sg) | 6725 | if (sg) |
| 6575 | *sg = &per_cpu(sched_group_core, group).sg; | 6726 | *sg = &per_cpu(sched_group_core, group).sg; |
| 6576 | return group; | 6727 | return group; |
| 6577 | } | 6728 | } |
| 6578 | #elif defined(CONFIG_SCHED_MC) | 6729 | #endif /* CONFIG_SCHED_MC */ |
| 6730 | |||
| 6731 | /* | ||
| 6732 | * book sched-domains: | ||
| 6733 | */ | ||
| 6734 | #ifdef CONFIG_SCHED_BOOK | ||
| 6735 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | ||
| 6736 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
| 6737 | |||
| 6579 | static int | 6738 | static int |
| 6580 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6739 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, |
| 6581 | struct sched_group **sg, struct cpumask *unused) | 6740 | struct sched_group **sg, struct cpumask *mask) |
| 6582 | { | 6741 | { |
| 6742 | int group = cpu; | ||
| 6743 | #ifdef CONFIG_SCHED_MC | ||
| 6744 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
| 6745 | group = cpumask_first(mask); | ||
| 6746 | #elif defined(CONFIG_SCHED_SMT) | ||
| 6747 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
| 6748 | group = cpumask_first(mask); | ||
| 6749 | #endif | ||
| 6583 | if (sg) | 6750 | if (sg) |
| 6584 | *sg = &per_cpu(sched_group_core, cpu).sg; | 6751 | *sg = &per_cpu(sched_group_book, group).sg; |
| 6585 | return cpu; | 6752 | return group; |
| 6586 | } | 6753 | } |
| 6587 | #endif | 6754 | #endif /* CONFIG_SCHED_BOOK */ |
| 6588 | 6755 | ||
| 6589 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | 6756 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); |
| 6590 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | 6757 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); |
| @@ -6594,7 +6761,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
| 6594 | struct sched_group **sg, struct cpumask *mask) | 6761 | struct sched_group **sg, struct cpumask *mask) |
| 6595 | { | 6762 | { |
| 6596 | int group; | 6763 | int group; |
| 6597 | #ifdef CONFIG_SCHED_MC | 6764 | #ifdef CONFIG_SCHED_BOOK |
| 6765 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | ||
| 6766 | group = cpumask_first(mask); | ||
| 6767 | #elif defined(CONFIG_SCHED_MC) | ||
| 6598 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | 6768 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); |
| 6599 | group = cpumask_first(mask); | 6769 | group = cpumask_first(mask); |
| 6600 | #elif defined(CONFIG_SCHED_SMT) | 6770 | #elif defined(CONFIG_SCHED_SMT) |
| @@ -6855,6 +7025,9 @@ SD_INIT_FUNC(CPU) | |||
| 6855 | #ifdef CONFIG_SCHED_MC | 7025 | #ifdef CONFIG_SCHED_MC |
| 6856 | SD_INIT_FUNC(MC) | 7026 | SD_INIT_FUNC(MC) |
| 6857 | #endif | 7027 | #endif |
| 7028 | #ifdef CONFIG_SCHED_BOOK | ||
| 7029 | SD_INIT_FUNC(BOOK) | ||
| 7030 | #endif | ||
| 6858 | 7031 | ||
| 6859 | static int default_relax_domain_level = -1; | 7032 | static int default_relax_domain_level = -1; |
| 6860 | 7033 | ||
| @@ -6904,6 +7077,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
| 6904 | free_cpumask_var(d->tmpmask); /* fall through */ | 7077 | free_cpumask_var(d->tmpmask); /* fall through */ |
| 6905 | case sa_send_covered: | 7078 | case sa_send_covered: |
| 6906 | free_cpumask_var(d->send_covered); /* fall through */ | 7079 | free_cpumask_var(d->send_covered); /* fall through */ |
| 7080 | case sa_this_book_map: | ||
| 7081 | free_cpumask_var(d->this_book_map); /* fall through */ | ||
| 6907 | case sa_this_core_map: | 7082 | case sa_this_core_map: |
| 6908 | free_cpumask_var(d->this_core_map); /* fall through */ | 7083 | free_cpumask_var(d->this_core_map); /* fall through */ |
| 6909 | case sa_this_sibling_map: | 7084 | case sa_this_sibling_map: |
| @@ -6950,8 +7125,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
| 6950 | return sa_nodemask; | 7125 | return sa_nodemask; |
| 6951 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) | 7126 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) |
| 6952 | return sa_this_sibling_map; | 7127 | return sa_this_sibling_map; |
| 6953 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | 7128 | if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) |
| 6954 | return sa_this_core_map; | 7129 | return sa_this_core_map; |
| 7130 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | ||
| 7131 | return sa_this_book_map; | ||
| 6955 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | 7132 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) |
| 6956 | return sa_send_covered; | 7133 | return sa_send_covered; |
| 6957 | d->rd = alloc_rootdomain(); | 7134 | d->rd = alloc_rootdomain(); |
| @@ -7009,6 +7186,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | |||
| 7009 | return sd; | 7186 | return sd; |
| 7010 | } | 7187 | } |
| 7011 | 7188 | ||
| 7189 | static struct sched_domain *__build_book_sched_domain(struct s_data *d, | ||
| 7190 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | ||
| 7191 | struct sched_domain *parent, int i) | ||
| 7192 | { | ||
| 7193 | struct sched_domain *sd = parent; | ||
| 7194 | #ifdef CONFIG_SCHED_BOOK | ||
| 7195 | sd = &per_cpu(book_domains, i).sd; | ||
| 7196 | SD_INIT(sd, BOOK); | ||
| 7197 | set_domain_attribute(sd, attr); | ||
| 7198 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | ||
| 7199 | sd->parent = parent; | ||
| 7200 | parent->child = sd; | ||
| 7201 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
| 7202 | #endif | ||
| 7203 | return sd; | ||
| 7204 | } | ||
| 7205 | |||
| 7012 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | 7206 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, |
| 7013 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7207 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, |
| 7014 | struct sched_domain *parent, int i) | 7208 | struct sched_domain *parent, int i) |
| @@ -7066,6 +7260,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l, | |||
| 7066 | d->send_covered, d->tmpmask); | 7260 | d->send_covered, d->tmpmask); |
| 7067 | break; | 7261 | break; |
| 7068 | #endif | 7262 | #endif |
| 7263 | #ifdef CONFIG_SCHED_BOOK | ||
| 7264 | case SD_LV_BOOK: /* set up book groups */ | ||
| 7265 | cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); | ||
| 7266 | if (cpu == cpumask_first(d->this_book_map)) | ||
| 7267 | init_sched_build_groups(d->this_book_map, cpu_map, | ||
| 7268 | &cpu_to_book_group, | ||
| 7269 | d->send_covered, d->tmpmask); | ||
| 7270 | break; | ||
| 7271 | #endif | ||
| 7069 | case SD_LV_CPU: /* set up physical groups */ | 7272 | case SD_LV_CPU: /* set up physical groups */ |
| 7070 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); | 7273 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); |
| 7071 | if (!cpumask_empty(d->nodemask)) | 7274 | if (!cpumask_empty(d->nodemask)) |
| @@ -7113,12 +7316,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7113 | 7316 | ||
| 7114 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); | 7317 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); |
| 7115 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); | 7318 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); |
| 7319 | sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); | ||
| 7116 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); | 7320 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); |
| 7117 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | 7321 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); |
| 7118 | } | 7322 | } |
| 7119 | 7323 | ||
| 7120 | for_each_cpu(i, cpu_map) { | 7324 | for_each_cpu(i, cpu_map) { |
| 7121 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); | 7325 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); |
| 7326 | build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); | ||
| 7122 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); | 7327 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); |
| 7123 | } | 7328 | } |
| 7124 | 7329 | ||
| @@ -7149,6 +7354,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7149 | init_sched_groups_power(i, sd); | 7354 | init_sched_groups_power(i, sd); |
| 7150 | } | 7355 | } |
| 7151 | #endif | 7356 | #endif |
| 7357 | #ifdef CONFIG_SCHED_BOOK | ||
| 7358 | for_each_cpu(i, cpu_map) { | ||
| 7359 | sd = &per_cpu(book_domains, i).sd; | ||
| 7360 | init_sched_groups_power(i, sd); | ||
| 7361 | } | ||
| 7362 | #endif | ||
| 7152 | 7363 | ||
| 7153 | for_each_cpu(i, cpu_map) { | 7364 | for_each_cpu(i, cpu_map) { |
| 7154 | sd = &per_cpu(phys_domains, i).sd; | 7365 | sd = &per_cpu(phys_domains, i).sd; |
| @@ -7174,6 +7385,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
| 7174 | sd = &per_cpu(cpu_domains, i).sd; | 7385 | sd = &per_cpu(cpu_domains, i).sd; |
| 7175 | #elif defined(CONFIG_SCHED_MC) | 7386 | #elif defined(CONFIG_SCHED_MC) |
| 7176 | sd = &per_cpu(core_domains, i).sd; | 7387 | sd = &per_cpu(core_domains, i).sd; |
| 7388 | #elif defined(CONFIG_SCHED_BOOK) | ||
| 7389 | sd = &per_cpu(book_domains, i).sd; | ||
| 7177 | #else | 7390 | #else |
| 7178 | sd = &per_cpu(phys_domains, i).sd; | 7391 | sd = &per_cpu(phys_domains, i).sd; |
| 7179 | #endif | 7392 | #endif |
| @@ -8078,9 +8291,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8078 | 8291 | ||
| 8079 | return 1; | 8292 | return 1; |
| 8080 | 8293 | ||
| 8081 | err_free_rq: | 8294 | err_free_rq: |
| 8082 | kfree(cfs_rq); | 8295 | kfree(cfs_rq); |
| 8083 | err: | 8296 | err: |
| 8084 | return 0; | 8297 | return 0; |
| 8085 | } | 8298 | } |
| 8086 | 8299 | ||
| @@ -8168,9 +8381,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8168 | 8381 | ||
| 8169 | return 1; | 8382 | return 1; |
| 8170 | 8383 | ||
| 8171 | err_free_rq: | 8384 | err_free_rq: |
| 8172 | kfree(rt_rq); | 8385 | kfree(rt_rq); |
| 8173 | err: | 8386 | err: |
| 8174 | return 0; | 8387 | return 0; |
| 8175 | } | 8388 | } |
| 8176 | 8389 | ||
| @@ -8528,7 +8741,7 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
| 8528 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 8741 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
| 8529 | } | 8742 | } |
| 8530 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 8743 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
| 8531 | unlock: | 8744 | unlock: |
| 8532 | read_unlock(&tasklist_lock); | 8745 | read_unlock(&tasklist_lock); |
| 8533 | mutex_unlock(&rt_constraints_mutex); | 8746 | mutex_unlock(&rt_constraints_mutex); |
| 8534 | 8747 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index db3f674ca49d..933f3d1b62ea 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | 25 | ||
| 26 | /* | 26 | /* |
| 27 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
| 28 | * (default: 5ms * (1 + ilog(ncpus)), units: nanoseconds) | 28 | * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) |
| 29 | * | 29 | * |
| 30 | * NOTE: this latency value is not the same as the concept of | 30 | * NOTE: this latency value is not the same as the concept of |
| 31 | * 'timeslice length' - timeslices in CFS are of variable length | 31 | * 'timeslice length' - timeslices in CFS are of variable length |
| @@ -52,7 +52,7 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling | |||
| 52 | 52 | ||
| 53 | /* | 53 | /* |
| 54 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
| 55 | * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 56 | */ | 56 | */ |
| 57 | unsigned int sysctl_sched_min_granularity = 750000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 750000ULL; |
| 58 | unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; | 58 | unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; |
| @@ -519,7 +519,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 519 | static void update_curr(struct cfs_rq *cfs_rq) | 519 | static void update_curr(struct cfs_rq *cfs_rq) |
| 520 | { | 520 | { |
| 521 | struct sched_entity *curr = cfs_rq->curr; | 521 | struct sched_entity *curr = cfs_rq->curr; |
| 522 | u64 now = rq_of(cfs_rq)->clock; | 522 | u64 now = rq_of(cfs_rq)->clock_task; |
| 523 | unsigned long delta_exec; | 523 | unsigned long delta_exec; |
| 524 | 524 | ||
| 525 | if (unlikely(!curr)) | 525 | if (unlikely(!curr)) |
| @@ -602,7 +602,7 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 602 | /* | 602 | /* |
| 603 | * We are starting a new run period: | 603 | * We are starting a new run period: |
| 604 | */ | 604 | */ |
| 605 | se->exec_start = rq_of(cfs_rq)->clock; | 605 | se->exec_start = rq_of(cfs_rq)->clock_task; |
| 606 | } | 606 | } |
| 607 | 607 | ||
| 608 | /************************************************** | 608 | /************************************************** |
| @@ -1764,6 +1764,10 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
| 1764 | set_task_cpu(p, this_cpu); | 1764 | set_task_cpu(p, this_cpu); |
| 1765 | activate_task(this_rq, p, 0); | 1765 | activate_task(this_rq, p, 0); |
| 1766 | check_preempt_curr(this_rq, p, 0); | 1766 | check_preempt_curr(this_rq, p, 0); |
| 1767 | |||
| 1768 | /* re-arm NEWIDLE balancing when moving tasks */ | ||
| 1769 | src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
| 1770 | this_rq->idle_stamp = 0; | ||
| 1767 | } | 1771 | } |
| 1768 | 1772 | ||
| 1769 | /* | 1773 | /* |
| @@ -1798,7 +1802,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 1798 | * 2) too many balance attempts have failed. | 1802 | * 2) too many balance attempts have failed. |
| 1799 | */ | 1803 | */ |
| 1800 | 1804 | ||
| 1801 | tsk_cache_hot = task_hot(p, rq->clock, sd); | 1805 | tsk_cache_hot = task_hot(p, rq->clock_task, sd); |
| 1802 | if (!tsk_cache_hot || | 1806 | if (!tsk_cache_hot || |
| 1803 | sd->nr_balance_failed > sd->cache_nice_tries) { | 1807 | sd->nr_balance_failed > sd->cache_nice_tries) { |
| 1804 | #ifdef CONFIG_SCHEDSTATS | 1808 | #ifdef CONFIG_SCHEDSTATS |
| @@ -2030,12 +2034,14 @@ struct sd_lb_stats { | |||
| 2030 | unsigned long this_load; | 2034 | unsigned long this_load; |
| 2031 | unsigned long this_load_per_task; | 2035 | unsigned long this_load_per_task; |
| 2032 | unsigned long this_nr_running; | 2036 | unsigned long this_nr_running; |
| 2037 | unsigned long this_has_capacity; | ||
| 2033 | 2038 | ||
| 2034 | /* Statistics of the busiest group */ | 2039 | /* Statistics of the busiest group */ |
| 2035 | unsigned long max_load; | 2040 | unsigned long max_load; |
| 2036 | unsigned long busiest_load_per_task; | 2041 | unsigned long busiest_load_per_task; |
| 2037 | unsigned long busiest_nr_running; | 2042 | unsigned long busiest_nr_running; |
| 2038 | unsigned long busiest_group_capacity; | 2043 | unsigned long busiest_group_capacity; |
| 2044 | unsigned long busiest_has_capacity; | ||
| 2039 | 2045 | ||
| 2040 | int group_imb; /* Is there imbalance in this sd */ | 2046 | int group_imb; /* Is there imbalance in this sd */ |
| 2041 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| @@ -2058,6 +2064,7 @@ struct sg_lb_stats { | |||
| 2058 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
| 2059 | unsigned long group_capacity; | 2065 | unsigned long group_capacity; |
| 2060 | int group_imb; /* Is there an imbalance in the group ? */ | 2066 | int group_imb; /* Is there an imbalance in the group ? */ |
| 2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | ||
| 2061 | }; | 2068 | }; |
| 2062 | 2069 | ||
| 2063 | /** | 2070 | /** |
| @@ -2268,7 +2275,13 @@ unsigned long scale_rt_power(int cpu) | |||
| 2268 | u64 total, available; | 2275 | u64 total, available; |
| 2269 | 2276 | ||
| 2270 | total = sched_avg_period() + (rq->clock - rq->age_stamp); | 2277 | total = sched_avg_period() + (rq->clock - rq->age_stamp); |
| 2271 | available = total - rq->rt_avg; | 2278 | |
| 2279 | if (unlikely(total < rq->rt_avg)) { | ||
| 2280 | /* Ensures that power won't end up being negative */ | ||
| 2281 | available = 0; | ||
| 2282 | } else { | ||
| 2283 | available = total - rq->rt_avg; | ||
| 2284 | } | ||
| 2272 | 2285 | ||
| 2273 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) | 2286 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) |
| 2274 | total = SCHED_LOAD_SCALE; | 2287 | total = SCHED_LOAD_SCALE; |
| @@ -2378,7 +2391,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2378 | int local_group, const struct cpumask *cpus, | 2391 | int local_group, const struct cpumask *cpus, |
| 2379 | int *balance, struct sg_lb_stats *sgs) | 2392 | int *balance, struct sg_lb_stats *sgs) |
| 2380 | { | 2393 | { |
| 2381 | unsigned long load, max_cpu_load, min_cpu_load; | 2394 | unsigned long load, max_cpu_load, min_cpu_load, max_nr_running; |
| 2382 | int i; | 2395 | int i; |
| 2383 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | 2396 | unsigned int balance_cpu = -1, first_idle_cpu = 0; |
| 2384 | unsigned long avg_load_per_task = 0; | 2397 | unsigned long avg_load_per_task = 0; |
| @@ -2389,6 +2402,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2389 | /* Tally up the load of all CPUs in the group */ | 2402 | /* Tally up the load of all CPUs in the group */ |
| 2390 | max_cpu_load = 0; | 2403 | max_cpu_load = 0; |
| 2391 | min_cpu_load = ~0UL; | 2404 | min_cpu_load = ~0UL; |
| 2405 | max_nr_running = 0; | ||
| 2392 | 2406 | ||
| 2393 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 2407 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { |
| 2394 | struct rq *rq = cpu_rq(i); | 2408 | struct rq *rq = cpu_rq(i); |
| @@ -2406,8 +2420,10 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2406 | load = target_load(i, load_idx); | 2420 | load = target_load(i, load_idx); |
| 2407 | } else { | 2421 | } else { |
| 2408 | load = source_load(i, load_idx); | 2422 | load = source_load(i, load_idx); |
| 2409 | if (load > max_cpu_load) | 2423 | if (load > max_cpu_load) { |
| 2410 | max_cpu_load = load; | 2424 | max_cpu_load = load; |
| 2425 | max_nr_running = rq->nr_running; | ||
| 2426 | } | ||
| 2411 | if (min_cpu_load > load) | 2427 | if (min_cpu_load > load) |
| 2412 | min_cpu_load = load; | 2428 | min_cpu_load = load; |
| 2413 | } | 2429 | } |
| @@ -2447,13 +2463,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2447 | if (sgs->sum_nr_running) | 2463 | if (sgs->sum_nr_running) |
| 2448 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 2464 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
| 2449 | 2465 | ||
| 2450 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | 2466 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) |
| 2451 | sgs->group_imb = 1; | 2467 | sgs->group_imb = 1; |
| 2452 | 2468 | ||
| 2453 | sgs->group_capacity = | 2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
| 2454 | DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | ||
| 2455 | if (!sgs->group_capacity) | 2470 | if (!sgs->group_capacity) |
| 2456 | sgs->group_capacity = fix_small_capacity(sd, group); | 2471 | sgs->group_capacity = fix_small_capacity(sd, group); |
| 2472 | |||
| 2473 | if (sgs->group_capacity > sgs->sum_nr_running) | ||
| 2474 | sgs->group_has_capacity = 1; | ||
| 2457 | } | 2475 | } |
| 2458 | 2476 | ||
| 2459 | /** | 2477 | /** |
| @@ -2542,9 +2560,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2542 | /* | 2560 | /* |
| 2543 | * In case the child domain prefers tasks go to siblings | 2561 | * In case the child domain prefers tasks go to siblings |
| 2544 | * first, lower the sg capacity to one so that we'll try | 2562 | * first, lower the sg capacity to one so that we'll try |
| 2545 | * and move all the excess tasks away. | 2563 | * and move all the excess tasks away. We lower the capacity |
| 2564 | * of a group only if the local group has the capacity to fit | ||
| 2565 | * these excess tasks, i.e. nr_running < group_capacity. The | ||
| 2566 | * extra check prevents the case where you always pull from the | ||
| 2567 | * heaviest group when it is already under-utilized (possible | ||
| 2568 | * with a large weight task outweighs the tasks on the system). | ||
| 2546 | */ | 2569 | */ |
| 2547 | if (prefer_sibling) | 2570 | if (prefer_sibling && !local_group && sds->this_has_capacity) |
| 2548 | sgs.group_capacity = min(sgs.group_capacity, 1UL); | 2571 | sgs.group_capacity = min(sgs.group_capacity, 1UL); |
| 2549 | 2572 | ||
| 2550 | if (local_group) { | 2573 | if (local_group) { |
| @@ -2552,12 +2575,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2552 | sds->this = sg; | 2575 | sds->this = sg; |
| 2553 | sds->this_nr_running = sgs.sum_nr_running; | 2576 | sds->this_nr_running = sgs.sum_nr_running; |
| 2554 | sds->this_load_per_task = sgs.sum_weighted_load; | 2577 | sds->this_load_per_task = sgs.sum_weighted_load; |
| 2578 | sds->this_has_capacity = sgs.group_has_capacity; | ||
| 2555 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
| 2556 | sds->max_load = sgs.avg_load; | 2580 | sds->max_load = sgs.avg_load; |
| 2557 | sds->busiest = sg; | 2581 | sds->busiest = sg; |
| 2558 | sds->busiest_nr_running = sgs.sum_nr_running; | 2582 | sds->busiest_nr_running = sgs.sum_nr_running; |
| 2559 | sds->busiest_group_capacity = sgs.group_capacity; | 2583 | sds->busiest_group_capacity = sgs.group_capacity; |
| 2560 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
| 2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | ||
| 2561 | sds->group_imb = sgs.group_imb; | 2586 | sds->group_imb = sgs.group_imb; |
| 2562 | } | 2587 | } |
| 2563 | 2588 | ||
| @@ -2754,6 +2779,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
| 2754 | return fix_small_imbalance(sds, this_cpu, imbalance); | 2779 | return fix_small_imbalance(sds, this_cpu, imbalance); |
| 2755 | 2780 | ||
| 2756 | } | 2781 | } |
| 2782 | |||
| 2757 | /******* find_busiest_group() helpers end here *********************/ | 2783 | /******* find_busiest_group() helpers end here *********************/ |
| 2758 | 2784 | ||
| 2759 | /** | 2785 | /** |
| @@ -2805,6 +2831,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2805 | * 4) This group is more busy than the avg busieness at this | 2831 | * 4) This group is more busy than the avg busieness at this |
| 2806 | * sched_domain. | 2832 | * sched_domain. |
| 2807 | * 5) The imbalance is within the specified limit. | 2833 | * 5) The imbalance is within the specified limit. |
| 2834 | * | ||
| 2835 | * Note: when doing newidle balance, if the local group has excess | ||
| 2836 | * capacity (i.e. nr_running < group_capacity) and the busiest group | ||
| 2837 | * does not have any capacity, we force a load balance to pull tasks | ||
| 2838 | * to the local group. In this case, we skip past checks 3, 4 and 5. | ||
| 2808 | */ | 2839 | */ |
| 2809 | if (!(*balance)) | 2840 | if (!(*balance)) |
| 2810 | goto ret; | 2841 | goto ret; |
| @@ -2816,6 +2847,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2816 | if (!sds.busiest || sds.busiest_nr_running == 0) | 2847 | if (!sds.busiest || sds.busiest_nr_running == 0) |
| 2817 | goto out_balanced; | 2848 | goto out_balanced; |
| 2818 | 2849 | ||
| 2850 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | ||
| 2851 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && | ||
| 2852 | !sds.busiest_has_capacity) | ||
| 2853 | goto force_balance; | ||
| 2854 | |||
| 2819 | if (sds.this_load >= sds.max_load) | 2855 | if (sds.this_load >= sds.max_load) |
| 2820 | goto out_balanced; | 2856 | goto out_balanced; |
| 2821 | 2857 | ||
| @@ -2827,6 +2863,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2827 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) |
| 2828 | goto out_balanced; | 2864 | goto out_balanced; |
| 2829 | 2865 | ||
| 2866 | force_balance: | ||
| 2830 | /* Looks like there is an imbalance. Compute it */ | 2867 | /* Looks like there is an imbalance. Compute it */ |
| 2831 | calculate_imbalance(&sds, this_cpu, imbalance); | 2868 | calculate_imbalance(&sds, this_cpu, imbalance); |
| 2832 | return sds.busiest; | 2869 | return sds.busiest; |
| @@ -3031,7 +3068,14 @@ redo: | |||
| 3031 | 3068 | ||
| 3032 | if (!ld_moved) { | 3069 | if (!ld_moved) { |
| 3033 | schedstat_inc(sd, lb_failed[idle]); | 3070 | schedstat_inc(sd, lb_failed[idle]); |
| 3034 | sd->nr_balance_failed++; | 3071 | /* |
| 3072 | * Increment the failure counter only on periodic balance. | ||
| 3073 | * We do not want newidle balance, which can be very | ||
| 3074 | * frequent, pollute the failure counter causing | ||
| 3075 | * excessive cache_hot migrations and active balances. | ||
| 3076 | */ | ||
| 3077 | if (idle != CPU_NEWLY_IDLE) | ||
| 3078 | sd->nr_balance_failed++; | ||
| 3035 | 3079 | ||
| 3036 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), | 3080 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), |
| 3037 | this_cpu)) { | 3081 | this_cpu)) { |
| @@ -3153,10 +3197,8 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3153 | interval = msecs_to_jiffies(sd->balance_interval); | 3197 | interval = msecs_to_jiffies(sd->balance_interval); |
| 3154 | if (time_after(next_balance, sd->last_balance + interval)) | 3198 | if (time_after(next_balance, sd->last_balance + interval)) |
| 3155 | next_balance = sd->last_balance + interval; | 3199 | next_balance = sd->last_balance + interval; |
| 3156 | if (pulled_task) { | 3200 | if (pulled_task) |
| 3157 | this_rq->idle_stamp = 0; | ||
| 3158 | break; | 3201 | break; |
| 3159 | } | ||
| 3160 | } | 3202 | } |
| 3161 | 3203 | ||
| 3162 | raw_spin_lock(&this_rq->lock); | 3204 | raw_spin_lock(&this_rq->lock); |
| @@ -3751,8 +3793,11 @@ static void task_fork_fair(struct task_struct *p) | |||
| 3751 | 3793 | ||
| 3752 | update_rq_clock(rq); | 3794 | update_rq_clock(rq); |
| 3753 | 3795 | ||
| 3754 | if (unlikely(task_cpu(p) != this_cpu)) | 3796 | if (unlikely(task_cpu(p) != this_cpu)) { |
| 3797 | rcu_read_lock(); | ||
| 3755 | __set_task_cpu(p, this_cpu); | 3798 | __set_task_cpu(p, this_cpu); |
| 3799 | rcu_read_unlock(); | ||
| 3800 | } | ||
| 3756 | 3801 | ||
| 3757 | update_curr(cfs_rq); | 3802 | update_curr(cfs_rq); |
| 3758 | 3803 | ||
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 83c66e8ad3ee..185f920ec1a2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -61,3 +61,8 @@ SCHED_FEAT(ASYM_EFF_LOAD, 1) | |||
| 61 | * release the lock. Decreases scheduling overhead. | 61 | * release the lock. Decreases scheduling overhead. |
| 62 | */ | 62 | */ |
| 63 | SCHED_FEAT(OWNER_SPIN, 1) | 63 | SCHED_FEAT(OWNER_SPIN, 1) |
| 64 | |||
| 65 | /* | ||
| 66 | * Decrement CPU power based on irq activity | ||
| 67 | */ | ||
| 68 | SCHED_FEAT(NONIRQ_POWER, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d10c80ebb67a..bea7d79f7e9c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -609,7 +609,7 @@ static void update_curr_rt(struct rq *rq) | |||
| 609 | if (!task_has_rt_policy(curr)) | 609 | if (!task_has_rt_policy(curr)) |
| 610 | return; | 610 | return; |
| 611 | 611 | ||
| 612 | delta_exec = rq->clock - curr->se.exec_start; | 612 | delta_exec = rq->clock_task - curr->se.exec_start; |
| 613 | if (unlikely((s64)delta_exec < 0)) | 613 | if (unlikely((s64)delta_exec < 0)) |
| 614 | delta_exec = 0; | 614 | delta_exec = 0; |
| 615 | 615 | ||
| @@ -618,7 +618,7 @@ static void update_curr_rt(struct rq *rq) | |||
| 618 | curr->se.sum_exec_runtime += delta_exec; | 618 | curr->se.sum_exec_runtime += delta_exec; |
| 619 | account_group_exec_runtime(curr, delta_exec); | 619 | account_group_exec_runtime(curr, delta_exec); |
| 620 | 620 | ||
| 621 | curr->se.exec_start = rq->clock; | 621 | curr->se.exec_start = rq->clock_task; |
| 622 | cpuacct_charge(curr, delta_exec); | 622 | cpuacct_charge(curr, delta_exec); |
| 623 | 623 | ||
| 624 | sched_rt_avg_update(rq, delta_exec); | 624 | sched_rt_avg_update(rq, delta_exec); |
| @@ -960,18 +960,19 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | |||
| 960 | * runqueue. Otherwise simply start this RT task | 960 | * runqueue. Otherwise simply start this RT task |
| 961 | * on its current runqueue. | 961 | * on its current runqueue. |
| 962 | * | 962 | * |
| 963 | * We want to avoid overloading runqueues. Even if | 963 | * We want to avoid overloading runqueues. If the woken |
| 964 | * the RT task is of higher priority than the current RT task. | 964 | * task is a higher priority, then it will stay on this CPU |
| 965 | * RT tasks behave differently than other tasks. If | 965 | * and the lower prio task should be moved to another CPU. |
| 966 | * one gets preempted, we try to push it off to another queue. | 966 | * Even though this will probably make the lower prio task |
| 967 | * So trying to keep a preempting RT task on the same | 967 | * lose its cache, we do not want to bounce a higher task |
| 968 | * cache hot CPU will force the running RT task to | 968 | * around just because it gave up its CPU, perhaps for a |
| 969 | * a cold CPU. So we waste all the cache for the lower | 969 | * lock? |
| 970 | * RT task in hopes of saving some of a RT task | 970 | * |
| 971 | * that is just being woken and probably will have | 971 | * For equal prio tasks, we just let the scheduler sort it out. |
| 972 | * cold cache anyway. | ||
| 973 | */ | 972 | */ |
| 974 | if (unlikely(rt_task(rq->curr)) && | 973 | if (unlikely(rt_task(rq->curr)) && |
| 974 | (rq->curr->rt.nr_cpus_allowed < 2 || | ||
| 975 | rq->curr->prio < p->prio) && | ||
| 975 | (p->rt.nr_cpus_allowed > 1)) { | 976 | (p->rt.nr_cpus_allowed > 1)) { |
| 976 | int cpu = find_lowest_rq(p); | 977 | int cpu = find_lowest_rq(p); |
| 977 | 978 | ||
| @@ -1074,7 +1075,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
| 1074 | } while (rt_rq); | 1075 | } while (rt_rq); |
| 1075 | 1076 | ||
| 1076 | p = rt_task_of(rt_se); | 1077 | p = rt_task_of(rt_se); |
| 1077 | p->se.exec_start = rq->clock; | 1078 | p->se.exec_start = rq->clock_task; |
| 1078 | 1079 | ||
| 1079 | return p; | 1080 | return p; |
| 1080 | } | 1081 | } |
| @@ -1139,7 +1140,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
| 1139 | for_each_leaf_rt_rq(rt_rq, rq) { | 1140 | for_each_leaf_rt_rq(rt_rq, rq) { |
| 1140 | array = &rt_rq->active; | 1141 | array = &rt_rq->active; |
| 1141 | idx = sched_find_first_bit(array->bitmap); | 1142 | idx = sched_find_first_bit(array->bitmap); |
| 1142 | next_idx: | 1143 | next_idx: |
| 1143 | if (idx >= MAX_RT_PRIO) | 1144 | if (idx >= MAX_RT_PRIO) |
| 1144 | continue; | 1145 | continue; |
| 1145 | if (next && next->prio < idx) | 1146 | if (next && next->prio < idx) |
| @@ -1315,7 +1316,7 @@ static int push_rt_task(struct rq *rq) | |||
| 1315 | if (!next_task) | 1316 | if (!next_task) |
| 1316 | return 0; | 1317 | return 0; |
| 1317 | 1318 | ||
| 1318 | retry: | 1319 | retry: |
| 1319 | if (unlikely(next_task == rq->curr)) { | 1320 | if (unlikely(next_task == rq->curr)) { |
| 1320 | WARN_ON(1); | 1321 | WARN_ON(1); |
| 1321 | return 0; | 1322 | return 0; |
| @@ -1463,7 +1464,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
| 1463 | * but possible) | 1464 | * but possible) |
| 1464 | */ | 1465 | */ |
| 1465 | } | 1466 | } |
| 1466 | skip: | 1467 | skip: |
| 1467 | double_unlock_balance(this_rq, src_rq); | 1468 | double_unlock_balance(this_rq, src_rq); |
| 1468 | } | 1469 | } |
| 1469 | 1470 | ||
| @@ -1491,7 +1492,10 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) | |||
| 1491 | if (!task_running(rq, p) && | 1492 | if (!task_running(rq, p) && |
| 1492 | !test_tsk_need_resched(rq->curr) && | 1493 | !test_tsk_need_resched(rq->curr) && |
| 1493 | has_pushable_tasks(rq) && | 1494 | has_pushable_tasks(rq) && |
| 1494 | p->rt.nr_cpus_allowed > 1) | 1495 | p->rt.nr_cpus_allowed > 1 && |
| 1496 | rt_task(rq->curr) && | ||
| 1497 | (rq->curr->rt.nr_cpus_allowed < 2 || | ||
| 1498 | rq->curr->prio < p->prio)) | ||
| 1495 | push_rt_tasks(rq); | 1499 | push_rt_tasks(rq); |
| 1496 | } | 1500 | } |
| 1497 | 1501 | ||
| @@ -1709,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
| 1709 | { | 1713 | { |
| 1710 | struct task_struct *p = rq->curr; | 1714 | struct task_struct *p = rq->curr; |
| 1711 | 1715 | ||
| 1712 | p->se.exec_start = rq->clock; | 1716 | p->se.exec_start = rq->clock_task; |
| 1713 | 1717 | ||
| 1714 | /* The running task is never eligible for pushing */ | 1718 | /* The running task is never eligible for pushing */ |
| 1715 | dequeue_pushable_task(rq, p); | 1719 | dequeue_pushable_task(rq, p); |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c new file mode 100644 index 000000000000..45bddc0c1048 --- /dev/null +++ b/kernel/sched_stoptask.c | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | /* | ||
| 2 | * stop-task scheduling class. | ||
| 3 | * | ||
| 4 | * The stop task is the highest priority task in the system, it preempts | ||
| 5 | * everything and will be preempted by nothing. | ||
| 6 | * | ||
| 7 | * See kernel/stop_machine.c | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifdef CONFIG_SMP | ||
| 11 | static int | ||
| 12 | select_task_rq_stop(struct rq *rq, struct task_struct *p, | ||
| 13 | int sd_flag, int flags) | ||
| 14 | { | ||
| 15 | return task_cpu(p); /* stop tasks as never migrate */ | ||
| 16 | } | ||
| 17 | #endif /* CONFIG_SMP */ | ||
| 18 | |||
| 19 | static void | ||
| 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 21 | { | ||
| 22 | resched_task(rq->curr); /* we preempt everything */ | ||
| 23 | } | ||
| 24 | |||
| 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | ||
| 26 | { | ||
| 27 | struct task_struct *stop = rq->stop; | ||
| 28 | |||
| 29 | if (stop && stop->state == TASK_RUNNING) | ||
| 30 | return stop; | ||
| 31 | |||
| 32 | return NULL; | ||
| 33 | } | ||
| 34 | |||
| 35 | static void | ||
| 36 | enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 37 | { | ||
| 38 | } | ||
| 39 | |||
| 40 | static void | ||
| 41 | dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) | ||
| 42 | { | ||
| 43 | } | ||
| 44 | |||
| 45 | static void yield_task_stop(struct rq *rq) | ||
| 46 | { | ||
| 47 | BUG(); /* the stop task should never yield, its pointless. */ | ||
| 48 | } | ||
| 49 | |||
| 50 | static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) | ||
| 51 | { | ||
| 52 | } | ||
| 53 | |||
| 54 | static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) | ||
| 55 | { | ||
| 56 | } | ||
| 57 | |||
| 58 | static void set_curr_task_stop(struct rq *rq) | ||
| 59 | { | ||
| 60 | } | ||
| 61 | |||
| 62 | static void switched_to_stop(struct rq *rq, struct task_struct *p, | ||
| 63 | int running) | ||
| 64 | { | ||
| 65 | BUG(); /* its impossible to change to this class */ | ||
| 66 | } | ||
| 67 | |||
| 68 | static void prio_changed_stop(struct rq *rq, struct task_struct *p, | ||
| 69 | int oldprio, int running) | ||
| 70 | { | ||
| 71 | BUG(); /* how!?, what priority? */ | ||
| 72 | } | ||
| 73 | |||
| 74 | static unsigned int | ||
| 75 | get_rr_interval_stop(struct rq *rq, struct task_struct *task) | ||
| 76 | { | ||
| 77 | return 0; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Simple, special scheduling class for the per-CPU stop tasks: | ||
| 82 | */ | ||
| 83 | static const struct sched_class stop_sched_class = { | ||
| 84 | .next = &rt_sched_class, | ||
| 85 | |||
| 86 | .enqueue_task = enqueue_task_stop, | ||
| 87 | .dequeue_task = dequeue_task_stop, | ||
| 88 | .yield_task = yield_task_stop, | ||
| 89 | |||
| 90 | .check_preempt_curr = check_preempt_curr_stop, | ||
| 91 | |||
| 92 | .pick_next_task = pick_next_task_stop, | ||
| 93 | .put_prev_task = put_prev_task_stop, | ||
| 94 | |||
| 95 | #ifdef CONFIG_SMP | ||
| 96 | .select_task_rq = select_task_rq_stop, | ||
| 97 | #endif | ||
| 98 | |||
| 99 | .set_curr_task = set_curr_task_stop, | ||
| 100 | .task_tick = task_tick_stop, | ||
| 101 | |||
| 102 | .get_rr_interval = get_rr_interval_stop, | ||
| 103 | |||
| 104 | .prio_changed = prio_changed_stop, | ||
| 105 | .switched_to = switched_to_stop, | ||
| 106 | |||
| 107 | /* no .task_new for stop tasks */ | ||
| 108 | }; | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 07b4f1b1a73a..fc978889b194 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -77,11 +77,21 @@ void wakeup_softirqd(void) | |||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | /* | 79 | /* |
| 80 | * preempt_count and SOFTIRQ_OFFSET usage: | ||
| 81 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | ||
| 82 | * softirq processing. | ||
| 83 | * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) | ||
| 84 | * on local_bh_disable or local_bh_enable. | ||
| 85 | * This lets us distinguish between whether we are currently processing | ||
| 86 | * softirq and whether we just have bh disabled. | ||
| 87 | */ | ||
| 88 | |||
| 89 | /* | ||
| 80 | * This one is for softirq.c-internal use, | 90 | * This one is for softirq.c-internal use, |
| 81 | * where hardirqs are disabled legitimately: | 91 | * where hardirqs are disabled legitimately: |
| 82 | */ | 92 | */ |
| 83 | #ifdef CONFIG_TRACE_IRQFLAGS | 93 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 84 | static void __local_bh_disable(unsigned long ip) | 94 | static void __local_bh_disable(unsigned long ip, unsigned int cnt) |
| 85 | { | 95 | { |
| 86 | unsigned long flags; | 96 | unsigned long flags; |
| 87 | 97 | ||
| @@ -95,32 +105,43 @@ static void __local_bh_disable(unsigned long ip) | |||
| 95 | * We must manually increment preempt_count here and manually | 105 | * We must manually increment preempt_count here and manually |
| 96 | * call the trace_preempt_off later. | 106 | * call the trace_preempt_off later. |
| 97 | */ | 107 | */ |
| 98 | preempt_count() += SOFTIRQ_OFFSET; | 108 | preempt_count() += cnt; |
| 99 | /* | 109 | /* |
| 100 | * Were softirqs turned off above: | 110 | * Were softirqs turned off above: |
| 101 | */ | 111 | */ |
| 102 | if (softirq_count() == SOFTIRQ_OFFSET) | 112 | if (softirq_count() == cnt) |
| 103 | trace_softirqs_off(ip); | 113 | trace_softirqs_off(ip); |
| 104 | raw_local_irq_restore(flags); | 114 | raw_local_irq_restore(flags); |
| 105 | 115 | ||
| 106 | if (preempt_count() == SOFTIRQ_OFFSET) | 116 | if (preempt_count() == cnt) |
| 107 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 117 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); |
| 108 | } | 118 | } |
| 109 | #else /* !CONFIG_TRACE_IRQFLAGS */ | 119 | #else /* !CONFIG_TRACE_IRQFLAGS */ |
| 110 | static inline void __local_bh_disable(unsigned long ip) | 120 | static inline void __local_bh_disable(unsigned long ip, unsigned int cnt) |
| 111 | { | 121 | { |
| 112 | add_preempt_count(SOFTIRQ_OFFSET); | 122 | add_preempt_count(cnt); |
| 113 | barrier(); | 123 | barrier(); |
| 114 | } | 124 | } |
| 115 | #endif /* CONFIG_TRACE_IRQFLAGS */ | 125 | #endif /* CONFIG_TRACE_IRQFLAGS */ |
| 116 | 126 | ||
| 117 | void local_bh_disable(void) | 127 | void local_bh_disable(void) |
| 118 | { | 128 | { |
| 119 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 129 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
| 130 | SOFTIRQ_DISABLE_OFFSET); | ||
| 120 | } | 131 | } |
| 121 | 132 | ||
| 122 | EXPORT_SYMBOL(local_bh_disable); | 133 | EXPORT_SYMBOL(local_bh_disable); |
| 123 | 134 | ||
| 135 | static void __local_bh_enable(unsigned int cnt) | ||
| 136 | { | ||
| 137 | WARN_ON_ONCE(in_irq()); | ||
| 138 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 139 | |||
| 140 | if (softirq_count() == cnt) | ||
| 141 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
| 142 | sub_preempt_count(cnt); | ||
| 143 | } | ||
| 144 | |||
| 124 | /* | 145 | /* |
| 125 | * Special-case - softirqs can safely be enabled in | 146 | * Special-case - softirqs can safely be enabled in |
| 126 | * cond_resched_softirq(), or by __do_softirq(), | 147 | * cond_resched_softirq(), or by __do_softirq(), |
| @@ -128,12 +149,7 @@ EXPORT_SYMBOL(local_bh_disable); | |||
| 128 | */ | 149 | */ |
| 129 | void _local_bh_enable(void) | 150 | void _local_bh_enable(void) |
| 130 | { | 151 | { |
| 131 | WARN_ON_ONCE(in_irq()); | 152 | __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); |
| 132 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 133 | |||
| 134 | if (softirq_count() == SOFTIRQ_OFFSET) | ||
| 135 | trace_softirqs_on((unsigned long)__builtin_return_address(0)); | ||
| 136 | sub_preempt_count(SOFTIRQ_OFFSET); | ||
| 137 | } | 153 | } |
| 138 | 154 | ||
| 139 | EXPORT_SYMBOL(_local_bh_enable); | 155 | EXPORT_SYMBOL(_local_bh_enable); |
| @@ -147,13 +163,13 @@ static inline void _local_bh_enable_ip(unsigned long ip) | |||
| 147 | /* | 163 | /* |
| 148 | * Are softirqs going to be turned on now: | 164 | * Are softirqs going to be turned on now: |
| 149 | */ | 165 | */ |
| 150 | if (softirq_count() == SOFTIRQ_OFFSET) | 166 | if (softirq_count() == SOFTIRQ_DISABLE_OFFSET) |
| 151 | trace_softirqs_on(ip); | 167 | trace_softirqs_on(ip); |
| 152 | /* | 168 | /* |
| 153 | * Keep preemption disabled until we are done with | 169 | * Keep preemption disabled until we are done with |
| 154 | * softirq processing: | 170 | * softirq processing: |
| 155 | */ | 171 | */ |
| 156 | sub_preempt_count(SOFTIRQ_OFFSET - 1); | 172 | sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1); |
| 157 | 173 | ||
| 158 | if (unlikely(!in_interrupt() && local_softirq_pending())) | 174 | if (unlikely(!in_interrupt() && local_softirq_pending())) |
| 159 | do_softirq(); | 175 | do_softirq(); |
| @@ -198,7 +214,8 @@ asmlinkage void __do_softirq(void) | |||
| 198 | pending = local_softirq_pending(); | 214 | pending = local_softirq_pending(); |
| 199 | account_system_vtime(current); | 215 | account_system_vtime(current); |
| 200 | 216 | ||
| 201 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 217 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
| 218 | SOFTIRQ_OFFSET); | ||
| 202 | lockdep_softirq_enter(); | 219 | lockdep_softirq_enter(); |
| 203 | 220 | ||
| 204 | cpu = smp_processor_id(); | 221 | cpu = smp_processor_id(); |
| @@ -245,7 +262,7 @@ restart: | |||
| 245 | lockdep_softirq_exit(); | 262 | lockdep_softirq_exit(); |
| 246 | 263 | ||
| 247 | account_system_vtime(current); | 264 | account_system_vtime(current); |
| 248 | _local_bh_enable(); | 265 | __local_bh_enable(SOFTIRQ_OFFSET); |
| 249 | } | 266 | } |
| 250 | 267 | ||
| 251 | #ifndef __ARCH_HAS_DO_SOFTIRQ | 268 | #ifndef __ARCH_HAS_DO_SOFTIRQ |
| @@ -279,10 +296,16 @@ void irq_enter(void) | |||
| 279 | 296 | ||
| 280 | rcu_irq_enter(); | 297 | rcu_irq_enter(); |
| 281 | if (idle_cpu(cpu) && !in_interrupt()) { | 298 | if (idle_cpu(cpu) && !in_interrupt()) { |
| 282 | __irq_enter(); | 299 | /* |
| 300 | * Prevent raise_softirq from needlessly waking up ksoftirqd | ||
| 301 | * here, as softirq will be serviced on return from interrupt. | ||
| 302 | */ | ||
| 303 | local_bh_disable(); | ||
| 283 | tick_check_idle(cpu); | 304 | tick_check_idle(cpu); |
| 284 | } else | 305 | _local_bh_enable(); |
| 285 | __irq_enter(); | 306 | } |
| 307 | |||
| 308 | __irq_enter(); | ||
| 286 | } | 309 | } |
| 287 | 310 | ||
| 288 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 311 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
| @@ -696,6 +719,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
| 696 | { | 719 | { |
| 697 | set_current_state(TASK_INTERRUPTIBLE); | 720 | set_current_state(TASK_INTERRUPTIBLE); |
| 698 | 721 | ||
| 722 | current->flags |= PF_KSOFTIRQD; | ||
| 699 | while (!kthread_should_stop()) { | 723 | while (!kthread_should_stop()) { |
| 700 | preempt_disable(); | 724 | preempt_disable(); |
| 701 | if (!local_softirq_pending()) { | 725 | if (!local_softirq_pending()) { |
| @@ -886,17 +910,14 @@ int __init __weak early_irq_init(void) | |||
| 886 | return 0; | 910 | return 0; |
| 887 | } | 911 | } |
| 888 | 912 | ||
| 913 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
| 889 | int __init __weak arch_probe_nr_irqs(void) | 914 | int __init __weak arch_probe_nr_irqs(void) |
| 890 | { | 915 | { |
| 891 | return 0; | 916 | return NR_IRQS_LEGACY; |
| 892 | } | 917 | } |
| 893 | 918 | ||
| 894 | int __init __weak arch_early_irq_init(void) | 919 | int __init __weak arch_early_irq_init(void) |
| 895 | { | 920 | { |
| 896 | return 0; | 921 | return 0; |
| 897 | } | 922 | } |
| 898 | 923 | #endif | |
| 899 | int __weak arch_init_chip_data(struct irq_desc *desc, int node) | ||
| 900 | { | ||
| 901 | return 0; | ||
| 902 | } | ||
diff --git a/kernel/srcu.c b/kernel/srcu.c index 2980da3fd509..c71e07500536 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -46,11 +46,9 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) | |||
| 46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, | 46 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, |
| 47 | struct lock_class_key *key) | 47 | struct lock_class_key *key) |
| 48 | { | 48 | { |
| 49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 50 | /* Don't re-initialize a lock while it is held. */ | 49 | /* Don't re-initialize a lock while it is held. */ |
| 51 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | 50 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); |
| 52 | lockdep_init_map(&sp->dep_map, name, key, 0); | 51 | lockdep_init_map(&sp->dep_map, name, key, 0); |
| 53 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 54 | return init_srcu_struct_fields(sp); | 52 | return init_srcu_struct_fields(sp); |
| 55 | } | 53 | } |
| 56 | EXPORT_SYMBOL_GPL(__init_srcu_struct); | 54 | EXPORT_SYMBOL_GPL(__init_srcu_struct); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 4372ccb25127..090c28812ce1 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -287,11 +287,12 @@ repeat: | |||
| 287 | goto repeat; | 287 | goto repeat; |
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | extern void sched_set_stop_task(int cpu, struct task_struct *stop); | ||
| 291 | |||
| 290 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ | 292 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ |
| 291 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | 293 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, |
| 292 | unsigned long action, void *hcpu) | 294 | unsigned long action, void *hcpu) |
| 293 | { | 295 | { |
| 294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
| 295 | unsigned int cpu = (unsigned long)hcpu; | 296 | unsigned int cpu = (unsigned long)hcpu; |
| 296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | 297 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); |
| 297 | struct task_struct *p; | 298 | struct task_struct *p; |
| @@ -304,13 +305,13 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 304 | cpu); | 305 | cpu); |
| 305 | if (IS_ERR(p)) | 306 | if (IS_ERR(p)) |
| 306 | return NOTIFY_BAD; | 307 | return NOTIFY_BAD; |
| 307 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
| 308 | get_task_struct(p); | 308 | get_task_struct(p); |
| 309 | kthread_bind(p, cpu); | ||
| 310 | sched_set_stop_task(cpu, p); | ||
| 309 | stopper->thread = p; | 311 | stopper->thread = p; |
| 310 | break; | 312 | break; |
| 311 | 313 | ||
| 312 | case CPU_ONLINE: | 314 | case CPU_ONLINE: |
| 313 | kthread_bind(stopper->thread, cpu); | ||
| 314 | /* strictly unnecessary, as first user will wake it */ | 315 | /* strictly unnecessary, as first user will wake it */ |
| 315 | wake_up_process(stopper->thread); | 316 | wake_up_process(stopper->thread); |
| 316 | /* mark enabled */ | 317 | /* mark enabled */ |
| @@ -325,6 +326,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 325 | { | 326 | { |
| 326 | struct cpu_stop_work *work; | 327 | struct cpu_stop_work *work; |
| 327 | 328 | ||
| 329 | sched_set_stop_task(cpu, NULL); | ||
| 328 | /* kill the stopper */ | 330 | /* kill the stopper */ |
| 329 | kthread_stop(stopper->thread); | 331 | kthread_stop(stopper->thread); |
| 330 | /* drain remaining works */ | 332 | /* drain remaining works */ |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index c63116863a80..d2321891538f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -149,10 +149,18 @@ static void ntp_update_offset(long offset) | |||
| 149 | time_reftime = get_seconds(); | 149 | time_reftime = get_seconds(); |
| 150 | 150 | ||
| 151 | offset64 = offset; | 151 | offset64 = offset; |
| 152 | freq_adj = (offset64 * secs) << | 152 | freq_adj = ntp_update_offset_fll(offset64, secs); |
| 153 | (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); | ||
| 154 | 153 | ||
| 155 | freq_adj += ntp_update_offset_fll(offset64, secs); | 154 | /* |
| 155 | * Clamp update interval to reduce PLL gain with low | ||
| 156 | * sampling rate (e.g. intermittent network connection) | ||
| 157 | * to avoid instability. | ||
| 158 | */ | ||
| 159 | if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant))) | ||
| 160 | secs = 1 << (SHIFT_PLL + 1 + time_constant); | ||
| 161 | |||
| 162 | freq_adj += (offset64 * secs) << | ||
| 163 | (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant)); | ||
| 156 | 164 | ||
| 157 | freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); | 165 | freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED); |
| 158 | 166 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65fb077ea79c..ebd80d50c474 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -1638,8 +1638,8 @@ ftrace_failures_open(struct inode *inode, struct file *file) | |||
| 1638 | 1638 | ||
| 1639 | ret = ftrace_avail_open(inode, file); | 1639 | ret = ftrace_avail_open(inode, file); |
| 1640 | if (!ret) { | 1640 | if (!ret) { |
| 1641 | m = (struct seq_file *)file->private_data; | 1641 | m = file->private_data; |
| 1642 | iter = (struct ftrace_iterator *)m->private; | 1642 | iter = m->private; |
| 1643 | iter->flags = FTRACE_ITER_FAILURES; | 1643 | iter->flags = FTRACE_ITER_FAILURES; |
| 1644 | } | 1644 | } |
| 1645 | 1645 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ec59f541156..001bcd2ccf4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp) | |||
| 2196 | 2196 | ||
| 2197 | static int tracing_release(struct inode *inode, struct file *file) | 2197 | static int tracing_release(struct inode *inode, struct file *file) |
| 2198 | { | 2198 | { |
| 2199 | struct seq_file *m = (struct seq_file *)file->private_data; | 2199 | struct seq_file *m = file->private_data; |
| 2200 | struct trace_iterator *iter; | 2200 | struct trace_iterator *iter; |
| 2201 | int cpu; | 2201 | int cpu; |
| 2202 | 2202 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d39b3c5454a5..9021f8c0c0c3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr, | |||
| 343 | unsigned long ip, | 343 | unsigned long ip, |
| 344 | unsigned long parent_ip, | 344 | unsigned long parent_ip, |
| 345 | unsigned long flags, int pc); | 345 | unsigned long flags, int pc); |
| 346 | void trace_graph_function(struct trace_array *tr, | ||
| 347 | unsigned long ip, | ||
| 348 | unsigned long parent_ip, | ||
| 349 | unsigned long flags, int pc); | ||
| 346 | void trace_default_header(struct seq_file *m); | 350 | void trace_default_header(struct seq_file *m); |
| 347 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); | 351 | void print_trace_header(struct seq_file *m, struct trace_iterator *iter); |
| 348 | int trace_empty(struct trace_iterator *iter); | 352 | int trace_empty(struct trace_iterator *iter); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ef49e9370b25..76b05980225c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -262,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace) | |||
| 262 | return trace_graph_entry(trace); | 262 | return trace_graph_entry(trace); |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | static void | ||
| 266 | __trace_graph_function(struct trace_array *tr, | ||
| 267 | unsigned long ip, unsigned long flags, int pc) | ||
| 268 | { | ||
| 269 | u64 time = trace_clock_local(); | ||
| 270 | struct ftrace_graph_ent ent = { | ||
| 271 | .func = ip, | ||
| 272 | .depth = 0, | ||
| 273 | }; | ||
| 274 | struct ftrace_graph_ret ret = { | ||
| 275 | .func = ip, | ||
| 276 | .depth = 0, | ||
| 277 | .calltime = time, | ||
| 278 | .rettime = time, | ||
| 279 | }; | ||
| 280 | |||
| 281 | __trace_graph_entry(tr, &ent, flags, pc); | ||
| 282 | __trace_graph_return(tr, &ret, flags, pc); | ||
| 283 | } | ||
| 284 | |||
| 285 | void | ||
| 286 | trace_graph_function(struct trace_array *tr, | ||
| 287 | unsigned long ip, unsigned long parent_ip, | ||
| 288 | unsigned long flags, int pc) | ||
| 289 | { | ||
| 290 | __trace_graph_function(tr, ip, flags, pc); | ||
| 291 | } | ||
| 292 | |||
| 265 | void __trace_graph_return(struct trace_array *tr, | 293 | void __trace_graph_return(struct trace_array *tr, |
| 266 | struct ftrace_graph_ret *trace, | 294 | struct ftrace_graph_ret *trace, |
| 267 | unsigned long flags, | 295 | unsigned long flags, |
| @@ -888,12 +916,20 @@ check_irq_entry(struct trace_iterator *iter, u32 flags, | |||
| 888 | unsigned long addr, int depth) | 916 | unsigned long addr, int depth) |
| 889 | { | 917 | { |
| 890 | int cpu = iter->cpu; | 918 | int cpu = iter->cpu; |
| 919 | int *depth_irq; | ||
| 891 | struct fgraph_data *data = iter->private; | 920 | struct fgraph_data *data = iter->private; |
| 892 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 893 | 921 | ||
| 894 | if (flags & TRACE_GRAPH_PRINT_IRQS) | 922 | /* |
| 923 | * If we are either displaying irqs, or we got called as | ||
| 924 | * a graph event and private data does not exist, | ||
| 925 | * then we bypass the irq check. | ||
| 926 | */ | ||
| 927 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
| 928 | (!data)) | ||
| 895 | return 0; | 929 | return 0; |
| 896 | 930 | ||
| 931 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 932 | |||
| 897 | /* | 933 | /* |
| 898 | * We are inside the irq code | 934 | * We are inside the irq code |
| 899 | */ | 935 | */ |
| @@ -926,12 +962,20 @@ static int | |||
| 926 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) | 962 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) |
| 927 | { | 963 | { |
| 928 | int cpu = iter->cpu; | 964 | int cpu = iter->cpu; |
| 965 | int *depth_irq; | ||
| 929 | struct fgraph_data *data = iter->private; | 966 | struct fgraph_data *data = iter->private; |
| 930 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 931 | 967 | ||
| 932 | if (flags & TRACE_GRAPH_PRINT_IRQS) | 968 | /* |
| 969 | * If we are either displaying irqs, or we got called as | ||
| 970 | * a graph event and private data does not exist, | ||
| 971 | * then we bypass the irq check. | ||
| 972 | */ | ||
| 973 | if ((flags & TRACE_GRAPH_PRINT_IRQS) || | ||
| 974 | (!data)) | ||
| 933 | return 0; | 975 | return 0; |
| 934 | 976 | ||
| 977 | depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
| 978 | |||
| 935 | /* | 979 | /* |
| 936 | * We are not inside the irq code. | 980 | * We are not inside the irq code. |
| 937 | */ | 981 | */ |
| @@ -1163,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 1163 | 1207 | ||
| 1164 | 1208 | ||
| 1165 | enum print_line_t | 1209 | enum print_line_t |
| 1166 | print_graph_function_flags(struct trace_iterator *iter, u32 flags) | 1210 | __print_graph_function_flags(struct trace_iterator *iter, u32 flags) |
| 1167 | { | 1211 | { |
| 1168 | struct ftrace_graph_ent_entry *field; | 1212 | struct ftrace_graph_ent_entry *field; |
| 1169 | struct fgraph_data *data = iter->private; | 1213 | struct fgraph_data *data = iter->private; |
| @@ -1226,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) | |||
| 1226 | static enum print_line_t | 1270 | static enum print_line_t |
| 1227 | print_graph_function(struct trace_iterator *iter) | 1271 | print_graph_function(struct trace_iterator *iter) |
| 1228 | { | 1272 | { |
| 1229 | return print_graph_function_flags(iter, tracer_flags.val); | 1273 | return __print_graph_function_flags(iter, tracer_flags.val); |
| 1274 | } | ||
| 1275 | |||
| 1276 | enum print_line_t print_graph_function_flags(struct trace_iterator *iter, | ||
| 1277 | u32 flags) | ||
| 1278 | { | ||
| 1279 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
| 1280 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 1281 | else | ||
| 1282 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1283 | |||
| 1284 | return __print_graph_function_flags(iter, flags); | ||
| 1230 | } | 1285 | } |
| 1231 | 1286 | ||
| 1232 | static enum print_line_t | 1287 | static enum print_line_t |
| @@ -1258,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags) | |||
| 1258 | seq_printf(s, "#%.*s|||| / \n", size, spaces); | 1313 | seq_printf(s, "#%.*s|||| / \n", size, spaces); |
| 1259 | } | 1314 | } |
| 1260 | 1315 | ||
| 1261 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | 1316 | static void __print_graph_headers_flags(struct seq_file *s, u32 flags) |
| 1262 | { | 1317 | { |
| 1263 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; | 1318 | int lat = trace_flags & TRACE_ITER_LATENCY_FMT; |
| 1264 | 1319 | ||
| @@ -1299,6 +1354,23 @@ void print_graph_headers(struct seq_file *s) | |||
| 1299 | print_graph_headers_flags(s, tracer_flags.val); | 1354 | print_graph_headers_flags(s, tracer_flags.val); |
| 1300 | } | 1355 | } |
| 1301 | 1356 | ||
| 1357 | void print_graph_headers_flags(struct seq_file *s, u32 flags) | ||
| 1358 | { | ||
| 1359 | struct trace_iterator *iter = s->private; | ||
| 1360 | |||
| 1361 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
| 1362 | /* print nothing if the buffers are empty */ | ||
| 1363 | if (trace_empty(iter)) | ||
| 1364 | return; | ||
| 1365 | |||
| 1366 | print_trace_header(s, iter); | ||
| 1367 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 1368 | } else | ||
| 1369 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 1370 | |||
| 1371 | __print_graph_headers_flags(s, flags); | ||
| 1372 | } | ||
| 1373 | |||
| 1302 | void graph_trace_open(struct trace_iterator *iter) | 1374 | void graph_trace_open(struct trace_iterator *iter) |
| 1303 | { | 1375 | { |
| 1304 | /* pid and depth on the last trace processed */ | 1376 | /* pid and depth on the last trace processed */ |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 73a6b0601f2e..5cf8c602b880 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
| @@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; | |||
| 87 | 87 | ||
| 88 | #ifdef CONFIG_FUNCTION_TRACER | 88 | #ifdef CONFIG_FUNCTION_TRACER |
| 89 | /* | 89 | /* |
| 90 | * irqsoff uses its own tracer function to keep the overhead down: | 90 | * Prologue for the preempt and irqs off function tracers. |
| 91 | * | ||
| 92 | * Returns 1 if it is OK to continue, and data->disabled is | ||
| 93 | * incremented. | ||
| 94 | * 0 if the trace is to be ignored, and data->disabled | ||
| 95 | * is kept the same. | ||
| 96 | * | ||
| 97 | * Note, this function is also used outside this ifdef but | ||
| 98 | * inside the #ifdef of the function graph tracer below. | ||
| 99 | * This is OK, since the function graph tracer is | ||
| 100 | * dependent on the function tracer. | ||
| 91 | */ | 101 | */ |
| 92 | static void | 102 | static int func_prolog_dec(struct trace_array *tr, |
| 93 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | 103 | struct trace_array_cpu **data, |
| 104 | unsigned long *flags) | ||
| 94 | { | 105 | { |
| 95 | struct trace_array *tr = irqsoff_trace; | ||
| 96 | struct trace_array_cpu *data; | ||
| 97 | unsigned long flags; | ||
| 98 | long disabled; | 106 | long disabled; |
| 99 | int cpu; | 107 | int cpu; |
| 100 | 108 | ||
| @@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | |||
| 106 | */ | 114 | */ |
| 107 | cpu = raw_smp_processor_id(); | 115 | cpu = raw_smp_processor_id(); |
| 108 | if (likely(!per_cpu(tracing_cpu, cpu))) | 116 | if (likely(!per_cpu(tracing_cpu, cpu))) |
| 109 | return; | 117 | return 0; |
| 110 | 118 | ||
| 111 | local_save_flags(flags); | 119 | local_save_flags(*flags); |
| 112 | /* slight chance to get a false positive on tracing_cpu */ | 120 | /* slight chance to get a false positive on tracing_cpu */ |
| 113 | if (!irqs_disabled_flags(flags)) | 121 | if (!irqs_disabled_flags(*flags)) |
| 114 | return; | 122 | return 0; |
| 115 | 123 | ||
| 116 | data = tr->data[cpu]; | 124 | *data = tr->data[cpu]; |
| 117 | disabled = atomic_inc_return(&data->disabled); | 125 | disabled = atomic_inc_return(&(*data)->disabled); |
| 118 | 126 | ||
| 119 | if (likely(disabled == 1)) | 127 | if (likely(disabled == 1)) |
| 120 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | 128 | return 1; |
| 129 | |||
| 130 | atomic_dec(&(*data)->disabled); | ||
| 131 | |||
| 132 | return 0; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * irqsoff uses its own tracer function to keep the overhead down: | ||
| 137 | */ | ||
| 138 | static void | ||
| 139 | irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
| 140 | { | ||
| 141 | struct trace_array *tr = irqsoff_trace; | ||
| 142 | struct trace_array_cpu *data; | ||
| 143 | unsigned long flags; | ||
| 144 | |||
| 145 | if (!func_prolog_dec(tr, &data, &flags)) | ||
| 146 | return; | ||
| 147 | |||
| 148 | trace_function(tr, ip, parent_ip, flags, preempt_count()); | ||
| 121 | 149 | ||
| 122 | atomic_dec(&data->disabled); | 150 | atomic_dec(&data->disabled); |
| 123 | } | 151 | } |
| @@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) | |||
| 155 | struct trace_array *tr = irqsoff_trace; | 183 | struct trace_array *tr = irqsoff_trace; |
| 156 | struct trace_array_cpu *data; | 184 | struct trace_array_cpu *data; |
| 157 | unsigned long flags; | 185 | unsigned long flags; |
| 158 | long disabled; | ||
| 159 | int ret; | 186 | int ret; |
| 160 | int cpu; | ||
| 161 | int pc; | 187 | int pc; |
| 162 | 188 | ||
| 163 | cpu = raw_smp_processor_id(); | 189 | if (!func_prolog_dec(tr, &data, &flags)) |
| 164 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
| 165 | return 0; | 190 | return 0; |
| 166 | 191 | ||
| 167 | local_save_flags(flags); | 192 | pc = preempt_count(); |
| 168 | /* slight chance to get a false positive on tracing_cpu */ | 193 | ret = __trace_graph_entry(tr, trace, flags, pc); |
| 169 | if (!irqs_disabled_flags(flags)) | ||
| 170 | return 0; | ||
| 171 | |||
| 172 | data = tr->data[cpu]; | ||
| 173 | disabled = atomic_inc_return(&data->disabled); | ||
| 174 | |||
| 175 | if (likely(disabled == 1)) { | ||
| 176 | pc = preempt_count(); | ||
| 177 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
| 178 | } else | ||
| 179 | ret = 0; | ||
| 180 | |||
| 181 | atomic_dec(&data->disabled); | 194 | atomic_dec(&data->disabled); |
| 195 | |||
| 182 | return ret; | 196 | return ret; |
| 183 | } | 197 | } |
| 184 | 198 | ||
| @@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) | |||
| 187 | struct trace_array *tr = irqsoff_trace; | 201 | struct trace_array *tr = irqsoff_trace; |
| 188 | struct trace_array_cpu *data; | 202 | struct trace_array_cpu *data; |
| 189 | unsigned long flags; | 203 | unsigned long flags; |
| 190 | long disabled; | ||
| 191 | int cpu; | ||
| 192 | int pc; | 204 | int pc; |
| 193 | 205 | ||
| 194 | cpu = raw_smp_processor_id(); | 206 | if (!func_prolog_dec(tr, &data, &flags)) |
| 195 | if (likely(!per_cpu(tracing_cpu, cpu))) | ||
| 196 | return; | 207 | return; |
| 197 | 208 | ||
| 198 | local_save_flags(flags); | 209 | pc = preempt_count(); |
| 199 | /* slight chance to get a false positive on tracing_cpu */ | 210 | __trace_graph_return(tr, trace, flags, pc); |
| 200 | if (!irqs_disabled_flags(flags)) | ||
| 201 | return; | ||
| 202 | |||
| 203 | data = tr->data[cpu]; | ||
| 204 | disabled = atomic_inc_return(&data->disabled); | ||
| 205 | |||
| 206 | if (likely(disabled == 1)) { | ||
| 207 | pc = preempt_count(); | ||
| 208 | __trace_graph_return(tr, trace, flags, pc); | ||
| 209 | } | ||
| 210 | |||
| 211 | atomic_dec(&data->disabled); | 211 | atomic_dec(&data->disabled); |
| 212 | } | 212 | } |
| 213 | 213 | ||
| @@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter) | |||
| 229 | 229 | ||
| 230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) | 230 | static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) |
| 231 | { | 231 | { |
| 232 | u32 flags = GRAPH_TRACER_FLAGS; | ||
| 233 | |||
| 234 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
| 235 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 236 | else | ||
| 237 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 238 | |||
| 239 | /* | 232 | /* |
| 240 | * In graph mode call the graph tracer output function, | 233 | * In graph mode call the graph tracer output function, |
| 241 | * otherwise go with the TRACE_FN event handler | 234 | * otherwise go with the TRACE_FN event handler |
| 242 | */ | 235 | */ |
| 243 | if (is_graph()) | 236 | if (is_graph()) |
| 244 | return print_graph_function_flags(iter, flags); | 237 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); |
| 245 | 238 | ||
| 246 | return TRACE_TYPE_UNHANDLED; | 239 | return TRACE_TYPE_UNHANDLED; |
| 247 | } | 240 | } |
| 248 | 241 | ||
| 249 | static void irqsoff_print_header(struct seq_file *s) | 242 | static void irqsoff_print_header(struct seq_file *s) |
| 250 | { | 243 | { |
| 251 | if (is_graph()) { | 244 | if (is_graph()) |
| 252 | struct trace_iterator *iter = s->private; | 245 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); |
| 253 | u32 flags = GRAPH_TRACER_FLAGS; | 246 | else |
| 254 | |||
| 255 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | ||
| 256 | /* print nothing if the buffers are empty */ | ||
| 257 | if (trace_empty(iter)) | ||
| 258 | return; | ||
| 259 | |||
| 260 | print_trace_header(s, iter); | ||
| 261 | flags |= TRACE_GRAPH_PRINT_DURATION; | ||
| 262 | } else | ||
| 263 | flags |= TRACE_GRAPH_PRINT_ABS_TIME; | ||
| 264 | |||
| 265 | print_graph_headers_flags(s, flags); | ||
| 266 | } else | ||
| 267 | trace_default_header(s); | 247 | trace_default_header(s); |
| 268 | } | 248 | } |
| 269 | 249 | ||
| 270 | static void | 250 | static void |
| 271 | trace_graph_function(struct trace_array *tr, | ||
| 272 | unsigned long ip, unsigned long flags, int pc) | ||
| 273 | { | ||
| 274 | u64 time = trace_clock_local(); | ||
| 275 | struct ftrace_graph_ent ent = { | ||
| 276 | .func = ip, | ||
| 277 | .depth = 0, | ||
| 278 | }; | ||
| 279 | struct ftrace_graph_ret ret = { | ||
| 280 | .func = ip, | ||
| 281 | .depth = 0, | ||
| 282 | .calltime = time, | ||
| 283 | .rettime = time, | ||
| 284 | }; | ||
| 285 | |||
| 286 | __trace_graph_entry(tr, &ent, flags, pc); | ||
| 287 | __trace_graph_return(tr, &ret, flags, pc); | ||
| 288 | } | ||
| 289 | |||
| 290 | static void | ||
| 291 | __trace_function(struct trace_array *tr, | 251 | __trace_function(struct trace_array *tr, |
| 292 | unsigned long ip, unsigned long parent_ip, | 252 | unsigned long ip, unsigned long parent_ip, |
| 293 | unsigned long flags, int pc) | 253 | unsigned long flags, int pc) |
| 294 | { | 254 | { |
| 295 | if (!is_graph()) | 255 | if (is_graph()) |
| 256 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
| 257 | else | ||
| 296 | trace_function(tr, ip, parent_ip, flags, pc); | 258 | trace_function(tr, ip, parent_ip, flags, pc); |
| 297 | else { | ||
| 298 | trace_graph_function(tr, parent_ip, flags, pc); | ||
| 299 | trace_graph_function(tr, ip, flags, pc); | ||
| 300 | } | ||
| 301 | } | 259 | } |
| 302 | 260 | ||
| 303 | #else | 261 | #else |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 4086eae6e81b..7319559ed59f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -31,48 +31,98 @@ static int wakeup_rt; | |||
| 31 | static arch_spinlock_t wakeup_lock = | 31 | static arch_spinlock_t wakeup_lock = |
| 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 32 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
| 33 | 33 | ||
| 34 | static void wakeup_reset(struct trace_array *tr); | ||
| 34 | static void __wakeup_reset(struct trace_array *tr); | 35 | static void __wakeup_reset(struct trace_array *tr); |
| 36 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace); | ||
| 37 | static void wakeup_graph_return(struct ftrace_graph_ret *trace); | ||
| 35 | 38 | ||
| 36 | static int save_lat_flag; | 39 | static int save_lat_flag; |
| 37 | 40 | ||
| 41 | #define TRACE_DISPLAY_GRAPH 1 | ||
| 42 | |||
| 43 | static struct tracer_opt trace_opts[] = { | ||
| 44 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 45 | /* display latency trace as call graph */ | ||
| 46 | { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, | ||
| 47 | #endif | ||
| 48 | { } /* Empty entry */ | ||
| 49 | }; | ||
| 50 | |||
| 51 | static struct tracer_flags tracer_flags = { | ||
| 52 | .val = 0, | ||
| 53 | .opts = trace_opts, | ||
| 54 | }; | ||
| 55 | |||
| 56 | #define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) | ||
| 57 | |||
| 38 | #ifdef CONFIG_FUNCTION_TRACER | 58 | #ifdef CONFIG_FUNCTION_TRACER |
| 59 | |||
| 39 | /* | 60 | /* |
| 40 | * irqsoff uses its own tracer function to keep the overhead down: | 61 | * Prologue for the wakeup function tracers. |
| 62 | * | ||
| 63 | * Returns 1 if it is OK to continue, and preemption | ||
| 64 | * is disabled and data->disabled is incremented. | ||
| 65 | * 0 if the trace is to be ignored, and preemption | ||
| 66 | * is not disabled and data->disabled is | ||
| 67 | * kept the same. | ||
| 68 | * | ||
| 69 | * Note, this function is also used outside this ifdef but | ||
| 70 | * inside the #ifdef of the function graph tracer below. | ||
| 71 | * This is OK, since the function graph tracer is | ||
| 72 | * dependent on the function tracer. | ||
| 41 | */ | 73 | */ |
| 42 | static void | 74 | static int |
| 43 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | 75 | func_prolog_preempt_disable(struct trace_array *tr, |
| 76 | struct trace_array_cpu **data, | ||
| 77 | int *pc) | ||
| 44 | { | 78 | { |
| 45 | struct trace_array *tr = wakeup_trace; | ||
| 46 | struct trace_array_cpu *data; | ||
| 47 | unsigned long flags; | ||
| 48 | long disabled; | 79 | long disabled; |
| 49 | int cpu; | 80 | int cpu; |
| 50 | int pc; | ||
| 51 | 81 | ||
| 52 | if (likely(!wakeup_task)) | 82 | if (likely(!wakeup_task)) |
| 53 | return; | 83 | return 0; |
| 54 | 84 | ||
| 55 | pc = preempt_count(); | 85 | *pc = preempt_count(); |
| 56 | preempt_disable_notrace(); | 86 | preempt_disable_notrace(); |
| 57 | 87 | ||
| 58 | cpu = raw_smp_processor_id(); | 88 | cpu = raw_smp_processor_id(); |
| 59 | if (cpu != wakeup_current_cpu) | 89 | if (cpu != wakeup_current_cpu) |
| 60 | goto out_enable; | 90 | goto out_enable; |
| 61 | 91 | ||
| 62 | data = tr->data[cpu]; | 92 | *data = tr->data[cpu]; |
| 63 | disabled = atomic_inc_return(&data->disabled); | 93 | disabled = atomic_inc_return(&(*data)->disabled); |
| 64 | if (unlikely(disabled != 1)) | 94 | if (unlikely(disabled != 1)) |
| 65 | goto out; | 95 | goto out; |
| 66 | 96 | ||
| 67 | local_irq_save(flags); | 97 | return 1; |
| 68 | 98 | ||
| 69 | trace_function(tr, ip, parent_ip, flags, pc); | 99 | out: |
| 100 | atomic_dec(&(*data)->disabled); | ||
| 101 | |||
| 102 | out_enable: | ||
| 103 | preempt_enable_notrace(); | ||
| 104 | return 0; | ||
| 105 | } | ||
| 70 | 106 | ||
| 107 | /* | ||
| 108 | * wakeup uses its own tracer function to keep the overhead down: | ||
| 109 | */ | ||
| 110 | static void | ||
| 111 | wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) | ||
| 112 | { | ||
| 113 | struct trace_array *tr = wakeup_trace; | ||
| 114 | struct trace_array_cpu *data; | ||
| 115 | unsigned long flags; | ||
| 116 | int pc; | ||
| 117 | |||
| 118 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 119 | return; | ||
| 120 | |||
| 121 | local_irq_save(flags); | ||
| 122 | trace_function(tr, ip, parent_ip, flags, pc); | ||
| 71 | local_irq_restore(flags); | 123 | local_irq_restore(flags); |
| 72 | 124 | ||
| 73 | out: | ||
| 74 | atomic_dec(&data->disabled); | 125 | atomic_dec(&data->disabled); |
| 75 | out_enable: | ||
| 76 | preempt_enable_notrace(); | 126 | preempt_enable_notrace(); |
| 77 | } | 127 | } |
| 78 | 128 | ||
| @@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly = | |||
| 82 | }; | 132 | }; |
| 83 | #endif /* CONFIG_FUNCTION_TRACER */ | 133 | #endif /* CONFIG_FUNCTION_TRACER */ |
| 84 | 134 | ||
| 135 | static int start_func_tracer(int graph) | ||
| 136 | { | ||
| 137 | int ret; | ||
| 138 | |||
| 139 | if (!graph) | ||
| 140 | ret = register_ftrace_function(&trace_ops); | ||
| 141 | else | ||
| 142 | ret = register_ftrace_graph(&wakeup_graph_return, | ||
| 143 | &wakeup_graph_entry); | ||
| 144 | |||
| 145 | if (!ret && tracing_is_enabled()) | ||
| 146 | tracer_enabled = 1; | ||
| 147 | else | ||
| 148 | tracer_enabled = 0; | ||
| 149 | |||
| 150 | return ret; | ||
| 151 | } | ||
| 152 | |||
| 153 | static void stop_func_tracer(int graph) | ||
| 154 | { | ||
| 155 | tracer_enabled = 0; | ||
| 156 | |||
| 157 | if (!graph) | ||
| 158 | unregister_ftrace_function(&trace_ops); | ||
| 159 | else | ||
| 160 | unregister_ftrace_graph(); | ||
| 161 | } | ||
| 162 | |||
| 163 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
| 164 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
| 165 | { | ||
| 166 | |||
| 167 | if (!(bit & TRACE_DISPLAY_GRAPH)) | ||
| 168 | return -EINVAL; | ||
| 169 | |||
| 170 | if (!(is_graph() ^ set)) | ||
| 171 | return 0; | ||
| 172 | |||
| 173 | stop_func_tracer(!set); | ||
| 174 | |||
| 175 | wakeup_reset(wakeup_trace); | ||
| 176 | tracing_max_latency = 0; | ||
| 177 | |||
| 178 | return start_func_tracer(set); | ||
| 179 | } | ||
| 180 | |||
| 181 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
| 182 | { | ||
| 183 | struct trace_array *tr = wakeup_trace; | ||
| 184 | struct trace_array_cpu *data; | ||
| 185 | unsigned long flags; | ||
| 186 | int pc, ret = 0; | ||
| 187 | |||
| 188 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 189 | return 0; | ||
| 190 | |||
| 191 | local_save_flags(flags); | ||
| 192 | ret = __trace_graph_entry(tr, trace, flags, pc); | ||
| 193 | atomic_dec(&data->disabled); | ||
| 194 | preempt_enable_notrace(); | ||
| 195 | |||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) | ||
| 200 | { | ||
| 201 | struct trace_array *tr = wakeup_trace; | ||
| 202 | struct trace_array_cpu *data; | ||
| 203 | unsigned long flags; | ||
| 204 | int pc; | ||
| 205 | |||
| 206 | if (!func_prolog_preempt_disable(tr, &data, &pc)) | ||
| 207 | return; | ||
| 208 | |||
| 209 | local_save_flags(flags); | ||
| 210 | __trace_graph_return(tr, trace, flags, pc); | ||
| 211 | atomic_dec(&data->disabled); | ||
| 212 | |||
| 213 | preempt_enable_notrace(); | ||
| 214 | return; | ||
| 215 | } | ||
| 216 | |||
| 217 | static void wakeup_trace_open(struct trace_iterator *iter) | ||
| 218 | { | ||
| 219 | if (is_graph()) | ||
| 220 | graph_trace_open(iter); | ||
| 221 | } | ||
| 222 | |||
| 223 | static void wakeup_trace_close(struct trace_iterator *iter) | ||
| 224 | { | ||
| 225 | if (iter->private) | ||
| 226 | graph_trace_close(iter); | ||
| 227 | } | ||
| 228 | |||
| 229 | #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) | ||
| 230 | |||
| 231 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
| 232 | { | ||
| 233 | /* | ||
| 234 | * In graph mode call the graph tracer output function, | ||
| 235 | * otherwise go with the TRACE_FN event handler | ||
| 236 | */ | ||
| 237 | if (is_graph()) | ||
| 238 | return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); | ||
| 239 | |||
| 240 | return TRACE_TYPE_UNHANDLED; | ||
| 241 | } | ||
| 242 | |||
| 243 | static void wakeup_print_header(struct seq_file *s) | ||
| 244 | { | ||
| 245 | if (is_graph()) | ||
| 246 | print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); | ||
| 247 | else | ||
| 248 | trace_default_header(s); | ||
| 249 | } | ||
| 250 | |||
| 251 | static void | ||
| 252 | __trace_function(struct trace_array *tr, | ||
| 253 | unsigned long ip, unsigned long parent_ip, | ||
| 254 | unsigned long flags, int pc) | ||
| 255 | { | ||
| 256 | if (is_graph()) | ||
| 257 | trace_graph_function(tr, ip, parent_ip, flags, pc); | ||
| 258 | else | ||
| 259 | trace_function(tr, ip, parent_ip, flags, pc); | ||
| 260 | } | ||
| 261 | #else | ||
| 262 | #define __trace_function trace_function | ||
| 263 | |||
| 264 | static int wakeup_set_flag(u32 old_flags, u32 bit, int set) | ||
| 265 | { | ||
| 266 | return -EINVAL; | ||
| 267 | } | ||
| 268 | |||
| 269 | static int wakeup_graph_entry(struct ftrace_graph_ent *trace) | ||
| 270 | { | ||
| 271 | return -1; | ||
| 272 | } | ||
| 273 | |||
| 274 | static enum print_line_t wakeup_print_line(struct trace_iterator *iter) | ||
| 275 | { | ||
| 276 | return TRACE_TYPE_UNHANDLED; | ||
| 277 | } | ||
| 278 | |||
| 279 | static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } | ||
| 280 | static void wakeup_print_header(struct seq_file *s) { } | ||
| 281 | static void wakeup_trace_open(struct trace_iterator *iter) { } | ||
| 282 | static void wakeup_trace_close(struct trace_iterator *iter) { } | ||
| 283 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
| 284 | |||
| 85 | /* | 285 | /* |
| 86 | * Should this new latency be reported/recorded? | 286 | * Should this new latency be reported/recorded? |
| 87 | */ | 287 | */ |
| @@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore, | |||
| 152 | /* The task we are waiting for is waking up */ | 352 | /* The task we are waiting for is waking up */ |
| 153 | data = wakeup_trace->data[wakeup_cpu]; | 353 | data = wakeup_trace->data[wakeup_cpu]; |
| 154 | 354 | ||
| 155 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); | 355 | __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
| 156 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 356 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
| 157 | 357 | ||
| 158 | T0 = data->preempt_timestamp; | 358 | T0 = data->preempt_timestamp; |
| @@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success) | |||
| 252 | * is not called by an assembly function (where as schedule is) | 452 | * is not called by an assembly function (where as schedule is) |
| 253 | * it should be safe to use it here. | 453 | * it should be safe to use it here. |
| 254 | */ | 454 | */ |
| 255 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 455 | __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
| 256 | 456 | ||
| 257 | out_locked: | 457 | out_locked: |
| 258 | arch_spin_unlock(&wakeup_lock); | 458 | arch_spin_unlock(&wakeup_lock); |
| @@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr) | |||
| 303 | */ | 503 | */ |
| 304 | smp_wmb(); | 504 | smp_wmb(); |
| 305 | 505 | ||
| 306 | register_ftrace_function(&trace_ops); | 506 | if (start_func_tracer(is_graph())) |
| 307 | 507 | printk(KERN_ERR "failed to start wakeup tracer\n"); | |
| 308 | if (tracing_is_enabled()) | ||
| 309 | tracer_enabled = 1; | ||
| 310 | else | ||
| 311 | tracer_enabled = 0; | ||
| 312 | 508 | ||
| 313 | return; | 509 | return; |
| 314 | fail_deprobe_wake_new: | 510 | fail_deprobe_wake_new: |
| @@ -320,7 +516,7 @@ fail_deprobe: | |||
| 320 | static void stop_wakeup_tracer(struct trace_array *tr) | 516 | static void stop_wakeup_tracer(struct trace_array *tr) |
| 321 | { | 517 | { |
| 322 | tracer_enabled = 0; | 518 | tracer_enabled = 0; |
| 323 | unregister_ftrace_function(&trace_ops); | 519 | stop_func_tracer(is_graph()); |
| 324 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); | 520 | unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); |
| 325 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); | 521 | unregister_trace_sched_wakeup_new(probe_wakeup, NULL); |
| 326 | unregister_trace_sched_wakeup(probe_wakeup, NULL); | 522 | unregister_trace_sched_wakeup(probe_wakeup, NULL); |
| @@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly = | |||
| 379 | .start = wakeup_tracer_start, | 575 | .start = wakeup_tracer_start, |
| 380 | .stop = wakeup_tracer_stop, | 576 | .stop = wakeup_tracer_stop, |
| 381 | .print_max = 1, | 577 | .print_max = 1, |
| 578 | .print_header = wakeup_print_header, | ||
| 579 | .print_line = wakeup_print_line, | ||
| 580 | .flags = &tracer_flags, | ||
| 581 | .set_flag = wakeup_set_flag, | ||
| 382 | #ifdef CONFIG_FTRACE_SELFTEST | 582 | #ifdef CONFIG_FTRACE_SELFTEST |
| 383 | .selftest = trace_selftest_startup_wakeup, | 583 | .selftest = trace_selftest_startup_wakeup, |
| 384 | #endif | 584 | #endif |
| 585 | .open = wakeup_trace_open, | ||
| 586 | .close = wakeup_trace_close, | ||
| 385 | .use_max_tr = 1, | 587 | .use_max_tr = 1, |
| 386 | }; | 588 | }; |
| 387 | 589 | ||
| @@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly = | |||
| 394 | .stop = wakeup_tracer_stop, | 596 | .stop = wakeup_tracer_stop, |
| 395 | .wait_pipe = poll_wait_pipe, | 597 | .wait_pipe = poll_wait_pipe, |
| 396 | .print_max = 1, | 598 | .print_max = 1, |
| 599 | .print_header = wakeup_print_header, | ||
| 600 | .print_line = wakeup_print_line, | ||
| 601 | .flags = &tracer_flags, | ||
| 602 | .set_flag = wakeup_set_flag, | ||
| 397 | #ifdef CONFIG_FTRACE_SELFTEST | 603 | #ifdef CONFIG_FTRACE_SELFTEST |
| 398 | .selftest = trace_selftest_startup_wakeup, | 604 | .selftest = trace_selftest_startup_wakeup, |
| 399 | #endif | 605 | #endif |
| 606 | .open = wakeup_trace_open, | ||
| 607 | .close = wakeup_trace_close, | ||
| 400 | .use_max_tr = 1, | 608 | .use_max_tr = 1, |
| 401 | }; | 609 | }; |
| 402 | 610 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index dc8e16824b51..bafba687a6d8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -196,7 +196,7 @@ static struct perf_event_attr wd_hw_attr = { | |||
| 196 | }; | 196 | }; |
| 197 | 197 | ||
| 198 | /* Callback function for perf event subsystem */ | 198 | /* Callback function for perf event subsystem */ |
| 199 | void watchdog_overflow_callback(struct perf_event *event, int nmi, | 199 | static void watchdog_overflow_callback(struct perf_event *event, int nmi, |
| 200 | struct perf_sample_data *data, | 200 | struct perf_sample_data *data, |
| 201 | struct pt_regs *regs) | 201 | struct pt_regs *regs) |
| 202 | { | 202 | { |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e85d549b6eac..21ac83070a80 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -540,6 +540,23 @@ config PROVE_RCU_REPEATEDLY | |||
| 540 | disabling, allowing multiple RCU-lockdep warnings to be printed | 540 | disabling, allowing multiple RCU-lockdep warnings to be printed |
| 541 | on a single reboot. | 541 | on a single reboot. |
| 542 | 542 | ||
| 543 | Say Y to allow multiple RCU-lockdep warnings per boot. | ||
| 544 | |||
| 545 | Say N if you are unsure. | ||
| 546 | |||
| 547 | config SPARSE_RCU_POINTER | ||
| 548 | bool "RCU debugging: sparse-based checks for pointer usage" | ||
| 549 | default n | ||
| 550 | help | ||
| 551 | This feature enables the __rcu sparse annotation for | ||
| 552 | RCU-protected pointers. This annotation will cause sparse | ||
| 553 | to flag any non-RCU used of annotated pointers. This can be | ||
| 554 | helpful when debugging RCU usage. Please note that this feature | ||
| 555 | is not intended to enforce code cleanliness; it is instead merely | ||
| 556 | a debugging aid. | ||
| 557 | |||
| 558 | Say Y to make sparse flag questionable use of RCU-protected pointers | ||
| 559 | |||
| 543 | Say N if you are unsure. | 560 | Say N if you are unsure. |
| 544 | 561 | ||
| 545 | config LOCKDEP | 562 | config LOCKDEP |
| @@ -832,6 +849,30 @@ config RCU_CPU_STALL_DETECTOR | |||
| 832 | 849 | ||
| 833 | Say Y if you are unsure. | 850 | Say Y if you are unsure. |
| 834 | 851 | ||
| 852 | config RCU_CPU_STALL_TIMEOUT | ||
| 853 | int "RCU CPU stall timeout in seconds" | ||
| 854 | depends on RCU_CPU_STALL_DETECTOR | ||
| 855 | range 3 300 | ||
| 856 | default 60 | ||
| 857 | help | ||
| 858 | If a given RCU grace period extends more than the specified | ||
| 859 | number of seconds, a CPU stall warning is printed. If the | ||
| 860 | RCU grace period persists, additional CPU stall warnings are | ||
| 861 | printed at more widely spaced intervals. | ||
| 862 | |||
| 863 | config RCU_CPU_STALL_DETECTOR_RUNNABLE | ||
| 864 | bool "RCU CPU stall checking starts automatically at boot" | ||
| 865 | depends on RCU_CPU_STALL_DETECTOR | ||
| 866 | default y | ||
| 867 | help | ||
| 868 | If set, start checking for RCU CPU stalls immediately on | ||
| 869 | boot. Otherwise, RCU CPU stall checking must be manually | ||
| 870 | enabled. | ||
| 871 | |||
| 872 | Say Y if you are unsure. | ||
| 873 | |||
| 874 | Say N if you wish to suppress RCU CPU stall checking during boot. | ||
| 875 | |||
| 835 | config RCU_CPU_STALL_VERBOSE | 876 | config RCU_CPU_STALL_VERBOSE |
| 836 | bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" | 877 | bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" |
| 837 | depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU | 878 | depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU |
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index efd16fa80b1c..6f412ab4c24f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
| @@ -49,7 +49,7 @@ struct radix_tree_node { | |||
| 49 | unsigned int height; /* Height from the bottom */ | 49 | unsigned int height; /* Height from the bottom */ |
| 50 | unsigned int count; | 50 | unsigned int count; |
| 51 | struct rcu_head rcu_head; | 51 | struct rcu_head rcu_head; |
| 52 | void *slots[RADIX_TREE_MAP_SIZE]; | 52 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; |
| 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; | 53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 34e3082632d8..7c06ee51a29a 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
| @@ -70,7 +70,7 @@ static unsigned long io_tlb_nslabs; | |||
| 70 | */ | 70 | */ |
| 71 | static unsigned long io_tlb_overflow = 32*1024; | 71 | static unsigned long io_tlb_overflow = 32*1024; |
| 72 | 72 | ||
| 73 | void *io_tlb_overflow_buffer; | 73 | static void *io_tlb_overflow_buffer; |
| 74 | 74 | ||
| 75 | /* | 75 | /* |
| 76 | * This is a free list describing the number of free entries available from | 76 | * This is a free list describing the number of free entries available from |
| @@ -147,16 +147,16 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | |||
| 147 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE | 147 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE |
| 148 | * between io_tlb_start and io_tlb_end. | 148 | * between io_tlb_start and io_tlb_end. |
| 149 | */ | 149 | */ |
| 150 | io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); | 150 | io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); |
| 151 | for (i = 0; i < io_tlb_nslabs; i++) | 151 | for (i = 0; i < io_tlb_nslabs; i++) |
| 152 | io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); | 152 | io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); |
| 153 | io_tlb_index = 0; | 153 | io_tlb_index = 0; |
| 154 | io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); | 154 | io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
| 155 | 155 | ||
| 156 | /* | 156 | /* |
| 157 | * Get the overflow emergency buffer | 157 | * Get the overflow emergency buffer |
| 158 | */ | 158 | */ |
| 159 | io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); | 159 | io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); |
| 160 | if (!io_tlb_overflow_buffer) | 160 | if (!io_tlb_overflow_buffer) |
| 161 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); | 161 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); |
| 162 | if (verbose) | 162 | if (verbose) |
| @@ -182,7 +182,7 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) | |||
| 182 | /* | 182 | /* |
| 183 | * Get IO TLB memory from the low pages | 183 | * Get IO TLB memory from the low pages |
| 184 | */ | 184 | */ |
| 185 | io_tlb_start = alloc_bootmem_low_pages(bytes); | 185 | io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); |
| 186 | if (!io_tlb_start) | 186 | if (!io_tlb_start) |
| 187 | panic("Cannot allocate SWIOTLB buffer"); | 187 | panic("Cannot allocate SWIOTLB buffer"); |
| 188 | 188 | ||
| @@ -308,13 +308,13 @@ void __init swiotlb_free(void) | |||
| 308 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); | 308 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); |
| 309 | } else { | 309 | } else { |
| 310 | free_bootmem_late(__pa(io_tlb_overflow_buffer), | 310 | free_bootmem_late(__pa(io_tlb_overflow_buffer), |
| 311 | io_tlb_overflow); | 311 | PAGE_ALIGN(io_tlb_overflow)); |
| 312 | free_bootmem_late(__pa(io_tlb_orig_addr), | 312 | free_bootmem_late(__pa(io_tlb_orig_addr), |
| 313 | io_tlb_nslabs * sizeof(phys_addr_t)); | 313 | PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); |
| 314 | free_bootmem_late(__pa(io_tlb_list), | 314 | free_bootmem_late(__pa(io_tlb_list), |
| 315 | io_tlb_nslabs * sizeof(int)); | 315 | PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); |
| 316 | free_bootmem_late(__pa(io_tlb_start), | 316 | free_bootmem_late(__pa(io_tlb_start), |
| 317 | io_tlb_nslabs << IO_TLB_SHIFT); | 317 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
| 318 | } | 318 | } |
| 319 | } | 319 | } |
| 320 | 320 | ||
diff --git a/mm/memory.c b/mm/memory.c index 0e18b4d649ec..98b58fecedef 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -3185,7 +3185,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 3185 | * with threads. | 3185 | * with threads. |
| 3186 | */ | 3186 | */ |
| 3187 | if (flags & FAULT_FLAG_WRITE) | 3187 | if (flags & FAULT_FLAG_WRITE) |
| 3188 | flush_tlb_page(vma, address); | 3188 | flush_tlb_fix_spurious_fault(vma, address); |
| 3189 | } | 3189 | } |
| 3190 | unlock: | 3190 | unlock: |
| 3191 | pte_unmap_unlock(pte, ptl); | 3191 | pte_unmap_unlock(pte, ptl); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a6..d8087f0db507 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -517,6 +517,15 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | |||
| 517 | static void purge_fragmented_blocks_allcpus(void); | 517 | static void purge_fragmented_blocks_allcpus(void); |
| 518 | 518 | ||
| 519 | /* | 519 | /* |
| 520 | * called before a call to iounmap() if the caller wants vm_area_struct's | ||
| 521 | * immediately freed. | ||
| 522 | */ | ||
| 523 | void set_iounmap_nonlazy(void) | ||
| 524 | { | ||
| 525 | atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); | ||
| 526 | } | ||
| 527 | |||
| 528 | /* | ||
| 520 | * Purges all lazily-freed vmap areas. | 529 | * Purges all lazily-freed vmap areas. |
| 521 | * | 530 | * |
| 522 | * If sync is 0 then don't purge if there is already a purge in progress. | 531 | * If sync is 0 then don't purge if there is already a purge in progress. |
diff --git a/net/Kconfig b/net/Kconfig index e926884c1675..55fd82e9ffd9 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
| @@ -293,6 +293,7 @@ source "net/wimax/Kconfig" | |||
| 293 | source "net/rfkill/Kconfig" | 293 | source "net/rfkill/Kconfig" |
| 294 | source "net/9p/Kconfig" | 294 | source "net/9p/Kconfig" |
| 295 | source "net/caif/Kconfig" | 295 | source "net/caif/Kconfig" |
| 296 | source "net/ceph/Kconfig" | ||
| 296 | 297 | ||
| 297 | 298 | ||
| 298 | endif # if NET | 299 | endif # if NET |
diff --git a/net/Makefile b/net/Makefile index ea60fbce9b1b..6b7bfd7f1416 100644 --- a/net/Makefile +++ b/net/Makefile | |||
| @@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o | |||
| 68 | endif | 68 | endif |
| 69 | obj-$(CONFIG_WIMAX) += wimax/ | 69 | obj-$(CONFIG_WIMAX) += wimax/ |
| 70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ | 70 | obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ |
| 71 | obj-$(CONFIG_CEPH_LIB) += ceph/ | ||
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig new file mode 100644 index 000000000000..ad424049b0cf --- /dev/null +++ b/net/ceph/Kconfig | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | config CEPH_LIB | ||
| 2 | tristate "Ceph core library (EXPERIMENTAL)" | ||
| 3 | depends on INET && EXPERIMENTAL | ||
| 4 | select LIBCRC32C | ||
| 5 | select CRYPTO_AES | ||
| 6 | select CRYPTO | ||
| 7 | default n | ||
| 8 | help | ||
| 9 | Choose Y or M here to include cephlib, which provides the | ||
| 10 | common functionality to both the Ceph filesystem and | ||
| 11 | to the rados block device (rbd). | ||
| 12 | |||
| 13 | More information at http://ceph.newdream.net/. | ||
| 14 | |||
| 15 | If unsure, say N. | ||
| 16 | |||
| 17 | config CEPH_LIB_PRETTYDEBUG | ||
| 18 | bool "Include file:line in ceph debug output" | ||
| 19 | depends on CEPH_LIB | ||
| 20 | default n | ||
| 21 | help | ||
| 22 | If you say Y here, debug output will include a filename and | ||
| 23 | line to aid debugging. This increases kernel size and slows | ||
| 24 | execution slightly when debug call sites are enabled (e.g., | ||
| 25 | via CONFIG_DYNAMIC_DEBUG). | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
diff --git a/net/ceph/Makefile b/net/ceph/Makefile new file mode 100644 index 000000000000..aab1cabb8035 --- /dev/null +++ b/net/ceph/Makefile | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | # | ||
| 2 | # Makefile for CEPH filesystem. | ||
| 3 | # | ||
| 4 | |||
| 5 | ifneq ($(KERNELRELEASE),) | ||
| 6 | |||
| 7 | obj-$(CONFIG_CEPH_LIB) += libceph.o | ||
| 8 | |||
| 9 | libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ | ||
| 10 | mon_client.o \ | ||
| 11 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
| 12 | debugfs.o \ | ||
| 13 | auth.o auth_none.o \ | ||
| 14 | crypto.o armor.o \ | ||
| 15 | auth_x.o \ | ||
| 16 | ceph_fs.o ceph_strings.o ceph_hash.o \ | ||
| 17 | pagevec.o | ||
| 18 | |||
| 19 | else | ||
| 20 | #Otherwise we were called directly from the command | ||
| 21 | # line; invoke the kernel build system. | ||
| 22 | |||
| 23 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build | ||
| 24 | PWD := $(shell pwd) | ||
| 25 | |||
| 26 | default: all | ||
| 27 | |||
| 28 | all: | ||
| 29 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules | ||
| 30 | |||
| 31 | modules_install: | ||
| 32 | $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install | ||
| 33 | |||
| 34 | clean: | ||
| 35 | $(MAKE) -C $(KERNELDIR) M=$(PWD) clean | ||
| 36 | |||
| 37 | endif | ||
diff --git a/fs/ceph/armor.c b/net/ceph/armor.c index eb2a666b0be7..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/net/ceph/armor.c | |||
diff --git a/fs/ceph/auth.c b/net/ceph/auth.c index 6d2e30600627..549c1f43e1d5 100644 --- a/fs/ceph/auth.c +++ b/net/ceph/auth.c | |||
| @@ -1,16 +1,16 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 6 | 6 | ||
| 7 | #include "types.h" | 7 | #include <linux/ceph/types.h> |
| 8 | #include <linux/ceph/decode.h> | ||
| 9 | #include <linux/ceph/libceph.h> | ||
| 10 | #include <linux/ceph/messenger.h> | ||
| 8 | #include "auth_none.h" | 11 | #include "auth_none.h" |
| 9 | #include "auth_x.h" | 12 | #include "auth_x.h" |
| 10 | #include "decode.h" | ||
| 11 | #include "super.h" | ||
| 12 | 13 | ||
| 13 | #include "messenger.h" | ||
| 14 | 14 | ||
| 15 | /* | 15 | /* |
| 16 | * get protocol handler | 16 | * get protocol handler |
diff --git a/fs/ceph/auth_none.c b/net/ceph/auth_none.c index ad1dc21286c7..214c2bb43d62 100644 --- a/fs/ceph/auth_none.c +++ b/net/ceph/auth_none.c | |||
| @@ -1,14 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 10 | #include <linux/ceph/auth.h> | ||
| 11 | |||
| 9 | #include "auth_none.h" | 12 | #include "auth_none.h" |
| 10 | #include "auth.h" | ||
| 11 | #include "decode.h" | ||
| 12 | 13 | ||
| 13 | static void reset(struct ceph_auth_client *ac) | 14 | static void reset(struct ceph_auth_client *ac) |
| 14 | { | 15 | { |
diff --git a/fs/ceph/auth_none.h b/net/ceph/auth_none.h index 8164df1a08be..ed7d088b1bc9 100644 --- a/fs/ceph/auth_none.h +++ b/net/ceph/auth_none.h | |||
| @@ -2,8 +2,7 @@ | |||
| 2 | #define _FS_CEPH_AUTH_NONE_H | 2 | #define _FS_CEPH_AUTH_NONE_H |
| 3 | 3 | ||
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | 5 | #include <linux/ceph/auth.h> | |
| 6 | #include "auth.h" | ||
| 7 | 6 | ||
| 8 | /* | 7 | /* |
| 9 | * null security mode. | 8 | * null security mode. |
diff --git a/fs/ceph/auth_x.c b/net/ceph/auth_x.c index a2d002cbdec2..7fd5dfcf6e18 100644 --- a/fs/ceph/auth_x.c +++ b/net/ceph/auth_x.c | |||
| @@ -1,16 +1,17 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 7 | #include <linux/slab.h> | 7 | #include <linux/slab.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 10 | #include <linux/ceph/auth.h> | ||
| 11 | |||
| 12 | #include "crypto.h" | ||
| 9 | #include "auth_x.h" | 13 | #include "auth_x.h" |
| 10 | #include "auth_x_protocol.h" | 14 | #include "auth_x_protocol.h" |
| 11 | #include "crypto.h" | ||
| 12 | #include "auth.h" | ||
| 13 | #include "decode.h" | ||
| 14 | 15 | ||
| 15 | #define TEMP_TICKET_BUF_LEN 256 | 16 | #define TEMP_TICKET_BUF_LEN 256 |
| 16 | 17 | ||
diff --git a/fs/ceph/auth_x.h b/net/ceph/auth_x.h index ff6f8180e681..e02da7a5c5a1 100644 --- a/fs/ceph/auth_x.h +++ b/net/ceph/auth_x.h | |||
| @@ -3,8 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
| 5 | 5 | ||
| 6 | #include <linux/ceph/auth.h> | ||
| 7 | |||
| 6 | #include "crypto.h" | 8 | #include "crypto.h" |
| 7 | #include "auth.h" | ||
| 8 | #include "auth_x_protocol.h" | 9 | #include "auth_x_protocol.h" |
| 9 | 10 | ||
| 10 | /* | 11 | /* |
diff --git a/fs/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 671d30576c4f..671d30576c4f 100644 --- a/fs/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h | |||
diff --git a/fs/ceph/buffer.c b/net/ceph/buffer.c index cd39f17021de..53d8abfa25d5 100644 --- a/fs/ceph/buffer.c +++ b/net/ceph/buffer.c | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/module.h> | ||
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | 6 | ||
| 6 | #include "buffer.h" | 7 | #include <linux/ceph/buffer.h> |
| 7 | #include "decode.h" | 8 | #include <linux/ceph/decode.h> |
| 8 | 9 | ||
| 9 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | 10 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) |
| 10 | { | 11 | { |
| @@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | |||
| 32 | dout("buffer_new %p\n", b); | 33 | dout("buffer_new %p\n", b); |
| 33 | return b; | 34 | return b; |
| 34 | } | 35 | } |
| 36 | EXPORT_SYMBOL(ceph_buffer_new); | ||
| 35 | 37 | ||
| 36 | void ceph_buffer_release(struct kref *kref) | 38 | void ceph_buffer_release(struct kref *kref) |
| 37 | { | 39 | { |
| @@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref) | |||
| 46 | } | 48 | } |
| 47 | kfree(b); | 49 | kfree(b); |
| 48 | } | 50 | } |
| 51 | EXPORT_SYMBOL(ceph_buffer_release); | ||
| 49 | 52 | ||
| 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 53 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
| 51 | { | 54 | { |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c new file mode 100644 index 000000000000..f3e4a13fea0c --- /dev/null +++ b/net/ceph/ceph_common.c | |||
| @@ -0,0 +1,529 @@ | |||
| 1 | |||
| 2 | #include <linux/ceph/ceph_debug.h> | ||
| 3 | #include <linux/backing-dev.h> | ||
| 4 | #include <linux/ctype.h> | ||
| 5 | #include <linux/fs.h> | ||
| 6 | #include <linux/inet.h> | ||
| 7 | #include <linux/in6.h> | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/mount.h> | ||
| 10 | #include <linux/parser.h> | ||
| 11 | #include <linux/sched.h> | ||
| 12 | #include <linux/seq_file.h> | ||
| 13 | #include <linux/slab.h> | ||
| 14 | #include <linux/statfs.h> | ||
| 15 | #include <linux/string.h> | ||
| 16 | |||
| 17 | |||
| 18 | #include <linux/ceph/libceph.h> | ||
| 19 | #include <linux/ceph/debugfs.h> | ||
| 20 | #include <linux/ceph/decode.h> | ||
| 21 | #include <linux/ceph/mon_client.h> | ||
| 22 | #include <linux/ceph/auth.h> | ||
| 23 | |||
| 24 | |||
| 25 | |||
| 26 | /* | ||
| 27 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
| 28 | */ | ||
| 29 | const char *ceph_file_part(const char *s, int len) | ||
| 30 | { | ||
| 31 | const char *e = s + len; | ||
| 32 | |||
| 33 | while (e != s && *(e-1) != '/') | ||
| 34 | e--; | ||
| 35 | return e; | ||
| 36 | } | ||
| 37 | EXPORT_SYMBOL(ceph_file_part); | ||
| 38 | |||
| 39 | const char *ceph_msg_type_name(int type) | ||
| 40 | { | ||
| 41 | switch (type) { | ||
| 42 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
| 43 | case CEPH_MSG_PING: return "ping"; | ||
| 44 | case CEPH_MSG_AUTH: return "auth"; | ||
| 45 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
| 46 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
| 47 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
| 48 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
| 49 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
| 50 | case CEPH_MSG_STATFS: return "statfs"; | ||
| 51 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
| 52 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
| 53 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
| 54 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
| 55 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
| 56 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
| 57 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
| 58 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
| 59 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
| 60 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
| 61 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
| 62 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
| 63 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
| 64 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
| 65 | default: return "unknown"; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | EXPORT_SYMBOL(ceph_msg_type_name); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Initially learn our fsid, or verify an fsid matches. | ||
| 72 | */ | ||
| 73 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | ||
| 74 | { | ||
| 75 | if (client->have_fsid) { | ||
| 76 | if (ceph_fsid_compare(&client->fsid, fsid)) { | ||
| 77 | pr_err("bad fsid, had %pU got %pU", | ||
| 78 | &client->fsid, fsid); | ||
| 79 | return -1; | ||
| 80 | } | ||
| 81 | } else { | ||
| 82 | pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||
| 83 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | ||
| 84 | ceph_debugfs_client_init(client); | ||
| 85 | client->have_fsid = true; | ||
| 86 | } | ||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | EXPORT_SYMBOL(ceph_check_fsid); | ||
| 90 | |||
| 91 | static int strcmp_null(const char *s1, const char *s2) | ||
| 92 | { | ||
| 93 | if (!s1 && !s2) | ||
| 94 | return 0; | ||
| 95 | if (s1 && !s2) | ||
| 96 | return -1; | ||
| 97 | if (!s1 && s2) | ||
| 98 | return 1; | ||
| 99 | return strcmp(s1, s2); | ||
| 100 | } | ||
| 101 | |||
| 102 | int ceph_compare_options(struct ceph_options *new_opt, | ||
| 103 | struct ceph_client *client) | ||
| 104 | { | ||
| 105 | struct ceph_options *opt1 = new_opt; | ||
| 106 | struct ceph_options *opt2 = client->options; | ||
| 107 | int ofs = offsetof(struct ceph_options, mon_addr); | ||
| 108 | int i; | ||
| 109 | int ret; | ||
| 110 | |||
| 111 | ret = memcmp(opt1, opt2, ofs); | ||
| 112 | if (ret) | ||
| 113 | return ret; | ||
| 114 | |||
| 115 | ret = strcmp_null(opt1->name, opt2->name); | ||
| 116 | if (ret) | ||
| 117 | return ret; | ||
| 118 | |||
| 119 | ret = strcmp_null(opt1->secret, opt2->secret); | ||
| 120 | if (ret) | ||
| 121 | return ret; | ||
| 122 | |||
| 123 | /* any matching mon ip implies a match */ | ||
| 124 | for (i = 0; i < opt1->num_mon; i++) { | ||
| 125 | if (ceph_monmap_contains(client->monc.monmap, | ||
| 126 | &opt1->mon_addr[i])) | ||
| 127 | return 0; | ||
| 128 | } | ||
| 129 | return -1; | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(ceph_compare_options); | ||
| 132 | |||
| 133 | |||
| 134 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
| 135 | { | ||
| 136 | int i = 0; | ||
| 137 | char tmp[3]; | ||
| 138 | int err = -EINVAL; | ||
| 139 | int d; | ||
| 140 | |||
| 141 | dout("parse_fsid '%s'\n", str); | ||
| 142 | tmp[2] = 0; | ||
| 143 | while (*str && i < 16) { | ||
| 144 | if (ispunct(*str)) { | ||
| 145 | str++; | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
| 149 | break; | ||
| 150 | tmp[0] = str[0]; | ||
| 151 | tmp[1] = str[1]; | ||
| 152 | if (sscanf(tmp, "%x", &d) < 1) | ||
| 153 | break; | ||
| 154 | fsid->fsid[i] = d & 0xff; | ||
| 155 | i++; | ||
| 156 | str += 2; | ||
| 157 | } | ||
| 158 | |||
| 159 | if (i == 16) | ||
| 160 | err = 0; | ||
| 161 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
| 162 | return err; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * ceph options | ||
| 167 | */ | ||
| 168 | enum { | ||
| 169 | Opt_osdtimeout, | ||
| 170 | Opt_osdkeepalivetimeout, | ||
| 171 | Opt_mount_timeout, | ||
| 172 | Opt_osd_idle_ttl, | ||
| 173 | Opt_last_int, | ||
| 174 | /* int args above */ | ||
| 175 | Opt_fsid, | ||
| 176 | Opt_name, | ||
| 177 | Opt_secret, | ||
| 178 | Opt_ip, | ||
| 179 | Opt_last_string, | ||
| 180 | /* string args above */ | ||
| 181 | Opt_noshare, | ||
| 182 | Opt_nocrc, | ||
| 183 | }; | ||
| 184 | |||
| 185 | static match_table_t opt_tokens = { | ||
| 186 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
| 187 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
| 188 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
| 189 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
| 190 | /* int args above */ | ||
| 191 | {Opt_fsid, "fsid=%s"}, | ||
| 192 | {Opt_name, "name=%s"}, | ||
| 193 | {Opt_secret, "secret=%s"}, | ||
| 194 | {Opt_ip, "ip=%s"}, | ||
| 195 | /* string args above */ | ||
| 196 | {Opt_noshare, "noshare"}, | ||
| 197 | {Opt_nocrc, "nocrc"}, | ||
| 198 | {-1, NULL} | ||
| 199 | }; | ||
| 200 | |||
| 201 | void ceph_destroy_options(struct ceph_options *opt) | ||
| 202 | { | ||
| 203 | dout("destroy_options %p\n", opt); | ||
| 204 | kfree(opt->name); | ||
| 205 | kfree(opt->secret); | ||
| 206 | kfree(opt); | ||
| 207 | } | ||
| 208 | EXPORT_SYMBOL(ceph_destroy_options); | ||
| 209 | |||
| 210 | int ceph_parse_options(struct ceph_options **popt, char *options, | ||
| 211 | const char *dev_name, const char *dev_name_end, | ||
| 212 | int (*parse_extra_token)(char *c, void *private), | ||
| 213 | void *private) | ||
| 214 | { | ||
| 215 | struct ceph_options *opt; | ||
| 216 | const char *c; | ||
| 217 | int err = -ENOMEM; | ||
| 218 | substring_t argstr[MAX_OPT_ARGS]; | ||
| 219 | |||
| 220 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | ||
| 221 | if (!opt) | ||
| 222 | return err; | ||
| 223 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | ||
| 224 | GFP_KERNEL); | ||
| 225 | if (!opt->mon_addr) | ||
| 226 | goto out; | ||
| 227 | |||
| 228 | dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, | ||
| 229 | dev_name); | ||
| 230 | |||
| 231 | /* start with defaults */ | ||
| 232 | opt->flags = CEPH_OPT_DEFAULT; | ||
| 233 | opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||
| 234 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||
| 235 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | ||
| 236 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
| 237 | |||
| 238 | /* get mon ip(s) */ | ||
| 239 | /* ip1[:port1][,ip2[:port2]...] */ | ||
| 240 | err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, | ||
| 241 | CEPH_MAX_MON, &opt->num_mon); | ||
| 242 | if (err < 0) | ||
| 243 | goto out; | ||
| 244 | |||
| 245 | /* parse mount options */ | ||
| 246 | while ((c = strsep(&options, ",")) != NULL) { | ||
| 247 | int token, intval, ret; | ||
| 248 | if (!*c) | ||
| 249 | continue; | ||
| 250 | err = -EINVAL; | ||
| 251 | token = match_token((char *)c, opt_tokens, argstr); | ||
| 252 | if (token < 0 && parse_extra_token) { | ||
| 253 | /* extra? */ | ||
| 254 | err = parse_extra_token((char *)c, private); | ||
| 255 | if (err < 0) { | ||
| 256 | pr_err("bad option at '%s'\n", c); | ||
| 257 | goto out; | ||
| 258 | } | ||
| 259 | continue; | ||
| 260 | } | ||
| 261 | if (token < Opt_last_int) { | ||
| 262 | ret = match_int(&argstr[0], &intval); | ||
| 263 | if (ret < 0) { | ||
| 264 | pr_err("bad mount option arg (not int) " | ||
| 265 | "at '%s'\n", c); | ||
| 266 | continue; | ||
| 267 | } | ||
| 268 | dout("got int token %d val %d\n", token, intval); | ||
| 269 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
| 270 | dout("got string token %d val %s\n", token, | ||
| 271 | argstr[0].from); | ||
| 272 | } else { | ||
| 273 | dout("got token %d\n", token); | ||
| 274 | } | ||
| 275 | switch (token) { | ||
| 276 | case Opt_ip: | ||
| 277 | err = ceph_parse_ips(argstr[0].from, | ||
| 278 | argstr[0].to, | ||
| 279 | &opt->my_addr, | ||
| 280 | 1, NULL); | ||
| 281 | if (err < 0) | ||
| 282 | goto out; | ||
| 283 | opt->flags |= CEPH_OPT_MYIP; | ||
| 284 | break; | ||
| 285 | |||
| 286 | case Opt_fsid: | ||
| 287 | err = parse_fsid(argstr[0].from, &opt->fsid); | ||
| 288 | if (err == 0) | ||
| 289 | opt->flags |= CEPH_OPT_FSID; | ||
| 290 | break; | ||
| 291 | case Opt_name: | ||
| 292 | opt->name = kstrndup(argstr[0].from, | ||
| 293 | argstr[0].to-argstr[0].from, | ||
| 294 | GFP_KERNEL); | ||
| 295 | break; | ||
| 296 | case Opt_secret: | ||
| 297 | opt->secret = kstrndup(argstr[0].from, | ||
| 298 | argstr[0].to-argstr[0].from, | ||
| 299 | GFP_KERNEL); | ||
| 300 | break; | ||
| 301 | |||
| 302 | /* misc */ | ||
| 303 | case Opt_osdtimeout: | ||
| 304 | opt->osd_timeout = intval; | ||
| 305 | break; | ||
| 306 | case Opt_osdkeepalivetimeout: | ||
| 307 | opt->osd_keepalive_timeout = intval; | ||
| 308 | break; | ||
| 309 | case Opt_osd_idle_ttl: | ||
| 310 | opt->osd_idle_ttl = intval; | ||
| 311 | break; | ||
| 312 | case Opt_mount_timeout: | ||
| 313 | opt->mount_timeout = intval; | ||
| 314 | break; | ||
| 315 | |||
| 316 | case Opt_noshare: | ||
| 317 | opt->flags |= CEPH_OPT_NOSHARE; | ||
| 318 | break; | ||
| 319 | |||
| 320 | case Opt_nocrc: | ||
| 321 | opt->flags |= CEPH_OPT_NOCRC; | ||
| 322 | break; | ||
| 323 | |||
| 324 | default: | ||
| 325 | BUG_ON(token); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | /* success */ | ||
| 330 | *popt = opt; | ||
| 331 | return 0; | ||
| 332 | |||
| 333 | out: | ||
| 334 | ceph_destroy_options(opt); | ||
| 335 | return err; | ||
| 336 | } | ||
| 337 | EXPORT_SYMBOL(ceph_parse_options); | ||
| 338 | |||
| 339 | u64 ceph_client_id(struct ceph_client *client) | ||
| 340 | { | ||
| 341 | return client->monc.auth->global_id; | ||
| 342 | } | ||
| 343 | EXPORT_SYMBOL(ceph_client_id); | ||
| 344 | |||
| 345 | /* | ||
| 346 | * create a fresh client instance | ||
| 347 | */ | ||
| 348 | struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) | ||
| 349 | { | ||
| 350 | struct ceph_client *client; | ||
| 351 | int err = -ENOMEM; | ||
| 352 | |||
| 353 | client = kzalloc(sizeof(*client), GFP_KERNEL); | ||
| 354 | if (client == NULL) | ||
| 355 | return ERR_PTR(-ENOMEM); | ||
| 356 | |||
| 357 | client->private = private; | ||
| 358 | client->options = opt; | ||
| 359 | |||
| 360 | mutex_init(&client->mount_mutex); | ||
| 361 | init_waitqueue_head(&client->auth_wq); | ||
| 362 | client->auth_err = 0; | ||
| 363 | |||
| 364 | client->extra_mon_dispatch = NULL; | ||
| 365 | client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; | ||
| 366 | client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; | ||
| 367 | |||
| 368 | client->msgr = NULL; | ||
| 369 | |||
| 370 | /* subsystems */ | ||
| 371 | err = ceph_monc_init(&client->monc, client); | ||
| 372 | if (err < 0) | ||
| 373 | goto fail; | ||
| 374 | err = ceph_osdc_init(&client->osdc, client); | ||
| 375 | if (err < 0) | ||
| 376 | goto fail_monc; | ||
| 377 | |||
| 378 | return client; | ||
| 379 | |||
| 380 | fail_monc: | ||
| 381 | ceph_monc_stop(&client->monc); | ||
| 382 | fail: | ||
| 383 | kfree(client); | ||
| 384 | return ERR_PTR(err); | ||
| 385 | } | ||
| 386 | EXPORT_SYMBOL(ceph_create_client); | ||
| 387 | |||
| 388 | void ceph_destroy_client(struct ceph_client *client) | ||
| 389 | { | ||
| 390 | dout("destroy_client %p\n", client); | ||
| 391 | |||
| 392 | /* unmount */ | ||
| 393 | ceph_osdc_stop(&client->osdc); | ||
| 394 | |||
| 395 | /* | ||
| 396 | * make sure mds and osd connections close out before destroying | ||
| 397 | * the auth module, which is needed to free those connections' | ||
| 398 | * ceph_authorizers. | ||
| 399 | */ | ||
| 400 | ceph_msgr_flush(); | ||
| 401 | |||
| 402 | ceph_monc_stop(&client->monc); | ||
| 403 | |||
| 404 | ceph_debugfs_client_cleanup(client); | ||
| 405 | |||
| 406 | if (client->msgr) | ||
| 407 | ceph_messenger_destroy(client->msgr); | ||
| 408 | |||
| 409 | ceph_destroy_options(client->options); | ||
| 410 | |||
| 411 | kfree(client); | ||
| 412 | dout("destroy_client %p done\n", client); | ||
| 413 | } | ||
| 414 | EXPORT_SYMBOL(ceph_destroy_client); | ||
| 415 | |||
| 416 | /* | ||
| 417 | * true if we have the mon map (and have thus joined the cluster) | ||
| 418 | */ | ||
| 419 | static int have_mon_and_osd_map(struct ceph_client *client) | ||
| 420 | { | ||
| 421 | return client->monc.monmap && client->monc.monmap->epoch && | ||
| 422 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
| 423 | } | ||
| 424 | |||
| 425 | /* | ||
| 426 | * mount: join the ceph cluster, and open root directory. | ||
| 427 | */ | ||
| 428 | int __ceph_open_session(struct ceph_client *client, unsigned long started) | ||
| 429 | { | ||
| 430 | struct ceph_entity_addr *myaddr = NULL; | ||
| 431 | int err; | ||
| 432 | unsigned long timeout = client->options->mount_timeout * HZ; | ||
| 433 | |||
| 434 | /* initialize the messenger */ | ||
| 435 | if (client->msgr == NULL) { | ||
| 436 | if (ceph_test_opt(client, MYIP)) | ||
| 437 | myaddr = &client->options->my_addr; | ||
| 438 | client->msgr = ceph_messenger_create(myaddr, | ||
| 439 | client->supported_features, | ||
| 440 | client->required_features); | ||
| 441 | if (IS_ERR(client->msgr)) { | ||
| 442 | client->msgr = NULL; | ||
| 443 | return PTR_ERR(client->msgr); | ||
| 444 | } | ||
| 445 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
| 446 | } | ||
| 447 | |||
| 448 | /* open session, and wait for mon and osd maps */ | ||
| 449 | err = ceph_monc_open_session(&client->monc); | ||
| 450 | if (err < 0) | ||
| 451 | return err; | ||
| 452 | |||
| 453 | while (!have_mon_and_osd_map(client)) { | ||
| 454 | err = -EIO; | ||
| 455 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
| 456 | return err; | ||
| 457 | |||
| 458 | /* wait */ | ||
| 459 | dout("mount waiting for mon_map\n"); | ||
| 460 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
| 461 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
| 462 | timeout); | ||
| 463 | if (err == -EINTR || err == -ERESTARTSYS) | ||
| 464 | return err; | ||
| 465 | if (client->auth_err < 0) | ||
| 466 | return client->auth_err; | ||
| 467 | } | ||
| 468 | |||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | EXPORT_SYMBOL(__ceph_open_session); | ||
| 472 | |||
| 473 | |||
| 474 | int ceph_open_session(struct ceph_client *client) | ||
| 475 | { | ||
| 476 | int ret; | ||
| 477 | unsigned long started = jiffies; /* note the start time */ | ||
| 478 | |||
| 479 | dout("open_session start\n"); | ||
| 480 | mutex_lock(&client->mount_mutex); | ||
| 481 | |||
| 482 | ret = __ceph_open_session(client, started); | ||
| 483 | |||
| 484 | mutex_unlock(&client->mount_mutex); | ||
| 485 | return ret; | ||
| 486 | } | ||
| 487 | EXPORT_SYMBOL(ceph_open_session); | ||
| 488 | |||
| 489 | |||
| 490 | static int __init init_ceph_lib(void) | ||
| 491 | { | ||
| 492 | int ret = 0; | ||
| 493 | |||
| 494 | ret = ceph_debugfs_init(); | ||
| 495 | if (ret < 0) | ||
| 496 | goto out; | ||
| 497 | |||
| 498 | ret = ceph_msgr_init(); | ||
| 499 | if (ret < 0) | ||
| 500 | goto out_debugfs; | ||
| 501 | |||
| 502 | pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", | ||
| 503 | CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, | ||
| 504 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
| 505 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
| 506 | |||
| 507 | return 0; | ||
| 508 | |||
| 509 | out_debugfs: | ||
| 510 | ceph_debugfs_cleanup(); | ||
| 511 | out: | ||
| 512 | return ret; | ||
| 513 | } | ||
| 514 | |||
| 515 | static void __exit exit_ceph_lib(void) | ||
| 516 | { | ||
| 517 | dout("exit_ceph_lib\n"); | ||
| 518 | ceph_msgr_exit(); | ||
| 519 | ceph_debugfs_cleanup(); | ||
| 520 | } | ||
| 521 | |||
| 522 | module_init(init_ceph_lib); | ||
| 523 | module_exit(exit_ceph_lib); | ||
| 524 | |||
| 525 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | ||
| 526 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | ||
| 527 | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | ||
| 528 | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | ||
| 529 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 3ac6cc7c1156..a3a3a31d3c37 100644 --- a/fs/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Some non-inline ceph helpers | 2 | * Some non-inline ceph helpers |
| 3 | */ | 3 | */ |
| 4 | #include "types.h" | 4 | #include <linux/module.h> |
| 5 | #include <linux/ceph/types.h> | ||
| 5 | 6 | ||
| 6 | /* | 7 | /* |
| 7 | * return true if @layout appears to be valid | 8 | * return true if @layout appears to be valid |
| @@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags) | |||
| 52 | 53 | ||
| 53 | return mode; | 54 | return mode; |
| 54 | } | 55 | } |
| 56 | EXPORT_SYMBOL(ceph_flags_to_mode); | ||
| 55 | 57 | ||
| 56 | int ceph_caps_for_mode(int mode) | 58 | int ceph_caps_for_mode(int mode) |
| 57 | { | 59 | { |
| @@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode) | |||
| 70 | 72 | ||
| 71 | return caps; | 73 | return caps; |
| 72 | } | 74 | } |
| 75 | EXPORT_SYMBOL(ceph_caps_for_mode); | ||
diff --git a/fs/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index bd570015d147..815ef8826796 100644 --- a/fs/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | #include "types.h" | 2 | #include <linux/ceph/types.h> |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * Robert Jenkin's hash function. | 5 | * Robert Jenkin's hash function. |
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c new file mode 100644 index 000000000000..3fbda04de29c --- /dev/null +++ b/net/ceph/ceph_strings.c | |||
| @@ -0,0 +1,84 @@ | |||
| 1 | /* | ||
| 2 | * Ceph string constants | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <linux/ceph/types.h> | ||
| 6 | |||
| 7 | const char *ceph_entity_type_name(int type) | ||
| 8 | { | ||
| 9 | switch (type) { | ||
| 10 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
| 11 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
| 12 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
| 13 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
| 14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
| 15 | default: return "unknown"; | ||
| 16 | } | ||
| 17 | } | ||
| 18 | |||
| 19 | const char *ceph_osd_op_name(int op) | ||
| 20 | { | ||
| 21 | switch (op) { | ||
| 22 | case CEPH_OSD_OP_READ: return "read"; | ||
| 23 | case CEPH_OSD_OP_STAT: return "stat"; | ||
| 24 | |||
| 25 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
| 26 | |||
| 27 | case CEPH_OSD_OP_WRITE: return "write"; | ||
| 28 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
| 29 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
| 30 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
| 31 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
| 32 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
| 33 | |||
| 34 | case CEPH_OSD_OP_APPEND: return "append"; | ||
| 35 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
| 36 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
| 37 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
| 38 | |||
| 39 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
| 40 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
| 41 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
| 42 | |||
| 43 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
| 44 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
| 45 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
| 46 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
| 47 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
| 48 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
| 49 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
| 50 | |||
| 51 | case CEPH_OSD_OP_PULL: return "pull"; | ||
| 52 | case CEPH_OSD_OP_PUSH: return "push"; | ||
| 53 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
| 54 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
| 55 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
| 56 | |||
| 57 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
| 58 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
| 59 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
| 60 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
| 61 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
| 62 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
| 63 | |||
| 64 | case CEPH_OSD_OP_CALL: return "call"; | ||
| 65 | |||
| 66 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
| 67 | } | ||
| 68 | return "???"; | ||
| 69 | } | ||
| 70 | |||
| 71 | |||
| 72 | const char *ceph_pool_op_name(int op) | ||
| 73 | { | ||
| 74 | switch (op) { | ||
| 75 | case POOL_OP_CREATE: return "create"; | ||
| 76 | case POOL_OP_DELETE: return "delete"; | ||
| 77 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
| 78 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
| 79 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
| 80 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
| 81 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
| 82 | } | ||
| 83 | return "???"; | ||
| 84 | } | ||
diff --git a/fs/ceph/crush/crush.c b/net/ceph/crush/crush.c index fabd302e5779..d6ebb13a18a4 100644 --- a/fs/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | # define BUG_ON(x) assert(!(x)) | 8 | # define BUG_ON(x) assert(!(x)) |
| 9 | #endif | 9 | #endif |
| 10 | 10 | ||
| 11 | #include "crush.h" | 11 | #include <linux/crush/crush.h> |
| 12 | 12 | ||
| 13 | const char *crush_bucket_alg_name(int alg) | 13 | const char *crush_bucket_alg_name(int alg) |
| 14 | { | 14 | { |
diff --git a/fs/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5873aed694bf..5bb63e37a8a1 100644 --- a/fs/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | #include <linux/types.h> | 2 | #include <linux/types.h> |
| 3 | #include "hash.h" | 3 | #include <linux/crush/hash.h> |
| 4 | 4 | ||
| 5 | /* | 5 | /* |
| 6 | * Robert Jenkins' function for mixing 32-bit values | 6 | * Robert Jenkins' function for mixing 32-bit values |
diff --git a/fs/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a4eec133258e..42599e31dcad 100644 --- a/fs/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | # define kfree(x) free(x) | 18 | # define kfree(x) free(x) |
| 19 | #endif | 19 | #endif |
| 20 | 20 | ||
| 21 | #include "crush.h" | 21 | #include <linux/crush/crush.h> |
| 22 | #include "hash.h" | 22 | #include <linux/crush/hash.h> |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * Implement the core CRUSH mapping algorithm. | 25 | * Implement the core CRUSH mapping algorithm. |
diff --git a/fs/ceph/crypto.c b/net/ceph/crypto.c index a3e627f63293..7b505b0c983f 100644 --- a/fs/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
| @@ -1,13 +1,13 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 5 | #include <linux/scatterlist.h> | 5 | #include <linux/scatterlist.h> |
| 6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
| 7 | #include <crypto/hash.h> | 7 | #include <crypto/hash.h> |
| 8 | 8 | ||
| 9 | #include <linux/ceph/decode.h> | ||
| 9 | #include "crypto.h" | 10 | #include "crypto.h" |
| 10 | #include "decode.h" | ||
| 11 | 11 | ||
| 12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | 12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) |
| 13 | { | 13 | { |
diff --git a/fs/ceph/crypto.h b/net/ceph/crypto.h index bdf38607323c..f9eccace592b 100644 --- a/fs/ceph/crypto.h +++ b/net/ceph/crypto.h | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #ifndef _FS_CEPH_CRYPTO_H | 1 | #ifndef _FS_CEPH_CRYPTO_H |
| 2 | #define _FS_CEPH_CRYPTO_H | 2 | #define _FS_CEPH_CRYPTO_H |
| 3 | 3 | ||
| 4 | #include "types.h" | 4 | #include <linux/ceph/types.h> |
| 5 | #include "buffer.h" | 5 | #include <linux/ceph/buffer.h> |
| 6 | 6 | ||
| 7 | /* | 7 | /* |
| 8 | * cryptographic secret | 8 | * cryptographic secret |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c new file mode 100644 index 000000000000..27d4ea315d12 --- /dev/null +++ b/net/ceph/debugfs.c | |||
| @@ -0,0 +1,267 @@ | |||
| 1 | #include <linux/ceph/ceph_debug.h> | ||
| 2 | |||
| 3 | #include <linux/device.h> | ||
| 4 | #include <linux/slab.h> | ||
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/ctype.h> | ||
| 7 | #include <linux/debugfs.h> | ||
| 8 | #include <linux/seq_file.h> | ||
| 9 | |||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | #include <linux/ceph/mon_client.h> | ||
| 12 | #include <linux/ceph/auth.h> | ||
| 13 | #include <linux/ceph/debugfs.h> | ||
| 14 | |||
| 15 | #ifdef CONFIG_DEBUG_FS | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Implement /sys/kernel/debug/ceph fun | ||
| 19 | * | ||
| 20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
| 21 | * .../osdmap - current osdmap | ||
| 22 | * .../monmap - current monmap | ||
| 23 | * .../osdc - active osd requests | ||
| 24 | * .../monc - mon client state | ||
| 25 | * .../dentry_lru - dump contents of dentry lru | ||
| 26 | * .../caps - expose cap (reservation) stats | ||
| 27 | * .../bdi - symlink to ../../bdi/something | ||
| 28 | */ | ||
| 29 | |||
| 30 | static struct dentry *ceph_debugfs_dir; | ||
| 31 | |||
| 32 | static int monmap_show(struct seq_file *s, void *p) | ||
| 33 | { | ||
| 34 | int i; | ||
| 35 | struct ceph_client *client = s->private; | ||
| 36 | |||
| 37 | if (client->monc.monmap == NULL) | ||
| 38 | return 0; | ||
| 39 | |||
| 40 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
| 41 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
| 42 | struct ceph_entity_inst *inst = | ||
| 43 | &client->monc.monmap->mon_inst[i]; | ||
| 44 | |||
| 45 | seq_printf(s, "\t%s%lld\t%s\n", | ||
| 46 | ENTITY_NAME(inst->name), | ||
| 47 | ceph_pr_addr(&inst->addr.in_addr)); | ||
| 48 | } | ||
| 49 | return 0; | ||
| 50 | } | ||
| 51 | |||
| 52 | static int osdmap_show(struct seq_file *s, void *p) | ||
| 53 | { | ||
| 54 | int i; | ||
| 55 | struct ceph_client *client = s->private; | ||
| 56 | struct rb_node *n; | ||
| 57 | |||
| 58 | if (client->osdc.osdmap == NULL) | ||
| 59 | return 0; | ||
| 60 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
| 61 | seq_printf(s, "flags%s%s\n", | ||
| 62 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
| 63 | " NEARFULL" : "", | ||
| 64 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
| 65 | " FULL" : ""); | ||
| 66 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
| 67 | struct ceph_pg_pool_info *pool = | ||
| 68 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
| 69 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
| 70 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
| 71 | pool->v.lpg_num, pool->lpg_num_mask); | ||
| 72 | } | ||
| 73 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
| 74 | struct ceph_entity_addr *addr = | ||
| 75 | &client->osdc.osdmap->osd_addr[i]; | ||
| 76 | int state = client->osdc.osdmap->osd_state[i]; | ||
| 77 | char sb[64]; | ||
| 78 | |||
| 79 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
| 80 | i, ceph_pr_addr(&addr->in_addr), | ||
| 81 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
| 82 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
| 83 | } | ||
| 84 | return 0; | ||
| 85 | } | ||
| 86 | |||
| 87 | static int monc_show(struct seq_file *s, void *p) | ||
| 88 | { | ||
| 89 | struct ceph_client *client = s->private; | ||
| 90 | struct ceph_mon_generic_request *req; | ||
| 91 | struct ceph_mon_client *monc = &client->monc; | ||
| 92 | struct rb_node *rp; | ||
| 93 | |||
| 94 | mutex_lock(&monc->mutex); | ||
| 95 | |||
| 96 | if (monc->have_mdsmap) | ||
| 97 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
| 98 | if (monc->have_osdmap) | ||
| 99 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
| 100 | if (monc->want_next_osdmap) | ||
| 101 | seq_printf(s, "want next osdmap\n"); | ||
| 102 | |||
| 103 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
| 104 | __u16 op; | ||
| 105 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
| 106 | op = le16_to_cpu(req->request->hdr.type); | ||
| 107 | if (op == CEPH_MSG_STATFS) | ||
| 108 | seq_printf(s, "%lld statfs\n", req->tid); | ||
| 109 | else | ||
| 110 | seq_printf(s, "%lld unknown\n", req->tid); | ||
| 111 | } | ||
| 112 | |||
| 113 | mutex_unlock(&monc->mutex); | ||
| 114 | return 0; | ||
| 115 | } | ||
| 116 | |||
| 117 | static int osdc_show(struct seq_file *s, void *pp) | ||
| 118 | { | ||
| 119 | struct ceph_client *client = s->private; | ||
| 120 | struct ceph_osd_client *osdc = &client->osdc; | ||
| 121 | struct rb_node *p; | ||
| 122 | |||
| 123 | mutex_lock(&osdc->request_mutex); | ||
| 124 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
| 125 | struct ceph_osd_request *req; | ||
| 126 | struct ceph_osd_request_head *head; | ||
| 127 | struct ceph_osd_op *op; | ||
| 128 | int num_ops; | ||
| 129 | int opcode, olen; | ||
| 130 | int i; | ||
| 131 | |||
| 132 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
| 133 | |||
| 134 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
| 135 | req->r_osd ? req->r_osd->o_osd : -1, | ||
| 136 | le32_to_cpu(req->r_pgid.pool), | ||
| 137 | le16_to_cpu(req->r_pgid.ps)); | ||
| 138 | |||
| 139 | head = req->r_request->front.iov_base; | ||
| 140 | op = (void *)(head + 1); | ||
| 141 | |||
| 142 | num_ops = le16_to_cpu(head->num_ops); | ||
| 143 | olen = le32_to_cpu(head->object_len); | ||
| 144 | seq_printf(s, "%.*s", olen, | ||
| 145 | (const char *)(head->ops + num_ops)); | ||
| 146 | |||
| 147 | if (req->r_reassert_version.epoch) | ||
| 148 | seq_printf(s, "\t%u'%llu", | ||
| 149 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
| 150 | le64_to_cpu(req->r_reassert_version.version)); | ||
| 151 | else | ||
| 152 | seq_printf(s, "\t"); | ||
| 153 | |||
| 154 | for (i = 0; i < num_ops; i++) { | ||
| 155 | opcode = le16_to_cpu(op->op); | ||
| 156 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
| 157 | op++; | ||
| 158 | } | ||
| 159 | |||
| 160 | seq_printf(s, "\n"); | ||
| 161 | } | ||
| 162 | mutex_unlock(&osdc->request_mutex); | ||
| 163 | return 0; | ||
| 164 | } | ||
| 165 | |||
| 166 | CEPH_DEFINE_SHOW_FUNC(monmap_show) | ||
| 167 | CEPH_DEFINE_SHOW_FUNC(osdmap_show) | ||
| 168 | CEPH_DEFINE_SHOW_FUNC(monc_show) | ||
| 169 | CEPH_DEFINE_SHOW_FUNC(osdc_show) | ||
| 170 | |||
| 171 | int ceph_debugfs_init(void) | ||
| 172 | { | ||
| 173 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
| 174 | if (!ceph_debugfs_dir) | ||
| 175 | return -ENOMEM; | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | void ceph_debugfs_cleanup(void) | ||
| 180 | { | ||
| 181 | debugfs_remove(ceph_debugfs_dir); | ||
| 182 | } | ||
| 183 | |||
| 184 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 185 | { | ||
| 186 | int ret = -ENOMEM; | ||
| 187 | char name[80]; | ||
| 188 | |||
| 189 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
| 190 | client->monc.auth->global_id); | ||
| 191 | |||
| 192 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | ||
| 193 | if (!client->debugfs_dir) | ||
| 194 | goto out; | ||
| 195 | |||
| 196 | client->monc.debugfs_file = debugfs_create_file("monc", | ||
| 197 | 0600, | ||
| 198 | client->debugfs_dir, | ||
| 199 | client, | ||
| 200 | &monc_show_fops); | ||
| 201 | if (!client->monc.debugfs_file) | ||
| 202 | goto out; | ||
| 203 | |||
| 204 | client->osdc.debugfs_file = debugfs_create_file("osdc", | ||
| 205 | 0600, | ||
| 206 | client->debugfs_dir, | ||
| 207 | client, | ||
| 208 | &osdc_show_fops); | ||
| 209 | if (!client->osdc.debugfs_file) | ||
| 210 | goto out; | ||
| 211 | |||
| 212 | client->debugfs_monmap = debugfs_create_file("monmap", | ||
| 213 | 0600, | ||
| 214 | client->debugfs_dir, | ||
| 215 | client, | ||
| 216 | &monmap_show_fops); | ||
| 217 | if (!client->debugfs_monmap) | ||
| 218 | goto out; | ||
| 219 | |||
| 220 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
| 221 | 0600, | ||
| 222 | client->debugfs_dir, | ||
| 223 | client, | ||
| 224 | &osdmap_show_fops); | ||
| 225 | if (!client->debugfs_osdmap) | ||
| 226 | goto out; | ||
| 227 | |||
| 228 | return 0; | ||
| 229 | |||
| 230 | out: | ||
| 231 | ceph_debugfs_client_cleanup(client); | ||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | |||
| 235 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 236 | { | ||
| 237 | debugfs_remove(client->debugfs_osdmap); | ||
| 238 | debugfs_remove(client->debugfs_monmap); | ||
| 239 | debugfs_remove(client->osdc.debugfs_file); | ||
| 240 | debugfs_remove(client->monc.debugfs_file); | ||
| 241 | debugfs_remove(client->debugfs_dir); | ||
| 242 | } | ||
| 243 | |||
| 244 | #else /* CONFIG_DEBUG_FS */ | ||
| 245 | |||
| 246 | int ceph_debugfs_init(void) | ||
| 247 | { | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | void ceph_debugfs_cleanup(void) | ||
| 252 | { | ||
| 253 | } | ||
| 254 | |||
| 255 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
| 256 | { | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
| 261 | { | ||
| 262 | } | ||
| 263 | |||
| 264 | #endif /* CONFIG_DEBUG_FS */ | ||
| 265 | |||
| 266 | EXPORT_SYMBOL(ceph_debugfs_init); | ||
| 267 | EXPORT_SYMBOL(ceph_debugfs_cleanup); | ||
diff --git a/fs/ceph/messenger.c b/net/ceph/messenger.c index 2502d76fcec1..0e8157ee5d43 100644 --- a/fs/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/crc32c.h> | 3 | #include <linux/crc32c.h> |
| 4 | #include <linux/ctype.h> | 4 | #include <linux/ctype.h> |
| @@ -9,12 +9,14 @@ | |||
| 9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
| 10 | #include <linux/socket.h> | 10 | #include <linux/socket.h> |
| 11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
| 12 | #include <linux/bio.h> | ||
| 13 | #include <linux/blkdev.h> | ||
| 12 | #include <net/tcp.h> | 14 | #include <net/tcp.h> |
| 13 | 15 | ||
| 14 | #include "super.h" | 16 | #include <linux/ceph/libceph.h> |
| 15 | #include "messenger.h" | 17 | #include <linux/ceph/messenger.h> |
| 16 | #include "decode.h" | 18 | #include <linux/ceph/decode.h> |
| 17 | #include "pagelist.h" | 19 | #include <linux/ceph/pagelist.h> |
| 18 | 20 | ||
| 19 | /* | 21 | /* |
| 20 | * Ceph uses the messenger to exchange ceph_msg messages with other | 22 | * Ceph uses the messenger to exchange ceph_msg messages with other |
| @@ -48,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | |||
| 48 | static DEFINE_SPINLOCK(addr_str_lock); | 50 | static DEFINE_SPINLOCK(addr_str_lock); |
| 49 | static int last_addr_str; | 51 | static int last_addr_str; |
| 50 | 52 | ||
| 51 | const char *pr_addr(const struct sockaddr_storage *ss) | 53 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
| 52 | { | 54 | { |
| 53 | int i; | 55 | int i; |
| 54 | char *s; | 56 | char *s; |
| @@ -79,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||
| 79 | 81 | ||
| 80 | return s; | 82 | return s; |
| 81 | } | 83 | } |
| 84 | EXPORT_SYMBOL(ceph_pr_addr); | ||
| 82 | 85 | ||
| 83 | static void encode_my_addr(struct ceph_messenger *msgr) | 86 | static void encode_my_addr(struct ceph_messenger *msgr) |
| 84 | { | 87 | { |
| @@ -91,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||
| 91 | */ | 94 | */ |
| 92 | struct workqueue_struct *ceph_msgr_wq; | 95 | struct workqueue_struct *ceph_msgr_wq; |
| 93 | 96 | ||
| 94 | int __init ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
| 95 | { | 98 | { |
| 96 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | 99 | ceph_msgr_wq = create_workqueue("ceph-msgr"); |
| 97 | if (IS_ERR(ceph_msgr_wq)) { | 100 | if (IS_ERR(ceph_msgr_wq)) { |
| @@ -102,16 +105,19 @@ int __init ceph_msgr_init(void) | |||
| 102 | } | 105 | } |
| 103 | return 0; | 106 | return 0; |
| 104 | } | 107 | } |
| 108 | EXPORT_SYMBOL(ceph_msgr_init); | ||
| 105 | 109 | ||
| 106 | void ceph_msgr_exit(void) | 110 | void ceph_msgr_exit(void) |
| 107 | { | 111 | { |
| 108 | destroy_workqueue(ceph_msgr_wq); | 112 | destroy_workqueue(ceph_msgr_wq); |
| 109 | } | 113 | } |
| 114 | EXPORT_SYMBOL(ceph_msgr_exit); | ||
| 110 | 115 | ||
| 111 | void ceph_msgr_flush(void) | 116 | void ceph_msgr_flush(void) |
| 112 | { | 117 | { |
| 113 | flush_workqueue(ceph_msgr_wq); | 118 | flush_workqueue(ceph_msgr_wq); |
| 114 | } | 119 | } |
| 120 | EXPORT_SYMBOL(ceph_msgr_flush); | ||
| 115 | 121 | ||
| 116 | 122 | ||
| 117 | /* | 123 | /* |
| @@ -221,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
| 221 | 227 | ||
| 222 | set_sock_callbacks(sock, con); | 228 | set_sock_callbacks(sock, con); |
| 223 | 229 | ||
| 224 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 230 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
| 225 | 231 | ||
| 226 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 232 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
| 227 | O_NONBLOCK); | 233 | O_NONBLOCK); |
| 228 | if (ret == -EINPROGRESS) { | 234 | if (ret == -EINPROGRESS) { |
| 229 | dout("connect %s EINPROGRESS sk_state = %u\n", | 235 | dout("connect %s EINPROGRESS sk_state = %u\n", |
| 230 | pr_addr(&con->peer_addr.in_addr), | 236 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 231 | sock->sk->sk_state); | 237 | sock->sk->sk_state); |
| 232 | ret = 0; | 238 | ret = 0; |
| 233 | } | 239 | } |
| 234 | if (ret < 0) { | 240 | if (ret < 0) { |
| 235 | pr_err("connect %s error %d\n", | 241 | pr_err("connect %s error %d\n", |
| 236 | pr_addr(&con->peer_addr.in_addr), ret); | 242 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
| 237 | sock_release(sock); | 243 | sock_release(sock); |
| 238 | con->sock = NULL; | 244 | con->sock = NULL; |
| 239 | con->error_msg = "connect error"; | 245 | con->error_msg = "connect error"; |
| @@ -334,7 +340,8 @@ static void reset_connection(struct ceph_connection *con) | |||
| 334 | */ | 340 | */ |
| 335 | void ceph_con_close(struct ceph_connection *con) | 341 | void ceph_con_close(struct ceph_connection *con) |
| 336 | { | 342 | { |
| 337 | dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); | 343 | dout("con_close %p peer %s\n", con, |
| 344 | ceph_pr_addr(&con->peer_addr.in_addr)); | ||
| 338 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 345 | set_bit(CLOSED, &con->state); /* in case there's queued work */ |
| 339 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 346 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ |
| 340 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | 347 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ |
| @@ -347,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con) | |||
| 347 | mutex_unlock(&con->mutex); | 354 | mutex_unlock(&con->mutex); |
| 348 | queue_con(con); | 355 | queue_con(con); |
| 349 | } | 356 | } |
| 357 | EXPORT_SYMBOL(ceph_con_close); | ||
| 350 | 358 | ||
| 351 | /* | 359 | /* |
| 352 | * Reopen a closed connection, with a new peer address. | 360 | * Reopen a closed connection, with a new peer address. |
| 353 | */ | 361 | */ |
| 354 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | 362 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) |
| 355 | { | 363 | { |
| 356 | dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); | 364 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
| 357 | set_bit(OPENING, &con->state); | 365 | set_bit(OPENING, &con->state); |
| 358 | clear_bit(CLOSED, &con->state); | 366 | clear_bit(CLOSED, &con->state); |
| 359 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | 367 | memcpy(&con->peer_addr, addr, sizeof(*addr)); |
| 360 | con->delay = 0; /* reset backoff memory */ | 368 | con->delay = 0; /* reset backoff memory */ |
| 361 | queue_con(con); | 369 | queue_con(con); |
| 362 | } | 370 | } |
| 371 | EXPORT_SYMBOL(ceph_con_open); | ||
| 363 | 372 | ||
| 364 | /* | 373 | /* |
| 365 | * return true if this connection ever successfully opened | 374 | * return true if this connection ever successfully opened |
| @@ -406,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | |||
| 406 | INIT_LIST_HEAD(&con->out_sent); | 415 | INIT_LIST_HEAD(&con->out_sent); |
| 407 | INIT_DELAYED_WORK(&con->work, con_work); | 416 | INIT_DELAYED_WORK(&con->work, con_work); |
| 408 | } | 417 | } |
| 418 | EXPORT_SYMBOL(ceph_con_init); | ||
| 409 | 419 | ||
| 410 | 420 | ||
| 411 | /* | 421 | /* |
| @@ -529,8 +539,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 529 | if (le32_to_cpu(m->hdr.data_len) > 0) { | 539 | if (le32_to_cpu(m->hdr.data_len) > 0) { |
| 530 | /* initialize page iterator */ | 540 | /* initialize page iterator */ |
| 531 | con->out_msg_pos.page = 0; | 541 | con->out_msg_pos.page = 0; |
| 532 | con->out_msg_pos.page_pos = | 542 | if (m->pages) |
| 533 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | 543 | con->out_msg_pos.page_pos = |
| 544 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
| 545 | else | ||
| 546 | con->out_msg_pos.page_pos = 0; | ||
| 534 | con->out_msg_pos.data_pos = 0; | 547 | con->out_msg_pos.data_pos = 0; |
| 535 | con->out_msg_pos.did_page_crc = 0; | 548 | con->out_msg_pos.did_page_crc = 0; |
| 536 | con->out_more = 1; /* data + footer will follow */ | 549 | con->out_more = 1; /* data + footer will follow */ |
| @@ -647,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
| 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 660 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
| 648 | con->connect_seq, global_seq, proto); | 661 | con->connect_seq, global_seq, proto); |
| 649 | 662 | ||
| 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); | 663 | con->out_connect.features = cpu_to_le64(msgr->supported_features); |
| 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 664 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
| 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 665 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
| 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 666 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
| @@ -712,6 +725,31 @@ out: | |||
| 712 | return ret; /* done! */ | 725 | return ret; /* done! */ |
| 713 | } | 726 | } |
| 714 | 727 | ||
| 728 | #ifdef CONFIG_BLOCK | ||
| 729 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | ||
| 730 | { | ||
| 731 | if (!bio) { | ||
| 732 | *iter = NULL; | ||
| 733 | *seg = 0; | ||
| 734 | return; | ||
| 735 | } | ||
| 736 | *iter = bio; | ||
| 737 | *seg = bio->bi_idx; | ||
| 738 | } | ||
| 739 | |||
| 740 | static void iter_bio_next(struct bio **bio_iter, int *seg) | ||
| 741 | { | ||
| 742 | if (*bio_iter == NULL) | ||
| 743 | return; | ||
| 744 | |||
| 745 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | ||
| 746 | |||
| 747 | (*seg)++; | ||
| 748 | if (*seg == (*bio_iter)->bi_vcnt) | ||
| 749 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | ||
| 750 | } | ||
| 751 | #endif | ||
| 752 | |||
| 715 | /* | 753 | /* |
| 716 | * Write as much message data payload as we can. If we finish, queue | 754 | * Write as much message data payload as we can. If we finish, queue |
| 717 | * up the footer. | 755 | * up the footer. |
| @@ -726,21 +764,46 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 726 | size_t len; | 764 | size_t len; |
| 727 | int crc = con->msgr->nocrc; | 765 | int crc = con->msgr->nocrc; |
| 728 | int ret; | 766 | int ret; |
| 767 | int total_max_write; | ||
| 768 | int in_trail = 0; | ||
| 769 | size_t trail_len = (msg->trail ? msg->trail->length : 0); | ||
| 729 | 770 | ||
| 730 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 771 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", |
| 731 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 772 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, |
| 732 | con->out_msg_pos.page_pos); | 773 | con->out_msg_pos.page_pos); |
| 733 | 774 | ||
| 734 | while (con->out_msg_pos.page < con->out_msg->nr_pages) { | 775 | #ifdef CONFIG_BLOCK |
| 776 | if (msg->bio && !msg->bio_iter) | ||
| 777 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | ||
| 778 | #endif | ||
| 779 | |||
| 780 | while (data_len > con->out_msg_pos.data_pos) { | ||
| 735 | struct page *page = NULL; | 781 | struct page *page = NULL; |
| 736 | void *kaddr = NULL; | 782 | void *kaddr = NULL; |
| 783 | int max_write = PAGE_SIZE; | ||
| 784 | int page_shift = 0; | ||
| 785 | |||
| 786 | total_max_write = data_len - trail_len - | ||
| 787 | con->out_msg_pos.data_pos; | ||
| 737 | 788 | ||
| 738 | /* | 789 | /* |
| 739 | * if we are calculating the data crc (the default), we need | 790 | * if we are calculating the data crc (the default), we need |
| 740 | * to map the page. if our pages[] has been revoked, use the | 791 | * to map the page. if our pages[] has been revoked, use the |
| 741 | * zero page. | 792 | * zero page. |
| 742 | */ | 793 | */ |
| 743 | if (msg->pages) { | 794 | |
| 795 | /* have we reached the trail part of the data? */ | ||
| 796 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { | ||
| 797 | in_trail = 1; | ||
| 798 | |||
| 799 | total_max_write = data_len - con->out_msg_pos.data_pos; | ||
| 800 | |||
| 801 | page = list_first_entry(&msg->trail->head, | ||
| 802 | struct page, lru); | ||
| 803 | if (crc) | ||
| 804 | kaddr = kmap(page); | ||
| 805 | max_write = PAGE_SIZE; | ||
| 806 | } else if (msg->pages) { | ||
| 744 | page = msg->pages[con->out_msg_pos.page]; | 807 | page = msg->pages[con->out_msg_pos.page]; |
| 745 | if (crc) | 808 | if (crc) |
| 746 | kaddr = kmap(page); | 809 | kaddr = kmap(page); |
| @@ -749,13 +812,25 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 749 | struct page, lru); | 812 | struct page, lru); |
| 750 | if (crc) | 813 | if (crc) |
| 751 | kaddr = kmap(page); | 814 | kaddr = kmap(page); |
| 815 | #ifdef CONFIG_BLOCK | ||
| 816 | } else if (msg->bio) { | ||
| 817 | struct bio_vec *bv; | ||
| 818 | |||
| 819 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | ||
| 820 | page = bv->bv_page; | ||
| 821 | page_shift = bv->bv_offset; | ||
| 822 | if (crc) | ||
| 823 | kaddr = kmap(page) + page_shift; | ||
| 824 | max_write = bv->bv_len; | ||
| 825 | #endif | ||
| 752 | } else { | 826 | } else { |
| 753 | page = con->msgr->zero_page; | 827 | page = con->msgr->zero_page; |
| 754 | if (crc) | 828 | if (crc) |
| 755 | kaddr = page_address(con->msgr->zero_page); | 829 | kaddr = page_address(con->msgr->zero_page); |
| 756 | } | 830 | } |
| 757 | len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), | 831 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
| 758 | (int)(data_len - con->out_msg_pos.data_pos)); | 832 | total_max_write); |
| 833 | |||
| 759 | if (crc && !con->out_msg_pos.did_page_crc) { | 834 | if (crc && !con->out_msg_pos.did_page_crc) { |
| 760 | void *base = kaddr + con->out_msg_pos.page_pos; | 835 | void *base = kaddr + con->out_msg_pos.page_pos; |
| 761 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 836 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
| @@ -765,13 +840,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 765 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 840 | cpu_to_le32(crc32c(tmpcrc, base, len)); |
| 766 | con->out_msg_pos.did_page_crc = 1; | 841 | con->out_msg_pos.did_page_crc = 1; |
| 767 | } | 842 | } |
| 768 | |||
| 769 | ret = kernel_sendpage(con->sock, page, | 843 | ret = kernel_sendpage(con->sock, page, |
| 770 | con->out_msg_pos.page_pos, len, | 844 | con->out_msg_pos.page_pos + page_shift, |
| 845 | len, | ||
| 771 | MSG_DONTWAIT | MSG_NOSIGNAL | | 846 | MSG_DONTWAIT | MSG_NOSIGNAL | |
| 772 | MSG_MORE); | 847 | MSG_MORE); |
| 773 | 848 | ||
| 774 | if (crc && (msg->pages || msg->pagelist)) | 849 | if (crc && |
| 850 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||
| 775 | kunmap(page); | 851 | kunmap(page); |
| 776 | 852 | ||
| 777 | if (ret <= 0) | 853 | if (ret <= 0) |
| @@ -783,9 +859,16 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 783 | con->out_msg_pos.page_pos = 0; | 859 | con->out_msg_pos.page_pos = 0; |
| 784 | con->out_msg_pos.page++; | 860 | con->out_msg_pos.page++; |
| 785 | con->out_msg_pos.did_page_crc = 0; | 861 | con->out_msg_pos.did_page_crc = 0; |
| 786 | if (msg->pagelist) | 862 | if (in_trail) |
| 863 | list_move_tail(&page->lru, | ||
| 864 | &msg->trail->head); | ||
| 865 | else if (msg->pagelist) | ||
| 787 | list_move_tail(&page->lru, | 866 | list_move_tail(&page->lru, |
| 788 | &msg->pagelist->head); | 867 | &msg->pagelist->head); |
| 868 | #ifdef CONFIG_BLOCK | ||
| 869 | else if (msg->bio) | ||
| 870 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||
| 871 | #endif | ||
| 789 | } | 872 | } |
| 790 | } | 873 | } |
| 791 | 874 | ||
| @@ -938,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con) | |||
| 938 | { | 1021 | { |
| 939 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | 1022 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
| 940 | pr_err("connect to %s got bad banner\n", | 1023 | pr_err("connect to %s got bad banner\n", |
| 941 | pr_addr(&con->peer_addr.in_addr)); | 1024 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 942 | con->error_msg = "protocol error, bad banner"; | 1025 | con->error_msg = "protocol error, bad banner"; |
| 943 | return -1; | 1026 | return -1; |
| 944 | } | 1027 | } |
| @@ -1041,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end, | |||
| 1041 | 1124 | ||
| 1042 | addr_set_port(ss, port); | 1125 | addr_set_port(ss, port); |
| 1043 | 1126 | ||
| 1044 | dout("parse_ips got %s\n", pr_addr(ss)); | 1127 | dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
| 1045 | 1128 | ||
| 1046 | if (p == end) | 1129 | if (p == end) |
| 1047 | break; | 1130 | break; |
| @@ -1061,6 +1144,7 @@ bad: | |||
| 1061 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | 1144 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
| 1062 | return -EINVAL; | 1145 | return -EINVAL; |
| 1063 | } | 1146 | } |
| 1147 | EXPORT_SYMBOL(ceph_parse_ips); | ||
| 1064 | 1148 | ||
| 1065 | static int process_banner(struct ceph_connection *con) | 1149 | static int process_banner(struct ceph_connection *con) |
| 1066 | { | 1150 | { |
| @@ -1082,9 +1166,9 @@ static int process_banner(struct ceph_connection *con) | |||
| 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1166 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
| 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1167 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
| 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", | 1168 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
| 1085 | pr_addr(&con->peer_addr.in_addr), | 1169 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1086 | (int)le32_to_cpu(con->peer_addr.nonce), | 1170 | (int)le32_to_cpu(con->peer_addr.nonce), |
| 1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1171 | ceph_pr_addr(&con->actual_peer_addr.in_addr), |
| 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); | 1172 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
| 1089 | con->error_msg = "wrong peer at address"; | 1173 | con->error_msg = "wrong peer at address"; |
| 1090 | return -1; | 1174 | return -1; |
| @@ -1102,7 +1186,7 @@ static int process_banner(struct ceph_connection *con) | |||
| 1102 | addr_set_port(&con->msgr->inst.addr.in_addr, port); | 1186 | addr_set_port(&con->msgr->inst.addr.in_addr, port); |
| 1103 | encode_my_addr(con->msgr); | 1187 | encode_my_addr(con->msgr); |
| 1104 | dout("process_banner learned my addr is %s\n", | 1188 | dout("process_banner learned my addr is %s\n", |
| 1105 | pr_addr(&con->msgr->inst.addr.in_addr)); | 1189 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
| 1106 | } | 1190 | } |
| 1107 | 1191 | ||
| 1108 | set_bit(NEGOTIATING, &con->state); | 1192 | set_bit(NEGOTIATING, &con->state); |
| @@ -1123,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
| 1123 | 1207 | ||
| 1124 | static int process_connect(struct ceph_connection *con) | 1208 | static int process_connect(struct ceph_connection *con) |
| 1125 | { | 1209 | { |
| 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1210 | u64 sup_feat = con->msgr->supported_features; |
| 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1211 | u64 req_feat = con->msgr->required_features; |
| 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1212 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
| 1129 | 1213 | ||
| 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1214 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
| @@ -1134,7 +1218,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1134 | pr_err("%s%lld %s feature set mismatch," | 1218 | pr_err("%s%lld %s feature set mismatch," |
| 1135 | " my %llx < server's %llx, missing %llx\n", | 1219 | " my %llx < server's %llx, missing %llx\n", |
| 1136 | ENTITY_NAME(con->peer_name), | 1220 | ENTITY_NAME(con->peer_name), |
| 1137 | pr_addr(&con->peer_addr.in_addr), | 1221 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1138 | sup_feat, server_feat, server_feat & ~sup_feat); | 1222 | sup_feat, server_feat, server_feat & ~sup_feat); |
| 1139 | con->error_msg = "missing required protocol features"; | 1223 | con->error_msg = "missing required protocol features"; |
| 1140 | fail_protocol(con); | 1224 | fail_protocol(con); |
| @@ -1144,7 +1228,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1144 | pr_err("%s%lld %s protocol version mismatch," | 1228 | pr_err("%s%lld %s protocol version mismatch," |
| 1145 | " my %d != server's %d\n", | 1229 | " my %d != server's %d\n", |
| 1146 | ENTITY_NAME(con->peer_name), | 1230 | ENTITY_NAME(con->peer_name), |
| 1147 | pr_addr(&con->peer_addr.in_addr), | 1231 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1148 | le32_to_cpu(con->out_connect.protocol_version), | 1232 | le32_to_cpu(con->out_connect.protocol_version), |
| 1149 | le32_to_cpu(con->in_reply.protocol_version)); | 1233 | le32_to_cpu(con->in_reply.protocol_version)); |
| 1150 | con->error_msg = "protocol version mismatch"; | 1234 | con->error_msg = "protocol version mismatch"; |
| @@ -1178,7 +1262,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1178 | le32_to_cpu(con->in_connect.connect_seq)); | 1262 | le32_to_cpu(con->in_connect.connect_seq)); |
| 1179 | pr_err("%s%lld %s connection reset\n", | 1263 | pr_err("%s%lld %s connection reset\n", |
| 1180 | ENTITY_NAME(con->peer_name), | 1264 | ENTITY_NAME(con->peer_name), |
| 1181 | pr_addr(&con->peer_addr.in_addr)); | 1265 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 1182 | reset_connection(con); | 1266 | reset_connection(con); |
| 1183 | prepare_write_connect(con->msgr, con, 0); | 1267 | prepare_write_connect(con->msgr, con, 0); |
| 1184 | prepare_read_connect(con); | 1268 | prepare_read_connect(con); |
| @@ -1223,7 +1307,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1223 | pr_err("%s%lld %s protocol feature mismatch," | 1307 | pr_err("%s%lld %s protocol feature mismatch," |
| 1224 | " my required %llx > server's %llx, need %llx\n", | 1308 | " my required %llx > server's %llx, need %llx\n", |
| 1225 | ENTITY_NAME(con->peer_name), | 1309 | ENTITY_NAME(con->peer_name), |
| 1226 | pr_addr(&con->peer_addr.in_addr), | 1310 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1227 | req_feat, server_feat, req_feat & ~server_feat); | 1311 | req_feat, server_feat, req_feat & ~server_feat); |
| 1228 | con->error_msg = "missing required protocol features"; | 1312 | con->error_msg = "missing required protocol features"; |
| 1229 | fail_protocol(con); | 1313 | fail_protocol(con); |
| @@ -1305,8 +1389,7 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1305 | struct kvec *section, | 1389 | struct kvec *section, |
| 1306 | unsigned int sec_len, u32 *crc) | 1390 | unsigned int sec_len, u32 *crc) |
| 1307 | { | 1391 | { |
| 1308 | int left; | 1392 | int ret, left; |
| 1309 | int ret; | ||
| 1310 | 1393 | ||
| 1311 | BUG_ON(!section); | 1394 | BUG_ON(!section); |
| 1312 | 1395 | ||
| @@ -1329,13 +1412,83 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1329 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 1412 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, |
| 1330 | struct ceph_msg_header *hdr, | 1413 | struct ceph_msg_header *hdr, |
| 1331 | int *skip); | 1414 | int *skip); |
| 1415 | |||
| 1416 | |||
| 1417 | static int read_partial_message_pages(struct ceph_connection *con, | ||
| 1418 | struct page **pages, | ||
| 1419 | unsigned data_len, int datacrc) | ||
| 1420 | { | ||
| 1421 | void *p; | ||
| 1422 | int ret; | ||
| 1423 | int left; | ||
| 1424 | |||
| 1425 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
| 1426 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | ||
| 1427 | /* (page) data */ | ||
| 1428 | BUG_ON(pages == NULL); | ||
| 1429 | p = kmap(pages[con->in_msg_pos.page]); | ||
| 1430 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
| 1431 | left); | ||
| 1432 | if (ret > 0 && datacrc) | ||
| 1433 | con->in_data_crc = | ||
| 1434 | crc32c(con->in_data_crc, | ||
| 1435 | p + con->in_msg_pos.page_pos, ret); | ||
| 1436 | kunmap(pages[con->in_msg_pos.page]); | ||
| 1437 | if (ret <= 0) | ||
| 1438 | return ret; | ||
| 1439 | con->in_msg_pos.data_pos += ret; | ||
| 1440 | con->in_msg_pos.page_pos += ret; | ||
| 1441 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | ||
| 1442 | con->in_msg_pos.page_pos = 0; | ||
| 1443 | con->in_msg_pos.page++; | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | return ret; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | #ifdef CONFIG_BLOCK | ||
| 1450 | static int read_partial_message_bio(struct ceph_connection *con, | ||
| 1451 | struct bio **bio_iter, int *bio_seg, | ||
| 1452 | unsigned data_len, int datacrc) | ||
| 1453 | { | ||
| 1454 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | ||
| 1455 | void *p; | ||
| 1456 | int ret, left; | ||
| 1457 | |||
| 1458 | if (IS_ERR(bv)) | ||
| 1459 | return PTR_ERR(bv); | ||
| 1460 | |||
| 1461 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
| 1462 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); | ||
| 1463 | |||
| 1464 | p = kmap(bv->bv_page) + bv->bv_offset; | ||
| 1465 | |||
| 1466 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
| 1467 | left); | ||
| 1468 | if (ret > 0 && datacrc) | ||
| 1469 | con->in_data_crc = | ||
| 1470 | crc32c(con->in_data_crc, | ||
| 1471 | p + con->in_msg_pos.page_pos, ret); | ||
| 1472 | kunmap(bv->bv_page); | ||
| 1473 | if (ret <= 0) | ||
| 1474 | return ret; | ||
| 1475 | con->in_msg_pos.data_pos += ret; | ||
| 1476 | con->in_msg_pos.page_pos += ret; | ||
| 1477 | if (con->in_msg_pos.page_pos == bv->bv_len) { | ||
| 1478 | con->in_msg_pos.page_pos = 0; | ||
| 1479 | iter_bio_next(bio_iter, bio_seg); | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | return ret; | ||
| 1483 | } | ||
| 1484 | #endif | ||
| 1485 | |||
| 1332 | /* | 1486 | /* |
| 1333 | * read (part of) a message. | 1487 | * read (part of) a message. |
| 1334 | */ | 1488 | */ |
| 1335 | static int read_partial_message(struct ceph_connection *con) | 1489 | static int read_partial_message(struct ceph_connection *con) |
| 1336 | { | 1490 | { |
| 1337 | struct ceph_msg *m = con->in_msg; | 1491 | struct ceph_msg *m = con->in_msg; |
| 1338 | void *p; | ||
| 1339 | int ret; | 1492 | int ret; |
| 1340 | int to, left; | 1493 | int to, left; |
| 1341 | unsigned front_len, middle_len, data_len, data_off; | 1494 | unsigned front_len, middle_len, data_len, data_off; |
| @@ -1381,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1381 | if ((s64)seq - (s64)con->in_seq < 1) { | 1534 | if ((s64)seq - (s64)con->in_seq < 1) { |
| 1382 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | 1535 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", |
| 1383 | ENTITY_NAME(con->peer_name), | 1536 | ENTITY_NAME(con->peer_name), |
| 1384 | pr_addr(&con->peer_addr.in_addr), | 1537 | ceph_pr_addr(&con->peer_addr.in_addr), |
| 1385 | seq, con->in_seq + 1); | 1538 | seq, con->in_seq + 1); |
| 1386 | con->in_base_pos = -front_len - middle_len - data_len - | 1539 | con->in_base_pos = -front_len - middle_len - data_len - |
| 1387 | sizeof(m->footer); | 1540 | sizeof(m->footer); |
| @@ -1422,7 +1575,10 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1422 | m->middle->vec.iov_len = 0; | 1575 | m->middle->vec.iov_len = 0; |
| 1423 | 1576 | ||
| 1424 | con->in_msg_pos.page = 0; | 1577 | con->in_msg_pos.page = 0; |
| 1425 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 1578 | if (m->pages) |
| 1579 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | ||
| 1580 | else | ||
| 1581 | con->in_msg_pos.page_pos = 0; | ||
| 1426 | con->in_msg_pos.data_pos = 0; | 1582 | con->in_msg_pos.data_pos = 0; |
| 1427 | } | 1583 | } |
| 1428 | 1584 | ||
| @@ -1440,27 +1596,29 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1440 | if (ret <= 0) | 1596 | if (ret <= 0) |
| 1441 | return ret; | 1597 | return ret; |
| 1442 | } | 1598 | } |
| 1599 | #ifdef CONFIG_BLOCK | ||
| 1600 | if (m->bio && !m->bio_iter) | ||
| 1601 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||
| 1602 | #endif | ||
| 1443 | 1603 | ||
| 1444 | /* (page) data */ | 1604 | /* (page) data */ |
| 1445 | while (con->in_msg_pos.data_pos < data_len) { | 1605 | while (con->in_msg_pos.data_pos < data_len) { |
| 1446 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1606 | if (m->pages) { |
| 1447 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | 1607 | ret = read_partial_message_pages(con, m->pages, |
| 1448 | BUG_ON(m->pages == NULL); | 1608 | data_len, datacrc); |
| 1449 | p = kmap(m->pages[con->in_msg_pos.page]); | 1609 | if (ret <= 0) |
| 1450 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1610 | return ret; |
| 1451 | left); | 1611 | #ifdef CONFIG_BLOCK |
| 1452 | if (ret > 0 && datacrc) | 1612 | } else if (m->bio) { |
| 1453 | con->in_data_crc = | 1613 | |
| 1454 | crc32c(con->in_data_crc, | 1614 | ret = read_partial_message_bio(con, |
| 1455 | p + con->in_msg_pos.page_pos, ret); | 1615 | &m->bio_iter, &m->bio_seg, |
| 1456 | kunmap(m->pages[con->in_msg_pos.page]); | 1616 | data_len, datacrc); |
| 1457 | if (ret <= 0) | 1617 | if (ret <= 0) |
| 1458 | return ret; | 1618 | return ret; |
| 1459 | con->in_msg_pos.data_pos += ret; | 1619 | #endif |
| 1460 | con->in_msg_pos.page_pos += ret; | 1620 | } else { |
| 1461 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | 1621 | BUG_ON(1); |
| 1462 | con->in_msg_pos.page_pos = 0; | ||
| 1463 | con->in_msg_pos.page++; | ||
| 1464 | } | 1622 | } |
| 1465 | } | 1623 | } |
| 1466 | 1624 | ||
| @@ -1874,9 +2032,9 @@ out: | |||
| 1874 | static void ceph_fault(struct ceph_connection *con) | 2032 | static void ceph_fault(struct ceph_connection *con) |
| 1875 | { | 2033 | { |
| 1876 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 2034 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
| 1877 | pr_addr(&con->peer_addr.in_addr), con->error_msg); | 2035 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
| 1878 | dout("fault %p state %lu to peer %s\n", | 2036 | dout("fault %p state %lu to peer %s\n", |
| 1879 | con, con->state, pr_addr(&con->peer_addr.in_addr)); | 2037 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
| 1880 | 2038 | ||
| 1881 | if (test_bit(LOSSYTX, &con->state)) { | 2039 | if (test_bit(LOSSYTX, &con->state)) { |
| 1882 | dout("fault on LOSSYTX channel\n"); | 2040 | dout("fault on LOSSYTX channel\n"); |
| @@ -1936,7 +2094,9 @@ out: | |||
| 1936 | /* | 2094 | /* |
| 1937 | * create a new messenger instance | 2095 | * create a new messenger instance |
| 1938 | */ | 2096 | */ |
| 1939 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | 2097 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, |
| 2098 | u32 supported_features, | ||
| 2099 | u32 required_features) | ||
| 1940 | { | 2100 | { |
| 1941 | struct ceph_messenger *msgr; | 2101 | struct ceph_messenger *msgr; |
| 1942 | 2102 | ||
| @@ -1944,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
| 1944 | if (msgr == NULL) | 2104 | if (msgr == NULL) |
| 1945 | return ERR_PTR(-ENOMEM); | 2105 | return ERR_PTR(-ENOMEM); |
| 1946 | 2106 | ||
| 2107 | msgr->supported_features = supported_features; | ||
| 2108 | msgr->required_features = required_features; | ||
| 2109 | |||
| 1947 | spin_lock_init(&msgr->global_seq_lock); | 2110 | spin_lock_init(&msgr->global_seq_lock); |
| 1948 | 2111 | ||
| 1949 | /* the zero page is needed if a request is "canceled" while the message | 2112 | /* the zero page is needed if a request is "canceled" while the message |
| @@ -1966,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
| 1966 | dout("messenger_create %p\n", msgr); | 2129 | dout("messenger_create %p\n", msgr); |
| 1967 | return msgr; | 2130 | return msgr; |
| 1968 | } | 2131 | } |
| 2132 | EXPORT_SYMBOL(ceph_messenger_create); | ||
| 1969 | 2133 | ||
| 1970 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2134 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
| 1971 | { | 2135 | { |
| @@ -1975,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) | |||
| 1975 | kfree(msgr); | 2139 | kfree(msgr); |
| 1976 | dout("destroyed messenger %p\n", msgr); | 2140 | dout("destroyed messenger %p\n", msgr); |
| 1977 | } | 2141 | } |
| 2142 | EXPORT_SYMBOL(ceph_messenger_destroy); | ||
| 1978 | 2143 | ||
| 1979 | /* | 2144 | /* |
| 1980 | * Queue up an outgoing message on the given connection. | 2145 | * Queue up an outgoing message on the given connection. |
| @@ -2011,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2011 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2176 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2012 | queue_con(con); | 2177 | queue_con(con); |
| 2013 | } | 2178 | } |
| 2179 | EXPORT_SYMBOL(ceph_con_send); | ||
| 2014 | 2180 | ||
| 2015 | /* | 2181 | /* |
| 2016 | * Revoke a message that was previously queued for send | 2182 | * Revoke a message that was previously queued for send |
| @@ -2076,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
| 2076 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2242 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) |
| 2077 | queue_con(con); | 2243 | queue_con(con); |
| 2078 | } | 2244 | } |
| 2245 | EXPORT_SYMBOL(ceph_con_keepalive); | ||
| 2079 | 2246 | ||
| 2080 | 2247 | ||
| 2081 | /* | 2248 | /* |
| @@ -2136,6 +2303,10 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||
| 2136 | m->nr_pages = 0; | 2303 | m->nr_pages = 0; |
| 2137 | m->pages = NULL; | 2304 | m->pages = NULL; |
| 2138 | m->pagelist = NULL; | 2305 | m->pagelist = NULL; |
| 2306 | m->bio = NULL; | ||
| 2307 | m->bio_iter = NULL; | ||
| 2308 | m->bio_seg = 0; | ||
| 2309 | m->trail = NULL; | ||
| 2139 | 2310 | ||
| 2140 | dout("ceph_msg_new %p front %d\n", m, front_len); | 2311 | dout("ceph_msg_new %p front %d\n", m, front_len); |
| 2141 | return m; | 2312 | return m; |
| @@ -2146,6 +2317,7 @@ out: | |||
| 2146 | pr_err("msg_new can't create type %d front %d\n", type, front_len); | 2317 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
| 2147 | return NULL; | 2318 | return NULL; |
| 2148 | } | 2319 | } |
| 2320 | EXPORT_SYMBOL(ceph_msg_new); | ||
| 2149 | 2321 | ||
| 2150 | /* | 2322 | /* |
| 2151 | * Allocate "middle" portion of a message, if it is needed and wasn't | 2323 | * Allocate "middle" portion of a message, if it is needed and wasn't |
| @@ -2250,11 +2422,14 @@ void ceph_msg_last_put(struct kref *kref) | |||
| 2250 | m->pagelist = NULL; | 2422 | m->pagelist = NULL; |
| 2251 | } | 2423 | } |
| 2252 | 2424 | ||
| 2425 | m->trail = NULL; | ||
| 2426 | |||
| 2253 | if (m->pool) | 2427 | if (m->pool) |
| 2254 | ceph_msgpool_put(m->pool, m); | 2428 | ceph_msgpool_put(m->pool, m); |
| 2255 | else | 2429 | else |
| 2256 | ceph_msg_kfree(m); | 2430 | ceph_msg_kfree(m); |
| 2257 | } | 2431 | } |
| 2432 | EXPORT_SYMBOL(ceph_msg_last_put); | ||
| 2258 | 2433 | ||
| 2259 | void ceph_msg_dump(struct ceph_msg *msg) | 2434 | void ceph_msg_dump(struct ceph_msg *msg) |
| 2260 | { | 2435 | { |
| @@ -2275,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg) | |||
| 2275 | DUMP_PREFIX_OFFSET, 16, 1, | 2450 | DUMP_PREFIX_OFFSET, 16, 1, |
| 2276 | &msg->footer, sizeof(msg->footer), true); | 2451 | &msg->footer, sizeof(msg->footer), true); |
| 2277 | } | 2452 | } |
| 2453 | EXPORT_SYMBOL(ceph_msg_dump); | ||
diff --git a/fs/ceph/mon_client.c b/net/ceph/mon_client.c index b2a5a3e4a671..8a079399174a 100644 --- a/fs/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -1,14 +1,16 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <linux/random.h> | 6 | #include <linux/random.h> |
| 6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 7 | 8 | ||
| 8 | #include "mon_client.h" | 9 | #include <linux/ceph/mon_client.h> |
| 9 | #include "super.h" | 10 | #include <linux/ceph/libceph.h> |
| 10 | #include "auth.h" | 11 | #include <linux/ceph/decode.h> |
| 11 | #include "decode.h" | 12 | |
| 13 | #include <linux/ceph/auth.h> | ||
| 12 | 14 | ||
| 13 | /* | 15 | /* |
| 14 | * Interact with Ceph monitor cluster. Handle requests for new map | 16 | * Interact with Ceph monitor cluster. Handle requests for new map |
| @@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) | |||
| 74 | m->num_mon); | 76 | m->num_mon); |
| 75 | for (i = 0; i < m->num_mon; i++) | 77 | for (i = 0; i < m->num_mon; i++) |
| 76 | dout("monmap_decode mon%d is %s\n", i, | 78 | dout("monmap_decode mon%d is %s\n", i, |
| 77 | pr_addr(&m->mon_inst[i].addr.in_addr)); | 79 | ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); |
| 78 | return m; | 80 | return m; |
| 79 | 81 | ||
| 80 | bad: | 82 | bad: |
| @@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
| 191 | struct ceph_msg *msg = monc->m_subscribe; | 193 | struct ceph_msg *msg = monc->m_subscribe; |
| 192 | struct ceph_mon_subscribe_item *i; | 194 | struct ceph_mon_subscribe_item *i; |
| 193 | void *p, *end; | 195 | void *p, *end; |
| 196 | int num; | ||
| 194 | 197 | ||
| 195 | p = msg->front.iov_base; | 198 | p = msg->front.iov_base; |
| 196 | end = p + msg->front_max; | 199 | end = p + msg->front_max; |
| 197 | 200 | ||
| 198 | dout("__send_subscribe to 'mdsmap' %u+\n", | 201 | num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; |
| 199 | (unsigned)monc->have_mdsmap); | 202 | ceph_encode_32(&p, num); |
| 203 | |||
| 200 | if (monc->want_next_osdmap) { | 204 | if (monc->want_next_osdmap) { |
| 201 | dout("__send_subscribe to 'osdmap' %u\n", | 205 | dout("__send_subscribe to 'osdmap' %u\n", |
| 202 | (unsigned)monc->have_osdmap); | 206 | (unsigned)monc->have_osdmap); |
| 203 | ceph_encode_32(&p, 3); | ||
| 204 | ceph_encode_string(&p, end, "osdmap", 6); | 207 | ceph_encode_string(&p, end, "osdmap", 6); |
| 205 | i = p; | 208 | i = p; |
| 206 | i->have = cpu_to_le64(monc->have_osdmap); | 209 | i->have = cpu_to_le64(monc->have_osdmap); |
| 207 | i->onetime = 1; | 210 | i->onetime = 1; |
| 208 | p += sizeof(*i); | 211 | p += sizeof(*i); |
| 209 | monc->want_next_osdmap = 2; /* requested */ | 212 | monc->want_next_osdmap = 2; /* requested */ |
| 210 | } else { | ||
| 211 | ceph_encode_32(&p, 2); | ||
| 212 | } | 213 | } |
| 213 | ceph_encode_string(&p, end, "mdsmap", 6); | 214 | if (monc->want_mdsmap) { |
| 214 | i = p; | 215 | dout("__send_subscribe to 'mdsmap' %u+\n", |
| 215 | i->have = cpu_to_le64(monc->have_mdsmap); | 216 | (unsigned)monc->have_mdsmap); |
| 216 | i->onetime = 0; | 217 | ceph_encode_string(&p, end, "mdsmap", 6); |
| 217 | p += sizeof(*i); | 218 | i = p; |
| 219 | i->have = cpu_to_le64(monc->have_mdsmap); | ||
| 220 | i->onetime = 0; | ||
| 221 | p += sizeof(*i); | ||
| 222 | } | ||
| 218 | ceph_encode_string(&p, end, "monmap", 6); | 223 | ceph_encode_string(&p, end, "monmap", 6); |
| 219 | i = p; | 224 | i = p; |
| 220 | i->have = 0; | 225 | i->have = 0; |
| @@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, | |||
| 243 | mutex_lock(&monc->mutex); | 248 | mutex_lock(&monc->mutex); |
| 244 | if (monc->hunting) { | 249 | if (monc->hunting) { |
| 245 | pr_info("mon%d %s session established\n", | 250 | pr_info("mon%d %s session established\n", |
| 246 | monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); | 251 | monc->cur_mon, |
| 252 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); | ||
| 247 | monc->hunting = false; | 253 | monc->hunting = false; |
| 248 | } | 254 | } |
| 249 | dout("handle_subscribe_ack after %d seconds\n", seconds); | 255 | dout("handle_subscribe_ack after %d seconds\n", seconds); |
| @@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) | |||
| 266 | mutex_unlock(&monc->mutex); | 272 | mutex_unlock(&monc->mutex); |
| 267 | return 0; | 273 | return 0; |
| 268 | } | 274 | } |
| 275 | EXPORT_SYMBOL(ceph_monc_got_mdsmap); | ||
| 269 | 276 | ||
| 270 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | 277 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) |
| 271 | { | 278 | { |
| @@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) | |||
| 310 | mutex_unlock(&monc->mutex); | 317 | mutex_unlock(&monc->mutex); |
| 311 | return 0; | 318 | return 0; |
| 312 | } | 319 | } |
| 320 | EXPORT_SYMBOL(ceph_monc_open_session); | ||
| 313 | 321 | ||
| 314 | /* | 322 | /* |
| 315 | * The monitor responds with mount ack indicate mount success. The | 323 | * The monitor responds with mount ack indicate mount success. The |
| @@ -540,6 +548,7 @@ out: | |||
| 540 | kref_put(&req->kref, release_generic_request); | 548 | kref_put(&req->kref, release_generic_request); |
| 541 | return err; | 549 | return err; |
| 542 | } | 550 | } |
| 551 | EXPORT_SYMBOL(ceph_monc_do_statfs); | ||
| 543 | 552 | ||
| 544 | /* | 553 | /* |
| 545 | * pool ops | 554 | * pool ops |
| @@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, | |||
| 651 | pool, 0, (char *)snapid, sizeof(*snapid)); | 660 | pool, 0, (char *)snapid, sizeof(*snapid)); |
| 652 | 661 | ||
| 653 | } | 662 | } |
| 663 | EXPORT_SYMBOL(ceph_monc_create_snapid); | ||
| 654 | 664 | ||
| 655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | 665 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, |
| 656 | u32 pool, u64 snapid) | 666 | u32 pool, u64 snapid) |
| @@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work) | |||
| 708 | */ | 718 | */ |
| 709 | static int build_initial_monmap(struct ceph_mon_client *monc) | 719 | static int build_initial_monmap(struct ceph_mon_client *monc) |
| 710 | { | 720 | { |
| 711 | struct ceph_mount_args *args = monc->client->mount_args; | 721 | struct ceph_options *opt = monc->client->options; |
| 712 | struct ceph_entity_addr *mon_addr = args->mon_addr; | 722 | struct ceph_entity_addr *mon_addr = opt->mon_addr; |
| 713 | int num_mon = args->num_mon; | 723 | int num_mon = opt->num_mon; |
| 714 | int i; | 724 | int i; |
| 715 | 725 | ||
| 716 | /* build initial monmap */ | 726 | /* build initial monmap */ |
| @@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) | |||
| 728 | } | 738 | } |
| 729 | monc->monmap->num_mon = num_mon; | 739 | monc->monmap->num_mon = num_mon; |
| 730 | monc->have_fsid = false; | 740 | monc->have_fsid = false; |
| 731 | |||
| 732 | /* release addr memory */ | ||
| 733 | kfree(args->mon_addr); | ||
| 734 | args->mon_addr = NULL; | ||
| 735 | args->num_mon = 0; | ||
| 736 | return 0; | 741 | return 0; |
| 737 | } | 742 | } |
| 738 | 743 | ||
| @@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 753 | monc->con = NULL; | 758 | monc->con = NULL; |
| 754 | 759 | ||
| 755 | /* authentication */ | 760 | /* authentication */ |
| 756 | monc->auth = ceph_auth_init(cl->mount_args->name, | 761 | monc->auth = ceph_auth_init(cl->options->name, |
| 757 | cl->mount_args->secret); | 762 | cl->options->secret); |
| 758 | if (IS_ERR(monc->auth)) | 763 | if (IS_ERR(monc->auth)) |
| 759 | return PTR_ERR(monc->auth); | 764 | return PTR_ERR(monc->auth); |
| 760 | monc->auth->want_keys = | 765 | monc->auth->want_keys = |
| @@ -808,6 +813,7 @@ out_monmap: | |||
| 808 | out: | 813 | out: |
| 809 | return err; | 814 | return err; |
| 810 | } | 815 | } |
| 816 | EXPORT_SYMBOL(ceph_monc_init); | ||
| 811 | 817 | ||
| 812 | void ceph_monc_stop(struct ceph_mon_client *monc) | 818 | void ceph_monc_stop(struct ceph_mon_client *monc) |
| 813 | { | 819 | { |
| @@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
| 832 | 838 | ||
| 833 | kfree(monc->monmap); | 839 | kfree(monc->monmap); |
| 834 | } | 840 | } |
| 841 | EXPORT_SYMBOL(ceph_monc_stop); | ||
| 835 | 842 | ||
| 836 | static void handle_auth_reply(struct ceph_mon_client *monc, | 843 | static void handle_auth_reply(struct ceph_mon_client *monc, |
| 837 | struct ceph_msg *msg) | 844 | struct ceph_msg *msg) |
| @@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) | |||
| 889 | mutex_unlock(&monc->mutex); | 896 | mutex_unlock(&monc->mutex); |
| 890 | return ret; | 897 | return ret; |
| 891 | } | 898 | } |
| 899 | EXPORT_SYMBOL(ceph_monc_validate_auth); | ||
| 892 | 900 | ||
| 893 | /* | 901 | /* |
| 894 | * handle incoming message | 902 | * handle incoming message |
| @@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 922 | ceph_monc_handle_map(monc, msg); | 930 | ceph_monc_handle_map(monc, msg); |
| 923 | break; | 931 | break; |
| 924 | 932 | ||
| 925 | case CEPH_MSG_MDS_MAP: | ||
| 926 | ceph_mdsc_handle_map(&monc->client->mdsc, msg); | ||
| 927 | break; | ||
| 928 | |||
| 929 | case CEPH_MSG_OSD_MAP: | 933 | case CEPH_MSG_OSD_MAP: |
| 930 | ceph_osdc_handle_map(&monc->client->osdc, msg); | 934 | ceph_osdc_handle_map(&monc->client->osdc, msg); |
| 931 | break; | 935 | break; |
| 932 | 936 | ||
| 933 | default: | 937 | default: |
| 938 | /* can the chained handler handle it? */ | ||
| 939 | if (monc->client->extra_mon_dispatch && | ||
| 940 | monc->client->extra_mon_dispatch(monc->client, msg) == 0) | ||
| 941 | break; | ||
| 942 | |||
| 934 | pr_err("received unknown message type %d %s\n", type, | 943 | pr_err("received unknown message type %d %s\n", type, |
| 935 | ceph_msg_type_name(type)); | 944 | ceph_msg_type_name(type)); |
| 936 | } | 945 | } |
| @@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con) | |||
| 994 | if (monc->con && !monc->hunting) | 1003 | if (monc->con && !monc->hunting) |
| 995 | pr_info("mon%d %s session lost, " | 1004 | pr_info("mon%d %s session lost, " |
| 996 | "hunting for new mon\n", monc->cur_mon, | 1005 | "hunting for new mon\n", monc->cur_mon, |
| 997 | pr_addr(&monc->con->peer_addr.in_addr)); | 1006 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); |
| 998 | 1007 | ||
| 999 | __close_session(monc); | 1008 | __close_session(monc); |
| 1000 | if (!monc->hunting) { | 1009 | if (!monc->hunting) { |
diff --git a/fs/ceph/msgpool.c b/net/ceph/msgpool.c index dd65a6438131..d5f2d97ac05c 100644 --- a/fs/ceph/msgpool.c +++ b/net/ceph/msgpool.c | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/err.h> | 3 | #include <linux/err.h> |
| 4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
| 7 | 7 | ||
| 8 | #include "msgpool.h" | 8 | #include <linux/ceph/msgpool.h> |
| 9 | 9 | ||
| 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
| 11 | { | 11 | { |
diff --git a/fs/ceph/osd_client.c b/net/ceph/osd_client.c index 3b5571b8ce22..79391994b3ed 100644 --- a/fs/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -1,17 +1,22 @@ | |||
| 1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
| 2 | 2 | ||
| 3 | #include <linux/module.h> | ||
| 3 | #include <linux/err.h> | 4 | #include <linux/err.h> |
| 4 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
| 5 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
| 6 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
| 7 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
| 8 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
| 10 | #ifdef CONFIG_BLOCK | ||
| 11 | #include <linux/bio.h> | ||
| 12 | #endif | ||
| 9 | 13 | ||
| 10 | #include "super.h" | 14 | #include <linux/ceph/libceph.h> |
| 11 | #include "osd_client.h" | 15 | #include <linux/ceph/osd_client.h> |
| 12 | #include "messenger.h" | 16 | #include <linux/ceph/messenger.h> |
| 13 | #include "decode.h" | 17 | #include <linux/ceph/decode.h> |
| 14 | #include "auth.h" | 18 | #include <linux/ceph/auth.h> |
| 19 | #include <linux/ceph/pagelist.h> | ||
| 15 | 20 | ||
| 16 | #define OSD_OP_FRONT_LEN 4096 | 21 | #define OSD_OP_FRONT_LEN 4096 |
| 17 | #define OSD_OPREPLY_FRONT_LEN 512 | 22 | #define OSD_OPREPLY_FRONT_LEN 512 |
| @@ -22,6 +27,59 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
| 22 | 27 | ||
| 23 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | 28 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); |
| 24 | 29 | ||
| 30 | static int op_needs_trail(int op) | ||
| 31 | { | ||
| 32 | switch (op) { | ||
| 33 | case CEPH_OSD_OP_GETXATTR: | ||
| 34 | case CEPH_OSD_OP_SETXATTR: | ||
| 35 | case CEPH_OSD_OP_CMPXATTR: | ||
| 36 | case CEPH_OSD_OP_CALL: | ||
| 37 | return 1; | ||
| 38 | default: | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static int op_has_extent(int op) | ||
| 44 | { | ||
| 45 | return (op == CEPH_OSD_OP_READ || | ||
| 46 | op == CEPH_OSD_OP_WRITE); | ||
| 47 | } | ||
| 48 | |||
| 49 | void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
| 50 | struct ceph_file_layout *layout, | ||
| 51 | u64 snapid, | ||
| 52 | u64 off, u64 *plen, u64 *bno, | ||
| 53 | struct ceph_osd_request *req, | ||
| 54 | struct ceph_osd_req_op *op) | ||
| 55 | { | ||
| 56 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
| 57 | u64 orig_len = *plen; | ||
| 58 | u64 objoff, objlen; /* extent in object */ | ||
| 59 | |||
| 60 | reqhead->snapid = cpu_to_le64(snapid); | ||
| 61 | |||
| 62 | /* object extent? */ | ||
| 63 | ceph_calc_file_object_mapping(layout, off, plen, bno, | ||
| 64 | &objoff, &objlen); | ||
| 65 | if (*plen < orig_len) | ||
| 66 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
| 67 | orig_len - *plen, off, *plen); | ||
| 68 | |||
| 69 | if (op_has_extent(op->op)) { | ||
| 70 | op->extent.offset = objoff; | ||
| 71 | op->extent.length = objlen; | ||
| 72 | } | ||
| 73 | req->r_num_pages = calc_pages_for(off, *plen); | ||
| 74 | if (op->op == CEPH_OSD_OP_WRITE) | ||
| 75 | op->payload_len = *plen; | ||
| 76 | |||
| 77 | dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||
| 78 | *bno, objoff, objlen, req->r_num_pages); | ||
| 79 | |||
| 80 | } | ||
| 81 | EXPORT_SYMBOL(ceph_calc_raw_layout); | ||
| 82 | |||
| 25 | /* | 83 | /* |
| 26 | * Implement client access to distributed object storage cluster. | 84 | * Implement client access to distributed object storage cluster. |
| 27 | * | 85 | * |
| @@ -48,34 +106,19 @@ static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | |||
| 48 | * fill osd op in request message. | 106 | * fill osd op in request message. |
| 49 | */ | 107 | */ |
| 50 | static void calc_layout(struct ceph_osd_client *osdc, | 108 | static void calc_layout(struct ceph_osd_client *osdc, |
| 51 | struct ceph_vino vino, struct ceph_file_layout *layout, | 109 | struct ceph_vino vino, |
| 110 | struct ceph_file_layout *layout, | ||
| 52 | u64 off, u64 *plen, | 111 | u64 off, u64 *plen, |
| 53 | struct ceph_osd_request *req) | 112 | struct ceph_osd_request *req, |
| 113 | struct ceph_osd_req_op *op) | ||
| 54 | { | 114 | { |
| 55 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
| 56 | struct ceph_osd_op *op = (void *)(reqhead + 1); | ||
| 57 | u64 orig_len = *plen; | ||
| 58 | u64 objoff, objlen; /* extent in object */ | ||
| 59 | u64 bno; | 115 | u64 bno; |
| 60 | 116 | ||
| 61 | reqhead->snapid = cpu_to_le64(vino.snap); | 117 | ceph_calc_raw_layout(osdc, layout, vino.snap, off, |
| 62 | 118 | plen, &bno, req, op); | |
| 63 | /* object extent? */ | ||
| 64 | ceph_calc_file_object_mapping(layout, off, plen, &bno, | ||
| 65 | &objoff, &objlen); | ||
| 66 | if (*plen < orig_len) | ||
| 67 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
| 68 | orig_len - *plen, off, *plen); | ||
| 69 | 119 | ||
| 70 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | 120 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); |
| 71 | req->r_oid_len = strlen(req->r_oid); | 121 | req->r_oid_len = strlen(req->r_oid); |
| 72 | |||
| 73 | op->extent.offset = cpu_to_le64(objoff); | ||
| 74 | op->extent.length = cpu_to_le64(objlen); | ||
| 75 | req->r_num_pages = calc_pages_for(off, *plen); | ||
| 76 | |||
| 77 | dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", | ||
| 78 | req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); | ||
| 79 | } | 122 | } |
| 80 | 123 | ||
| 81 | /* | 124 | /* |
| @@ -101,56 +144,66 @@ void ceph_osdc_release_request(struct kref *kref) | |||
| 101 | if (req->r_own_pages) | 144 | if (req->r_own_pages) |
| 102 | ceph_release_page_vector(req->r_pages, | 145 | ceph_release_page_vector(req->r_pages, |
| 103 | req->r_num_pages); | 146 | req->r_num_pages); |
| 147 | #ifdef CONFIG_BLOCK | ||
| 148 | if (req->r_bio) | ||
| 149 | bio_put(req->r_bio); | ||
| 150 | #endif | ||
| 104 | ceph_put_snap_context(req->r_snapc); | 151 | ceph_put_snap_context(req->r_snapc); |
| 152 | if (req->r_trail) { | ||
| 153 | ceph_pagelist_release(req->r_trail); | ||
| 154 | kfree(req->r_trail); | ||
| 155 | } | ||
| 105 | if (req->r_mempool) | 156 | if (req->r_mempool) |
| 106 | mempool_free(req, req->r_osdc->req_mempool); | 157 | mempool_free(req, req->r_osdc->req_mempool); |
| 107 | else | 158 | else |
| 108 | kfree(req); | 159 | kfree(req); |
| 109 | } | 160 | } |
| 161 | EXPORT_SYMBOL(ceph_osdc_release_request); | ||
| 110 | 162 | ||
| 111 | /* | 163 | static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) |
| 112 | * build new request AND message, calculate layout, and adjust file | 164 | { |
| 113 | * extent as needed. | 165 | int i = 0; |
| 114 | * | 166 | |
| 115 | * if the file was recently truncated, we include information about its | 167 | if (needs_trail) |
| 116 | * old and new size so that the object can be updated appropriately. (we | 168 | *needs_trail = 0; |
| 117 | * avoid synchronously deleting truncated objects because it's slow.) | 169 | while (ops[i].op) { |
| 118 | * | 170 | if (needs_trail && op_needs_trail(ops[i].op)) |
| 119 | * if @do_sync, include a 'startsync' command so that the osd will flush | 171 | *needs_trail = 1; |
| 120 | * data quickly. | 172 | i++; |
| 121 | */ | 173 | } |
| 122 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | 174 | |
| 123 | struct ceph_file_layout *layout, | 175 | return i; |
| 124 | struct ceph_vino vino, | 176 | } |
| 125 | u64 off, u64 *plen, | 177 | |
| 126 | int opcode, int flags, | 178 | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
| 179 | int flags, | ||
| 127 | struct ceph_snap_context *snapc, | 180 | struct ceph_snap_context *snapc, |
| 128 | int do_sync, | 181 | struct ceph_osd_req_op *ops, |
| 129 | u32 truncate_seq, | 182 | bool use_mempool, |
| 130 | u64 truncate_size, | 183 | gfp_t gfp_flags, |
| 131 | struct timespec *mtime, | 184 | struct page **pages, |
| 132 | bool use_mempool, int num_reply) | 185 | struct bio *bio) |
| 133 | { | 186 | { |
| 134 | struct ceph_osd_request *req; | 187 | struct ceph_osd_request *req; |
| 135 | struct ceph_msg *msg; | 188 | struct ceph_msg *msg; |
| 136 | struct ceph_osd_request_head *head; | 189 | int needs_trail; |
| 137 | struct ceph_osd_op *op; | 190 | int num_op = get_num_ops(ops, &needs_trail); |
| 138 | void *p; | 191 | size_t msg_size = sizeof(struct ceph_osd_request_head); |
| 139 | int num_op = 1 + do_sync; | 192 | |
| 140 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | 193 | msg_size += num_op*sizeof(struct ceph_osd_op); |
| 141 | int i; | ||
| 142 | 194 | ||
| 143 | if (use_mempool) { | 195 | if (use_mempool) { |
| 144 | req = mempool_alloc(osdc->req_mempool, GFP_NOFS); | 196 | req = mempool_alloc(osdc->req_mempool, gfp_flags); |
| 145 | memset(req, 0, sizeof(*req)); | 197 | memset(req, 0, sizeof(*req)); |
| 146 | } else { | 198 | } else { |
| 147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 199 | req = kzalloc(sizeof(*req), gfp_flags); |
| 148 | } | 200 | } |
| 149 | if (req == NULL) | 201 | if (req == NULL) |
| 150 | return NULL; | 202 | return NULL; |
| 151 | 203 | ||
| 152 | req->r_osdc = osdc; | 204 | req->r_osdc = osdc; |
| 153 | req->r_mempool = use_mempool; | 205 | req->r_mempool = use_mempool; |
| 206 | |||
| 154 | kref_init(&req->r_kref); | 207 | kref_init(&req->r_kref); |
| 155 | init_completion(&req->r_completion); | 208 | init_completion(&req->r_completion); |
| 156 | init_completion(&req->r_safe_completion); | 209 | init_completion(&req->r_safe_completion); |
| @@ -164,13 +217,22 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 217 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
| 165 | else | 218 | else |
| 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 219 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
| 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); | 220 | OSD_OPREPLY_FRONT_LEN, gfp_flags); |
| 168 | if (!msg) { | 221 | if (!msg) { |
| 169 | ceph_osdc_put_request(req); | 222 | ceph_osdc_put_request(req); |
| 170 | return NULL; | 223 | return NULL; |
| 171 | } | 224 | } |
| 172 | req->r_reply = msg; | 225 | req->r_reply = msg; |
| 173 | 226 | ||
| 227 | /* allocate space for the trailing data */ | ||
| 228 | if (needs_trail) { | ||
| 229 | req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||
| 230 | if (!req->r_trail) { | ||
| 231 | ceph_osdc_put_request(req); | ||
| 232 | return NULL; | ||
| 233 | } | ||
| 234 | ceph_pagelist_init(req->r_trail); | ||
| 235 | } | ||
| 174 | /* create request message; allow space for oid */ | 236 | /* create request message; allow space for oid */ |
| 175 | msg_size += 40; | 237 | msg_size += 40; |
| 176 | if (snapc) | 238 | if (snapc) |
| @@ -178,18 +240,115 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 178 | if (use_mempool) | 240 | if (use_mempool) |
| 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 241 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
| 180 | else | 242 | else |
| 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); | 243 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); |
| 182 | if (!msg) { | 244 | if (!msg) { |
| 183 | ceph_osdc_put_request(req); | 245 | ceph_osdc_put_request(req); |
| 184 | return NULL; | 246 | return NULL; |
| 185 | } | 247 | } |
| 248 | |||
| 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 249 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
| 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 250 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
| 251 | |||
| 252 | req->r_request = msg; | ||
| 253 | req->r_pages = pages; | ||
| 254 | #ifdef CONFIG_BLOCK | ||
| 255 | if (bio) { | ||
| 256 | req->r_bio = bio; | ||
| 257 | bio_get(req->r_bio); | ||
| 258 | } | ||
| 259 | #endif | ||
| 260 | |||
| 261 | return req; | ||
| 262 | } | ||
| 263 | EXPORT_SYMBOL(ceph_osdc_alloc_request); | ||
| 264 | |||
| 265 | static void osd_req_encode_op(struct ceph_osd_request *req, | ||
| 266 | struct ceph_osd_op *dst, | ||
| 267 | struct ceph_osd_req_op *src) | ||
| 268 | { | ||
| 269 | dst->op = cpu_to_le16(src->op); | ||
| 270 | |||
| 271 | switch (dst->op) { | ||
| 272 | case CEPH_OSD_OP_READ: | ||
| 273 | case CEPH_OSD_OP_WRITE: | ||
| 274 | dst->extent.offset = | ||
| 275 | cpu_to_le64(src->extent.offset); | ||
| 276 | dst->extent.length = | ||
| 277 | cpu_to_le64(src->extent.length); | ||
| 278 | dst->extent.truncate_size = | ||
| 279 | cpu_to_le64(src->extent.truncate_size); | ||
| 280 | dst->extent.truncate_seq = | ||
| 281 | cpu_to_le32(src->extent.truncate_seq); | ||
| 282 | break; | ||
| 283 | |||
| 284 | case CEPH_OSD_OP_GETXATTR: | ||
| 285 | case CEPH_OSD_OP_SETXATTR: | ||
| 286 | case CEPH_OSD_OP_CMPXATTR: | ||
| 287 | BUG_ON(!req->r_trail); | ||
| 288 | |||
| 289 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | ||
| 290 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | ||
| 291 | dst->xattr.cmp_op = src->xattr.cmp_op; | ||
| 292 | dst->xattr.cmp_mode = src->xattr.cmp_mode; | ||
| 293 | ceph_pagelist_append(req->r_trail, src->xattr.name, | ||
| 294 | src->xattr.name_len); | ||
| 295 | ceph_pagelist_append(req->r_trail, src->xattr.val, | ||
| 296 | src->xattr.value_len); | ||
| 297 | break; | ||
| 298 | case CEPH_OSD_OP_CALL: | ||
| 299 | BUG_ON(!req->r_trail); | ||
| 300 | |||
| 301 | dst->cls.class_len = src->cls.class_len; | ||
| 302 | dst->cls.method_len = src->cls.method_len; | ||
| 303 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | ||
| 304 | |||
| 305 | ceph_pagelist_append(req->r_trail, src->cls.class_name, | ||
| 306 | src->cls.class_len); | ||
| 307 | ceph_pagelist_append(req->r_trail, src->cls.method_name, | ||
| 308 | src->cls.method_len); | ||
| 309 | ceph_pagelist_append(req->r_trail, src->cls.indata, | ||
| 310 | src->cls.indata_len); | ||
| 311 | break; | ||
| 312 | case CEPH_OSD_OP_ROLLBACK: | ||
| 313 | dst->snap.snapid = cpu_to_le64(src->snap.snapid); | ||
| 314 | break; | ||
| 315 | case CEPH_OSD_OP_STARTSYNC: | ||
| 316 | break; | ||
| 317 | default: | ||
| 318 | pr_err("unrecognized osd opcode %d\n", dst->op); | ||
| 319 | WARN_ON(1); | ||
| 320 | break; | ||
| 321 | } | ||
| 322 | dst->payload_len = cpu_to_le32(src->payload_len); | ||
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * build new request AND message | ||
| 327 | * | ||
| 328 | */ | ||
| 329 | void ceph_osdc_build_request(struct ceph_osd_request *req, | ||
| 330 | u64 off, u64 *plen, | ||
| 331 | struct ceph_osd_req_op *src_ops, | ||
| 332 | struct ceph_snap_context *snapc, | ||
| 333 | struct timespec *mtime, | ||
| 334 | const char *oid, | ||
| 335 | int oid_len) | ||
| 336 | { | ||
| 337 | struct ceph_msg *msg = req->r_request; | ||
| 338 | struct ceph_osd_request_head *head; | ||
| 339 | struct ceph_osd_req_op *src_op; | ||
| 340 | struct ceph_osd_op *op; | ||
| 341 | void *p; | ||
| 342 | int num_op = get_num_ops(src_ops, NULL); | ||
| 343 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | ||
| 344 | int flags = req->r_flags; | ||
| 345 | u64 data_len = 0; | ||
| 346 | int i; | ||
| 347 | |||
| 188 | head = msg->front.iov_base; | 348 | head = msg->front.iov_base; |
| 189 | op = (void *)(head + 1); | 349 | op = (void *)(head + 1); |
| 190 | p = (void *)(op + num_op); | 350 | p = (void *)(op + num_op); |
| 191 | 351 | ||
| 192 | req->r_request = msg; | ||
| 193 | req->r_snapc = ceph_get_snap_context(snapc); | 352 | req->r_snapc = ceph_get_snap_context(snapc); |
| 194 | 353 | ||
| 195 | head->client_inc = cpu_to_le32(1); /* always, for now. */ | 354 | head->client_inc = cpu_to_le32(1); /* always, for now. */ |
| @@ -197,29 +356,23 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 197 | if (flags & CEPH_OSD_FLAG_WRITE) | 356 | if (flags & CEPH_OSD_FLAG_WRITE) |
| 198 | ceph_encode_timespec(&head->mtime, mtime); | 357 | ceph_encode_timespec(&head->mtime, mtime); |
| 199 | head->num_ops = cpu_to_le16(num_op); | 358 | head->num_ops = cpu_to_le16(num_op); |
| 200 | op->op = cpu_to_le16(opcode); | ||
| 201 | 359 | ||
| 202 | /* calculate max write size */ | ||
| 203 | calc_layout(osdc, vino, layout, off, plen, req); | ||
| 204 | req->r_file_layout = *layout; /* keep a copy */ | ||
| 205 | |||
| 206 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
| 207 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
| 208 | req->r_request->hdr.data_len = cpu_to_le32(*plen); | ||
| 209 | op->payload_len = cpu_to_le32(*plen); | ||
| 210 | } | ||
| 211 | op->extent.truncate_size = cpu_to_le64(truncate_size); | ||
| 212 | op->extent.truncate_seq = cpu_to_le32(truncate_seq); | ||
| 213 | 360 | ||
| 214 | /* fill in oid */ | 361 | /* fill in oid */ |
| 215 | head->object_len = cpu_to_le32(req->r_oid_len); | 362 | head->object_len = cpu_to_le32(oid_len); |
| 216 | memcpy(p, req->r_oid, req->r_oid_len); | 363 | memcpy(p, oid, oid_len); |
| 217 | p += req->r_oid_len; | 364 | p += oid_len; |
| 218 | 365 | ||
| 219 | if (do_sync) { | 366 | src_op = src_ops; |
| 367 | while (src_op->op) { | ||
| 368 | osd_req_encode_op(req, op, src_op); | ||
| 369 | src_op++; | ||
| 220 | op++; | 370 | op++; |
| 221 | op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); | ||
| 222 | } | 371 | } |
| 372 | |||
| 373 | if (req->r_trail) | ||
| 374 | data_len += req->r_trail->length; | ||
| 375 | |||
| 223 | if (snapc) { | 376 | if (snapc) { |
| 224 | head->snap_seq = cpu_to_le64(snapc->seq); | 377 | head->snap_seq = cpu_to_le64(snapc->seq); |
| 225 | head->num_snaps = cpu_to_le32(snapc->num_snaps); | 378 | head->num_snaps = cpu_to_le32(snapc->num_snaps); |
| @@ -229,12 +382,79 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
| 229 | } | 382 | } |
| 230 | } | 383 | } |
| 231 | 384 | ||
| 385 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
| 386 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
| 387 | req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); | ||
| 388 | } else if (data_len) { | ||
| 389 | req->r_request->hdr.data_off = 0; | ||
| 390 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | ||
| 391 | } | ||
| 392 | |||
| 232 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 393 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
| 233 | msg_size = p - msg->front.iov_base; | 394 | msg_size = p - msg->front.iov_base; |
| 234 | msg->front.iov_len = msg_size; | 395 | msg->front.iov_len = msg_size; |
| 235 | msg->hdr.front_len = cpu_to_le32(msg_size); | 396 | msg->hdr.front_len = cpu_to_le32(msg_size); |
| 397 | return; | ||
| 398 | } | ||
| 399 | EXPORT_SYMBOL(ceph_osdc_build_request); | ||
| 400 | |||
| 401 | /* | ||
| 402 | * build new request AND message, calculate layout, and adjust file | ||
| 403 | * extent as needed. | ||
| 404 | * | ||
| 405 | * if the file was recently truncated, we include information about its | ||
| 406 | * old and new size so that the object can be updated appropriately. (we | ||
| 407 | * avoid synchronously deleting truncated objects because it's slow.) | ||
| 408 | * | ||
| 409 | * if @do_sync, include a 'startsync' command so that the osd will flush | ||
| 410 | * data quickly. | ||
| 411 | */ | ||
| 412 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | ||
| 413 | struct ceph_file_layout *layout, | ||
| 414 | struct ceph_vino vino, | ||
| 415 | u64 off, u64 *plen, | ||
| 416 | int opcode, int flags, | ||
| 417 | struct ceph_snap_context *snapc, | ||
| 418 | int do_sync, | ||
| 419 | u32 truncate_seq, | ||
| 420 | u64 truncate_size, | ||
| 421 | struct timespec *mtime, | ||
| 422 | bool use_mempool, int num_reply) | ||
| 423 | { | ||
| 424 | struct ceph_osd_req_op ops[3]; | ||
| 425 | struct ceph_osd_request *req; | ||
| 426 | |||
| 427 | ops[0].op = opcode; | ||
| 428 | ops[0].extent.truncate_seq = truncate_seq; | ||
| 429 | ops[0].extent.truncate_size = truncate_size; | ||
| 430 | ops[0].payload_len = 0; | ||
| 431 | |||
| 432 | if (do_sync) { | ||
| 433 | ops[1].op = CEPH_OSD_OP_STARTSYNC; | ||
| 434 | ops[1].payload_len = 0; | ||
| 435 | ops[2].op = 0; | ||
| 436 | } else | ||
| 437 | ops[1].op = 0; | ||
| 438 | |||
| 439 | req = ceph_osdc_alloc_request(osdc, flags, | ||
| 440 | snapc, ops, | ||
| 441 | use_mempool, | ||
| 442 | GFP_NOFS, NULL, NULL); | ||
| 443 | if (IS_ERR(req)) | ||
| 444 | return req; | ||
| 445 | |||
| 446 | /* calculate max write size */ | ||
| 447 | calc_layout(osdc, vino, layout, off, plen, req, ops); | ||
| 448 | req->r_file_layout = *layout; /* keep a copy */ | ||
| 449 | |||
| 450 | ceph_osdc_build_request(req, off, plen, ops, | ||
| 451 | snapc, | ||
| 452 | mtime, | ||
| 453 | req->r_oid, req->r_oid_len); | ||
| 454 | |||
| 236 | return req; | 455 | return req; |
| 237 | } | 456 | } |
| 457 | EXPORT_SYMBOL(ceph_osdc_new_request); | ||
| 238 | 458 | ||
| 239 | /* | 459 | /* |
| 240 | * We keep osd requests in an rbtree, sorted by ->r_tid. | 460 | * We keep osd requests in an rbtree, sorted by ->r_tid. |
| @@ -389,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, | |||
| 389 | dout("__move_osd_to_lru %p\n", osd); | 609 | dout("__move_osd_to_lru %p\n", osd); |
| 390 | BUG_ON(!list_empty(&osd->o_osd_lru)); | 610 | BUG_ON(!list_empty(&osd->o_osd_lru)); |
| 391 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | 611 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); |
| 392 | osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | 612 | osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; |
| 393 | } | 613 | } |
| 394 | 614 | ||
| 395 | static void __remove_osd_from_lru(struct ceph_osd *osd) | 615 | static void __remove_osd_from_lru(struct ceph_osd *osd) |
| @@ -483,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | |||
| 483 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | 703 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) |
| 484 | { | 704 | { |
| 485 | schedule_delayed_work(&osdc->timeout_work, | 705 | schedule_delayed_work(&osdc->timeout_work, |
| 486 | osdc->client->mount_args->osd_keepalive_timeout * HZ); | 706 | osdc->client->options->osd_keepalive_timeout * HZ); |
| 487 | } | 707 | } |
| 488 | 708 | ||
| 489 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | 709 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) |
| @@ -684,9 +904,9 @@ static void handle_timeout(struct work_struct *work) | |||
| 684 | container_of(work, struct ceph_osd_client, timeout_work.work); | 904 | container_of(work, struct ceph_osd_client, timeout_work.work); |
| 685 | struct ceph_osd_request *req, *last_req = NULL; | 905 | struct ceph_osd_request *req, *last_req = NULL; |
| 686 | struct ceph_osd *osd; | 906 | struct ceph_osd *osd; |
| 687 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 907 | unsigned long timeout = osdc->client->options->osd_timeout * HZ; |
| 688 | unsigned long keepalive = | 908 | unsigned long keepalive = |
| 689 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | 909 | osdc->client->options->osd_keepalive_timeout * HZ; |
| 690 | unsigned long last_stamp = 0; | 910 | unsigned long last_stamp = 0; |
| 691 | struct rb_node *p; | 911 | struct rb_node *p; |
| 692 | struct list_head slow_osds; | 912 | struct list_head slow_osds; |
| @@ -773,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work) | |||
| 773 | container_of(work, struct ceph_osd_client, | 993 | container_of(work, struct ceph_osd_client, |
| 774 | osds_timeout_work.work); | 994 | osds_timeout_work.work); |
| 775 | unsigned long delay = | 995 | unsigned long delay = |
| 776 | osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | 996 | osdc->client->options->osd_idle_ttl * HZ >> 2; |
| 777 | 997 | ||
| 778 | dout("osds timeout\n"); | 998 | dout("osds timeout\n"); |
| 779 | down_read(&osdc->map_sem); | 999 | down_read(&osdc->map_sem); |
| @@ -1104,6 +1324,10 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
| 1104 | 1324 | ||
| 1105 | req->r_request->pages = req->r_pages; | 1325 | req->r_request->pages = req->r_pages; |
| 1106 | req->r_request->nr_pages = req->r_num_pages; | 1326 | req->r_request->nr_pages = req->r_num_pages; |
| 1327 | #ifdef CONFIG_BLOCK | ||
| 1328 | req->r_request->bio = req->r_bio; | ||
| 1329 | #endif | ||
| 1330 | req->r_request->trail = req->r_trail; | ||
| 1107 | 1331 | ||
| 1108 | register_request(osdc, req); | 1332 | register_request(osdc, req); |
| 1109 | 1333 | ||
| @@ -1131,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
| 1131 | up_read(&osdc->map_sem); | 1355 | up_read(&osdc->map_sem); |
| 1132 | return rc; | 1356 | return rc; |
| 1133 | } | 1357 | } |
| 1358 | EXPORT_SYMBOL(ceph_osdc_start_request); | ||
| 1134 | 1359 | ||
| 1135 | /* | 1360 | /* |
| 1136 | * wait for a request to complete | 1361 | * wait for a request to complete |
| @@ -1153,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | |||
| 1153 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | 1378 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); |
| 1154 | return req->r_result; | 1379 | return req->r_result; |
| 1155 | } | 1380 | } |
| 1381 | EXPORT_SYMBOL(ceph_osdc_wait_request); | ||
| 1156 | 1382 | ||
| 1157 | /* | 1383 | /* |
| 1158 | * sync - wait for all in-flight requests to flush. avoid starvation. | 1384 | * sync - wait for all in-flight requests to flush. avoid starvation. |
| @@ -1186,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) | |||
| 1186 | mutex_unlock(&osdc->request_mutex); | 1412 | mutex_unlock(&osdc->request_mutex); |
| 1187 | dout("sync done (thru tid %llu)\n", last_tid); | 1413 | dout("sync done (thru tid %llu)\n", last_tid); |
| 1188 | } | 1414 | } |
| 1415 | EXPORT_SYMBOL(ceph_osdc_sync); | ||
| 1189 | 1416 | ||
| 1190 | /* | 1417 | /* |
| 1191 | * init, shutdown | 1418 | * init, shutdown |
| @@ -1211,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
| 1211 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | 1438 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); |
| 1212 | 1439 | ||
| 1213 | schedule_delayed_work(&osdc->osds_timeout_work, | 1440 | schedule_delayed_work(&osdc->osds_timeout_work, |
| 1214 | round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | 1441 | round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); |
| 1215 | 1442 | ||
| 1216 | err = -ENOMEM; | 1443 | err = -ENOMEM; |
| 1217 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | 1444 | osdc->req_mempool = mempool_create_kmalloc_pool(10, |
| @@ -1237,6 +1464,7 @@ out_mempool: | |||
| 1237 | out: | 1464 | out: |
| 1238 | return err; | 1465 | return err; |
| 1239 | } | 1466 | } |
| 1467 | EXPORT_SYMBOL(ceph_osdc_init); | ||
| 1240 | 1468 | ||
| 1241 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 1469 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
| 1242 | { | 1470 | { |
| @@ -1251,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) | |||
| 1251 | ceph_msgpool_destroy(&osdc->msgpool_op); | 1479 | ceph_msgpool_destroy(&osdc->msgpool_op); |
| 1252 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 1480 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); |
| 1253 | } | 1481 | } |
| 1482 | EXPORT_SYMBOL(ceph_osdc_stop); | ||
| 1254 | 1483 | ||
| 1255 | /* | 1484 | /* |
| 1256 | * Read some contiguous pages. If we cross a stripe boundary, shorten | 1485 | * Read some contiguous pages. If we cross a stripe boundary, shorten |
| @@ -1288,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
| 1288 | dout("readpages result %d\n", rc); | 1517 | dout("readpages result %d\n", rc); |
| 1289 | return rc; | 1518 | return rc; |
| 1290 | } | 1519 | } |
| 1520 | EXPORT_SYMBOL(ceph_osdc_readpages); | ||
| 1291 | 1521 | ||
| 1292 | /* | 1522 | /* |
| 1293 | * do a synchronous write on N pages | 1523 | * do a synchronous write on N pages |
| @@ -1330,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
| 1330 | dout("writepages result %d\n", rc); | 1560 | dout("writepages result %d\n", rc); |
| 1331 | return rc; | 1561 | return rc; |
| 1332 | } | 1562 | } |
| 1563 | EXPORT_SYMBOL(ceph_osdc_writepages); | ||
| 1333 | 1564 | ||
| 1334 | /* | 1565 | /* |
| 1335 | * handle incoming message | 1566 | * handle incoming message |
| @@ -1420,6 +1651,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 1420 | } | 1651 | } |
| 1421 | m->pages = req->r_pages; | 1652 | m->pages = req->r_pages; |
| 1422 | m->nr_pages = req->r_num_pages; | 1653 | m->nr_pages = req->r_num_pages; |
| 1654 | #ifdef CONFIG_BLOCK | ||
| 1655 | m->bio = req->r_bio; | ||
| 1656 | #endif | ||
| 1423 | } | 1657 | } |
| 1424 | *skip = 0; | 1658 | *skip = 0; |
| 1425 | req->r_con_filling_msg = ceph_con_get(con); | 1659 | req->r_con_filling_msg = ceph_con_get(con); |
diff --git a/fs/ceph/osdmap.c b/net/ceph/osdmap.c index e31f118f1392..d73f3f6efa36 100644 --- a/fs/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -1,14 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
| 3 | 3 | ||
| 4 | #include <linux/module.h> | ||
| 4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
| 5 | #include <asm/div64.h> | 6 | #include <asm/div64.h> |
| 6 | 7 | ||
| 7 | #include "super.h" | 8 | #include <linux/ceph/libceph.h> |
| 8 | #include "osdmap.h" | 9 | #include <linux/ceph/osdmap.h> |
| 9 | #include "crush/hash.h" | 10 | #include <linux/ceph/decode.h> |
| 10 | #include "crush/mapper.h" | 11 | #include <linux/crush/hash.h> |
| 11 | #include "decode.h" | 12 | #include <linux/crush/mapper.h> |
| 12 | 13 | ||
| 13 | char *ceph_osdmap_state_str(char *str, int len, int state) | 14 | char *ceph_osdmap_state_str(char *str, int len, int state) |
| 14 | { | 15 | { |
| @@ -417,6 +418,20 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
| 417 | return NULL; | 418 | return NULL; |
| 418 | } | 419 | } |
| 419 | 420 | ||
| 421 | int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) | ||
| 422 | { | ||
| 423 | struct rb_node *rbp; | ||
| 424 | |||
| 425 | for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) { | ||
| 426 | struct ceph_pg_pool_info *pi = | ||
| 427 | rb_entry(rbp, struct ceph_pg_pool_info, node); | ||
| 428 | if (pi->name && strcmp(pi->name, name) == 0) | ||
| 429 | return pi->id; | ||
| 430 | } | ||
| 431 | return -ENOENT; | ||
| 432 | } | ||
| 433 | EXPORT_SYMBOL(ceph_pg_poolid_by_name); | ||
| 434 | |||
| 420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | 435 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) |
| 421 | { | 436 | { |
| 422 | rb_erase(&pi->node, root); | 437 | rb_erase(&pi->node, root); |
| @@ -966,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
| 966 | 981 | ||
| 967 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 982 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); |
| 968 | } | 983 | } |
| 984 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | ||
| 969 | 985 | ||
| 970 | /* | 986 | /* |
| 971 | * calculate an object layout (i.e. pgid) from an oid, | 987 | * calculate an object layout (i.e. pgid) from an oid, |
| @@ -1011,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
| 1011 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; | 1027 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; |
| 1012 | return 0; | 1028 | return 0; |
| 1013 | } | 1029 | } |
| 1030 | EXPORT_SYMBOL(ceph_calc_object_layout); | ||
| 1014 | 1031 | ||
| 1015 | /* | 1032 | /* |
| 1016 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 1033 | * Calculate raw osd vector for the given pgid. Return pointer to osd |
| @@ -1108,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
| 1108 | return osds[i]; | 1125 | return osds[i]; |
| 1109 | return -1; | 1126 | return -1; |
| 1110 | } | 1127 | } |
| 1128 | EXPORT_SYMBOL(ceph_calc_pg_primary); | ||
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c new file mode 100644 index 000000000000..13cb409a7bba --- /dev/null +++ b/net/ceph/pagelist.c | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | |||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/gfp.h> | ||
| 4 | #include <linux/pagemap.h> | ||
| 5 | #include <linux/highmem.h> | ||
| 6 | #include <linux/ceph/pagelist.h> | ||
| 7 | |||
| 8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
| 9 | { | ||
| 10 | if (pl->mapped_tail) { | ||
| 11 | struct page *page = list_entry(pl->head.prev, struct page, lru); | ||
| 12 | kunmap(page); | ||
| 13 | pl->mapped_tail = NULL; | ||
| 14 | } | ||
| 15 | } | ||
| 16 | |||
| 17 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
| 18 | { | ||
| 19 | ceph_pagelist_unmap_tail(pl); | ||
| 20 | while (!list_empty(&pl->head)) { | ||
| 21 | struct page *page = list_first_entry(&pl->head, struct page, | ||
| 22 | lru); | ||
| 23 | list_del(&page->lru); | ||
| 24 | __free_page(page); | ||
| 25 | } | ||
| 26 | ceph_pagelist_free_reserve(pl); | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | EXPORT_SYMBOL(ceph_pagelist_release); | ||
| 30 | |||
| 31 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
| 32 | { | ||
| 33 | struct page *page; | ||
| 34 | |||
| 35 | if (!pl->num_pages_free) { | ||
| 36 | page = __page_cache_alloc(GFP_NOFS); | ||
| 37 | } else { | ||
| 38 | page = list_first_entry(&pl->free_list, struct page, lru); | ||
| 39 | list_del(&page->lru); | ||
| 40 | --pl->num_pages_free; | ||
| 41 | } | ||
| 42 | if (!page) | ||
| 43 | return -ENOMEM; | ||
| 44 | pl->room += PAGE_SIZE; | ||
| 45 | ceph_pagelist_unmap_tail(pl); | ||
| 46 | list_add_tail(&page->lru, &pl->head); | ||
| 47 | pl->mapped_tail = kmap(page); | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) | ||
| 52 | { | ||
| 53 | while (pl->room < len) { | ||
| 54 | size_t bit = pl->room; | ||
| 55 | int ret; | ||
| 56 | |||
| 57 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
| 58 | buf, bit); | ||
| 59 | pl->length += bit; | ||
| 60 | pl->room -= bit; | ||
| 61 | buf += bit; | ||
| 62 | len -= bit; | ||
| 63 | ret = ceph_pagelist_addpage(pl); | ||
| 64 | if (ret) | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
| 69 | pl->length += len; | ||
| 70 | pl->room -= len; | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | EXPORT_SYMBOL(ceph_pagelist_append); | ||
| 74 | |||
| 75 | /** | ||
| 76 | * Allocate enough pages for a pagelist to append the given amount | ||
| 77 | * of data without without allocating. | ||
| 78 | * Returns: 0 on success, -ENOMEM on error. | ||
| 79 | */ | ||
| 80 | int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space) | ||
| 81 | { | ||
| 82 | if (space <= pl->room) | ||
| 83 | return 0; | ||
| 84 | space -= pl->room; | ||
| 85 | space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */ | ||
| 86 | |||
| 87 | while (space > pl->num_pages_free) { | ||
| 88 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
| 89 | if (!page) | ||
| 90 | return -ENOMEM; | ||
| 91 | list_add_tail(&page->lru, &pl->free_list); | ||
| 92 | ++pl->num_pages_free; | ||
| 93 | } | ||
| 94 | return 0; | ||
| 95 | } | ||
| 96 | EXPORT_SYMBOL(ceph_pagelist_reserve); | ||
| 97 | |||
| 98 | /** | ||
| 99 | * Free any pages that have been preallocated. | ||
| 100 | */ | ||
| 101 | int ceph_pagelist_free_reserve(struct ceph_pagelist *pl) | ||
| 102 | { | ||
| 103 | while (!list_empty(&pl->free_list)) { | ||
| 104 | struct page *page = list_first_entry(&pl->free_list, | ||
| 105 | struct page, lru); | ||
| 106 | list_del(&page->lru); | ||
| 107 | __free_page(page); | ||
| 108 | --pl->num_pages_free; | ||
| 109 | } | ||
| 110 | BUG_ON(pl->num_pages_free); | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | EXPORT_SYMBOL(ceph_pagelist_free_reserve); | ||
| 114 | |||
| 115 | /** | ||
| 116 | * Create a truncation point. | ||
| 117 | */ | ||
| 118 | void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, | ||
| 119 | struct ceph_pagelist_cursor *c) | ||
| 120 | { | ||
| 121 | c->pl = pl; | ||
| 122 | c->page_lru = pl->head.prev; | ||
| 123 | c->room = pl->room; | ||
| 124 | } | ||
| 125 | EXPORT_SYMBOL(ceph_pagelist_set_cursor); | ||
| 126 | |||
| 127 | /** | ||
| 128 | * Truncate a pagelist to the given point. Move extra pages to reserve. | ||
| 129 | * This won't sleep. | ||
| 130 | * Returns: 0 on success, | ||
| 131 | * -EINVAL if the pagelist doesn't match the trunc point pagelist | ||
| 132 | */ | ||
| 133 | int ceph_pagelist_truncate(struct ceph_pagelist *pl, | ||
| 134 | struct ceph_pagelist_cursor *c) | ||
| 135 | { | ||
| 136 | struct page *page; | ||
| 137 | |||
| 138 | if (pl != c->pl) | ||
| 139 | return -EINVAL; | ||
| 140 | ceph_pagelist_unmap_tail(pl); | ||
| 141 | while (pl->head.prev != c->page_lru) { | ||
| 142 | page = list_entry(pl->head.prev, struct page, lru); | ||
| 143 | list_del(&page->lru); /* remove from pagelist */ | ||
| 144 | list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ | ||
| 145 | ++pl->num_pages_free; | ||
| 146 | } | ||
| 147 | pl->room = c->room; | ||
| 148 | if (!list_empty(&pl->head)) { | ||
| 149 | page = list_entry(pl->head.prev, struct page, lru); | ||
| 150 | pl->mapped_tail = kmap(page); | ||
| 151 | } | ||
| 152 | return 0; | ||
| 153 | } | ||
| 154 | EXPORT_SYMBOL(ceph_pagelist_truncate); | ||
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c new file mode 100644 index 000000000000..54caf0687155 --- /dev/null +++ b/net/ceph/pagevec.c | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | #include <linux/ceph/ceph_debug.h> | ||
| 2 | |||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include <linux/slab.h> | ||
| 6 | #include <linux/file.h> | ||
| 7 | #include <linux/namei.h> | ||
| 8 | #include <linux/writeback.h> | ||
| 9 | |||
| 10 | #include <linux/ceph/libceph.h> | ||
| 11 | |||
| 12 | /* | ||
| 13 | * build a vector of user pages | ||
| 14 | */ | ||
| 15 | struct page **ceph_get_direct_page_vector(const char __user *data, | ||
| 16 | int num_pages, | ||
| 17 | loff_t off, size_t len) | ||
| 18 | { | ||
| 19 | struct page **pages; | ||
| 20 | int rc; | ||
| 21 | |||
| 22 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
| 23 | if (!pages) | ||
| 24 | return ERR_PTR(-ENOMEM); | ||
| 25 | |||
| 26 | down_read(¤t->mm->mmap_sem); | ||
| 27 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
| 28 | num_pages, 0, 0, pages, NULL); | ||
| 29 | up_read(¤t->mm->mmap_sem); | ||
| 30 | if (rc < 0) | ||
| 31 | goto fail; | ||
| 32 | return pages; | ||
| 33 | |||
| 34 | fail: | ||
| 35 | kfree(pages); | ||
| 36 | return ERR_PTR(rc); | ||
| 37 | } | ||
| 38 | EXPORT_SYMBOL(ceph_get_direct_page_vector); | ||
| 39 | |||
| 40 | void ceph_put_page_vector(struct page **pages, int num_pages) | ||
| 41 | { | ||
| 42 | int i; | ||
| 43 | |||
| 44 | for (i = 0; i < num_pages; i++) | ||
| 45 | put_page(pages[i]); | ||
| 46 | kfree(pages); | ||
| 47 | } | ||
| 48 | EXPORT_SYMBOL(ceph_put_page_vector); | ||
| 49 | |||
| 50 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
| 51 | { | ||
| 52 | int i; | ||
| 53 | |||
| 54 | for (i = 0; i < num_pages; i++) | ||
| 55 | __free_pages(pages[i], 0); | ||
| 56 | kfree(pages); | ||
| 57 | } | ||
| 58 | EXPORT_SYMBOL(ceph_release_page_vector); | ||
| 59 | |||
| 60 | /* | ||
| 61 | * allocate a vector new pages | ||
| 62 | */ | ||
| 63 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
| 64 | { | ||
| 65 | struct page **pages; | ||
| 66 | int i; | ||
| 67 | |||
| 68 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
| 69 | if (!pages) | ||
| 70 | return ERR_PTR(-ENOMEM); | ||
| 71 | for (i = 0; i < num_pages; i++) { | ||
| 72 | pages[i] = __page_cache_alloc(flags); | ||
| 73 | if (pages[i] == NULL) { | ||
| 74 | ceph_release_page_vector(pages, i); | ||
| 75 | return ERR_PTR(-ENOMEM); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | return pages; | ||
| 79 | } | ||
| 80 | EXPORT_SYMBOL(ceph_alloc_page_vector); | ||
| 81 | |||
| 82 | /* | ||
| 83 | * copy user data into a page vector | ||
| 84 | */ | ||
| 85 | int ceph_copy_user_to_page_vector(struct page **pages, | ||
| 86 | const char __user *data, | ||
| 87 | loff_t off, size_t len) | ||
| 88 | { | ||
| 89 | int i = 0; | ||
| 90 | int po = off & ~PAGE_CACHE_MASK; | ||
| 91 | int left = len; | ||
| 92 | int l, bad; | ||
| 93 | |||
| 94 | while (left > 0) { | ||
| 95 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
| 96 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
| 97 | if (bad == l) | ||
| 98 | return -EFAULT; | ||
| 99 | data += l - bad; | ||
| 100 | left -= l - bad; | ||
| 101 | po += l - bad; | ||
| 102 | if (po == PAGE_CACHE_SIZE) { | ||
| 103 | po = 0; | ||
| 104 | i++; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | return len; | ||
| 108 | } | ||
| 109 | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); | ||
| 110 | |||
| 111 | int ceph_copy_to_page_vector(struct page **pages, | ||
| 112 | const char *data, | ||
| 113 | loff_t off, size_t len) | ||
| 114 | { | ||
| 115 | int i = 0; | ||
| 116 | size_t po = off & ~PAGE_CACHE_MASK; | ||
| 117 | size_t left = len; | ||
| 118 | size_t l; | ||
| 119 | |||
| 120 | while (left > 0) { | ||
| 121 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
| 122 | memcpy(page_address(pages[i]) + po, data, l); | ||
| 123 | data += l; | ||
| 124 | left -= l; | ||
| 125 | po += l; | ||
| 126 | if (po == PAGE_CACHE_SIZE) { | ||
| 127 | po = 0; | ||
| 128 | i++; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | return len; | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(ceph_copy_to_page_vector); | ||
| 134 | |||
| 135 | int ceph_copy_from_page_vector(struct page **pages, | ||
| 136 | char *data, | ||
| 137 | loff_t off, size_t len) | ||
| 138 | { | ||
| 139 | int i = 0; | ||
| 140 | size_t po = off & ~PAGE_CACHE_MASK; | ||
| 141 | size_t left = len; | ||
| 142 | size_t l; | ||
| 143 | |||
| 144 | while (left > 0) { | ||
| 145 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | ||
| 146 | memcpy(data, page_address(pages[i]) + po, l); | ||
| 147 | data += l; | ||
| 148 | left -= l; | ||
| 149 | po += l; | ||
| 150 | if (po == PAGE_CACHE_SIZE) { | ||
| 151 | po = 0; | ||
| 152 | i++; | ||
| 153 | } | ||
| 154 | } | ||
| 155 | return len; | ||
| 156 | } | ||
| 157 | EXPORT_SYMBOL(ceph_copy_from_page_vector); | ||
| 158 | |||
| 159 | /* | ||
| 160 | * copy user data from a page vector into a user pointer | ||
| 161 | */ | ||
| 162 | int ceph_copy_page_vector_to_user(struct page **pages, | ||
| 163 | char __user *data, | ||
| 164 | loff_t off, size_t len) | ||
| 165 | { | ||
| 166 | int i = 0; | ||
| 167 | int po = off & ~PAGE_CACHE_MASK; | ||
| 168 | int left = len; | ||
| 169 | int l, bad; | ||
| 170 | |||
| 171 | while (left > 0) { | ||
| 172 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
| 173 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
| 174 | if (bad == l) | ||
| 175 | return -EFAULT; | ||
| 176 | data += l - bad; | ||
| 177 | left -= l - bad; | ||
| 178 | if (po) { | ||
| 179 | po += l - bad; | ||
| 180 | if (po == PAGE_CACHE_SIZE) | ||
| 181 | po = 0; | ||
| 182 | } | ||
| 183 | i++; | ||
| 184 | } | ||
| 185 | return len; | ||
| 186 | } | ||
| 187 | EXPORT_SYMBOL(ceph_copy_page_vector_to_user); | ||
| 188 | |||
| 189 | /* | ||
| 190 | * Zero an extent within a page vector. Offset is relative to the | ||
| 191 | * start of the first page. | ||
| 192 | */ | ||
| 193 | void ceph_zero_page_vector_range(int off, int len, struct page **pages) | ||
| 194 | { | ||
| 195 | int i = off >> PAGE_CACHE_SHIFT; | ||
| 196 | |||
| 197 | off &= ~PAGE_CACHE_MASK; | ||
| 198 | |||
| 199 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
| 200 | |||
| 201 | /* leading partial page? */ | ||
| 202 | if (off) { | ||
| 203 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
| 204 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
| 205 | (int)off); | ||
| 206 | zero_user_segment(pages[i], off, end); | ||
| 207 | len -= (end - off); | ||
| 208 | i++; | ||
| 209 | } | ||
| 210 | while (len >= PAGE_CACHE_SIZE) { | ||
| 211 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
| 212 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
| 213 | len -= PAGE_CACHE_SIZE; | ||
| 214 | i++; | ||
| 215 | } | ||
| 216 | /* trailing partial page? */ | ||
| 217 | if (len) { | ||
| 218 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
| 219 | zero_user_segment(pages[i], 0, len); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | EXPORT_SYMBOL(ceph_zero_page_vector_range); | ||
| 223 | |||
diff --git a/net/core/sock.c b/net/core/sock.c index ef30e9d286e7..7d99e13148e6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -1078,8 +1078,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
| 1078 | #ifdef CONFIG_CGROUPS | 1078 | #ifdef CONFIG_CGROUPS |
| 1079 | void sock_update_classid(struct sock *sk) | 1079 | void sock_update_classid(struct sock *sk) |
| 1080 | { | 1080 | { |
| 1081 | u32 classid = task_cls_classid(current); | 1081 | u32 classid; |
| 1082 | 1082 | ||
| 1083 | rcu_read_lock(); /* doing current task, which cannot vanish. */ | ||
| 1084 | classid = task_cls_classid(current); | ||
| 1085 | rcu_read_unlock(); | ||
| 1083 | if (classid && classid != sk->sk_classid) | 1086 | if (classid && classid != sk->sk_classid) |
| 1084 | sk->sk_classid = classid; | 1087 | sk->sk_classid = classid; |
| 1085 | } | 1088 | } |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 244f7cb08d68..37f8adb68c79 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/proc_fs.h> | 11 | #include <linux/proc_fs.h> |
| 12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
| 13 | #include <linux/percpu.h> | 13 | #include <linux/percpu.h> |
| 14 | #include <linux/security.h> | ||
| 14 | #include <net/net_namespace.h> | 15 | #include <net/net_namespace.h> |
| 15 | 16 | ||
| 16 | #include <linux/netfilter.h> | 17 | #include <linux/netfilter.h> |
| @@ -87,6 +88,29 @@ static void ct_seq_stop(struct seq_file *s, void *v) | |||
| 87 | rcu_read_unlock(); | 88 | rcu_read_unlock(); |
| 88 | } | 89 | } |
| 89 | 90 | ||
| 91 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
| 92 | static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
| 93 | { | ||
| 94 | int ret; | ||
| 95 | u32 len; | ||
| 96 | char *secctx; | ||
| 97 | |||
| 98 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
| 99 | if (ret) | ||
| 100 | return ret; | ||
| 101 | |||
| 102 | ret = seq_printf(s, "secctx=%s ", secctx); | ||
| 103 | |||
| 104 | security_release_secctx(secctx, len); | ||
| 105 | return ret; | ||
| 106 | } | ||
| 107 | #else | ||
| 108 | static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
| 109 | { | ||
| 110 | return 0; | ||
| 111 | } | ||
| 112 | #endif | ||
| 113 | |||
| 90 | static int ct_seq_show(struct seq_file *s, void *v) | 114 | static int ct_seq_show(struct seq_file *s, void *v) |
| 91 | { | 115 | { |
| 92 | struct nf_conntrack_tuple_hash *hash = v; | 116 | struct nf_conntrack_tuple_hash *hash = v; |
| @@ -148,10 +172,8 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
| 148 | goto release; | 172 | goto release; |
| 149 | #endif | 173 | #endif |
| 150 | 174 | ||
| 151 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 175 | if (ct_show_secctx(s, ct)) |
| 152 | if (seq_printf(s, "secmark=%u ", ct->secmark)) | ||
| 153 | goto release; | 176 | goto release; |
| 154 | #endif | ||
| 155 | 177 | ||
| 156 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) | 178 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) |
| 157 | goto release; | 179 | goto release; |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 8c8632d9b93c..957c9241fb0c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
| @@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); | |||
| 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | 38 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
| 39 | 39 | ||
| 40 | #define MAX_IP_NAT_PROTO 256 | 40 | #define MAX_IP_NAT_PROTO 256 |
| 41 | static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] | 41 | static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] |
| 42 | __read_mostly; | 42 | __read_mostly; |
| 43 | 43 | ||
| 44 | static inline const struct nf_nat_protocol * | 44 | static inline const struct nf_nat_protocol * |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d33bfb..fdaec7daff1d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | static DEFINE_MUTEX(afinfo_mutex); | 28 | static DEFINE_MUTEX(afinfo_mutex); |
| 29 | 29 | ||
| 30 | const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | 30 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; |
| 31 | EXPORT_SYMBOL(nf_afinfo); | 31 | EXPORT_SYMBOL(nf_afinfo); |
| 32 | 32 | ||
| 33 | int nf_register_afinfo(const struct nf_afinfo *afinfo) | 33 | int nf_register_afinfo(const struct nf_afinfo *afinfo) |
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index cdcc7649476b..5702de35e2bb 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c | |||
| @@ -26,10 +26,10 @@ | |||
| 26 | 26 | ||
| 27 | static DEFINE_MUTEX(nf_ct_ecache_mutex); | 27 | static DEFINE_MUTEX(nf_ct_ecache_mutex); |
| 28 | 28 | ||
| 29 | struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; | 29 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; |
| 30 | EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); | 30 | EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); |
| 31 | 31 | ||
| 32 | struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; | 32 | struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; |
| 33 | EXPORT_SYMBOL_GPL(nf_expect_event_cb); | 33 | EXPORT_SYMBOL_GPL(nf_expect_event_cb); |
| 34 | 34 | ||
| 35 | /* deliver cached events and clear cache entry - must be called with locally | 35 | /* deliver cached events and clear cache entry - must be called with locally |
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 8d9e4c949b96..bd82450c193f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
| 17 | #include <net/netfilter/nf_conntrack_extend.h> | 17 | #include <net/netfilter/nf_conntrack_extend.h> |
| 18 | 18 | ||
| 19 | static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; | 19 | static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; |
| 20 | static DEFINE_MUTEX(nf_ct_ext_type_mutex); | 20 | static DEFINE_MUTEX(nf_ct_ext_type_mutex); |
| 21 | 21 | ||
| 22 | void __nf_ct_ext_destroy(struct nf_conn *ct) | 22 | void __nf_ct_ext_destroy(struct nf_conn *ct) |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5bae1cd15eea..146476c6441a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/rculist_nulls.h> | 22 | #include <linux/rculist_nulls.h> |
| 23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
| 25 | #include <linux/security.h> | ||
| 25 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> |
| 26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
| 27 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> |
| @@ -245,16 +246,31 @@ nla_put_failure: | |||
| 245 | 246 | ||
| 246 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 247 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
| 247 | static inline int | 248 | static inline int |
| 248 | ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct) | 249 | ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) |
| 249 | { | 250 | { |
| 250 | NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark)); | 251 | struct nlattr *nest_secctx; |
| 251 | return 0; | 252 | int len, ret; |
| 253 | char *secctx; | ||
| 254 | |||
| 255 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
| 256 | if (ret) | ||
| 257 | return ret; | ||
| 258 | |||
| 259 | ret = -1; | ||
| 260 | nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED); | ||
| 261 | if (!nest_secctx) | ||
| 262 | goto nla_put_failure; | ||
| 263 | |||
| 264 | NLA_PUT_STRING(skb, CTA_SECCTX_NAME, secctx); | ||
| 265 | nla_nest_end(skb, nest_secctx); | ||
| 252 | 266 | ||
| 267 | ret = 0; | ||
| 253 | nla_put_failure: | 268 | nla_put_failure: |
| 254 | return -1; | 269 | security_release_secctx(secctx, len); |
| 270 | return ret; | ||
| 255 | } | 271 | } |
| 256 | #else | 272 | #else |
| 257 | #define ctnetlink_dump_secmark(a, b) (0) | 273 | #define ctnetlink_dump_secctx(a, b) (0) |
| 258 | #endif | 274 | #endif |
| 259 | 275 | ||
| 260 | #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) | 276 | #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) |
| @@ -391,7 +407,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
| 391 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | 407 | ctnetlink_dump_protoinfo(skb, ct) < 0 || |
| 392 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | 408 | ctnetlink_dump_helpinfo(skb, ct) < 0 || |
| 393 | ctnetlink_dump_mark(skb, ct) < 0 || | 409 | ctnetlink_dump_mark(skb, ct) < 0 || |
| 394 | ctnetlink_dump_secmark(skb, ct) < 0 || | 410 | ctnetlink_dump_secctx(skb, ct) < 0 || |
| 395 | ctnetlink_dump_id(skb, ct) < 0 || | 411 | ctnetlink_dump_id(skb, ct) < 0 || |
| 396 | ctnetlink_dump_use(skb, ct) < 0 || | 412 | ctnetlink_dump_use(skb, ct) < 0 || |
| 397 | ctnetlink_dump_master(skb, ct) < 0 || | 413 | ctnetlink_dump_master(skb, ct) < 0 || |
| @@ -437,6 +453,17 @@ ctnetlink_counters_size(const struct nf_conn *ct) | |||
| 437 | ; | 453 | ; |
| 438 | } | 454 | } |
| 439 | 455 | ||
| 456 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
| 457 | static int ctnetlink_nlmsg_secctx_size(const struct nf_conn *ct) | ||
| 458 | { | ||
| 459 | int len; | ||
| 460 | |||
| 461 | security_secid_to_secctx(ct->secmark, NULL, &len); | ||
| 462 | |||
| 463 | return sizeof(char) * len; | ||
| 464 | } | ||
| 465 | #endif | ||
| 466 | |||
| 440 | static inline size_t | 467 | static inline size_t |
| 441 | ctnetlink_nlmsg_size(const struct nf_conn *ct) | 468 | ctnetlink_nlmsg_size(const struct nf_conn *ct) |
| 442 | { | 469 | { |
| @@ -453,7 +480,8 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) | |||
| 453 | + nla_total_size(0) /* CTA_HELP */ | 480 | + nla_total_size(0) /* CTA_HELP */ |
| 454 | + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ | 481 | + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ |
| 455 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 482 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
| 456 | + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */ | 483 | + nla_total_size(0) /* CTA_SECCTX */ |
| 484 | + nla_total_size(ctnetlink_nlmsg_secctx_size(ct)) /* CTA_SECCTX_NAME */ | ||
| 457 | #endif | 485 | #endif |
| 458 | #ifdef CONFIG_NF_NAT_NEEDED | 486 | #ifdef CONFIG_NF_NAT_NEEDED |
| 459 | + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ | 487 | + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ |
| @@ -556,7 +584,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
| 556 | 584 | ||
| 557 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 585 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
| 558 | if ((events & (1 << IPCT_SECMARK) || ct->secmark) | 586 | if ((events & (1 << IPCT_SECMARK) || ct->secmark) |
| 559 | && ctnetlink_dump_secmark(skb, ct) < 0) | 587 | && ctnetlink_dump_secctx(skb, ct) < 0) |
| 560 | goto nla_put_failure; | 588 | goto nla_put_failure; |
| 561 | #endif | 589 | #endif |
| 562 | 590 | ||
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5886ba1d52a0..ed6d92958023 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c | |||
| @@ -28,8 +28,8 @@ | |||
| 28 | #include <net/netfilter/nf_conntrack_l4proto.h> | 28 | #include <net/netfilter/nf_conntrack_l4proto.h> |
| 29 | #include <net/netfilter/nf_conntrack_core.h> | 29 | #include <net/netfilter/nf_conntrack_core.h> |
| 30 | 30 | ||
| 31 | static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; | 31 | static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; |
| 32 | struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; | 32 | struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; |
| 33 | EXPORT_SYMBOL_GPL(nf_ct_l3protos); | 33 | EXPORT_SYMBOL_GPL(nf_ct_l3protos); |
| 34 | 34 | ||
| 35 | static DEFINE_MUTEX(nf_ct_proto_mutex); | 35 | static DEFINE_MUTEX(nf_ct_proto_mutex); |
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index eb973fcd67ab..0fb65705b44b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
| 16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
| 17 | #include <linux/netdevice.h> | 17 | #include <linux/netdevice.h> |
| 18 | #include <linux/security.h> | ||
| 18 | #include <net/net_namespace.h> | 19 | #include <net/net_namespace.h> |
| 19 | #ifdef CONFIG_SYSCTL | 20 | #ifdef CONFIG_SYSCTL |
| 20 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
| @@ -108,6 +109,29 @@ static void ct_seq_stop(struct seq_file *s, void *v) | |||
| 108 | rcu_read_unlock(); | 109 | rcu_read_unlock(); |
| 109 | } | 110 | } |
| 110 | 111 | ||
| 112 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | ||
| 113 | static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
| 114 | { | ||
| 115 | int ret; | ||
| 116 | u32 len; | ||
| 117 | char *secctx; | ||
| 118 | |||
| 119 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | ||
| 120 | if (ret) | ||
| 121 | return ret; | ||
| 122 | |||
| 123 | ret = seq_printf(s, "secctx=%s ", secctx); | ||
| 124 | |||
| 125 | security_release_secctx(secctx, len); | ||
| 126 | return ret; | ||
| 127 | } | ||
| 128 | #else | ||
| 129 | static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | ||
| 130 | { | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | #endif | ||
| 134 | |||
| 111 | /* return 0 on success, 1 in case of error */ | 135 | /* return 0 on success, 1 in case of error */ |
| 112 | static int ct_seq_show(struct seq_file *s, void *v) | 136 | static int ct_seq_show(struct seq_file *s, void *v) |
| 113 | { | 137 | { |
| @@ -168,10 +192,8 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
| 168 | goto release; | 192 | goto release; |
| 169 | #endif | 193 | #endif |
| 170 | 194 | ||
| 171 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 195 | if (ct_show_secctx(s, ct)) |
| 172 | if (seq_printf(s, "secmark=%u ", ct->secmark)) | ||
| 173 | goto release; | 196 | goto release; |
| 174 | #endif | ||
| 175 | 197 | ||
| 176 | #ifdef CONFIG_NF_CONNTRACK_ZONES | 198 | #ifdef CONFIG_NF_CONNTRACK_ZONES |
| 177 | if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) | 199 | if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) |
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 7df37fd786bc..b07393eab88e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #define NF_LOG_PREFIXLEN 128 | 16 | #define NF_LOG_PREFIXLEN 128 |
| 17 | #define NFLOGGER_NAME_LEN 64 | 17 | #define NFLOGGER_NAME_LEN 64 |
| 18 | 18 | ||
| 19 | static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; | 19 | static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; |
| 20 | static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; | 20 | static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; |
| 21 | static DEFINE_MUTEX(nf_log_mutex); | 21 | static DEFINE_MUTEX(nf_log_mutex); |
| 22 | 22 | ||
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 78b3cf9c519c..74aebed5bd28 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | * long term mutex. The handler must provide an an outfn() to accept packets | 18 | * long term mutex. The handler must provide an an outfn() to accept packets |
| 19 | * for queueing and must reinject all packets it receives, no matter what. | 19 | * for queueing and must reinject all packets it receives, no matter what. |
| 20 | */ | 20 | */ |
| 21 | static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; | 21 | static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; |
| 22 | 22 | ||
| 23 | static DEFINE_MUTEX(queue_handler_mutex); | 23 | static DEFINE_MUTEX(queue_handler_mutex); |
| 24 | 24 | ||
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0cb6053f02fd..782e51986a6f 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
| 10 | #include <linux/gfp.h> | 10 | #include <linux/gfp.h> |
| 11 | #include <linux/skbuff.h> | 11 | #include <linux/skbuff.h> |
| 12 | #include <linux/selinux.h> | ||
| 13 | #include <linux/netfilter_ipv4/ip_tables.h> | 12 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 14 | #include <linux/netfilter_ipv6/ip6_tables.h> | 13 | #include <linux/netfilter_ipv6/ip6_tables.h> |
| 15 | #include <linux/netfilter/x_tables.h> | 14 | #include <linux/netfilter/x_tables.h> |
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 23b2d6c486b5..9faf5e050b79 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | */ | 14 | */ |
| 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/security.h> | ||
| 17 | #include <linux/skbuff.h> | 18 | #include <linux/skbuff.h> |
| 18 | #include <linux/selinux.h> | ||
| 19 | #include <linux/netfilter/x_tables.h> | 19 | #include <linux/netfilter/x_tables.h> |
| 20 | #include <linux/netfilter/xt_SECMARK.h> | 20 | #include <linux/netfilter/xt_SECMARK.h> |
| 21 | 21 | ||
| @@ -39,9 +39,8 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 39 | 39 | ||
| 40 | switch (mode) { | 40 | switch (mode) { |
| 41 | case SECMARK_MODE_SEL: | 41 | case SECMARK_MODE_SEL: |
| 42 | secmark = info->u.sel.selsid; | 42 | secmark = info->secid; |
| 43 | break; | 43 | break; |
| 44 | |||
| 45 | default: | 44 | default: |
| 46 | BUG(); | 45 | BUG(); |
| 47 | } | 46 | } |
| @@ -50,33 +49,33 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 50 | return XT_CONTINUE; | 49 | return XT_CONTINUE; |
| 51 | } | 50 | } |
| 52 | 51 | ||
| 53 | static int checkentry_selinux(struct xt_secmark_target_info *info) | 52 | static int checkentry_lsm(struct xt_secmark_target_info *info) |
| 54 | { | 53 | { |
| 55 | int err; | 54 | int err; |
| 56 | struct xt_secmark_target_selinux_info *sel = &info->u.sel; | ||
| 57 | 55 | ||
| 58 | sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; | 56 | info->secctx[SECMARK_SECCTX_MAX - 1] = '\0'; |
| 57 | info->secid = 0; | ||
| 59 | 58 | ||
| 60 | err = selinux_string_to_sid(sel->selctx, &sel->selsid); | 59 | err = security_secctx_to_secid(info->secctx, strlen(info->secctx), |
| 60 | &info->secid); | ||
| 61 | if (err) { | 61 | if (err) { |
| 62 | if (err == -EINVAL) | 62 | if (err == -EINVAL) |
| 63 | pr_info("invalid SELinux context \'%s\'\n", | 63 | pr_info("invalid security context \'%s\'\n", info->secctx); |
| 64 | sel->selctx); | ||
| 65 | return err; | 64 | return err; |
| 66 | } | 65 | } |
| 67 | 66 | ||
| 68 | if (!sel->selsid) { | 67 | if (!info->secid) { |
| 69 | pr_info("unable to map SELinux context \'%s\'\n", sel->selctx); | 68 | pr_info("unable to map security context \'%s\'\n", info->secctx); |
| 70 | return -ENOENT; | 69 | return -ENOENT; |
| 71 | } | 70 | } |
| 72 | 71 | ||
| 73 | err = selinux_secmark_relabel_packet_permission(sel->selsid); | 72 | err = security_secmark_relabel_packet(info->secid); |
| 74 | if (err) { | 73 | if (err) { |
| 75 | pr_info("unable to obtain relabeling permission\n"); | 74 | pr_info("unable to obtain relabeling permission\n"); |
| 76 | return err; | 75 | return err; |
| 77 | } | 76 | } |
| 78 | 77 | ||
| 79 | selinux_secmark_refcount_inc(); | 78 | security_secmark_refcount_inc(); |
| 80 | return 0; | 79 | return 0; |
| 81 | } | 80 | } |
| 82 | 81 | ||
| @@ -100,16 +99,16 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) | |||
| 100 | 99 | ||
| 101 | switch (info->mode) { | 100 | switch (info->mode) { |
| 102 | case SECMARK_MODE_SEL: | 101 | case SECMARK_MODE_SEL: |
| 103 | err = checkentry_selinux(info); | ||
| 104 | if (err <= 0) | ||
| 105 | return err; | ||
| 106 | break; | 102 | break; |
| 107 | |||
| 108 | default: | 103 | default: |
| 109 | pr_info("invalid mode: %hu\n", info->mode); | 104 | pr_info("invalid mode: %hu\n", info->mode); |
| 110 | return -EINVAL; | 105 | return -EINVAL; |
| 111 | } | 106 | } |
| 112 | 107 | ||
| 108 | err = checkentry_lsm(info); | ||
| 109 | if (err) | ||
| 110 | return err; | ||
| 111 | |||
| 113 | if (!mode) | 112 | if (!mode) |
| 114 | mode = info->mode; | 113 | mode = info->mode; |
| 115 | return 0; | 114 | return 0; |
| @@ -119,7 +118,7 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par) | |||
| 119 | { | 118 | { |
| 120 | switch (mode) { | 119 | switch (mode) { |
| 121 | case SECMARK_MODE_SEL: | 120 | case SECMARK_MODE_SEL: |
| 122 | selinux_secmark_refcount_dec(); | 121 | security_secmark_refcount_dec(); |
| 123 | } | 122 | } |
| 124 | } | 123 | } |
| 125 | 124 | ||
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 78ef2c5e130b..37dff78e9cb1 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c | |||
| @@ -123,7 +123,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
| 123 | * calls by looking at the number of nested bh disable calls because | 123 | * calls by looking at the number of nested bh disable calls because |
| 124 | * softirqs always disables bh. | 124 | * softirqs always disables bh. |
| 125 | */ | 125 | */ |
| 126 | if (softirq_count() != SOFTIRQ_OFFSET) { | 126 | if (in_serving_softirq()) { |
| 127 | /* If there is an sk_classid we'll use that. */ | 127 | /* If there is an sk_classid we'll use that. */ |
| 128 | if (!skb->sk) | 128 | if (!skb->sk) |
| 129 | return -1; | 129 | return -1; |
diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index 0a0a99f3b083..4d995aeaebc0 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore | |||
| @@ -3,3 +3,4 @@ | |||
| 3 | # | 3 | # |
| 4 | af_names.h | 4 | af_names.h |
| 5 | capability_names.h | 5 | capability_names.h |
| 6 | rlim_names.h | ||
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 7320331b44ab..544ff5837cb6 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | * aa_simple_write_to_buffer - common routine for getting policy from user | 29 | * aa_simple_write_to_buffer - common routine for getting policy from user |
| 30 | * @op: operation doing the user buffer copy | 30 | * @op: operation doing the user buffer copy |
| 31 | * @userbuf: user buffer to copy data from (NOT NULL) | 31 | * @userbuf: user buffer to copy data from (NOT NULL) |
| 32 | * @alloc_size: size of user buffer | 32 | * @alloc_size: size of user buffer (REQUIRES: @alloc_size >= @copy_size) |
| 33 | * @copy_size: size of data to copy from user buffer | 33 | * @copy_size: size of data to copy from user buffer |
| 34 | * @pos: position write is at in the file (NOT NULL) | 34 | * @pos: position write is at in the file (NOT NULL) |
| 35 | * | 35 | * |
| @@ -42,6 +42,8 @@ static char *aa_simple_write_to_buffer(int op, const char __user *userbuf, | |||
| 42 | { | 42 | { |
| 43 | char *data; | 43 | char *data; |
| 44 | 44 | ||
| 45 | BUG_ON(copy_size > alloc_size); | ||
| 46 | |||
| 45 | if (*pos != 0) | 47 | if (*pos != 0) |
| 46 | /* only writes from pos 0, that is complete writes */ | 48 | /* only writes from pos 0, that is complete writes */ |
| 47 | return ERR_PTR(-ESPIPE); | 49 | return ERR_PTR(-ESPIPE); |
diff --git a/security/capability.c b/security/capability.c index 95a6599a37bb..30ae00fbecd5 100644 --- a/security/capability.c +++ b/security/capability.c | |||
| @@ -677,7 +677,18 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb) | |||
| 677 | { | 677 | { |
| 678 | } | 678 | } |
| 679 | 679 | ||
| 680 | static int cap_secmark_relabel_packet(u32 secid) | ||
| 681 | { | ||
| 682 | return 0; | ||
| 683 | } | ||
| 680 | 684 | ||
| 685 | static void cap_secmark_refcount_inc(void) | ||
| 686 | { | ||
| 687 | } | ||
| 688 | |||
| 689 | static void cap_secmark_refcount_dec(void) | ||
| 690 | { | ||
| 691 | } | ||
| 681 | 692 | ||
| 682 | static void cap_req_classify_flow(const struct request_sock *req, | 693 | static void cap_req_classify_flow(const struct request_sock *req, |
| 683 | struct flowi *fl) | 694 | struct flowi *fl) |
| @@ -777,7 +788,8 @@ static int cap_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) | |||
| 777 | 788 | ||
| 778 | static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) | 789 | static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) |
| 779 | { | 790 | { |
| 780 | return -EOPNOTSUPP; | 791 | *secid = 0; |
| 792 | return 0; | ||
| 781 | } | 793 | } |
| 782 | 794 | ||
| 783 | static void cap_release_secctx(char *secdata, u32 seclen) | 795 | static void cap_release_secctx(char *secdata, u32 seclen) |
| @@ -1018,6 +1030,9 @@ void __init security_fixup_ops(struct security_operations *ops) | |||
| 1018 | set_to_cap_if_null(ops, inet_conn_request); | 1030 | set_to_cap_if_null(ops, inet_conn_request); |
| 1019 | set_to_cap_if_null(ops, inet_csk_clone); | 1031 | set_to_cap_if_null(ops, inet_csk_clone); |
| 1020 | set_to_cap_if_null(ops, inet_conn_established); | 1032 | set_to_cap_if_null(ops, inet_conn_established); |
| 1033 | set_to_cap_if_null(ops, secmark_relabel_packet); | ||
| 1034 | set_to_cap_if_null(ops, secmark_refcount_inc); | ||
| 1035 | set_to_cap_if_null(ops, secmark_refcount_dec); | ||
| 1021 | set_to_cap_if_null(ops, req_classify_flow); | 1036 | set_to_cap_if_null(ops, req_classify_flow); |
| 1022 | set_to_cap_if_null(ops, tun_dev_create); | 1037 | set_to_cap_if_null(ops, tun_dev_create); |
| 1023 | set_to_cap_if_null(ops, tun_dev_post_create); | 1038 | set_to_cap_if_null(ops, tun_dev_post_create); |
diff --git a/security/commoncap.c b/security/commoncap.c index 9d172e6e330c..5e632b4857e4 100644 --- a/security/commoncap.c +++ b/security/commoncap.c | |||
| @@ -719,14 +719,11 @@ static int cap_safe_nice(struct task_struct *p) | |||
| 719 | /** | 719 | /** |
| 720 | * cap_task_setscheduler - Detemine if scheduler policy change is permitted | 720 | * cap_task_setscheduler - Detemine if scheduler policy change is permitted |
| 721 | * @p: The task to affect | 721 | * @p: The task to affect |
| 722 | * @policy: The policy to effect | ||
| 723 | * @lp: The parameters to the scheduling policy | ||
| 724 | * | 722 | * |
| 725 | * Detemine if the requested scheduler policy change is permitted for the | 723 | * Detemine if the requested scheduler policy change is permitted for the |
| 726 | * specified task, returning 0 if permission is granted, -ve if denied. | 724 | * specified task, returning 0 if permission is granted, -ve if denied. |
| 727 | */ | 725 | */ |
| 728 | int cap_task_setscheduler(struct task_struct *p, int policy, | 726 | int cap_task_setscheduler(struct task_struct *p) |
| 729 | struct sched_param *lp) | ||
| 730 | { | 727 | { |
| 731 | return cap_safe_nice(p); | 728 | return cap_safe_nice(p); |
| 732 | } | 729 | } |
diff --git a/security/security.c b/security/security.c index c53949f17d9e..b50f472061a4 100644 --- a/security/security.c +++ b/security/security.c | |||
| @@ -89,20 +89,12 @@ __setup("security=", choose_lsm); | |||
| 89 | * Return true if: | 89 | * Return true if: |
| 90 | * -The passed LSM is the one chosen by user at boot time, | 90 | * -The passed LSM is the one chosen by user at boot time, |
| 91 | * -or the passed LSM is configured as the default and the user did not | 91 | * -or the passed LSM is configured as the default and the user did not |
| 92 | * choose an alternate LSM at boot time, | 92 | * choose an alternate LSM at boot time. |
| 93 | * -or there is no default LSM set and the user didn't specify a | ||
| 94 | * specific LSM and we're the first to ask for registration permission, | ||
| 95 | * -or the passed LSM is currently loaded. | ||
| 96 | * Otherwise, return false. | 93 | * Otherwise, return false. |
| 97 | */ | 94 | */ |
| 98 | int __init security_module_enable(struct security_operations *ops) | 95 | int __init security_module_enable(struct security_operations *ops) |
| 99 | { | 96 | { |
| 100 | if (!*chosen_lsm) | 97 | return !strcmp(ops->name, chosen_lsm); |
| 101 | strncpy(chosen_lsm, ops->name, SECURITY_NAME_MAX); | ||
| 102 | else if (strncmp(ops->name, chosen_lsm, SECURITY_NAME_MAX)) | ||
| 103 | return 0; | ||
| 104 | |||
| 105 | return 1; | ||
| 106 | } | 98 | } |
| 107 | 99 | ||
| 108 | /** | 100 | /** |
| @@ -786,10 +778,9 @@ int security_task_setrlimit(struct task_struct *p, unsigned int resource, | |||
| 786 | return security_ops->task_setrlimit(p, resource, new_rlim); | 778 | return security_ops->task_setrlimit(p, resource, new_rlim); |
| 787 | } | 779 | } |
| 788 | 780 | ||
| 789 | int security_task_setscheduler(struct task_struct *p, | 781 | int security_task_setscheduler(struct task_struct *p) |
| 790 | int policy, struct sched_param *lp) | ||
| 791 | { | 782 | { |
| 792 | return security_ops->task_setscheduler(p, policy, lp); | 783 | return security_ops->task_setscheduler(p); |
| 793 | } | 784 | } |
| 794 | 785 | ||
| 795 | int security_task_getscheduler(struct task_struct *p) | 786 | int security_task_getscheduler(struct task_struct *p) |
| @@ -1145,6 +1136,24 @@ void security_inet_conn_established(struct sock *sk, | |||
| 1145 | security_ops->inet_conn_established(sk, skb); | 1136 | security_ops->inet_conn_established(sk, skb); |
| 1146 | } | 1137 | } |
| 1147 | 1138 | ||
| 1139 | int security_secmark_relabel_packet(u32 secid) | ||
| 1140 | { | ||
| 1141 | return security_ops->secmark_relabel_packet(secid); | ||
| 1142 | } | ||
| 1143 | EXPORT_SYMBOL(security_secmark_relabel_packet); | ||
| 1144 | |||
| 1145 | void security_secmark_refcount_inc(void) | ||
| 1146 | { | ||
| 1147 | security_ops->secmark_refcount_inc(); | ||
| 1148 | } | ||
| 1149 | EXPORT_SYMBOL(security_secmark_refcount_inc); | ||
| 1150 | |||
| 1151 | void security_secmark_refcount_dec(void) | ||
| 1152 | { | ||
| 1153 | security_ops->secmark_refcount_dec(); | ||
| 1154 | } | ||
| 1155 | EXPORT_SYMBOL(security_secmark_refcount_dec); | ||
| 1156 | |||
| 1148 | int security_tun_dev_create(void) | 1157 | int security_tun_dev_create(void) |
| 1149 | { | 1158 | { |
| 1150 | return security_ops->tun_dev_create(); | 1159 | return security_ops->tun_dev_create(); |
diff --git a/security/selinux/Makefile b/security/selinux/Makefile index 58d80f3bd6f6..ad5cd76ec231 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile | |||
| @@ -2,25 +2,20 @@ | |||
| 2 | # Makefile for building the SELinux module as part of the kernel tree. | 2 | # Makefile for building the SELinux module as part of the kernel tree. |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_SECURITY_SELINUX) := selinux.o ss/ | 5 | obj-$(CONFIG_SECURITY_SELINUX) := selinux.o |
| 6 | 6 | ||
| 7 | selinux-y := avc.o \ | 7 | selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \ |
| 8 | hooks.o \ | 8 | netnode.o netport.o exports.o \ |
| 9 | selinuxfs.o \ | 9 | ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \ |
| 10 | netlink.o \ | 10 | ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/status.o |
| 11 | nlmsgtab.o \ | ||
| 12 | netif.o \ | ||
| 13 | netnode.o \ | ||
| 14 | netport.o \ | ||
| 15 | exports.o | ||
| 16 | 11 | ||
| 17 | selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o | 12 | selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o |
| 18 | 13 | ||
| 19 | selinux-$(CONFIG_NETLABEL) += netlabel.o | 14 | selinux-$(CONFIG_NETLABEL) += netlabel.o |
| 20 | 15 | ||
| 21 | EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include | 16 | ccflags-y := -Isecurity/selinux -Isecurity/selinux/include |
| 22 | 17 | ||
| 23 | $(obj)/avc.o: $(obj)/flask.h | 18 | $(addprefix $(obj)/,$(selinux-y)): $(obj)/flask.h |
| 24 | 19 | ||
| 25 | quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h | 20 | quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h |
| 26 | cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h | 21 | cmd_flask = scripts/selinux/genheaders/genheaders $(obj)/flask.h $(obj)/av_permissions.h |
diff --git a/security/selinux/exports.c b/security/selinux/exports.c index c0a454aee1e0..90664385dead 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c | |||
| @@ -11,58 +11,9 @@ | |||
| 11 | * it under the terms of the GNU General Public License version 2, | 11 | * it under the terms of the GNU General Public License version 2, |
| 12 | * as published by the Free Software Foundation. | 12 | * as published by the Free Software Foundation. |
| 13 | */ | 13 | */ |
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/kernel.h> | ||
| 16 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 17 | #include <linux/selinux.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | #include <linux/ipc.h> | ||
| 20 | #include <asm/atomic.h> | ||
| 21 | 15 | ||
| 22 | #include "security.h" | 16 | #include "security.h" |
| 23 | #include "objsec.h" | ||
| 24 | |||
| 25 | /* SECMARK reference count */ | ||
| 26 | extern atomic_t selinux_secmark_refcount; | ||
| 27 | |||
| 28 | int selinux_string_to_sid(char *str, u32 *sid) | ||
| 29 | { | ||
| 30 | if (selinux_enabled) | ||
| 31 | return security_context_to_sid(str, strlen(str), sid); | ||
| 32 | else { | ||
| 33 | *sid = 0; | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | EXPORT_SYMBOL_GPL(selinux_string_to_sid); | ||
| 38 | |||
| 39 | int selinux_secmark_relabel_packet_permission(u32 sid) | ||
| 40 | { | ||
| 41 | if (selinux_enabled) { | ||
| 42 | const struct task_security_struct *__tsec; | ||
| 43 | u32 tsid; | ||
| 44 | |||
| 45 | __tsec = current_security(); | ||
| 46 | tsid = __tsec->sid; | ||
| 47 | |||
| 48 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, | ||
| 49 | PACKET__RELABELTO, NULL); | ||
| 50 | } | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | EXPORT_SYMBOL_GPL(selinux_secmark_relabel_packet_permission); | ||
| 54 | |||
| 55 | void selinux_secmark_refcount_inc(void) | ||
| 56 | { | ||
| 57 | atomic_inc(&selinux_secmark_refcount); | ||
| 58 | } | ||
| 59 | EXPORT_SYMBOL_GPL(selinux_secmark_refcount_inc); | ||
| 60 | |||
| 61 | void selinux_secmark_refcount_dec(void) | ||
| 62 | { | ||
| 63 | atomic_dec(&selinux_secmark_refcount); | ||
| 64 | } | ||
| 65 | EXPORT_SYMBOL_GPL(selinux_secmark_refcount_dec); | ||
| 66 | 17 | ||
| 67 | bool selinux_is_enabled(void) | 18 | bool selinux_is_enabled(void) |
| 68 | { | 19 | { |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4796ddd4e721..d9154cf90ae1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
| @@ -3354,11 +3354,11 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, | |||
| 3354 | return 0; | 3354 | return 0; |
| 3355 | } | 3355 | } |
| 3356 | 3356 | ||
| 3357 | static int selinux_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp) | 3357 | static int selinux_task_setscheduler(struct task_struct *p) |
| 3358 | { | 3358 | { |
| 3359 | int rc; | 3359 | int rc; |
| 3360 | 3360 | ||
| 3361 | rc = cap_task_setscheduler(p, policy, lp); | 3361 | rc = cap_task_setscheduler(p); |
| 3362 | if (rc) | 3362 | if (rc) |
| 3363 | return rc; | 3363 | return rc; |
| 3364 | 3364 | ||
| @@ -4279,6 +4279,27 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) | |||
| 4279 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); | 4279 | selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); |
| 4280 | } | 4280 | } |
| 4281 | 4281 | ||
| 4282 | static int selinux_secmark_relabel_packet(u32 sid) | ||
| 4283 | { | ||
| 4284 | const struct task_security_struct *__tsec; | ||
| 4285 | u32 tsid; | ||
| 4286 | |||
| 4287 | __tsec = current_security(); | ||
| 4288 | tsid = __tsec->sid; | ||
| 4289 | |||
| 4290 | return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); | ||
| 4291 | } | ||
| 4292 | |||
| 4293 | static void selinux_secmark_refcount_inc(void) | ||
| 4294 | { | ||
| 4295 | atomic_inc(&selinux_secmark_refcount); | ||
| 4296 | } | ||
| 4297 | |||
| 4298 | static void selinux_secmark_refcount_dec(void) | ||
| 4299 | { | ||
| 4300 | atomic_dec(&selinux_secmark_refcount); | ||
| 4301 | } | ||
| 4302 | |||
| 4282 | static void selinux_req_classify_flow(const struct request_sock *req, | 4303 | static void selinux_req_classify_flow(const struct request_sock *req, |
| 4283 | struct flowi *fl) | 4304 | struct flowi *fl) |
| 4284 | { | 4305 | { |
| @@ -5533,6 +5554,9 @@ static struct security_operations selinux_ops = { | |||
| 5533 | .inet_conn_request = selinux_inet_conn_request, | 5554 | .inet_conn_request = selinux_inet_conn_request, |
| 5534 | .inet_csk_clone = selinux_inet_csk_clone, | 5555 | .inet_csk_clone = selinux_inet_csk_clone, |
| 5535 | .inet_conn_established = selinux_inet_conn_established, | 5556 | .inet_conn_established = selinux_inet_conn_established, |
| 5557 | .secmark_relabel_packet = selinux_secmark_relabel_packet, | ||
| 5558 | .secmark_refcount_inc = selinux_secmark_refcount_inc, | ||
| 5559 | .secmark_refcount_dec = selinux_secmark_refcount_dec, | ||
| 5536 | .req_classify_flow = selinux_req_classify_flow, | 5560 | .req_classify_flow = selinux_req_classify_flow, |
| 5537 | .tun_dev_create = selinux_tun_dev_create, | 5561 | .tun_dev_create = selinux_tun_dev_create, |
| 5538 | .tun_dev_post_create = selinux_tun_dev_post_create, | 5562 | .tun_dev_post_create = selinux_tun_dev_post_create, |
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index b4c9eb4bd6f9..8858d2b2d4b6 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h | |||
| @@ -17,7 +17,7 @@ struct security_class_mapping secclass_map[] = { | |||
| 17 | { "compute_av", "compute_create", "compute_member", | 17 | { "compute_av", "compute_create", "compute_member", |
| 18 | "check_context", "load_policy", "compute_relabel", | 18 | "check_context", "load_policy", "compute_relabel", |
| 19 | "compute_user", "setenforce", "setbool", "setsecparam", | 19 | "compute_user", "setenforce", "setbool", "setsecparam", |
| 20 | "setcheckreqprot", NULL } }, | 20 | "setcheckreqprot", "read_policy", NULL } }, |
| 21 | { "process", | 21 | { "process", |
| 22 | { "fork", "transition", "sigchld", "sigkill", | 22 | { "fork", "transition", "sigchld", "sigkill", |
| 23 | "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", | 23 | "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", |
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 1f7c2491d3dc..671273eb1115 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #define _SELINUX_SECURITY_H_ | 9 | #define _SELINUX_SECURITY_H_ |
| 10 | 10 | ||
| 11 | #include <linux/magic.h> | 11 | #include <linux/magic.h> |
| 12 | #include <linux/types.h> | ||
| 12 | #include "flask.h" | 13 | #include "flask.h" |
| 13 | 14 | ||
| 14 | #define SECSID_NULL 0x00000000 /* unspecified SID */ | 15 | #define SECSID_NULL 0x00000000 /* unspecified SID */ |
| @@ -82,6 +83,8 @@ extern int selinux_policycap_openperm; | |||
| 82 | int security_mls_enabled(void); | 83 | int security_mls_enabled(void); |
| 83 | 84 | ||
| 84 | int security_load_policy(void *data, size_t len); | 85 | int security_load_policy(void *data, size_t len); |
| 86 | int security_read_policy(void **data, ssize_t *len); | ||
| 87 | size_t security_policydb_len(void); | ||
| 85 | 88 | ||
| 86 | int security_policycap_supported(unsigned int req_cap); | 89 | int security_policycap_supported(unsigned int req_cap); |
| 87 | 90 | ||
| @@ -191,5 +194,25 @@ static inline int security_netlbl_sid_to_secattr(u32 sid, | |||
| 191 | 194 | ||
| 192 | const char *security_get_initial_sid_context(u32 sid); | 195 | const char *security_get_initial_sid_context(u32 sid); |
| 193 | 196 | ||
| 197 | /* | ||
| 198 | * status notifier using mmap interface | ||
| 199 | */ | ||
| 200 | extern struct page *selinux_kernel_status_page(void); | ||
| 201 | |||
| 202 | #define SELINUX_KERNEL_STATUS_VERSION 1 | ||
| 203 | struct selinux_kernel_status { | ||
| 204 | u32 version; /* version number of thie structure */ | ||
| 205 | u32 sequence; /* sequence number of seqlock logic */ | ||
| 206 | u32 enforcing; /* current setting of enforcing mode */ | ||
| 207 | u32 policyload; /* times of policy reloaded */ | ||
| 208 | u32 deny_unknown; /* current setting of deny_unknown */ | ||
| 209 | /* | ||
| 210 | * The version > 0 supports above members. | ||
| 211 | */ | ||
| 212 | } __attribute__((packed)); | ||
| 213 | |||
| 214 | extern void selinux_status_update_setenforce(int enforcing); | ||
| 215 | extern void selinux_status_update_policyload(int seqno); | ||
| 216 | |||
| 194 | #endif /* _SELINUX_SECURITY_H_ */ | 217 | #endif /* _SELINUX_SECURITY_H_ */ |
| 195 | 218 | ||
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 79a1bb635662..87e0556bae70 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
| @@ -68,6 +68,8 @@ static int *bool_pending_values; | |||
| 68 | static struct dentry *class_dir; | 68 | static struct dentry *class_dir; |
| 69 | static unsigned long last_class_ino; | 69 | static unsigned long last_class_ino; |
| 70 | 70 | ||
| 71 | static char policy_opened; | ||
| 72 | |||
| 71 | /* global data for policy capabilities */ | 73 | /* global data for policy capabilities */ |
| 72 | static struct dentry *policycap_dir; | 74 | static struct dentry *policycap_dir; |
| 73 | 75 | ||
| @@ -110,6 +112,8 @@ enum sel_inos { | |||
| 110 | SEL_COMPAT_NET, /* whether to use old compat network packet controls */ | 112 | SEL_COMPAT_NET, /* whether to use old compat network packet controls */ |
| 111 | SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */ | 113 | SEL_REJECT_UNKNOWN, /* export unknown reject handling to userspace */ |
| 112 | SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */ | 114 | SEL_DENY_UNKNOWN, /* export unknown deny handling to userspace */ |
| 115 | SEL_STATUS, /* export current status using mmap() */ | ||
| 116 | SEL_POLICY, /* allow userspace to read the in kernel policy */ | ||
| 113 | SEL_INO_NEXT, /* The next inode number to use */ | 117 | SEL_INO_NEXT, /* The next inode number to use */ |
| 114 | }; | 118 | }; |
| 115 | 119 | ||
| @@ -171,6 +175,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, | |||
| 171 | if (selinux_enforcing) | 175 | if (selinux_enforcing) |
| 172 | avc_ss_reset(0); | 176 | avc_ss_reset(0); |
| 173 | selnl_notify_setenforce(selinux_enforcing); | 177 | selnl_notify_setenforce(selinux_enforcing); |
| 178 | selinux_status_update_setenforce(selinux_enforcing); | ||
| 174 | } | 179 | } |
| 175 | length = count; | 180 | length = count; |
| 176 | out: | 181 | out: |
| @@ -205,6 +210,59 @@ static const struct file_operations sel_handle_unknown_ops = { | |||
| 205 | .llseek = generic_file_llseek, | 210 | .llseek = generic_file_llseek, |
| 206 | }; | 211 | }; |
| 207 | 212 | ||
| 213 | static int sel_open_handle_status(struct inode *inode, struct file *filp) | ||
| 214 | { | ||
| 215 | struct page *status = selinux_kernel_status_page(); | ||
| 216 | |||
| 217 | if (!status) | ||
| 218 | return -ENOMEM; | ||
| 219 | |||
| 220 | filp->private_data = status; | ||
| 221 | |||
| 222 | return 0; | ||
| 223 | } | ||
| 224 | |||
| 225 | static ssize_t sel_read_handle_status(struct file *filp, char __user *buf, | ||
| 226 | size_t count, loff_t *ppos) | ||
| 227 | { | ||
| 228 | struct page *status = filp->private_data; | ||
| 229 | |||
| 230 | BUG_ON(!status); | ||
| 231 | |||
| 232 | return simple_read_from_buffer(buf, count, ppos, | ||
| 233 | page_address(status), | ||
| 234 | sizeof(struct selinux_kernel_status)); | ||
| 235 | } | ||
| 236 | |||
| 237 | static int sel_mmap_handle_status(struct file *filp, | ||
| 238 | struct vm_area_struct *vma) | ||
| 239 | { | ||
| 240 | struct page *status = filp->private_data; | ||
| 241 | unsigned long size = vma->vm_end - vma->vm_start; | ||
| 242 | |||
| 243 | BUG_ON(!status); | ||
| 244 | |||
| 245 | /* only allows one page from the head */ | ||
| 246 | if (vma->vm_pgoff > 0 || size != PAGE_SIZE) | ||
| 247 | return -EIO; | ||
| 248 | /* disallow writable mapping */ | ||
| 249 | if (vma->vm_flags & VM_WRITE) | ||
| 250 | return -EPERM; | ||
| 251 | /* disallow mprotect() turns it into writable */ | ||
| 252 | vma->vm_flags &= ~VM_MAYWRITE; | ||
| 253 | |||
| 254 | return remap_pfn_range(vma, vma->vm_start, | ||
| 255 | page_to_pfn(status), | ||
| 256 | size, vma->vm_page_prot); | ||
| 257 | } | ||
| 258 | |||
| 259 | static const struct file_operations sel_handle_status_ops = { | ||
| 260 | .open = sel_open_handle_status, | ||
| 261 | .read = sel_read_handle_status, | ||
| 262 | .mmap = sel_mmap_handle_status, | ||
| 263 | .llseek = generic_file_llseek, | ||
| 264 | }; | ||
| 265 | |||
| 208 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE | 266 | #ifdef CONFIG_SECURITY_SELINUX_DISABLE |
| 209 | static ssize_t sel_write_disable(struct file *file, const char __user *buf, | 267 | static ssize_t sel_write_disable(struct file *file, const char __user *buf, |
| 210 | size_t count, loff_t *ppos) | 268 | size_t count, loff_t *ppos) |
| @@ -296,6 +354,141 @@ static const struct file_operations sel_mls_ops = { | |||
| 296 | .llseek = generic_file_llseek, | 354 | .llseek = generic_file_llseek, |
| 297 | }; | 355 | }; |
| 298 | 356 | ||
| 357 | struct policy_load_memory { | ||
| 358 | size_t len; | ||
| 359 | void *data; | ||
| 360 | }; | ||
| 361 | |||
| 362 | static int sel_open_policy(struct inode *inode, struct file *filp) | ||
| 363 | { | ||
| 364 | struct policy_load_memory *plm = NULL; | ||
| 365 | int rc; | ||
| 366 | |||
| 367 | BUG_ON(filp->private_data); | ||
| 368 | |||
| 369 | mutex_lock(&sel_mutex); | ||
| 370 | |||
| 371 | rc = task_has_security(current, SECURITY__READ_POLICY); | ||
| 372 | if (rc) | ||
| 373 | goto err; | ||
| 374 | |||
| 375 | rc = -EBUSY; | ||
| 376 | if (policy_opened) | ||
| 377 | goto err; | ||
| 378 | |||
| 379 | rc = -ENOMEM; | ||
| 380 | plm = kzalloc(sizeof(*plm), GFP_KERNEL); | ||
| 381 | if (!plm) | ||
| 382 | goto err; | ||
| 383 | |||
| 384 | if (i_size_read(inode) != security_policydb_len()) { | ||
| 385 | mutex_lock(&inode->i_mutex); | ||
| 386 | i_size_write(inode, security_policydb_len()); | ||
| 387 | mutex_unlock(&inode->i_mutex); | ||
| 388 | } | ||
| 389 | |||
| 390 | rc = security_read_policy(&plm->data, &plm->len); | ||
| 391 | if (rc) | ||
| 392 | goto err; | ||
| 393 | |||
| 394 | policy_opened = 1; | ||
| 395 | |||
| 396 | filp->private_data = plm; | ||
| 397 | |||
| 398 | mutex_unlock(&sel_mutex); | ||
| 399 | |||
| 400 | return 0; | ||
| 401 | err: | ||
| 402 | mutex_unlock(&sel_mutex); | ||
| 403 | |||
| 404 | if (plm) | ||
| 405 | vfree(plm->data); | ||
| 406 | kfree(plm); | ||
| 407 | return rc; | ||
| 408 | } | ||
| 409 | |||
| 410 | static int sel_release_policy(struct inode *inode, struct file *filp) | ||
| 411 | { | ||
| 412 | struct policy_load_memory *plm = filp->private_data; | ||
| 413 | |||
| 414 | BUG_ON(!plm); | ||
| 415 | |||
| 416 | policy_opened = 0; | ||
| 417 | |||
| 418 | vfree(plm->data); | ||
| 419 | kfree(plm); | ||
| 420 | |||
| 421 | return 0; | ||
| 422 | } | ||
| 423 | |||
| 424 | static ssize_t sel_read_policy(struct file *filp, char __user *buf, | ||
| 425 | size_t count, loff_t *ppos) | ||
| 426 | { | ||
| 427 | struct policy_load_memory *plm = filp->private_data; | ||
| 428 | int ret; | ||
| 429 | |||
| 430 | mutex_lock(&sel_mutex); | ||
| 431 | |||
| 432 | ret = task_has_security(current, SECURITY__READ_POLICY); | ||
| 433 | if (ret) | ||
| 434 | goto out; | ||
| 435 | |||
| 436 | ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); | ||
| 437 | out: | ||
| 438 | mutex_unlock(&sel_mutex); | ||
| 439 | return ret; | ||
| 440 | } | ||
| 441 | |||
| 442 | static int sel_mmap_policy_fault(struct vm_area_struct *vma, | ||
| 443 | struct vm_fault *vmf) | ||
| 444 | { | ||
| 445 | struct policy_load_memory *plm = vma->vm_file->private_data; | ||
| 446 | unsigned long offset; | ||
| 447 | struct page *page; | ||
| 448 | |||
| 449 | if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) | ||
| 450 | return VM_FAULT_SIGBUS; | ||
| 451 | |||
| 452 | offset = vmf->pgoff << PAGE_SHIFT; | ||
| 453 | if (offset >= roundup(plm->len, PAGE_SIZE)) | ||
| 454 | return VM_FAULT_SIGBUS; | ||
| 455 | |||
| 456 | page = vmalloc_to_page(plm->data + offset); | ||
| 457 | get_page(page); | ||
| 458 | |||
| 459 | vmf->page = page; | ||
| 460 | |||
| 461 | return 0; | ||
| 462 | } | ||
| 463 | |||
| 464 | static struct vm_operations_struct sel_mmap_policy_ops = { | ||
| 465 | .fault = sel_mmap_policy_fault, | ||
| 466 | .page_mkwrite = sel_mmap_policy_fault, | ||
| 467 | }; | ||
| 468 | |||
| 469 | int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma) | ||
| 470 | { | ||
| 471 | if (vma->vm_flags & VM_SHARED) { | ||
| 472 | /* do not allow mprotect to make mapping writable */ | ||
| 473 | vma->vm_flags &= ~VM_MAYWRITE; | ||
| 474 | |||
| 475 | if (vma->vm_flags & VM_WRITE) | ||
| 476 | return -EACCES; | ||
| 477 | } | ||
| 478 | |||
| 479 | vma->vm_flags |= VM_RESERVED; | ||
| 480 | vma->vm_ops = &sel_mmap_policy_ops; | ||
| 481 | |||
| 482 | return 0; | ||
| 483 | } | ||
| 484 | |||
| 485 | static const struct file_operations sel_policy_ops = { | ||
| 486 | .open = sel_open_policy, | ||
| 487 | .read = sel_read_policy, | ||
| 488 | .mmap = sel_mmap_policy, | ||
| 489 | .release = sel_release_policy, | ||
| 490 | }; | ||
| 491 | |||
| 299 | static ssize_t sel_write_load(struct file *file, const char __user *buf, | 492 | static ssize_t sel_write_load(struct file *file, const char __user *buf, |
| 300 | size_t count, loff_t *ppos) | 493 | size_t count, loff_t *ppos) |
| 301 | 494 | ||
| @@ -1612,6 +1805,8 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1612 | [SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR}, | 1805 | [SEL_CHECKREQPROT] = {"checkreqprot", &sel_checkreqprot_ops, S_IRUGO|S_IWUSR}, |
| 1613 | [SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO}, | 1806 | [SEL_REJECT_UNKNOWN] = {"reject_unknown", &sel_handle_unknown_ops, S_IRUGO}, |
| 1614 | [SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO}, | 1807 | [SEL_DENY_UNKNOWN] = {"deny_unknown", &sel_handle_unknown_ops, S_IRUGO}, |
| 1808 | [SEL_STATUS] = {"status", &sel_handle_status_ops, S_IRUGO}, | ||
| 1809 | [SEL_POLICY] = {"policy", &sel_policy_ops, S_IRUSR}, | ||
| 1615 | /* last one */ {""} | 1810 | /* last one */ {""} |
| 1616 | }; | 1811 | }; |
| 1617 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); | 1812 | ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); |
diff --git a/security/selinux/ss/Makefile b/security/selinux/ss/Makefile deleted file mode 100644 index 15d4e62917de..000000000000 --- a/security/selinux/ss/Makefile +++ /dev/null | |||
| @@ -1,9 +0,0 @@ | |||
| 1 | # | ||
| 2 | # Makefile for building the SELinux security server as part of the kernel tree. | ||
| 3 | # | ||
| 4 | |||
| 5 | EXTRA_CFLAGS += -Isecurity/selinux -Isecurity/selinux/include | ||
| 6 | obj-y := ss.o | ||
| 7 | |||
| 8 | ss-y := ebitmap.o hashtab.o symtab.o sidtab.o avtab.o policydb.o services.o conditional.o mls.o | ||
| 9 | |||
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 929480c6c430..a3dd9faa19c0 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c | |||
| @@ -266,8 +266,8 @@ int avtab_alloc(struct avtab *h, u32 nrules) | |||
| 266 | if (shift > 2) | 266 | if (shift > 2) |
| 267 | shift = shift - 2; | 267 | shift = shift - 2; |
| 268 | nslot = 1 << shift; | 268 | nslot = 1 << shift; |
| 269 | if (nslot > MAX_AVTAB_SIZE) | 269 | if (nslot > MAX_AVTAB_HASH_BUCKETS) |
| 270 | nslot = MAX_AVTAB_SIZE; | 270 | nslot = MAX_AVTAB_HASH_BUCKETS; |
| 271 | mask = nslot - 1; | 271 | mask = nslot - 1; |
| 272 | 272 | ||
| 273 | h->htable = kcalloc(nslot, sizeof(*(h->htable)), GFP_KERNEL); | 273 | h->htable = kcalloc(nslot, sizeof(*(h->htable)), GFP_KERNEL); |
| @@ -501,6 +501,48 @@ bad: | |||
| 501 | goto out; | 501 | goto out; |
| 502 | } | 502 | } |
| 503 | 503 | ||
| 504 | int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp) | ||
| 505 | { | ||
| 506 | __le16 buf16[4]; | ||
| 507 | __le32 buf32[1]; | ||
| 508 | int rc; | ||
| 509 | |||
| 510 | buf16[0] = cpu_to_le16(cur->key.source_type); | ||
| 511 | buf16[1] = cpu_to_le16(cur->key.target_type); | ||
| 512 | buf16[2] = cpu_to_le16(cur->key.target_class); | ||
| 513 | buf16[3] = cpu_to_le16(cur->key.specified); | ||
| 514 | rc = put_entry(buf16, sizeof(u16), 4, fp); | ||
| 515 | if (rc) | ||
| 516 | return rc; | ||
| 517 | buf32[0] = cpu_to_le32(cur->datum.data); | ||
| 518 | rc = put_entry(buf32, sizeof(u32), 1, fp); | ||
| 519 | if (rc) | ||
| 520 | return rc; | ||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | int avtab_write(struct policydb *p, struct avtab *a, void *fp) | ||
| 525 | { | ||
| 526 | unsigned int i; | ||
| 527 | int rc = 0; | ||
| 528 | struct avtab_node *cur; | ||
| 529 | __le32 buf[1]; | ||
| 530 | |||
| 531 | buf[0] = cpu_to_le32(a->nel); | ||
| 532 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 533 | if (rc) | ||
| 534 | return rc; | ||
| 535 | |||
| 536 | for (i = 0; i < a->nslot; i++) { | ||
| 537 | for (cur = a->htable[i]; cur; cur = cur->next) { | ||
| 538 | rc = avtab_write_item(p, cur, fp); | ||
| 539 | if (rc) | ||
| 540 | return rc; | ||
| 541 | } | ||
| 542 | } | ||
| 543 | |||
| 544 | return rc; | ||
| 545 | } | ||
| 504 | void avtab_cache_init(void) | 546 | void avtab_cache_init(void) |
| 505 | { | 547 | { |
| 506 | avtab_node_cachep = kmem_cache_create("avtab_node", | 548 | avtab_node_cachep = kmem_cache_create("avtab_node", |
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h index cd4f734e2749..dff0c75345c1 100644 --- a/security/selinux/ss/avtab.h +++ b/security/selinux/ss/avtab.h | |||
| @@ -71,6 +71,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol, | |||
| 71 | void *p); | 71 | void *p); |
| 72 | 72 | ||
| 73 | int avtab_read(struct avtab *a, void *fp, struct policydb *pol); | 73 | int avtab_read(struct avtab *a, void *fp, struct policydb *pol); |
| 74 | int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp); | ||
| 75 | int avtab_write(struct policydb *p, struct avtab *a, void *fp); | ||
| 74 | 76 | ||
| 75 | struct avtab_node *avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, | 77 | struct avtab_node *avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, |
| 76 | struct avtab_datum *datum); | 78 | struct avtab_datum *datum); |
| @@ -85,7 +87,6 @@ void avtab_cache_destroy(void); | |||
| 85 | #define MAX_AVTAB_HASH_BITS 11 | 87 | #define MAX_AVTAB_HASH_BITS 11 |
| 86 | #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) | 88 | #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) |
| 87 | #define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1) | 89 | #define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1) |
| 88 | #define MAX_AVTAB_SIZE MAX_AVTAB_HASH_BUCKETS | ||
| 89 | 90 | ||
| 90 | #endif /* _SS_AVTAB_H_ */ | 91 | #endif /* _SS_AVTAB_H_ */ |
| 91 | 92 | ||
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index c91e150c3087..655fe1c6cc69 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c | |||
| @@ -490,6 +490,129 @@ err: | |||
| 490 | return rc; | 490 | return rc; |
| 491 | } | 491 | } |
| 492 | 492 | ||
| 493 | int cond_write_bool(void *vkey, void *datum, void *ptr) | ||
| 494 | { | ||
| 495 | char *key = vkey; | ||
| 496 | struct cond_bool_datum *booldatum = datum; | ||
| 497 | struct policy_data *pd = ptr; | ||
| 498 | void *fp = pd->fp; | ||
| 499 | __le32 buf[3]; | ||
| 500 | u32 len; | ||
| 501 | int rc; | ||
| 502 | |||
| 503 | len = strlen(key); | ||
| 504 | buf[0] = cpu_to_le32(booldatum->value); | ||
| 505 | buf[1] = cpu_to_le32(booldatum->state); | ||
| 506 | buf[2] = cpu_to_le32(len); | ||
| 507 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 508 | if (rc) | ||
| 509 | return rc; | ||
| 510 | rc = put_entry(key, 1, len, fp); | ||
| 511 | if (rc) | ||
| 512 | return rc; | ||
| 513 | return 0; | ||
| 514 | } | ||
| 515 | |||
| 516 | /* | ||
| 517 | * cond_write_cond_av_list doesn't write out the av_list nodes. | ||
| 518 | * Instead it writes out the key/value pairs from the avtab. This | ||
| 519 | * is necessary because there is no way to uniquely identifying rules | ||
| 520 | * in the avtab so it is not possible to associate individual rules | ||
| 521 | * in the avtab with a conditional without saving them as part of | ||
| 522 | * the conditional. This means that the avtab with the conditional | ||
| 523 | * rules will not be saved but will be rebuilt on policy load. | ||
| 524 | */ | ||
| 525 | static int cond_write_av_list(struct policydb *p, | ||
| 526 | struct cond_av_list *list, struct policy_file *fp) | ||
| 527 | { | ||
| 528 | __le32 buf[1]; | ||
| 529 | struct cond_av_list *cur_list; | ||
| 530 | u32 len; | ||
| 531 | int rc; | ||
| 532 | |||
| 533 | len = 0; | ||
| 534 | for (cur_list = list; cur_list != NULL; cur_list = cur_list->next) | ||
| 535 | len++; | ||
| 536 | |||
| 537 | buf[0] = cpu_to_le32(len); | ||
| 538 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 539 | if (rc) | ||
| 540 | return rc; | ||
| 541 | |||
| 542 | if (len == 0) | ||
| 543 | return 0; | ||
| 544 | |||
| 545 | for (cur_list = list; cur_list != NULL; cur_list = cur_list->next) { | ||
| 546 | rc = avtab_write_item(p, cur_list->node, fp); | ||
| 547 | if (rc) | ||
| 548 | return rc; | ||
| 549 | } | ||
| 550 | |||
| 551 | return 0; | ||
| 552 | } | ||
| 553 | |||
| 554 | int cond_write_node(struct policydb *p, struct cond_node *node, | ||
| 555 | struct policy_file *fp) | ||
| 556 | { | ||
| 557 | struct cond_expr *cur_expr; | ||
| 558 | __le32 buf[2]; | ||
| 559 | int rc; | ||
| 560 | u32 len = 0; | ||
| 561 | |||
| 562 | buf[0] = cpu_to_le32(node->cur_state); | ||
| 563 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 564 | if (rc) | ||
| 565 | return rc; | ||
| 566 | |||
| 567 | for (cur_expr = node->expr; cur_expr != NULL; cur_expr = cur_expr->next) | ||
| 568 | len++; | ||
| 569 | |||
| 570 | buf[0] = cpu_to_le32(len); | ||
| 571 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 572 | if (rc) | ||
| 573 | return rc; | ||
| 574 | |||
| 575 | for (cur_expr = node->expr; cur_expr != NULL; cur_expr = cur_expr->next) { | ||
| 576 | buf[0] = cpu_to_le32(cur_expr->expr_type); | ||
| 577 | buf[1] = cpu_to_le32(cur_expr->bool); | ||
| 578 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 579 | if (rc) | ||
| 580 | return rc; | ||
| 581 | } | ||
| 582 | |||
| 583 | rc = cond_write_av_list(p, node->true_list, fp); | ||
| 584 | if (rc) | ||
| 585 | return rc; | ||
| 586 | rc = cond_write_av_list(p, node->false_list, fp); | ||
| 587 | if (rc) | ||
| 588 | return rc; | ||
| 589 | |||
| 590 | return 0; | ||
| 591 | } | ||
| 592 | |||
| 593 | int cond_write_list(struct policydb *p, struct cond_node *list, void *fp) | ||
| 594 | { | ||
| 595 | struct cond_node *cur; | ||
| 596 | u32 len; | ||
| 597 | __le32 buf[1]; | ||
| 598 | int rc; | ||
| 599 | |||
| 600 | len = 0; | ||
| 601 | for (cur = list; cur != NULL; cur = cur->next) | ||
| 602 | len++; | ||
| 603 | buf[0] = cpu_to_le32(len); | ||
| 604 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 605 | if (rc) | ||
| 606 | return rc; | ||
| 607 | |||
| 608 | for (cur = list; cur != NULL; cur = cur->next) { | ||
| 609 | rc = cond_write_node(p, cur, fp); | ||
| 610 | if (rc) | ||
| 611 | return rc; | ||
| 612 | } | ||
| 613 | |||
| 614 | return 0; | ||
| 615 | } | ||
| 493 | /* Determine whether additional permissions are granted by the conditional | 616 | /* Determine whether additional permissions are granted by the conditional |
| 494 | * av table, and if so, add them to the result | 617 | * av table, and if so, add them to the result |
| 495 | */ | 618 | */ |
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h index 53ddb013ae57..3f209c635295 100644 --- a/security/selinux/ss/conditional.h +++ b/security/selinux/ss/conditional.h | |||
| @@ -69,6 +69,8 @@ int cond_index_bool(void *key, void *datum, void *datap); | |||
| 69 | 69 | ||
| 70 | int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp); | 70 | int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp); |
| 71 | int cond_read_list(struct policydb *p, void *fp); | 71 | int cond_read_list(struct policydb *p, void *fp); |
| 72 | int cond_write_bool(void *key, void *datum, void *ptr); | ||
| 73 | int cond_write_list(struct policydb *p, struct cond_node *list, void *fp); | ||
| 72 | 74 | ||
| 73 | void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd); | 75 | void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd); |
| 74 | 76 | ||
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 04b6145d767f..d42951fcbe87 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include "ebitmap.h" | 22 | #include "ebitmap.h" |
| 23 | #include "policydb.h" | 23 | #include "policydb.h" |
| 24 | 24 | ||
| 25 | #define BITS_PER_U64 (sizeof(u64) * 8) | ||
| 26 | |||
| 25 | int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) | 27 | int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2) |
| 26 | { | 28 | { |
| 27 | struct ebitmap_node *n1, *n2; | 29 | struct ebitmap_node *n1, *n2; |
| @@ -363,10 +365,10 @@ int ebitmap_read(struct ebitmap *e, void *fp) | |||
| 363 | e->highbit = le32_to_cpu(buf[1]); | 365 | e->highbit = le32_to_cpu(buf[1]); |
| 364 | count = le32_to_cpu(buf[2]); | 366 | count = le32_to_cpu(buf[2]); |
| 365 | 367 | ||
| 366 | if (mapunit != sizeof(u64) * 8) { | 368 | if (mapunit != BITS_PER_U64) { |
| 367 | printk(KERN_ERR "SELinux: ebitmap: map size %u does not " | 369 | printk(KERN_ERR "SELinux: ebitmap: map size %u does not " |
| 368 | "match my size %Zd (high bit was %d)\n", | 370 | "match my size %Zd (high bit was %d)\n", |
| 369 | mapunit, sizeof(u64) * 8, e->highbit); | 371 | mapunit, BITS_PER_U64, e->highbit); |
| 370 | goto bad; | 372 | goto bad; |
| 371 | } | 373 | } |
| 372 | 374 | ||
| @@ -446,3 +448,78 @@ bad: | |||
| 446 | ebitmap_destroy(e); | 448 | ebitmap_destroy(e); |
| 447 | goto out; | 449 | goto out; |
| 448 | } | 450 | } |
| 451 | |||
| 452 | int ebitmap_write(struct ebitmap *e, void *fp) | ||
| 453 | { | ||
| 454 | struct ebitmap_node *n; | ||
| 455 | u32 count; | ||
| 456 | __le32 buf[3]; | ||
| 457 | u64 map; | ||
| 458 | int bit, last_bit, last_startbit, rc; | ||
| 459 | |||
| 460 | buf[0] = cpu_to_le32(BITS_PER_U64); | ||
| 461 | |||
| 462 | count = 0; | ||
| 463 | last_bit = 0; | ||
| 464 | last_startbit = -1; | ||
| 465 | ebitmap_for_each_positive_bit(e, n, bit) { | ||
| 466 | if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) { | ||
| 467 | count++; | ||
| 468 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
| 469 | } | ||
| 470 | last_bit = roundup(bit + 1, BITS_PER_U64); | ||
| 471 | } | ||
| 472 | buf[1] = cpu_to_le32(last_bit); | ||
| 473 | buf[2] = cpu_to_le32(count); | ||
| 474 | |||
| 475 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 476 | if (rc) | ||
| 477 | return rc; | ||
| 478 | |||
| 479 | map = 0; | ||
| 480 | last_startbit = INT_MIN; | ||
| 481 | ebitmap_for_each_positive_bit(e, n, bit) { | ||
| 482 | if (rounddown(bit, (int)BITS_PER_U64) > last_startbit) { | ||
| 483 | __le64 buf64[1]; | ||
| 484 | |||
| 485 | /* this is the very first bit */ | ||
| 486 | if (!map) { | ||
| 487 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
| 488 | map = (u64)1 << (bit - last_startbit); | ||
| 489 | continue; | ||
| 490 | } | ||
| 491 | |||
| 492 | /* write the last node */ | ||
| 493 | buf[0] = cpu_to_le32(last_startbit); | ||
| 494 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 495 | if (rc) | ||
| 496 | return rc; | ||
| 497 | |||
| 498 | buf64[0] = cpu_to_le64(map); | ||
| 499 | rc = put_entry(buf64, sizeof(u64), 1, fp); | ||
| 500 | if (rc) | ||
| 501 | return rc; | ||
| 502 | |||
| 503 | /* set up for the next node */ | ||
| 504 | map = 0; | ||
| 505 | last_startbit = rounddown(bit, BITS_PER_U64); | ||
| 506 | } | ||
| 507 | map |= (u64)1 << (bit - last_startbit); | ||
| 508 | } | ||
| 509 | /* write the last node */ | ||
| 510 | if (map) { | ||
| 511 | __le64 buf64[1]; | ||
| 512 | |||
| 513 | /* write the last node */ | ||
| 514 | buf[0] = cpu_to_le32(last_startbit); | ||
| 515 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 516 | if (rc) | ||
| 517 | return rc; | ||
| 518 | |||
| 519 | buf64[0] = cpu_to_le64(map); | ||
| 520 | rc = put_entry(buf64, sizeof(u64), 1, fp); | ||
| 521 | if (rc) | ||
| 522 | return rc; | ||
| 523 | } | ||
| 524 | return 0; | ||
| 525 | } | ||
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index f283b4367f54..1f4e93c2ae86 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h | |||
| @@ -123,6 +123,7 @@ int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); | |||
| 123 | int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); | 123 | int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); |
| 124 | void ebitmap_destroy(struct ebitmap *e); | 124 | void ebitmap_destroy(struct ebitmap *e); |
| 125 | int ebitmap_read(struct ebitmap *e, void *fp); | 125 | int ebitmap_read(struct ebitmap *e, void *fp); |
| 126 | int ebitmap_write(struct ebitmap *e, void *fp); | ||
| 126 | 127 | ||
| 127 | #ifdef CONFIG_NETLABEL | 128 | #ifdef CONFIG_NETLABEL |
| 128 | int ebitmap_netlbl_export(struct ebitmap *ebmap, | 129 | int ebitmap_netlbl_export(struct ebitmap *ebmap, |
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 3a29704be8ce..94f630d93a5c 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include "policydb.h" | 37 | #include "policydb.h" |
| 38 | #include "conditional.h" | 38 | #include "conditional.h" |
| 39 | #include "mls.h" | 39 | #include "mls.h" |
| 40 | #include "services.h" | ||
| 40 | 41 | ||
| 41 | #define _DEBUG_HASHES | 42 | #define _DEBUG_HASHES |
| 42 | 43 | ||
| @@ -185,9 +186,19 @@ static u32 rangetr_hash(struct hashtab *h, const void *k) | |||
| 185 | static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) | 186 | static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) |
| 186 | { | 187 | { |
| 187 | const struct range_trans *key1 = k1, *key2 = k2; | 188 | const struct range_trans *key1 = k1, *key2 = k2; |
| 188 | return (key1->source_type != key2->source_type || | 189 | int v; |
| 189 | key1->target_type != key2->target_type || | 190 | |
| 190 | key1->target_class != key2->target_class); | 191 | v = key1->source_type - key2->source_type; |
| 192 | if (v) | ||
| 193 | return v; | ||
| 194 | |||
| 195 | v = key1->target_type - key2->target_type; | ||
| 196 | if (v) | ||
| 197 | return v; | ||
| 198 | |||
| 199 | v = key1->target_class - key2->target_class; | ||
| 200 | |||
| 201 | return v; | ||
| 191 | } | 202 | } |
| 192 | 203 | ||
| 193 | /* | 204 | /* |
| @@ -1624,11 +1635,11 @@ static int role_bounds_sanity_check(void *key, void *datum, void *datap) | |||
| 1624 | 1635 | ||
| 1625 | static int type_bounds_sanity_check(void *key, void *datum, void *datap) | 1636 | static int type_bounds_sanity_check(void *key, void *datum, void *datap) |
| 1626 | { | 1637 | { |
| 1627 | struct type_datum *upper, *type; | 1638 | struct type_datum *upper; |
| 1628 | struct policydb *p = datap; | 1639 | struct policydb *p = datap; |
| 1629 | int depth = 0; | 1640 | int depth = 0; |
| 1630 | 1641 | ||
| 1631 | upper = type = datum; | 1642 | upper = datum; |
| 1632 | while (upper->bounds) { | 1643 | while (upper->bounds) { |
| 1633 | if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { | 1644 | if (++depth == POLICYDB_BOUNDS_MAXDEPTH) { |
| 1634 | printk(KERN_ERR "SELinux: type %s: " | 1645 | printk(KERN_ERR "SELinux: type %s: " |
| @@ -2306,3 +2317,843 @@ bad: | |||
| 2306 | policydb_destroy(p); | 2317 | policydb_destroy(p); |
| 2307 | goto out; | 2318 | goto out; |
| 2308 | } | 2319 | } |
| 2320 | |||
| 2321 | /* | ||
| 2322 | * Write a MLS level structure to a policydb binary | ||
| 2323 | * representation file. | ||
| 2324 | */ | ||
| 2325 | static int mls_write_level(struct mls_level *l, void *fp) | ||
| 2326 | { | ||
| 2327 | __le32 buf[1]; | ||
| 2328 | int rc; | ||
| 2329 | |||
| 2330 | buf[0] = cpu_to_le32(l->sens); | ||
| 2331 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2332 | if (rc) | ||
| 2333 | return rc; | ||
| 2334 | |||
| 2335 | rc = ebitmap_write(&l->cat, fp); | ||
| 2336 | if (rc) | ||
| 2337 | return rc; | ||
| 2338 | |||
| 2339 | return 0; | ||
| 2340 | } | ||
| 2341 | |||
| 2342 | /* | ||
| 2343 | * Write a MLS range structure to a policydb binary | ||
| 2344 | * representation file. | ||
| 2345 | */ | ||
| 2346 | static int mls_write_range_helper(struct mls_range *r, void *fp) | ||
| 2347 | { | ||
| 2348 | __le32 buf[3]; | ||
| 2349 | size_t items; | ||
| 2350 | int rc, eq; | ||
| 2351 | |||
| 2352 | eq = mls_level_eq(&r->level[1], &r->level[0]); | ||
| 2353 | |||
| 2354 | if (eq) | ||
| 2355 | items = 2; | ||
| 2356 | else | ||
| 2357 | items = 3; | ||
| 2358 | buf[0] = cpu_to_le32(items-1); | ||
| 2359 | buf[1] = cpu_to_le32(r->level[0].sens); | ||
| 2360 | if (!eq) | ||
| 2361 | buf[2] = cpu_to_le32(r->level[1].sens); | ||
| 2362 | |||
| 2363 | BUG_ON(items > (sizeof(buf)/sizeof(buf[0]))); | ||
| 2364 | |||
| 2365 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
| 2366 | if (rc) | ||
| 2367 | return rc; | ||
| 2368 | |||
| 2369 | rc = ebitmap_write(&r->level[0].cat, fp); | ||
| 2370 | if (rc) | ||
| 2371 | return rc; | ||
| 2372 | if (!eq) { | ||
| 2373 | rc = ebitmap_write(&r->level[1].cat, fp); | ||
| 2374 | if (rc) | ||
| 2375 | return rc; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | return 0; | ||
| 2379 | } | ||
| 2380 | |||
| 2381 | static int sens_write(void *vkey, void *datum, void *ptr) | ||
| 2382 | { | ||
| 2383 | char *key = vkey; | ||
| 2384 | struct level_datum *levdatum = datum; | ||
| 2385 | struct policy_data *pd = ptr; | ||
| 2386 | void *fp = pd->fp; | ||
| 2387 | __le32 buf[2]; | ||
| 2388 | size_t len; | ||
| 2389 | int rc; | ||
| 2390 | |||
| 2391 | len = strlen(key); | ||
| 2392 | buf[0] = cpu_to_le32(len); | ||
| 2393 | buf[1] = cpu_to_le32(levdatum->isalias); | ||
| 2394 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2395 | if (rc) | ||
| 2396 | return rc; | ||
| 2397 | |||
| 2398 | rc = put_entry(key, 1, len, fp); | ||
| 2399 | if (rc) | ||
| 2400 | return rc; | ||
| 2401 | |||
| 2402 | rc = mls_write_level(levdatum->level, fp); | ||
| 2403 | if (rc) | ||
| 2404 | return rc; | ||
| 2405 | |||
| 2406 | return 0; | ||
| 2407 | } | ||
| 2408 | |||
| 2409 | static int cat_write(void *vkey, void *datum, void *ptr) | ||
| 2410 | { | ||
| 2411 | char *key = vkey; | ||
| 2412 | struct cat_datum *catdatum = datum; | ||
| 2413 | struct policy_data *pd = ptr; | ||
| 2414 | void *fp = pd->fp; | ||
| 2415 | __le32 buf[3]; | ||
| 2416 | size_t len; | ||
| 2417 | int rc; | ||
| 2418 | |||
| 2419 | len = strlen(key); | ||
| 2420 | buf[0] = cpu_to_le32(len); | ||
| 2421 | buf[1] = cpu_to_le32(catdatum->value); | ||
| 2422 | buf[2] = cpu_to_le32(catdatum->isalias); | ||
| 2423 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 2424 | if (rc) | ||
| 2425 | return rc; | ||
| 2426 | |||
| 2427 | rc = put_entry(key, 1, len, fp); | ||
| 2428 | if (rc) | ||
| 2429 | return rc; | ||
| 2430 | |||
| 2431 | return 0; | ||
| 2432 | } | ||
| 2433 | |||
| 2434 | static int role_trans_write(struct role_trans *r, void *fp) | ||
| 2435 | { | ||
| 2436 | struct role_trans *tr; | ||
| 2437 | u32 buf[3]; | ||
| 2438 | size_t nel; | ||
| 2439 | int rc; | ||
| 2440 | |||
| 2441 | nel = 0; | ||
| 2442 | for (tr = r; tr; tr = tr->next) | ||
| 2443 | nel++; | ||
| 2444 | buf[0] = cpu_to_le32(nel); | ||
| 2445 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2446 | if (rc) | ||
| 2447 | return rc; | ||
| 2448 | for (tr = r; tr; tr = tr->next) { | ||
| 2449 | buf[0] = cpu_to_le32(tr->role); | ||
| 2450 | buf[1] = cpu_to_le32(tr->type); | ||
| 2451 | buf[2] = cpu_to_le32(tr->new_role); | ||
| 2452 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 2453 | if (rc) | ||
| 2454 | return rc; | ||
| 2455 | } | ||
| 2456 | |||
| 2457 | return 0; | ||
| 2458 | } | ||
| 2459 | |||
| 2460 | static int role_allow_write(struct role_allow *r, void *fp) | ||
| 2461 | { | ||
| 2462 | struct role_allow *ra; | ||
| 2463 | u32 buf[2]; | ||
| 2464 | size_t nel; | ||
| 2465 | int rc; | ||
| 2466 | |||
| 2467 | nel = 0; | ||
| 2468 | for (ra = r; ra; ra = ra->next) | ||
| 2469 | nel++; | ||
| 2470 | buf[0] = cpu_to_le32(nel); | ||
| 2471 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2472 | if (rc) | ||
| 2473 | return rc; | ||
| 2474 | for (ra = r; ra; ra = ra->next) { | ||
| 2475 | buf[0] = cpu_to_le32(ra->role); | ||
| 2476 | buf[1] = cpu_to_le32(ra->new_role); | ||
| 2477 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2478 | if (rc) | ||
| 2479 | return rc; | ||
| 2480 | } | ||
| 2481 | return 0; | ||
| 2482 | } | ||
| 2483 | |||
| 2484 | /* | ||
| 2485 | * Write a security context structure | ||
| 2486 | * to a policydb binary representation file. | ||
| 2487 | */ | ||
| 2488 | static int context_write(struct policydb *p, struct context *c, | ||
| 2489 | void *fp) | ||
| 2490 | { | ||
| 2491 | int rc; | ||
| 2492 | __le32 buf[3]; | ||
| 2493 | |||
| 2494 | buf[0] = cpu_to_le32(c->user); | ||
| 2495 | buf[1] = cpu_to_le32(c->role); | ||
| 2496 | buf[2] = cpu_to_le32(c->type); | ||
| 2497 | |||
| 2498 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 2499 | if (rc) | ||
| 2500 | return rc; | ||
| 2501 | |||
| 2502 | rc = mls_write_range_helper(&c->range, fp); | ||
| 2503 | if (rc) | ||
| 2504 | return rc; | ||
| 2505 | |||
| 2506 | return 0; | ||
| 2507 | } | ||
| 2508 | |||
| 2509 | /* | ||
| 2510 | * The following *_write functions are used to | ||
| 2511 | * write the symbol data to a policy database | ||
| 2512 | * binary representation file. | ||
| 2513 | */ | ||
| 2514 | |||
| 2515 | static int perm_write(void *vkey, void *datum, void *fp) | ||
| 2516 | { | ||
| 2517 | char *key = vkey; | ||
| 2518 | struct perm_datum *perdatum = datum; | ||
| 2519 | __le32 buf[2]; | ||
| 2520 | size_t len; | ||
| 2521 | int rc; | ||
| 2522 | |||
| 2523 | len = strlen(key); | ||
| 2524 | buf[0] = cpu_to_le32(len); | ||
| 2525 | buf[1] = cpu_to_le32(perdatum->value); | ||
| 2526 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2527 | if (rc) | ||
| 2528 | return rc; | ||
| 2529 | |||
| 2530 | rc = put_entry(key, 1, len, fp); | ||
| 2531 | if (rc) | ||
| 2532 | return rc; | ||
| 2533 | |||
| 2534 | return 0; | ||
| 2535 | } | ||
| 2536 | |||
| 2537 | static int common_write(void *vkey, void *datum, void *ptr) | ||
| 2538 | { | ||
| 2539 | char *key = vkey; | ||
| 2540 | struct common_datum *comdatum = datum; | ||
| 2541 | struct policy_data *pd = ptr; | ||
| 2542 | void *fp = pd->fp; | ||
| 2543 | __le32 buf[4]; | ||
| 2544 | size_t len; | ||
| 2545 | int rc; | ||
| 2546 | |||
| 2547 | len = strlen(key); | ||
| 2548 | buf[0] = cpu_to_le32(len); | ||
| 2549 | buf[1] = cpu_to_le32(comdatum->value); | ||
| 2550 | buf[2] = cpu_to_le32(comdatum->permissions.nprim); | ||
| 2551 | buf[3] = cpu_to_le32(comdatum->permissions.table->nel); | ||
| 2552 | rc = put_entry(buf, sizeof(u32), 4, fp); | ||
| 2553 | if (rc) | ||
| 2554 | return rc; | ||
| 2555 | |||
| 2556 | rc = put_entry(key, 1, len, fp); | ||
| 2557 | if (rc) | ||
| 2558 | return rc; | ||
| 2559 | |||
| 2560 | rc = hashtab_map(comdatum->permissions.table, perm_write, fp); | ||
| 2561 | if (rc) | ||
| 2562 | return rc; | ||
| 2563 | |||
| 2564 | return 0; | ||
| 2565 | } | ||
| 2566 | |||
| 2567 | static int write_cons_helper(struct policydb *p, struct constraint_node *node, | ||
| 2568 | void *fp) | ||
| 2569 | { | ||
| 2570 | struct constraint_node *c; | ||
| 2571 | struct constraint_expr *e; | ||
| 2572 | __le32 buf[3]; | ||
| 2573 | u32 nel; | ||
| 2574 | int rc; | ||
| 2575 | |||
| 2576 | for (c = node; c; c = c->next) { | ||
| 2577 | nel = 0; | ||
| 2578 | for (e = c->expr; e; e = e->next) | ||
| 2579 | nel++; | ||
| 2580 | buf[0] = cpu_to_le32(c->permissions); | ||
| 2581 | buf[1] = cpu_to_le32(nel); | ||
| 2582 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2583 | if (rc) | ||
| 2584 | return rc; | ||
| 2585 | for (e = c->expr; e; e = e->next) { | ||
| 2586 | buf[0] = cpu_to_le32(e->expr_type); | ||
| 2587 | buf[1] = cpu_to_le32(e->attr); | ||
| 2588 | buf[2] = cpu_to_le32(e->op); | ||
| 2589 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 2590 | if (rc) | ||
| 2591 | return rc; | ||
| 2592 | |||
| 2593 | switch (e->expr_type) { | ||
| 2594 | case CEXPR_NAMES: | ||
| 2595 | rc = ebitmap_write(&e->names, fp); | ||
| 2596 | if (rc) | ||
| 2597 | return rc; | ||
| 2598 | break; | ||
| 2599 | default: | ||
| 2600 | break; | ||
| 2601 | } | ||
| 2602 | } | ||
| 2603 | } | ||
| 2604 | |||
| 2605 | return 0; | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | static int class_write(void *vkey, void *datum, void *ptr) | ||
| 2609 | { | ||
| 2610 | char *key = vkey; | ||
| 2611 | struct class_datum *cladatum = datum; | ||
| 2612 | struct policy_data *pd = ptr; | ||
| 2613 | void *fp = pd->fp; | ||
| 2614 | struct policydb *p = pd->p; | ||
| 2615 | struct constraint_node *c; | ||
| 2616 | __le32 buf[6]; | ||
| 2617 | u32 ncons; | ||
| 2618 | size_t len, len2; | ||
| 2619 | int rc; | ||
| 2620 | |||
| 2621 | len = strlen(key); | ||
| 2622 | if (cladatum->comkey) | ||
| 2623 | len2 = strlen(cladatum->comkey); | ||
| 2624 | else | ||
| 2625 | len2 = 0; | ||
| 2626 | |||
| 2627 | ncons = 0; | ||
| 2628 | for (c = cladatum->constraints; c; c = c->next) | ||
| 2629 | ncons++; | ||
| 2630 | |||
| 2631 | buf[0] = cpu_to_le32(len); | ||
| 2632 | buf[1] = cpu_to_le32(len2); | ||
| 2633 | buf[2] = cpu_to_le32(cladatum->value); | ||
| 2634 | buf[3] = cpu_to_le32(cladatum->permissions.nprim); | ||
| 2635 | if (cladatum->permissions.table) | ||
| 2636 | buf[4] = cpu_to_le32(cladatum->permissions.table->nel); | ||
| 2637 | else | ||
| 2638 | buf[4] = 0; | ||
| 2639 | buf[5] = cpu_to_le32(ncons); | ||
| 2640 | rc = put_entry(buf, sizeof(u32), 6, fp); | ||
| 2641 | if (rc) | ||
| 2642 | return rc; | ||
| 2643 | |||
| 2644 | rc = put_entry(key, 1, len, fp); | ||
| 2645 | if (rc) | ||
| 2646 | return rc; | ||
| 2647 | |||
| 2648 | if (cladatum->comkey) { | ||
| 2649 | rc = put_entry(cladatum->comkey, 1, len2, fp); | ||
| 2650 | if (rc) | ||
| 2651 | return rc; | ||
| 2652 | } | ||
| 2653 | |||
| 2654 | rc = hashtab_map(cladatum->permissions.table, perm_write, fp); | ||
| 2655 | if (rc) | ||
| 2656 | return rc; | ||
| 2657 | |||
| 2658 | rc = write_cons_helper(p, cladatum->constraints, fp); | ||
| 2659 | if (rc) | ||
| 2660 | return rc; | ||
| 2661 | |||
| 2662 | /* write out the validatetrans rule */ | ||
| 2663 | ncons = 0; | ||
| 2664 | for (c = cladatum->validatetrans; c; c = c->next) | ||
| 2665 | ncons++; | ||
| 2666 | |||
| 2667 | buf[0] = cpu_to_le32(ncons); | ||
| 2668 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2669 | if (rc) | ||
| 2670 | return rc; | ||
| 2671 | |||
| 2672 | rc = write_cons_helper(p, cladatum->validatetrans, fp); | ||
| 2673 | if (rc) | ||
| 2674 | return rc; | ||
| 2675 | |||
| 2676 | return 0; | ||
| 2677 | } | ||
| 2678 | |||
| 2679 | static int role_write(void *vkey, void *datum, void *ptr) | ||
| 2680 | { | ||
| 2681 | char *key = vkey; | ||
| 2682 | struct role_datum *role = datum; | ||
| 2683 | struct policy_data *pd = ptr; | ||
| 2684 | void *fp = pd->fp; | ||
| 2685 | struct policydb *p = pd->p; | ||
| 2686 | __le32 buf[3]; | ||
| 2687 | size_t items, len; | ||
| 2688 | int rc; | ||
| 2689 | |||
| 2690 | len = strlen(key); | ||
| 2691 | items = 0; | ||
| 2692 | buf[items++] = cpu_to_le32(len); | ||
| 2693 | buf[items++] = cpu_to_le32(role->value); | ||
| 2694 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) | ||
| 2695 | buf[items++] = cpu_to_le32(role->bounds); | ||
| 2696 | |||
| 2697 | BUG_ON(items > (sizeof(buf)/sizeof(buf[0]))); | ||
| 2698 | |||
| 2699 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
| 2700 | if (rc) | ||
| 2701 | return rc; | ||
| 2702 | |||
| 2703 | rc = put_entry(key, 1, len, fp); | ||
| 2704 | if (rc) | ||
| 2705 | return rc; | ||
| 2706 | |||
| 2707 | rc = ebitmap_write(&role->dominates, fp); | ||
| 2708 | if (rc) | ||
| 2709 | return rc; | ||
| 2710 | |||
| 2711 | rc = ebitmap_write(&role->types, fp); | ||
| 2712 | if (rc) | ||
| 2713 | return rc; | ||
| 2714 | |||
| 2715 | return 0; | ||
| 2716 | } | ||
| 2717 | |||
| 2718 | static int type_write(void *vkey, void *datum, void *ptr) | ||
| 2719 | { | ||
| 2720 | char *key = vkey; | ||
| 2721 | struct type_datum *typdatum = datum; | ||
| 2722 | struct policy_data *pd = ptr; | ||
| 2723 | struct policydb *p = pd->p; | ||
| 2724 | void *fp = pd->fp; | ||
| 2725 | __le32 buf[4]; | ||
| 2726 | int rc; | ||
| 2727 | size_t items, len; | ||
| 2728 | |||
| 2729 | len = strlen(key); | ||
| 2730 | items = 0; | ||
| 2731 | buf[items++] = cpu_to_le32(len); | ||
| 2732 | buf[items++] = cpu_to_le32(typdatum->value); | ||
| 2733 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) { | ||
| 2734 | u32 properties = 0; | ||
| 2735 | |||
| 2736 | if (typdatum->primary) | ||
| 2737 | properties |= TYPEDATUM_PROPERTY_PRIMARY; | ||
| 2738 | |||
| 2739 | if (typdatum->attribute) | ||
| 2740 | properties |= TYPEDATUM_PROPERTY_ATTRIBUTE; | ||
| 2741 | |||
| 2742 | buf[items++] = cpu_to_le32(properties); | ||
| 2743 | buf[items++] = cpu_to_le32(typdatum->bounds); | ||
| 2744 | } else { | ||
| 2745 | buf[items++] = cpu_to_le32(typdatum->primary); | ||
| 2746 | } | ||
| 2747 | BUG_ON(items > (sizeof(buf) / sizeof(buf[0]))); | ||
| 2748 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
| 2749 | if (rc) | ||
| 2750 | return rc; | ||
| 2751 | |||
| 2752 | rc = put_entry(key, 1, len, fp); | ||
| 2753 | if (rc) | ||
| 2754 | return rc; | ||
| 2755 | |||
| 2756 | return 0; | ||
| 2757 | } | ||
| 2758 | |||
| 2759 | static int user_write(void *vkey, void *datum, void *ptr) | ||
| 2760 | { | ||
| 2761 | char *key = vkey; | ||
| 2762 | struct user_datum *usrdatum = datum; | ||
| 2763 | struct policy_data *pd = ptr; | ||
| 2764 | struct policydb *p = pd->p; | ||
| 2765 | void *fp = pd->fp; | ||
| 2766 | __le32 buf[3]; | ||
| 2767 | size_t items, len; | ||
| 2768 | int rc; | ||
| 2769 | |||
| 2770 | len = strlen(key); | ||
| 2771 | items = 0; | ||
| 2772 | buf[items++] = cpu_to_le32(len); | ||
| 2773 | buf[items++] = cpu_to_le32(usrdatum->value); | ||
| 2774 | if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) | ||
| 2775 | buf[items++] = cpu_to_le32(usrdatum->bounds); | ||
| 2776 | BUG_ON(items > (sizeof(buf) / sizeof(buf[0]))); | ||
| 2777 | rc = put_entry(buf, sizeof(u32), items, fp); | ||
| 2778 | if (rc) | ||
| 2779 | return rc; | ||
| 2780 | |||
| 2781 | rc = put_entry(key, 1, len, fp); | ||
| 2782 | if (rc) | ||
| 2783 | return rc; | ||
| 2784 | |||
| 2785 | rc = ebitmap_write(&usrdatum->roles, fp); | ||
| 2786 | if (rc) | ||
| 2787 | return rc; | ||
| 2788 | |||
| 2789 | rc = mls_write_range_helper(&usrdatum->range, fp); | ||
| 2790 | if (rc) | ||
| 2791 | return rc; | ||
| 2792 | |||
| 2793 | rc = mls_write_level(&usrdatum->dfltlevel, fp); | ||
| 2794 | if (rc) | ||
| 2795 | return rc; | ||
| 2796 | |||
| 2797 | return 0; | ||
| 2798 | } | ||
| 2799 | |||
| 2800 | static int (*write_f[SYM_NUM]) (void *key, void *datum, | ||
| 2801 | void *datap) = | ||
| 2802 | { | ||
| 2803 | common_write, | ||
| 2804 | class_write, | ||
| 2805 | role_write, | ||
| 2806 | type_write, | ||
| 2807 | user_write, | ||
| 2808 | cond_write_bool, | ||
| 2809 | sens_write, | ||
| 2810 | cat_write, | ||
| 2811 | }; | ||
| 2812 | |||
| 2813 | static int ocontext_write(struct policydb *p, struct policydb_compat_info *info, | ||
| 2814 | void *fp) | ||
| 2815 | { | ||
| 2816 | unsigned int i, j, rc; | ||
| 2817 | size_t nel, len; | ||
| 2818 | __le32 buf[3]; | ||
| 2819 | u32 nodebuf[8]; | ||
| 2820 | struct ocontext *c; | ||
| 2821 | for (i = 0; i < info->ocon_num; i++) { | ||
| 2822 | nel = 0; | ||
| 2823 | for (c = p->ocontexts[i]; c; c = c->next) | ||
| 2824 | nel++; | ||
| 2825 | buf[0] = cpu_to_le32(nel); | ||
| 2826 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2827 | if (rc) | ||
| 2828 | return rc; | ||
| 2829 | for (c = p->ocontexts[i]; c; c = c->next) { | ||
| 2830 | switch (i) { | ||
| 2831 | case OCON_ISID: | ||
| 2832 | buf[0] = cpu_to_le32(c->sid[0]); | ||
| 2833 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2834 | if (rc) | ||
| 2835 | return rc; | ||
| 2836 | rc = context_write(p, &c->context[0], fp); | ||
| 2837 | if (rc) | ||
| 2838 | return rc; | ||
| 2839 | break; | ||
| 2840 | case OCON_FS: | ||
| 2841 | case OCON_NETIF: | ||
| 2842 | len = strlen(c->u.name); | ||
| 2843 | buf[0] = cpu_to_le32(len); | ||
| 2844 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2845 | if (rc) | ||
| 2846 | return rc; | ||
| 2847 | rc = put_entry(c->u.name, 1, len, fp); | ||
| 2848 | if (rc) | ||
| 2849 | return rc; | ||
| 2850 | rc = context_write(p, &c->context[0], fp); | ||
| 2851 | if (rc) | ||
| 2852 | return rc; | ||
| 2853 | rc = context_write(p, &c->context[1], fp); | ||
| 2854 | if (rc) | ||
| 2855 | return rc; | ||
| 2856 | break; | ||
| 2857 | case OCON_PORT: | ||
| 2858 | buf[0] = cpu_to_le32(c->u.port.protocol); | ||
| 2859 | buf[1] = cpu_to_le32(c->u.port.low_port); | ||
| 2860 | buf[2] = cpu_to_le32(c->u.port.high_port); | ||
| 2861 | rc = put_entry(buf, sizeof(u32), 3, fp); | ||
| 2862 | if (rc) | ||
| 2863 | return rc; | ||
| 2864 | rc = context_write(p, &c->context[0], fp); | ||
| 2865 | if (rc) | ||
| 2866 | return rc; | ||
| 2867 | break; | ||
| 2868 | case OCON_NODE: | ||
| 2869 | nodebuf[0] = c->u.node.addr; /* network order */ | ||
| 2870 | nodebuf[1] = c->u.node.mask; /* network order */ | ||
| 2871 | rc = put_entry(nodebuf, sizeof(u32), 2, fp); | ||
| 2872 | if (rc) | ||
| 2873 | return rc; | ||
| 2874 | rc = context_write(p, &c->context[0], fp); | ||
| 2875 | if (rc) | ||
| 2876 | return rc; | ||
| 2877 | break; | ||
| 2878 | case OCON_FSUSE: | ||
| 2879 | buf[0] = cpu_to_le32(c->v.behavior); | ||
| 2880 | len = strlen(c->u.name); | ||
| 2881 | buf[1] = cpu_to_le32(len); | ||
| 2882 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2883 | if (rc) | ||
| 2884 | return rc; | ||
| 2885 | rc = put_entry(c->u.name, 1, len, fp); | ||
| 2886 | if (rc) | ||
| 2887 | return rc; | ||
| 2888 | rc = context_write(p, &c->context[0], fp); | ||
| 2889 | if (rc) | ||
| 2890 | return rc; | ||
| 2891 | break; | ||
| 2892 | case OCON_NODE6: | ||
| 2893 | for (j = 0; j < 4; j++) | ||
| 2894 | nodebuf[j] = c->u.node6.addr[j]; /* network order */ | ||
| 2895 | for (j = 0; j < 4; j++) | ||
| 2896 | nodebuf[j + 4] = c->u.node6.mask[j]; /* network order */ | ||
| 2897 | rc = put_entry(nodebuf, sizeof(u32), 8, fp); | ||
| 2898 | if (rc) | ||
| 2899 | return rc; | ||
| 2900 | rc = context_write(p, &c->context[0], fp); | ||
| 2901 | if (rc) | ||
| 2902 | return rc; | ||
| 2903 | break; | ||
| 2904 | } | ||
| 2905 | } | ||
| 2906 | } | ||
| 2907 | return 0; | ||
| 2908 | } | ||
| 2909 | |||
| 2910 | static int genfs_write(struct policydb *p, void *fp) | ||
| 2911 | { | ||
| 2912 | struct genfs *genfs; | ||
| 2913 | struct ocontext *c; | ||
| 2914 | size_t len; | ||
| 2915 | __le32 buf[1]; | ||
| 2916 | int rc; | ||
| 2917 | |||
| 2918 | len = 0; | ||
| 2919 | for (genfs = p->genfs; genfs; genfs = genfs->next) | ||
| 2920 | len++; | ||
| 2921 | buf[0] = cpu_to_le32(len); | ||
| 2922 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2923 | if (rc) | ||
| 2924 | return rc; | ||
| 2925 | for (genfs = p->genfs; genfs; genfs = genfs->next) { | ||
| 2926 | len = strlen(genfs->fstype); | ||
| 2927 | buf[0] = cpu_to_le32(len); | ||
| 2928 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2929 | if (rc) | ||
| 2930 | return rc; | ||
| 2931 | rc = put_entry(genfs->fstype, 1, len, fp); | ||
| 2932 | if (rc) | ||
| 2933 | return rc; | ||
| 2934 | len = 0; | ||
| 2935 | for (c = genfs->head; c; c = c->next) | ||
| 2936 | len++; | ||
| 2937 | buf[0] = cpu_to_le32(len); | ||
| 2938 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2939 | if (rc) | ||
| 2940 | return rc; | ||
| 2941 | for (c = genfs->head; c; c = c->next) { | ||
| 2942 | len = strlen(c->u.name); | ||
| 2943 | buf[0] = cpu_to_le32(len); | ||
| 2944 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2945 | if (rc) | ||
| 2946 | return rc; | ||
| 2947 | rc = put_entry(c->u.name, 1, len, fp); | ||
| 2948 | if (rc) | ||
| 2949 | return rc; | ||
| 2950 | buf[0] = cpu_to_le32(c->v.sclass); | ||
| 2951 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2952 | if (rc) | ||
| 2953 | return rc; | ||
| 2954 | rc = context_write(p, &c->context[0], fp); | ||
| 2955 | if (rc) | ||
| 2956 | return rc; | ||
| 2957 | } | ||
| 2958 | } | ||
| 2959 | return 0; | ||
| 2960 | } | ||
| 2961 | |||
| 2962 | static int range_count(void *key, void *data, void *ptr) | ||
| 2963 | { | ||
| 2964 | int *cnt = ptr; | ||
| 2965 | *cnt = *cnt + 1; | ||
| 2966 | |||
| 2967 | return 0; | ||
| 2968 | } | ||
| 2969 | |||
| 2970 | static int range_write_helper(void *key, void *data, void *ptr) | ||
| 2971 | { | ||
| 2972 | __le32 buf[2]; | ||
| 2973 | struct range_trans *rt = key; | ||
| 2974 | struct mls_range *r = data; | ||
| 2975 | struct policy_data *pd = ptr; | ||
| 2976 | void *fp = pd->fp; | ||
| 2977 | struct policydb *p = pd->p; | ||
| 2978 | int rc; | ||
| 2979 | |||
| 2980 | buf[0] = cpu_to_le32(rt->source_type); | ||
| 2981 | buf[1] = cpu_to_le32(rt->target_type); | ||
| 2982 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 2983 | if (rc) | ||
| 2984 | return rc; | ||
| 2985 | if (p->policyvers >= POLICYDB_VERSION_RANGETRANS) { | ||
| 2986 | buf[0] = cpu_to_le32(rt->target_class); | ||
| 2987 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 2988 | if (rc) | ||
| 2989 | return rc; | ||
| 2990 | } | ||
| 2991 | rc = mls_write_range_helper(r, fp); | ||
| 2992 | if (rc) | ||
| 2993 | return rc; | ||
| 2994 | |||
| 2995 | return 0; | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | static int range_write(struct policydb *p, void *fp) | ||
| 2999 | { | ||
| 3000 | size_t nel; | ||
| 3001 | __le32 buf[1]; | ||
| 3002 | int rc; | ||
| 3003 | struct policy_data pd; | ||
| 3004 | |||
| 3005 | pd.p = p; | ||
| 3006 | pd.fp = fp; | ||
| 3007 | |||
| 3008 | /* count the number of entries in the hashtab */ | ||
| 3009 | nel = 0; | ||
| 3010 | rc = hashtab_map(p->range_tr, range_count, &nel); | ||
| 3011 | if (rc) | ||
| 3012 | return rc; | ||
| 3013 | |||
| 3014 | buf[0] = cpu_to_le32(nel); | ||
| 3015 | rc = put_entry(buf, sizeof(u32), 1, fp); | ||
| 3016 | if (rc) | ||
| 3017 | return rc; | ||
| 3018 | |||
| 3019 | /* actually write all of the entries */ | ||
| 3020 | rc = hashtab_map(p->range_tr, range_write_helper, &pd); | ||
| 3021 | if (rc) | ||
| 3022 | return rc; | ||
| 3023 | |||
| 3024 | return 0; | ||
| 3025 | } | ||
| 3026 | |||
| 3027 | /* | ||
| 3028 | * Write the configuration data in a policy database | ||
| 3029 | * structure to a policy database binary representation | ||
| 3030 | * file. | ||
| 3031 | */ | ||
| 3032 | int policydb_write(struct policydb *p, void *fp) | ||
| 3033 | { | ||
| 3034 | unsigned int i, num_syms; | ||
| 3035 | int rc; | ||
| 3036 | __le32 buf[4]; | ||
| 3037 | u32 config; | ||
| 3038 | size_t len; | ||
| 3039 | struct policydb_compat_info *info; | ||
| 3040 | |||
| 3041 | /* | ||
| 3042 | * refuse to write policy older than compressed avtab | ||
| 3043 | * to simplify the writer. There are other tests dropped | ||
| 3044 | * since we assume this throughout the writer code. Be | ||
| 3045 | * careful if you ever try to remove this restriction | ||
| 3046 | */ | ||
| 3047 | if (p->policyvers < POLICYDB_VERSION_AVTAB) { | ||
| 3048 | printk(KERN_ERR "SELinux: refusing to write policy version %d." | ||
| 3049 | " Because it is less than version %d\n", p->policyvers, | ||
| 3050 | POLICYDB_VERSION_AVTAB); | ||
| 3051 | return -EINVAL; | ||
| 3052 | } | ||
| 3053 | |||
| 3054 | config = 0; | ||
| 3055 | if (p->mls_enabled) | ||
| 3056 | config |= POLICYDB_CONFIG_MLS; | ||
| 3057 | |||
| 3058 | if (p->reject_unknown) | ||
| 3059 | config |= REJECT_UNKNOWN; | ||
| 3060 | if (p->allow_unknown) | ||
| 3061 | config |= ALLOW_UNKNOWN; | ||
| 3062 | |||
| 3063 | /* Write the magic number and string identifiers. */ | ||
| 3064 | buf[0] = cpu_to_le32(POLICYDB_MAGIC); | ||
| 3065 | len = strlen(POLICYDB_STRING); | ||
| 3066 | buf[1] = cpu_to_le32(len); | ||
| 3067 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 3068 | if (rc) | ||
| 3069 | return rc; | ||
| 3070 | rc = put_entry(POLICYDB_STRING, 1, len, fp); | ||
| 3071 | if (rc) | ||
| 3072 | return rc; | ||
| 3073 | |||
| 3074 | /* Write the version, config, and table sizes. */ | ||
| 3075 | info = policydb_lookup_compat(p->policyvers); | ||
| 3076 | if (!info) { | ||
| 3077 | printk(KERN_ERR "SELinux: compatibility lookup failed for policy " | ||
| 3078 | "version %d", p->policyvers); | ||
| 3079 | return rc; | ||
| 3080 | } | ||
| 3081 | |||
| 3082 | buf[0] = cpu_to_le32(p->policyvers); | ||
| 3083 | buf[1] = cpu_to_le32(config); | ||
| 3084 | buf[2] = cpu_to_le32(info->sym_num); | ||
| 3085 | buf[3] = cpu_to_le32(info->ocon_num); | ||
| 3086 | |||
| 3087 | rc = put_entry(buf, sizeof(u32), 4, fp); | ||
| 3088 | if (rc) | ||
| 3089 | return rc; | ||
| 3090 | |||
| 3091 | if (p->policyvers >= POLICYDB_VERSION_POLCAP) { | ||
| 3092 | rc = ebitmap_write(&p->policycaps, fp); | ||
| 3093 | if (rc) | ||
| 3094 | return rc; | ||
| 3095 | } | ||
| 3096 | |||
| 3097 | if (p->policyvers >= POLICYDB_VERSION_PERMISSIVE) { | ||
| 3098 | rc = ebitmap_write(&p->permissive_map, fp); | ||
| 3099 | if (rc) | ||
| 3100 | return rc; | ||
| 3101 | } | ||
| 3102 | |||
| 3103 | num_syms = info->sym_num; | ||
| 3104 | for (i = 0; i < num_syms; i++) { | ||
| 3105 | struct policy_data pd; | ||
| 3106 | |||
| 3107 | pd.fp = fp; | ||
| 3108 | pd.p = p; | ||
| 3109 | |||
| 3110 | buf[0] = cpu_to_le32(p->symtab[i].nprim); | ||
| 3111 | buf[1] = cpu_to_le32(p->symtab[i].table->nel); | ||
| 3112 | |||
| 3113 | rc = put_entry(buf, sizeof(u32), 2, fp); | ||
| 3114 | if (rc) | ||
| 3115 | return rc; | ||
| 3116 | rc = hashtab_map(p->symtab[i].table, write_f[i], &pd); | ||
| 3117 | if (rc) | ||
| 3118 | return rc; | ||
| 3119 | } | ||
| 3120 | |||
| 3121 | rc = avtab_write(p, &p->te_avtab, fp); | ||
| 3122 | if (rc) | ||
| 3123 | return rc; | ||
| 3124 | |||
| 3125 | rc = cond_write_list(p, p->cond_list, fp); | ||
| 3126 | if (rc) | ||
| 3127 | return rc; | ||
| 3128 | |||
| 3129 | rc = role_trans_write(p->role_tr, fp); | ||
| 3130 | if (rc) | ||
| 3131 | return rc; | ||
| 3132 | |||
| 3133 | rc = role_allow_write(p->role_allow, fp); | ||
| 3134 | if (rc) | ||
| 3135 | return rc; | ||
| 3136 | |||
| 3137 | rc = ocontext_write(p, info, fp); | ||
| 3138 | if (rc) | ||
| 3139 | return rc; | ||
| 3140 | |||
| 3141 | rc = genfs_write(p, fp); | ||
| 3142 | if (rc) | ||
| 3143 | return rc; | ||
| 3144 | |||
| 3145 | rc = range_write(p, fp); | ||
| 3146 | if (rc) | ||
| 3147 | return rc; | ||
| 3148 | |||
| 3149 | for (i = 0; i < p->p_types.nprim; i++) { | ||
| 3150 | struct ebitmap *e = flex_array_get(p->type_attr_map_array, i); | ||
| 3151 | |||
| 3152 | BUG_ON(!e); | ||
| 3153 | rc = ebitmap_write(e, fp); | ||
| 3154 | if (rc) | ||
| 3155 | return rc; | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | return 0; | ||
| 3159 | } | ||
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 310e94442cb8..95d3d7de361e 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h | |||
| @@ -254,6 +254,9 @@ struct policydb { | |||
| 254 | 254 | ||
| 255 | struct ebitmap permissive_map; | 255 | struct ebitmap permissive_map; |
| 256 | 256 | ||
| 257 | /* length of this policy when it was loaded */ | ||
| 258 | size_t len; | ||
| 259 | |||
| 257 | unsigned int policyvers; | 260 | unsigned int policyvers; |
| 258 | 261 | ||
| 259 | unsigned int reject_unknown : 1; | 262 | unsigned int reject_unknown : 1; |
| @@ -270,6 +273,7 @@ extern int policydb_class_isvalid(struct policydb *p, unsigned int class); | |||
| 270 | extern int policydb_type_isvalid(struct policydb *p, unsigned int type); | 273 | extern int policydb_type_isvalid(struct policydb *p, unsigned int type); |
| 271 | extern int policydb_role_isvalid(struct policydb *p, unsigned int role); | 274 | extern int policydb_role_isvalid(struct policydb *p, unsigned int role); |
| 272 | extern int policydb_read(struct policydb *p, void *fp); | 275 | extern int policydb_read(struct policydb *p, void *fp); |
| 276 | extern int policydb_write(struct policydb *p, void *fp); | ||
| 273 | 277 | ||
| 274 | #define PERM_SYMTAB_SIZE 32 | 278 | #define PERM_SYMTAB_SIZE 32 |
| 275 | 279 | ||
| @@ -290,6 +294,11 @@ struct policy_file { | |||
| 290 | size_t len; | 294 | size_t len; |
| 291 | }; | 295 | }; |
| 292 | 296 | ||
| 297 | struct policy_data { | ||
| 298 | struct policydb *p; | ||
| 299 | void *fp; | ||
| 300 | }; | ||
| 301 | |||
| 293 | static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) | 302 | static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) |
| 294 | { | 303 | { |
| 295 | if (bytes > fp->len) | 304 | if (bytes > fp->len) |
| @@ -301,6 +310,17 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) | |||
| 301 | return 0; | 310 | return 0; |
| 302 | } | 311 | } |
| 303 | 312 | ||
| 313 | static inline int put_entry(void *buf, size_t bytes, int num, struct policy_file *fp) | ||
| 314 | { | ||
| 315 | size_t len = bytes * num; | ||
| 316 | |||
| 317 | memcpy(fp->data, buf, len); | ||
| 318 | fp->data += len; | ||
| 319 | fp->len -= len; | ||
| 320 | |||
| 321 | return 0; | ||
| 322 | } | ||
| 323 | |||
| 304 | extern u16 string_to_security_class(struct policydb *p, const char *name); | 324 | extern u16 string_to_security_class(struct policydb *p, const char *name); |
| 305 | extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); | 325 | extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); |
| 306 | 326 | ||
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 9ea2feca3cd4..223c1ff6ef23 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/mutex.h> | 51 | #include <linux/mutex.h> |
| 52 | #include <linux/selinux.h> | 52 | #include <linux/selinux.h> |
| 53 | #include <linux/flex_array.h> | 53 | #include <linux/flex_array.h> |
| 54 | #include <linux/vmalloc.h> | ||
| 54 | #include <net/netlabel.h> | 55 | #include <net/netlabel.h> |
| 55 | 56 | ||
| 56 | #include "flask.h" | 57 | #include "flask.h" |
| @@ -991,7 +992,8 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 | |||
| 991 | { | 992 | { |
| 992 | char *scontextp; | 993 | char *scontextp; |
| 993 | 994 | ||
| 994 | *scontext = NULL; | 995 | if (scontext) |
| 996 | *scontext = NULL; | ||
| 995 | *scontext_len = 0; | 997 | *scontext_len = 0; |
| 996 | 998 | ||
| 997 | if (context->len) { | 999 | if (context->len) { |
| @@ -1008,6 +1010,9 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 | |||
| 1008 | *scontext_len += strlen(policydb.p_type_val_to_name[context->type - 1]) + 1; | 1010 | *scontext_len += strlen(policydb.p_type_val_to_name[context->type - 1]) + 1; |
| 1009 | *scontext_len += mls_compute_context_len(context); | 1011 | *scontext_len += mls_compute_context_len(context); |
| 1010 | 1012 | ||
| 1013 | if (!scontext) | ||
| 1014 | return 0; | ||
| 1015 | |||
| 1011 | /* Allocate space for the context; caller must free this space. */ | 1016 | /* Allocate space for the context; caller must free this space. */ |
| 1012 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); | 1017 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); |
| 1013 | if (!scontextp) | 1018 | if (!scontextp) |
| @@ -1047,7 +1052,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext, | |||
| 1047 | struct context *context; | 1052 | struct context *context; |
| 1048 | int rc = 0; | 1053 | int rc = 0; |
| 1049 | 1054 | ||
| 1050 | *scontext = NULL; | 1055 | if (scontext) |
| 1056 | *scontext = NULL; | ||
| 1051 | *scontext_len = 0; | 1057 | *scontext_len = 0; |
| 1052 | 1058 | ||
| 1053 | if (!ss_initialized) { | 1059 | if (!ss_initialized) { |
| @@ -1055,6 +1061,8 @@ static int security_sid_to_context_core(u32 sid, char **scontext, | |||
| 1055 | char *scontextp; | 1061 | char *scontextp; |
| 1056 | 1062 | ||
| 1057 | *scontext_len = strlen(initial_sid_to_string[sid]) + 1; | 1063 | *scontext_len = strlen(initial_sid_to_string[sid]) + 1; |
| 1064 | if (!scontext) | ||
| 1065 | goto out; | ||
| 1058 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); | 1066 | scontextp = kmalloc(*scontext_len, GFP_ATOMIC); |
| 1059 | if (!scontextp) { | 1067 | if (!scontextp) { |
| 1060 | rc = -ENOMEM; | 1068 | rc = -ENOMEM; |
| @@ -1769,6 +1777,7 @@ int security_load_policy(void *data, size_t len) | |||
| 1769 | return rc; | 1777 | return rc; |
| 1770 | } | 1778 | } |
| 1771 | 1779 | ||
| 1780 | policydb.len = len; | ||
| 1772 | rc = selinux_set_mapping(&policydb, secclass_map, | 1781 | rc = selinux_set_mapping(&policydb, secclass_map, |
| 1773 | ¤t_mapping, | 1782 | ¤t_mapping, |
| 1774 | ¤t_mapping_size); | 1783 | ¤t_mapping_size); |
| @@ -1791,6 +1800,7 @@ int security_load_policy(void *data, size_t len) | |||
| 1791 | selinux_complete_init(); | 1800 | selinux_complete_init(); |
| 1792 | avc_ss_reset(seqno); | 1801 | avc_ss_reset(seqno); |
| 1793 | selnl_notify_policyload(seqno); | 1802 | selnl_notify_policyload(seqno); |
| 1803 | selinux_status_update_policyload(seqno); | ||
| 1794 | selinux_netlbl_cache_invalidate(); | 1804 | selinux_netlbl_cache_invalidate(); |
| 1795 | selinux_xfrm_notify_policyload(); | 1805 | selinux_xfrm_notify_policyload(); |
| 1796 | return 0; | 1806 | return 0; |
| @@ -1804,6 +1814,7 @@ int security_load_policy(void *data, size_t len) | |||
| 1804 | if (rc) | 1814 | if (rc) |
| 1805 | return rc; | 1815 | return rc; |
| 1806 | 1816 | ||
| 1817 | newpolicydb.len = len; | ||
| 1807 | /* If switching between different policy types, log MLS status */ | 1818 | /* If switching between different policy types, log MLS status */ |
| 1808 | if (policydb.mls_enabled && !newpolicydb.mls_enabled) | 1819 | if (policydb.mls_enabled && !newpolicydb.mls_enabled) |
| 1809 | printk(KERN_INFO "SELinux: Disabling MLS support...\n"); | 1820 | printk(KERN_INFO "SELinux: Disabling MLS support...\n"); |
| @@ -1870,6 +1881,7 @@ int security_load_policy(void *data, size_t len) | |||
| 1870 | 1881 | ||
| 1871 | avc_ss_reset(seqno); | 1882 | avc_ss_reset(seqno); |
| 1872 | selnl_notify_policyload(seqno); | 1883 | selnl_notify_policyload(seqno); |
| 1884 | selinux_status_update_policyload(seqno); | ||
| 1873 | selinux_netlbl_cache_invalidate(); | 1885 | selinux_netlbl_cache_invalidate(); |
| 1874 | selinux_xfrm_notify_policyload(); | 1886 | selinux_xfrm_notify_policyload(); |
| 1875 | 1887 | ||
| @@ -1883,6 +1895,17 @@ err: | |||
| 1883 | 1895 | ||
| 1884 | } | 1896 | } |
| 1885 | 1897 | ||
| 1898 | size_t security_policydb_len(void) | ||
| 1899 | { | ||
| 1900 | size_t len; | ||
| 1901 | |||
| 1902 | read_lock(&policy_rwlock); | ||
| 1903 | len = policydb.len; | ||
| 1904 | read_unlock(&policy_rwlock); | ||
| 1905 | |||
| 1906 | return len; | ||
| 1907 | } | ||
| 1908 | |||
| 1886 | /** | 1909 | /** |
| 1887 | * security_port_sid - Obtain the SID for a port. | 1910 | * security_port_sid - Obtain the SID for a port. |
| 1888 | * @protocol: protocol number | 1911 | * @protocol: protocol number |
| @@ -2374,6 +2397,7 @@ out: | |||
| 2374 | if (!rc) { | 2397 | if (!rc) { |
| 2375 | avc_ss_reset(seqno); | 2398 | avc_ss_reset(seqno); |
| 2376 | selnl_notify_policyload(seqno); | 2399 | selnl_notify_policyload(seqno); |
| 2400 | selinux_status_update_policyload(seqno); | ||
| 2377 | selinux_xfrm_notify_policyload(); | 2401 | selinux_xfrm_notify_policyload(); |
| 2378 | } | 2402 | } |
| 2379 | return rc; | 2403 | return rc; |
| @@ -3129,3 +3153,38 @@ netlbl_sid_to_secattr_failure: | |||
| 3129 | return rc; | 3153 | return rc; |
| 3130 | } | 3154 | } |
| 3131 | #endif /* CONFIG_NETLABEL */ | 3155 | #endif /* CONFIG_NETLABEL */ |
| 3156 | |||
| 3157 | /** | ||
| 3158 | * security_read_policy - read the policy. | ||
| 3159 | * @data: binary policy data | ||
| 3160 | * @len: length of data in bytes | ||
| 3161 | * | ||
| 3162 | */ | ||
| 3163 | int security_read_policy(void **data, ssize_t *len) | ||
| 3164 | { | ||
| 3165 | int rc; | ||
| 3166 | struct policy_file fp; | ||
| 3167 | |||
| 3168 | if (!ss_initialized) | ||
| 3169 | return -EINVAL; | ||
| 3170 | |||
| 3171 | *len = security_policydb_len(); | ||
| 3172 | |||
| 3173 | *data = vmalloc_user(*len); | ||
| 3174 | if (!*data) | ||
| 3175 | return -ENOMEM; | ||
| 3176 | |||
| 3177 | fp.data = *data; | ||
| 3178 | fp.len = *len; | ||
| 3179 | |||
| 3180 | read_lock(&policy_rwlock); | ||
| 3181 | rc = policydb_write(&policydb, &fp); | ||
| 3182 | read_unlock(&policy_rwlock); | ||
| 3183 | |||
| 3184 | if (rc) | ||
| 3185 | return rc; | ||
| 3186 | |||
| 3187 | *len = (unsigned long)fp.data - (unsigned long)*data; | ||
| 3188 | return 0; | ||
| 3189 | |||
| 3190 | } | ||
diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c new file mode 100644 index 000000000000..d982365f9d1a --- /dev/null +++ b/security/selinux/ss/status.c | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | /* | ||
| 2 | * mmap based event notifications for SELinux | ||
| 3 | * | ||
| 4 | * Author: KaiGai Kohei <kaigai@ak.jp.nec.com> | ||
| 5 | * | ||
| 6 | * Copyright (C) 2010 NEC corporation | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2, | ||
| 10 | * as published by the Free Software Foundation. | ||
| 11 | */ | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/gfp.h> | ||
| 14 | #include <linux/mm.h> | ||
| 15 | #include <linux/mutex.h> | ||
| 16 | #include "avc.h" | ||
| 17 | #include "services.h" | ||
| 18 | |||
| 19 | /* | ||
| 20 | * The selinux_status_page shall be exposed to userspace applications | ||
| 21 | * using mmap interface on /selinux/status. | ||
| 22 | * It enables to notify applications a few events that will cause reset | ||
| 23 | * of userspace access vector without context switching. | ||
| 24 | * | ||
| 25 | * The selinux_kernel_status structure on the head of status page is | ||
| 26 | * protected from concurrent accesses using seqlock logic, so userspace | ||
| 27 | * application should reference the status page according to the seqlock | ||
| 28 | * logic. | ||
| 29 | * | ||
| 30 | * Typically, application checks status->sequence at the head of access | ||
| 31 | * control routine. If it is odd-number, kernel is updating the status, | ||
| 32 | * so please wait for a moment. If it is changed from the last sequence | ||
| 33 | * number, it means something happen, so application will reset userspace | ||
| 34 | * avc, if needed. | ||
| 35 | * In most cases, application shall confirm the kernel status is not | ||
| 36 | * changed without any system call invocations. | ||
| 37 | */ | ||
| 38 | static struct page *selinux_status_page; | ||
| 39 | static DEFINE_MUTEX(selinux_status_lock); | ||
| 40 | |||
| 41 | /* | ||
| 42 | * selinux_kernel_status_page | ||
| 43 | * | ||
| 44 | * It returns a reference to selinux_status_page. If the status page is | ||
| 45 | * not allocated yet, it also tries to allocate it at the first time. | ||
| 46 | */ | ||
| 47 | struct page *selinux_kernel_status_page(void) | ||
| 48 | { | ||
| 49 | struct selinux_kernel_status *status; | ||
| 50 | struct page *result = NULL; | ||
| 51 | |||
| 52 | mutex_lock(&selinux_status_lock); | ||
| 53 | if (!selinux_status_page) { | ||
| 54 | selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | ||
| 55 | |||
| 56 | if (selinux_status_page) { | ||
| 57 | status = page_address(selinux_status_page); | ||
| 58 | |||
| 59 | status->version = SELINUX_KERNEL_STATUS_VERSION; | ||
| 60 | status->sequence = 0; | ||
| 61 | status->enforcing = selinux_enforcing; | ||
| 62 | /* | ||
| 63 | * NOTE: the next policyload event shall set | ||
| 64 | * a positive value on the status->policyload, | ||
| 65 | * although it may not be 1, but never zero. | ||
| 66 | * So, application can know it was updated. | ||
| 67 | */ | ||
| 68 | status->policyload = 0; | ||
| 69 | status->deny_unknown = !security_get_allow_unknown(); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | result = selinux_status_page; | ||
| 73 | mutex_unlock(&selinux_status_lock); | ||
| 74 | |||
| 75 | return result; | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | ||
| 79 | * selinux_status_update_setenforce | ||
| 80 | * | ||
| 81 | * It updates status of the current enforcing/permissive mode. | ||
| 82 | */ | ||
| 83 | void selinux_status_update_setenforce(int enforcing) | ||
| 84 | { | ||
| 85 | struct selinux_kernel_status *status; | ||
| 86 | |||
| 87 | mutex_lock(&selinux_status_lock); | ||
| 88 | if (selinux_status_page) { | ||
| 89 | status = page_address(selinux_status_page); | ||
| 90 | |||
| 91 | status->sequence++; | ||
| 92 | smp_wmb(); | ||
| 93 | |||
| 94 | status->enforcing = enforcing; | ||
| 95 | |||
| 96 | smp_wmb(); | ||
| 97 | status->sequence++; | ||
| 98 | } | ||
| 99 | mutex_unlock(&selinux_status_lock); | ||
| 100 | } | ||
| 101 | |||
| 102 | /* | ||
| 103 | * selinux_status_update_policyload | ||
| 104 | * | ||
| 105 | * It updates status of the times of policy reloaded, and current | ||
| 106 | * setting of deny_unknown. | ||
| 107 | */ | ||
| 108 | void selinux_status_update_policyload(int seqno) | ||
| 109 | { | ||
| 110 | struct selinux_kernel_status *status; | ||
| 111 | |||
| 112 | mutex_lock(&selinux_status_lock); | ||
| 113 | if (selinux_status_page) { | ||
| 114 | status = page_address(selinux_status_page); | ||
| 115 | |||
| 116 | status->sequence++; | ||
| 117 | smp_wmb(); | ||
| 118 | |||
| 119 | status->policyload = seqno; | ||
| 120 | status->deny_unknown = !security_get_allow_unknown(); | ||
| 121 | |||
| 122 | smp_wmb(); | ||
| 123 | status->sequence++; | ||
| 124 | } | ||
| 125 | mutex_unlock(&selinux_status_lock); | ||
| 126 | } | ||
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c448d57ae2b7..bc39f4067af6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c | |||
| @@ -1281,12 +1281,11 @@ static int smack_task_getioprio(struct task_struct *p) | |||
| 1281 | * | 1281 | * |
| 1282 | * Return 0 if read access is permitted | 1282 | * Return 0 if read access is permitted |
| 1283 | */ | 1283 | */ |
| 1284 | static int smack_task_setscheduler(struct task_struct *p, int policy, | 1284 | static int smack_task_setscheduler(struct task_struct *p) |
| 1285 | struct sched_param *lp) | ||
| 1286 | { | 1285 | { |
| 1287 | int rc; | 1286 | int rc; |
| 1288 | 1287 | ||
| 1289 | rc = cap_task_setscheduler(p, policy, lp); | 1288 | rc = cap_task_setscheduler(p); |
| 1290 | if (rc == 0) | 1289 | if (rc == 0) |
| 1291 | rc = smk_curacc_on_task(p, MAY_WRITE); | 1290 | rc = smk_curacc_on_task(p, MAY_WRITE); |
| 1292 | return rc; | 1291 | return rc; |
| @@ -3005,7 +3004,8 @@ static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) | |||
| 3005 | { | 3004 | { |
| 3006 | char *sp = smack_from_secid(secid); | 3005 | char *sp = smack_from_secid(secid); |
| 3007 | 3006 | ||
| 3008 | *secdata = sp; | 3007 | if (secdata) |
| 3008 | *secdata = sp; | ||
| 3009 | *seclen = strlen(sp); | 3009 | *seclen = strlen(sp); |
| 3010 | return 0; | 3010 | return 0; |
| 3011 | } | 3011 | } |
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index c668b447c725..7556315c1978 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c | |||
| @@ -768,8 +768,10 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data) | |||
| 768 | return true; /* Do nothing if open(O_WRONLY). */ | 768 | return true; /* Do nothing if open(O_WRONLY). */ |
| 769 | memset(&head->r, 0, sizeof(head->r)); | 769 | memset(&head->r, 0, sizeof(head->r)); |
| 770 | head->r.print_this_domain_only = true; | 770 | head->r.print_this_domain_only = true; |
| 771 | head->r.eof = !domain; | 771 | if (domain) |
| 772 | head->r.domain = &domain->list; | 772 | head->r.domain = &domain->list; |
| 773 | else | ||
| 774 | head->r.eof = 1; | ||
| 773 | tomoyo_io_printf(head, "# select %s\n", data); | 775 | tomoyo_io_printf(head, "# select %s\n", data); |
| 774 | if (domain && domain->is_deleted) | 776 | if (domain && domain->is_deleted) |
| 775 | tomoyo_io_printf(head, "# This is a deleted domain.\n"); | 777 | tomoyo_io_printf(head, "# This is a deleted domain.\n"); |
| @@ -2051,13 +2053,22 @@ void tomoyo_check_profile(void) | |||
| 2051 | const u8 profile = domain->profile; | 2053 | const u8 profile = domain->profile; |
| 2052 | if (tomoyo_profile_ptr[profile]) | 2054 | if (tomoyo_profile_ptr[profile]) |
| 2053 | continue; | 2055 | continue; |
| 2056 | printk(KERN_ERR "You need to define profile %u before using it.\n", | ||
| 2057 | profile); | ||
| 2058 | printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " | ||
| 2059 | "for more information.\n"); | ||
| 2054 | panic("Profile %u (used by '%s') not defined.\n", | 2060 | panic("Profile %u (used by '%s') not defined.\n", |
| 2055 | profile, domain->domainname->name); | 2061 | profile, domain->domainname->name); |
| 2056 | } | 2062 | } |
| 2057 | tomoyo_read_unlock(idx); | 2063 | tomoyo_read_unlock(idx); |
| 2058 | if (tomoyo_profile_version != 20090903) | 2064 | if (tomoyo_profile_version != 20090903) { |
| 2065 | printk(KERN_ERR "You need to install userland programs for " | ||
| 2066 | "TOMOYO 2.3 and initialize policy configuration.\n"); | ||
| 2067 | printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " | ||
| 2068 | "for more information.\n"); | ||
| 2059 | panic("Profile version %u is not supported.\n", | 2069 | panic("Profile version %u is not supported.\n", |
| 2060 | tomoyo_profile_version); | 2070 | tomoyo_profile_version); |
| 2071 | } | ||
| 2061 | printk(KERN_INFO "TOMOYO: 2.3.0\n"); | 2072 | printk(KERN_INFO "TOMOYO: 2.3.0\n"); |
| 2062 | printk(KERN_INFO "Mandatory Access Control activated.\n"); | 2073 | printk(KERN_INFO "Mandatory Access Control activated.\n"); |
| 2063 | } | 2074 | } |
