diff options
-rw-r--r-- | Documentation/RCU/NMI-RCU.txt | 112 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 4 |
3 files changed, 116 insertions, 4 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt new file mode 100644 index 00000000000..d0634a5c344 --- /dev/null +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -0,0 +1,112 @@ | |||
1 | Using RCU to Protect Dynamic NMI Handlers | ||
2 | |||
3 | |||
4 | Although RCU is usually used to protect read-mostly data structures, | ||
5 | it is possible to use RCU to provide dynamic non-maskable interrupt | ||
6 | handlers, as well as dynamic irq handlers. This document describes | ||
7 | how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer | ||
8 | work in "arch/i386/oprofile/nmi_timer_int.c" and in | ||
9 | "arch/i386/kernel/traps.c". | ||
10 | |||
11 | The relevant pieces of code are listed below, each followed by a | ||
12 | brief explanation. | ||
13 | |||
14 | static int dummy_nmi_callback(struct pt_regs *regs, int cpu) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | The dummy_nmi_callback() function is a "dummy" NMI handler that does | ||
20 | nothing, but returns zero, thus saying that it did nothing, allowing | ||
21 | the NMI handler to take the default machine-specific action. | ||
22 | |||
23 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
24 | |||
25 | This nmi_callback variable is a global function pointer to the current | ||
26 | NMI handler. | ||
27 | |||
28 | fastcall void do_nmi(struct pt_regs * regs, long error_code) | ||
29 | { | ||
30 | int cpu; | ||
31 | |||
32 | nmi_enter(); | ||
33 | |||
34 | cpu = smp_processor_id(); | ||
35 | ++nmi_count(cpu); | ||
36 | |||
37 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | ||
38 | default_do_nmi(regs); | ||
39 | |||
40 | nmi_exit(); | ||
41 | } | ||
42 | |||
43 | The do_nmi() function processes each NMI. It first disables preemption | ||
44 | in the same way that a hardware irq would, then increments the per-CPU | ||
45 | count of NMIs. It then invokes the NMI handler stored in the nmi_callback | ||
46 | function pointer. If this handler returns zero, do_nmi() invokes the | ||
47 | default_do_nmi() function to handle a machine-specific NMI. Finally, | ||
48 | preemption is restored. | ||
49 | |||
50 | Strictly speaking, rcu_dereference() is not needed, since this code runs | ||
51 | only on i386, which does not need rcu_dereference() anyway. However, | ||
52 | it is a good documentation aid, particularly for anyone attempting to | ||
53 | do something similar on Alpha. | ||
54 | |||
55 | Quick Quiz: Why might the rcu_dereference() be necessary on Alpha, | ||
56 | given that the code referenced by the pointer is read-only? | ||
57 | |||
58 | |||
59 | Back to the discussion of NMI and RCU... | ||
60 | |||
61 | void set_nmi_callback(nmi_callback_t callback) | ||
62 | { | ||
63 | rcu_assign_pointer(nmi_callback, callback); | ||
64 | } | ||
65 | |||
66 | The set_nmi_callback() function registers an NMI handler. Note that any | ||
67 | data that is to be used by the callback must be initialized up -before- | ||
68 | the call to set_nmi_callback(). On architectures that do not order | ||
69 | writes, the rcu_assign_pointer() ensures that the NMI handler sees the | ||
70 | initialized values. | ||
71 | |||
72 | void unset_nmi_callback(void) | ||
73 | { | ||
74 | rcu_assign_pointer(nmi_callback, dummy_nmi_callback); | ||
75 | } | ||
76 | |||
77 | This function unregisters an NMI handler, restoring the original | ||
78 | dummy_nmi_handler(). However, there may well be an NMI handler | ||
79 | currently executing on some other CPU. We therefore cannot free | ||
80 | up any data structures used by the old NMI handler until execution | ||
81 | of it completes on all other CPUs. | ||
82 | |||
83 | One way to accomplish this is via synchronize_sched(), perhaps as | ||
84 | follows: | ||
85 | |||
86 | unset_nmi_callback(); | ||
87 | synchronize_sched(); | ||
88 | kfree(my_nmi_data); | ||
89 | |||
90 | This works because synchronize_sched() blocks until all CPUs complete | ||
91 | any preemption-disabled segments of code that they were executing. | ||
92 | Since NMI handlers disable preemption, synchronize_sched() is guaranteed | ||
93 | not to return until all ongoing NMI handlers exit. It is therefore safe | ||
94 | to free up the handler's data as soon as synchronize_sched() returns. | ||
95 | |||
96 | |||
97 | Answer to Quick Quiz | ||
98 | |||
99 | Why might the rcu_dereference() be necessary on Alpha, given | ||
100 | that the code referenced by the pointer is read-only? | ||
101 | |||
102 | Answer: The caller to set_nmi_callback() might well have | ||
103 | initialized some data that is to be used by the | ||
104 | new NMI handler. In this case, the rcu_dereference() | ||
105 | would be needed, because otherwise a CPU that received | ||
106 | an NMI just after the new handler was set might see | ||
107 | the pointer to the new NMI handler, but the old | ||
108 | pre-initialized version of the handler's data. | ||
109 | |||
110 | More important, the rcu_dereference() makes it clear | ||
111 | to someone reading the code that the pointer is being | ||
112 | protected by RCU. | ||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 54629bb5893..029bf94cda7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -657,7 +657,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) | |||
657 | 657 | ||
658 | ++nmi_count(cpu); | 658 | ++nmi_count(cpu); |
659 | 659 | ||
660 | if (!nmi_callback(regs, cpu)) | 660 | if (!rcu_dereference(nmi_callback)(regs, cpu)) |
661 | default_do_nmi(regs); | 661 | default_do_nmi(regs); |
662 | 662 | ||
663 | nmi_exit(); | 663 | nmi_exit(); |
@@ -665,7 +665,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) | |||
665 | 665 | ||
666 | void set_nmi_callback(nmi_callback_t callback) | 666 | void set_nmi_callback(nmi_callback_t callback) |
667 | { | 667 | { |
668 | nmi_callback = callback; | 668 | rcu_assign_pointer(nmi_callback, callback); |
669 | } | 669 | } |
670 | EXPORT_SYMBOL_GPL(set_nmi_callback); | 670 | EXPORT_SYMBOL_GPL(set_nmi_callback); |
671 | 671 | ||
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 84cae81fff8..caf164959e1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -524,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) | |||
524 | 524 | ||
525 | nmi_enter(); | 525 | nmi_enter(); |
526 | add_pda(__nmi_count,1); | 526 | add_pda(__nmi_count,1); |
527 | if (!nmi_callback(regs, cpu)) | 527 | if (!rcu_dereference(nmi_callback)(regs, cpu)) |
528 | default_do_nmi(regs); | 528 | default_do_nmi(regs); |
529 | nmi_exit(); | 529 | nmi_exit(); |
530 | } | 530 | } |
531 | 531 | ||
532 | void set_nmi_callback(nmi_callback_t callback) | 532 | void set_nmi_callback(nmi_callback_t callback) |
533 | { | 533 | { |
534 | nmi_callback = callback; | 534 | rcu_assign_pointer(nmi_callback, callback); |
535 | } | 535 | } |
536 | 536 | ||
537 | void unset_nmi_callback(void) | 537 | void unset_nmi_callback(void) |