diff options
Diffstat (limited to 'Documentation/RCU')
-rw-r--r-- | Documentation/RCU/NMI-RCU.txt | 112 | ||||
-rw-r--r-- | Documentation/RCU/rcuref.txt | 74 |
2 files changed, 186 insertions, 0 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt new file mode 100644 index 000000000000..d0634a5c3445 --- /dev/null +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -0,0 +1,112 @@ | |||
1 | Using RCU to Protect Dynamic NMI Handlers | ||
2 | |||
3 | |||
4 | Although RCU is usually used to protect read-mostly data structures, | ||
5 | it is possible to use RCU to provide dynamic non-maskable interrupt | ||
6 | handlers, as well as dynamic irq handlers. This document describes | ||
7 | how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer | ||
8 | work in "arch/i386/oprofile/nmi_timer_int.c" and in | ||
9 | "arch/i386/kernel/traps.c". | ||
10 | |||
11 | The relevant pieces of code are listed below, each followed by a | ||
12 | brief explanation. | ||
13 | |||
14 | static int dummy_nmi_callback(struct pt_regs *regs, int cpu) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | The dummy_nmi_callback() function is a "dummy" NMI handler that does | ||
20 | nothing, but returns zero, thus saying that it did nothing, allowing | ||
21 | the NMI handler to take the default machine-specific action. | ||
22 | |||
23 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
24 | |||
25 | This nmi_callback variable is a global function pointer to the current | ||
26 | NMI handler. | ||
27 | |||
28 | fastcall void do_nmi(struct pt_regs * regs, long error_code) | ||
29 | { | ||
30 | int cpu; | ||
31 | |||
32 | nmi_enter(); | ||
33 | |||
34 | cpu = smp_processor_id(); | ||
35 | ++nmi_count(cpu); | ||
36 | |||
37 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | ||
38 | default_do_nmi(regs); | ||
39 | |||
40 | nmi_exit(); | ||
41 | } | ||
42 | |||
43 | The do_nmi() function processes each NMI. It first disables preemption | ||
44 | in the same way that a hardware irq would, then increments the per-CPU | ||
45 | count of NMIs. It then invokes the NMI handler stored in the nmi_callback | ||
46 | function pointer. If this handler returns zero, do_nmi() invokes the | ||
47 | default_do_nmi() function to handle a machine-specific NMI. Finally, | ||
48 | preemption is restored. | ||
49 | |||
50 | Strictly speaking, rcu_dereference() is not needed, since this code runs | ||
51 | only on i386, which does not need rcu_dereference() anyway. However, | ||
52 | it is a good documentation aid, particularly for anyone attempting to | ||
53 | do something similar on Alpha. | ||
54 | |||
55 | Quick Quiz: Why might the rcu_dereference() be necessary on Alpha, | ||
56 | given that the code referenced by the pointer is read-only? | ||
57 | |||
58 | |||
59 | Back to the discussion of NMI and RCU... | ||
60 | |||
61 | void set_nmi_callback(nmi_callback_t callback) | ||
62 | { | ||
63 | rcu_assign_pointer(nmi_callback, callback); | ||
64 | } | ||
65 | |||
66 | The set_nmi_callback() function registers an NMI handler. Note that any | ||
67 | data that is to be used by the callback must be initialized up -before- | ||
68 | the call to set_nmi_callback(). On architectures that do not order | ||
69 | writes, the rcu_assign_pointer() ensures that the NMI handler sees the | ||
70 | initialized values. | ||
71 | |||
72 | void unset_nmi_callback(void) | ||
73 | { | ||
74 | rcu_assign_pointer(nmi_callback, dummy_nmi_callback); | ||
75 | } | ||
76 | |||
77 | This function unregisters an NMI handler, restoring the original | ||
78 | dummy_nmi_handler(). However, there may well be an NMI handler | ||
79 | currently executing on some other CPU. We therefore cannot free | ||
80 | up any data structures used by the old NMI handler until execution | ||
81 | of it completes on all other CPUs. | ||
82 | |||
83 | One way to accomplish this is via synchronize_sched(), perhaps as | ||
84 | follows: | ||
85 | |||
86 | unset_nmi_callback(); | ||
87 | synchronize_sched(); | ||
88 | kfree(my_nmi_data); | ||
89 | |||
90 | This works because synchronize_sched() blocks until all CPUs complete | ||
91 | any preemption-disabled segments of code that they were executing. | ||
92 | Since NMI handlers disable preemption, synchronize_sched() is guaranteed | ||
93 | not to return until all ongoing NMI handlers exit. It is therefore safe | ||
94 | to free up the handler's data as soon as synchronize_sched() returns. | ||
95 | |||
96 | |||
97 | Answer to Quick Quiz | ||
98 | |||
99 | Why might the rcu_dereference() be necessary on Alpha, given | ||
100 | that the code referenced by the pointer is read-only? | ||
101 | |||
102 | Answer: The caller to set_nmi_callback() might well have | ||
103 | initialized some data that is to be used by the | ||
104 | new NMI handler. In this case, the rcu_dereference() | ||
105 | would be needed, because otherwise a CPU that received | ||
106 | an NMI just after the new handler was set might see | ||
107 | the pointer to the new NMI handler, but the old | ||
108 | pre-initialized version of the handler's data. | ||
109 | |||
110 | More important, the rcu_dereference() makes it clear | ||
111 | to someone reading the code that the pointer is being | ||
112 | protected by RCU. | ||
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt new file mode 100644 index 000000000000..a23fee66064d --- /dev/null +++ b/Documentation/RCU/rcuref.txt | |||
@@ -0,0 +1,74 @@ | |||
1 | Refcounter framework for elements of lists/arrays protected by | ||
2 | RCU. | ||
3 | |||
4 | Refcounting on elements of lists which are protected by traditional | ||
5 | reader/writer spinlocks or semaphores are straight forward as in: | ||
6 | |||
7 | 1. 2. | ||
8 | add() search_and_reference() | ||
9 | { { | ||
10 | alloc_object read_lock(&list_lock); | ||
11 | ... search_for_element | ||
12 | atomic_set(&el->rc, 1); atomic_inc(&el->rc); | ||
13 | write_lock(&list_lock); ... | ||
14 | add_element read_unlock(&list_lock); | ||
15 | ... ... | ||
16 | write_unlock(&list_lock); } | ||
17 | } | ||
18 | |||
19 | 3. 4. | ||
20 | release_referenced() delete() | ||
21 | { { | ||
22 | ... write_lock(&list_lock); | ||
23 | atomic_dec(&el->rc, relfunc) ... | ||
24 | ... delete_element | ||
25 | } write_unlock(&list_lock); | ||
26 | ... | ||
27 | if (atomic_dec_and_test(&el->rc)) | ||
28 | kfree(el); | ||
29 | ... | ||
30 | } | ||
31 | |||
32 | If this list/array is made lock free using rcu as in changing the | ||
33 | write_lock in add() and delete() to spin_lock and changing read_lock | ||
34 | in search_and_reference to rcu_read_lock(), the rcuref_get in | ||
35 | search_and_reference could potentially hold reference to an element which | ||
36 | has already been deleted from the list/array. rcuref_lf_get_rcu takes | ||
37 | care of this scenario. search_and_reference should look as; | ||
38 | |||
39 | 1. 2. | ||
40 | add() search_and_reference() | ||
41 | { { | ||
42 | alloc_object rcu_read_lock(); | ||
43 | ... search_for_element | ||
44 | atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { | ||
45 | write_lock(&list_lock); rcu_read_unlock(); | ||
46 | return FAIL; | ||
47 | add_element } | ||
48 | ... ... | ||
49 | write_unlock(&list_lock); rcu_read_unlock(); | ||
50 | } } | ||
51 | 3. 4. | ||
52 | release_referenced() delete() | ||
53 | { { | ||
54 | ... write_lock(&list_lock); | ||
55 | rcuref_dec(&el->rc, relfunc) ... | ||
56 | ... delete_element | ||
57 | } write_unlock(&list_lock); | ||
58 | ... | ||
59 | if (rcuref_dec_and_test(&el->rc)) | ||
60 | call_rcu(&el->head, el_free); | ||
61 | ... | ||
62 | } | ||
63 | |||
64 | Sometimes, reference to the element need to be obtained in the | ||
65 | update (write) stream. In such cases, rcuref_inc_lf might be an overkill | ||
66 | since the spinlock serialising list updates are held. rcuref_inc | ||
67 | is to be used in such cases. | ||
68 | For arches which do not have cmpxchg rcuref_inc_lf | ||
69 | api uses a hashed spinlock implementation and the same hashed spinlock | ||
70 | is acquired in all rcuref_xxx primitives to preserve atomicity. | ||
71 | Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the | ||
72 | refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api | ||
73 | might lead to races. rcuref_inc_lf() must be used in lockfree | ||
74 | RCU critical sections only. | ||