diff options
Diffstat (limited to 'Documentation/padata.txt')
-rw-r--r-- | Documentation/padata.txt | 99 |
1 files changed, 76 insertions, 23 deletions
diff --git a/Documentation/padata.txt b/Documentation/padata.txt index 269d7d0d8335..7ddfe216a0aa 100644 --- a/Documentation/padata.txt +++ b/Documentation/padata.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | The padata parallel execution mechanism | 1 | The padata parallel execution mechanism |
2 | Last updated for 2.6.34 | 2 | Last updated for 2.6.36 |
3 | 3 | ||
4 | Padata is a mechanism by which the kernel can farm work out to be done in | 4 | Padata is a mechanism by which the kernel can farm work out to be done in |
5 | parallel on multiple CPUs while retaining the ordering of tasks. It was | 5 | parallel on multiple CPUs while retaining the ordering of tasks. It was |
@@ -13,31 +13,86 @@ overall control of how tasks are to be run: | |||
13 | 13 | ||
14 | #include <linux/padata.h> | 14 | #include <linux/padata.h> |
15 | 15 | ||
16 | struct padata_instance *padata_alloc(const struct cpumask *cpumask, | 16 | struct padata_instance *padata_alloc(struct workqueue_struct *wq, |
17 | struct workqueue_struct *wq); | 17 | const struct cpumask *pcpumask, |
18 | const struct cpumask *cbcpumask); | ||
18 | 19 | ||
19 | The cpumask describes which processors will be used to execute work | 20 | The pcpumask describes which processors will be used to execute work |
20 | submitted to this instance. The workqueue wq is where the work will | 21 | submitted to this instance in parallel. The cbcpumask defines which |
21 | actually be done; it should be a multithreaded queue, naturally. | 22 | processors are allowed to be used as the serialization callback processor. |
23 | The workqueue wq is where the work will actually be done; it should be | ||
24 | a multithreaded queue, naturally. | ||
25 | |||
26 | To allocate a padata instance with the cpu_possible_mask for both | ||
27 | cpumasks this helper function can be used: | ||
28 | |||
29 | struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq); | ||
30 | |||
31 | Note: Padata maintains two kinds of cpumasks internally. The user supplied | ||
32 | cpumasks, submitted by padata_alloc/padata_alloc_possible and the 'usable' | ||
33 | cpumasks. The usable cpumasks are always a subset of active CPUs in the | ||
34 | user supplied cpumasks; these are the cpumasks padata actually uses. So | ||
35 | it is legal to supply a cpumask to padata that contains offline CPUs. | ||
36 | Once an offline CPU in the user supplied cpumask comes online, padata | ||
37 | is going to use it. | ||
22 | 38 | ||
23 | There are functions for enabling and disabling the instance: | 39 | There are functions for enabling and disabling the instance: |
24 | 40 | ||
25 | void padata_start(struct padata_instance *pinst); | 41 | int padata_start(struct padata_instance *pinst); |
26 | void padata_stop(struct padata_instance *pinst); | 42 | void padata_stop(struct padata_instance *pinst); |
27 | 43 | ||
28 | These functions literally do nothing beyond setting or clearing the | 44 | These functions are setting or clearing the "PADATA_INIT" flag; |
29 | "padata_start() was called" flag; if that flag is not set, other functions | 45 | if that flag is not set, other functions will refuse to work. |
30 | will refuse to work. | 46 | padata_start returns zero on success (flag set) or -EINVAL if the |
47 | padata cpumask contains no active CPU (flag not set). | ||
48 | padata_stop clears the flag and blocks until the padata instance | ||
49 | is unused. | ||
31 | 50 | ||
32 | The list of CPUs to be used can be adjusted with these functions: | 51 | The list of CPUs to be used can be adjusted with these functions: |
33 | 52 | ||
34 | int padata_set_cpumask(struct padata_instance *pinst, | 53 | int padata_set_cpumasks(struct padata_instance *pinst, |
54 | cpumask_var_t pcpumask, | ||
55 | cpumask_var_t cbcpumask); | ||
56 | int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, | ||
35 | cpumask_var_t cpumask); | 57 | cpumask_var_t cpumask); |
36 | int padata_add_cpu(struct padata_instance *pinst, int cpu); | 58 | int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask); |
37 | int padata_remove_cpu(struct padata_instance *pinst, int cpu); | 59 | int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask); |
60 | |||
61 | Changing the CPU masks are expensive operations, though, so it should not be | ||
62 | done with great frequency. | ||
63 | |||
64 | It's possible to change both cpumasks of a padata instance with | ||
65 | padata_set_cpumasks by specifying the cpumasks for parallel execution (pcpumask) | ||
66 | and for the serial callback function (cbcpumask). padata_set_cpumask is used to | ||
67 | change just one of the cpumasks. Here cpumask_type is one of PADATA_CPU_SERIAL, | ||
68 | PADATA_CPU_PARALLEL and cpumask specifies the new cpumask to use. | ||
69 | To simply add or remove one CPU from a certain cpumask the functions | ||
70 | padata_add_cpu/padata_remove_cpu are used. cpu specifies the CPU to add or | ||
71 | remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL. | ||
72 | |||
73 | If a user is interested in padata cpumask changes, he can register to | ||
74 | the padata cpumask change notifier: | ||
75 | |||
76 | int padata_register_cpumask_notifier(struct padata_instance *pinst, | ||
77 | struct notifier_block *nblock); | ||
78 | |||
79 | To unregister from that notifier: | ||
80 | |||
81 | int padata_unregister_cpumask_notifier(struct padata_instance *pinst, | ||
82 | struct notifier_block *nblock); | ||
83 | |||
84 | The padata cpumask change notifier notifies about changes of the usable | ||
85 | cpumasks, i.e. the subset of active CPUs in the user supplied cpumask. | ||
86 | |||
87 | Padata calls the notifier chain with: | ||
88 | |||
89 | blocking_notifier_call_chain(&pinst->cpumask_change_notifier, | ||
90 | notification_mask, | ||
91 | &pd_new->cpumask); | ||
38 | 92 | ||
39 | Changing the CPU mask has the look of an expensive operation, though, so it | 93 | Here cpumask_change_notifier is registered notifier, notification_mask |
40 | probably should not be done with great frequency. | 94 | is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer |
95 | to a struct padata_cpumask that contains the new cpumask information. | ||
41 | 96 | ||
42 | Actually submitting work to the padata instance requires the creation of a | 97 | Actually submitting work to the padata instance requires the creation of a |
43 | padata_priv structure: | 98 | padata_priv structure: |
@@ -49,8 +104,8 @@ padata_priv structure: | |||
49 | }; | 104 | }; |
50 | 105 | ||
51 | This structure will almost certainly be embedded within some larger | 106 | This structure will almost certainly be embedded within some larger |
52 | structure specific to the work to be done. Most its fields are private to | 107 | structure specific to the work to be done. Most of its fields are private to |
53 | padata, but the structure should be zeroed at initialization time, and the | 108 | padata, but the structure should be zeroed at initialisation time, and the |
54 | parallel() and serial() functions should be provided. Those functions will | 109 | parallel() and serial() functions should be provided. Those functions will |
55 | be called in the process of getting the work done as we will see | 110 | be called in the process of getting the work done as we will see |
56 | momentarily. | 111 | momentarily. |
@@ -63,12 +118,10 @@ The submission of work is done with: | |||
63 | The pinst and padata structures must be set up as described above; cb_cpu | 118 | The pinst and padata structures must be set up as described above; cb_cpu |
64 | specifies which CPU will be used for the final callback when the work is | 119 | specifies which CPU will be used for the final callback when the work is |
65 | done; it must be in the current instance's CPU mask. The return value from | 120 | done; it must be in the current instance's CPU mask. The return value from |
66 | padata_do_parallel() is a little strange; zero is an error return | 121 | padata_do_parallel() is zero on success, indicating that the work is in |
67 | indicating that the caller forgot the padata_start() formalities. -EBUSY | 122 | progress. -EBUSY means that somebody, somewhere else is messing with the |
68 | means that somebody, somewhere else is messing with the instance's CPU | 123 | instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being |
69 | mask, while -EINVAL is a complaint about cb_cpu not being in that CPU mask. | 124 | in that CPU mask or about a not running instance. |
70 | If all goes well, this function will return -EINPROGRESS, indicating that | ||
71 | the work is in progress. | ||
72 | 125 | ||
73 | Each task submitted to padata_do_parallel() will, in turn, be passed to | 126 | Each task submitted to padata_do_parallel() will, in turn, be passed to |
74 | exactly one call to the above-mentioned parallel() function, on one CPU, so | 127 | exactly one call to the above-mentioned parallel() function, on one CPU, so |