diff options
285 files changed, 9123 insertions, 3127 deletions
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index bd23dc0bc0c7..6491b2c45dd4 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist | |||
@@ -80,3 +80,7 @@ kernel patches. | |||
80 | 23: Tested after it has been merged into the -mm patchset to make sure | 80 | 23: Tested after it has been merged into the -mm patchset to make sure |
81 | that it still works with all of the other queued patches and various | 81 | that it still works with all of the other queued patches and various |
82 | changes in the VM, VFS, and other subsystems. | 82 | changes in the VM, VFS, and other subsystems. |
83 | |||
84 | 24: Avoid whitespace damage such as indenting with spaces or whitespace | ||
85 | at the end of lines. You can test this by feeding the patch to | ||
86 | "git apply --check --whitespace=error-all" | ||
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index b0d0043f7c46..a417b25fb1aa 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches | |||
@@ -363,7 +363,8 @@ area or subsystem of the kernel is being patched. | |||
363 | The "summary phrase" in the email's Subject should concisely | 363 | The "summary phrase" in the email's Subject should concisely |
364 | describe the patch which that email contains. The "summary | 364 | describe the patch which that email contains. The "summary |
365 | phrase" should not be a filename. Do not use the same "summary | 365 | phrase" should not be a filename. Do not use the same "summary |
366 | phrase" for every patch in a whole patch series. | 366 | phrase" for every patch in a whole patch series (where a "patch |
367 | series" is an ordered sequence of multiple, related patches). | ||
367 | 368 | ||
368 | Bear in mind that the "summary phrase" of your email becomes | 369 | Bear in mind that the "summary phrase" of your email becomes |
369 | a globally-unique identifier for that patch. It propagates | 370 | a globally-unique identifier for that patch. It propagates |
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index cc60d29b954c..b6d24c22274b 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt | |||
@@ -217,14 +217,17 @@ Q: What happens when a CPU is being logically offlined? | |||
217 | A: The following happen, listed in no particular order :-) | 217 | A: The following happen, listed in no particular order :-) |
218 | 218 | ||
219 | - A notification is sent to in-kernel registered modules by sending an event | 219 | - A notification is sent to in-kernel registered modules by sending an event |
220 | CPU_DOWN_PREPARE | 220 | CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the |
221 | CPU is being offlined while tasks are frozen due to a suspend operation in | ||
222 | progress | ||
221 | - All process is migrated away from this outgoing CPU to a new CPU | 223 | - All process is migrated away from this outgoing CPU to a new CPU |
222 | - All interrupts targeted to this CPU is migrated to a new CPU | 224 | - All interrupts targeted to this CPU is migrated to a new CPU |
223 | - timers/bottom half/task lets are also migrated to a new CPU | 225 | - timers/bottom half/task lets are also migrated to a new CPU |
224 | - Once all services are migrated, kernel calls an arch specific routine | 226 | - Once all services are migrated, kernel calls an arch specific routine |
225 | __cpu_disable() to perform arch specific cleanup. | 227 | __cpu_disable() to perform arch specific cleanup. |
226 | - Once this is successful, an event for successful cleanup is sent by an event | 228 | - Once this is successful, an event for successful cleanup is sent by an event |
227 | CPU_DEAD. | 229 | CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the |
230 | CPU is being offlined). | ||
228 | 231 | ||
229 | "It is expected that each service cleans up when the CPU_DOWN_PREPARE | 232 | "It is expected that each service cleans up when the CPU_DOWN_PREPARE |
230 | notifier is called, when CPU_DEAD is called its expected there is nothing | 233 | notifier is called, when CPU_DEAD is called its expected there is nothing |
@@ -242,9 +245,11 @@ A: This is what you would need in your kernel code to receive notifications. | |||
242 | 245 | ||
243 | switch (action) { | 246 | switch (action) { |
244 | case CPU_ONLINE: | 247 | case CPU_ONLINE: |
248 | case CPU_ONLINE_FROZEN: | ||
245 | foobar_online_action(cpu); | 249 | foobar_online_action(cpu); |
246 | break; | 250 | break; |
247 | case CPU_DEAD: | 251 | case CPU_DEAD: |
252 | case CPU_DEAD_FROZEN: | ||
248 | foobar_dead_action(cpu); | 253 | foobar_dead_action(cpu); |
249 | break; | 254 | break; |
250 | } | 255 | } |
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt new file mode 100644 index 000000000000..15adc55359e5 --- /dev/null +++ b/Documentation/device-mapper/delay.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | dm-delay | ||
2 | ======== | ||
3 | |||
4 | Device-Mapper's "delay" target delays reads and/or writes | ||
5 | and maps them to different devices. | ||
6 | |||
7 | Parameters: | ||
8 | <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] | ||
9 | |||
10 | With separate write parameters, the first set is only used for reads. | ||
11 | Delays are specified in milliseconds. | ||
12 | |||
13 | Example scripts | ||
14 | =============== | ||
15 | [[ | ||
16 | #!/bin/sh | ||
17 | # Create device delaying rw operation for 500ms | ||
18 | echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed | ||
19 | ]] | ||
20 | |||
21 | [[ | ||
22 | #!/bin/sh | ||
23 | # Create device delaying only write operation for 500ms and | ||
24 | # splitting reads and writes to different devices $1 $2 | ||
25 | echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed | ||
26 | ]] | ||
diff --git a/Documentation/fb/arkfb.txt b/Documentation/fb/arkfb.txt new file mode 100644 index 000000000000..e8487a9d6a05 --- /dev/null +++ b/Documentation/fb/arkfb.txt | |||
@@ -0,0 +1,68 @@ | |||
1 | |||
2 | arkfb - fbdev driver for ARK Logic chips | ||
3 | ======================================== | ||
4 | |||
5 | |||
6 | Supported Hardware | ||
7 | ================== | ||
8 | |||
9 | ARK 2000PV chip | ||
10 | ICS 5342 ramdac | ||
11 | |||
12 | - only BIOS initialized VGA devices supported | ||
13 | - probably not working on big endian | ||
14 | |||
15 | |||
16 | Supported Features | ||
17 | ================== | ||
18 | |||
19 | * 4 bpp pseudocolor modes (with 18bit palette, two variants) | ||
20 | * 8 bpp pseudocolor mode (with 18bit palette) | ||
21 | * 16 bpp truecolor modes (RGB 555 and RGB 565) | ||
22 | * 24 bpp truecolor mode (RGB 888) | ||
23 | * 32 bpp truecolor mode (RGB 888) | ||
24 | * text mode (activated by bpp = 0) | ||
25 | * doublescan mode variant (not available in text mode) | ||
26 | * panning in both directions | ||
27 | * suspend/resume support | ||
28 | |||
29 | Text mode is supported even in higher resolutions, but there is limitation to | ||
30 | lower pixclocks (i got maximum about 70 MHz, it is dependent on specific | ||
31 | hardware). This limitation is not enforced by driver. Text mode supports 8bit | ||
32 | wide fonts only (hardware limitation) and 16bit tall fonts (driver | ||
33 | limitation). Unfortunately character attributes (like color) in text mode are | ||
34 | broken for unknown reason, so its usefulness is limited. | ||
35 | |||
36 | There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with | ||
37 | packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode | ||
38 | with interleaved planes (1 byte interleave), MSB first. Both modes support | ||
39 | 8bit wide fonts only (driver limitation). | ||
40 | |||
41 | Suspend/resume works on systems that initialize video card during resume and | ||
42 | if device is active (for example used by fbcon). | ||
43 | |||
44 | |||
45 | Missing Features | ||
46 | ================ | ||
47 | (alias TODO list) | ||
48 | |||
49 | * secondary (not initialized by BIOS) device support | ||
50 | * big endian support | ||
51 | * DPMS support | ||
52 | * MMIO support | ||
53 | * interlaced mode variant | ||
54 | * support for fontwidths != 8 in 4 bpp modes | ||
55 | * support for fontheight != 16 in text mode | ||
56 | * hardware cursor | ||
57 | * vsync synchronization | ||
58 | * feature connector support | ||
59 | * acceleration support (8514-like 2D) | ||
60 | |||
61 | |||
62 | Known bugs | ||
63 | ========== | ||
64 | |||
65 | * character attributes (and cursor) in text mode are broken | ||
66 | |||
67 | -- | ||
68 | Ondrej Zajicek <santiago@crfreenet.org> | ||
diff --git a/Documentation/fb/vt8623fb.txt b/Documentation/fb/vt8623fb.txt new file mode 100644 index 000000000000..f654576c56b7 --- /dev/null +++ b/Documentation/fb/vt8623fb.txt | |||
@@ -0,0 +1,64 @@ | |||
1 | |||
2 | vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset | ||
3 | =============================================================== | ||
4 | |||
5 | |||
6 | Supported Hardware | ||
7 | ================== | ||
8 | |||
9 | VIA VT8623 [CLE266] chipset and its graphics core | ||
10 | (known as CastleRock or Unichrome) | ||
11 | |||
12 | I tested vt8623fb on VIA EPIA ML-6000 | ||
13 | |||
14 | |||
15 | Supported Features | ||
16 | ================== | ||
17 | |||
18 | * 4 bpp pseudocolor modes (with 18bit palette, two variants) | ||
19 | * 8 bpp pseudocolor mode (with 18bit palette) | ||
20 | * 16 bpp truecolor mode (RGB 565) | ||
21 | * 32 bpp truecolor mode (RGB 888) | ||
22 | * text mode (activated by bpp = 0) | ||
23 | * doublescan mode variant (not available in text mode) | ||
24 | * panning in both directions | ||
25 | * suspend/resume support | ||
26 | * DPMS support | ||
27 | |||
28 | Text mode is supported even in higher resolutions, but there is limitation to | ||
29 | lower pixclocks (maximum about 100 MHz). This limitation is not enforced by | ||
30 | driver. Text mode supports 8bit wide fonts only (hardware limitation) and | ||
31 | 16bit tall fonts (driver limitation). | ||
32 | |||
33 | There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with | ||
34 | packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode | ||
35 | with interleaved planes (1 byte interleave), MSB first. Both modes support | ||
36 | 8bit wide fonts only (driver limitation). | ||
37 | |||
38 | Suspend/resume works on systems that initialize video card during resume and | ||
39 | if device is active (for example used by fbcon). | ||
40 | |||
41 | |||
42 | Missing Features | ||
43 | ================ | ||
44 | (alias TODO list) | ||
45 | |||
46 | * secondary (not initialized by BIOS) device support | ||
47 | * MMIO support | ||
48 | * interlaced mode variant | ||
49 | * support for fontwidths != 8 in 4 bpp modes | ||
50 | * support for fontheight != 16 in text mode | ||
51 | * hardware cursor | ||
52 | * video overlay support | ||
53 | * vsync synchronization | ||
54 | * acceleration support (8514-like 2D, busmaster transfers) | ||
55 | |||
56 | |||
57 | Known bugs | ||
58 | ========== | ||
59 | |||
60 | * cursor disable in text mode doesn't work | ||
61 | |||
62 | |||
63 | -- | ||
64 | Ondrej Zajicek <santiago@crfreenet.org> | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index 2202f5dc8ac2..5818628207b5 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -178,6 +178,21 @@ All md devices contain: | |||
178 | The size should be at least PAGE_SIZE (4k) and should be a power | 178 | The size should be at least PAGE_SIZE (4k) and should be a power |
179 | of 2. This can only be set while assembling an array | 179 | of 2. This can only be set while assembling an array |
180 | 180 | ||
181 | layout | ||
182 | The "layout" for the array for the particular level. This is | ||
183 | simply a number that is interpretted differently by different | ||
184 | levels. It can be written while assembling an array. | ||
185 | |||
186 | reshape_position | ||
187 | This is either "none" or a sector number within the devices of | ||
188 | the array where "reshape" is up to. If this is set, the three | ||
189 | attributes mentioned above (raid_disks, chunk_size, layout) can | ||
190 | potentially have 2 values, an old and a new value. If these | ||
191 | values differ, reading the attribute returns | ||
192 | new (old) | ||
193 | and writing will effect the 'new' value, leaving the 'old' | ||
194 | unchanged. | ||
195 | |||
181 | component_size | 196 | component_size |
182 | For arrays with data redundancy (i.e. not raid0, linear, faulty, | 197 | For arrays with data redundancy (i.e. not raid0, linear, faulty, |
183 | multipath), all components must be the same size - or at least | 198 | multipath), all components must be the same size - or at least |
@@ -193,11 +208,6 @@ All md devices contain: | |||
193 | 1.2 (newer format in varying locations) or "none" indicating that | 208 | 1.2 (newer format in varying locations) or "none" indicating that |
194 | the kernel isn't managing metadata at all. | 209 | the kernel isn't managing metadata at all. |
195 | 210 | ||
196 | layout | ||
197 | The "layout" for the array for the particular level. This is | ||
198 | simply a number that is interpretted differently by different | ||
199 | levels. It can be written while assembling an array. | ||
200 | |||
201 | resync_start | 211 | resync_start |
202 | The point at which resync should start. If no resync is needed, | 212 | The point at which resync should start. If no resync is needed, |
203 | this will be a very large number. At array creation it will | 213 | this will be a very large number. At array creation it will |
@@ -259,29 +269,6 @@ All md devices contain: | |||
259 | like active, but no writes have been seen for a while (safe_mode_delay). | 269 | like active, but no writes have been seen for a while (safe_mode_delay). |
260 | 270 | ||
261 | 271 | ||
262 | sync_speed_min | ||
263 | sync_speed_max | ||
264 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | ||
265 | however they only apply to the particular array. | ||
266 | If no value has been written to these, of if the word 'system' | ||
267 | is written, then the system-wide value is used. If a value, | ||
268 | in kibibytes-per-second is written, then it is used. | ||
269 | When the files are read, they show the currently active value | ||
270 | followed by "(local)" or "(system)" depending on whether it is | ||
271 | a locally set or system-wide value. | ||
272 | |||
273 | sync_completed | ||
274 | This shows the number of sectors that have been completed of | ||
275 | whatever the current sync_action is, followed by the number of | ||
276 | sectors in total that could need to be processed. The two | ||
277 | numbers are separated by a '/' thus effectively showing one | ||
278 | value, a fraction of the process that is complete. | ||
279 | |||
280 | sync_speed | ||
281 | This shows the current actual speed, in K/sec, of the current | ||
282 | sync_action. It is averaged over the last 30 seconds. | ||
283 | |||
284 | |||
285 | As component devices are added to an md array, they appear in the 'md' | 272 | As component devices are added to an md array, they appear in the 'md' |
286 | directory as new directories named | 273 | directory as new directories named |
287 | dev-XXX | 274 | dev-XXX |
@@ -412,6 +399,35 @@ also have | |||
412 | Note that the numbers are 'bit' numbers, not 'block' numbers. | 399 | Note that the numbers are 'bit' numbers, not 'block' numbers. |
413 | They should be scaled by the bitmap_chunksize. | 400 | They should be scaled by the bitmap_chunksize. |
414 | 401 | ||
402 | sync_speed_min | ||
403 | sync_speed_max | ||
404 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | ||
405 | however they only apply to the particular array. | ||
406 | If no value has been written to these, of if the word 'system' | ||
407 | is written, then the system-wide value is used. If a value, | ||
408 | in kibibytes-per-second is written, then it is used. | ||
409 | When the files are read, they show the currently active value | ||
410 | followed by "(local)" or "(system)" depending on whether it is | ||
411 | a locally set or system-wide value. | ||
412 | |||
413 | sync_completed | ||
414 | This shows the number of sectors that have been completed of | ||
415 | whatever the current sync_action is, followed by the number of | ||
416 | sectors in total that could need to be processed. The two | ||
417 | numbers are separated by a '/' thus effectively showing one | ||
418 | value, a fraction of the process that is complete. | ||
419 | |||
420 | sync_speed | ||
421 | This shows the current actual speed, in K/sec, of the current | ||
422 | sync_action. It is averaged over the last 30 seconds. | ||
423 | |||
424 | suspend_lo | ||
425 | suspend_hi | ||
426 | The two values, given as numbers of sectors, indicate a range | ||
427 | within the array where IO will be blocked. This is currently | ||
428 | only supported for raid4/5/6. | ||
429 | |||
430 | |||
415 | Each active md device may also have attributes specific to the | 431 | Each active md device may also have attributes specific to the |
416 | personality module that manages it. | 432 | personality module that manages it. |
417 | These are specific to the implementation of the module and could | 433 | These are specific to the implementation of the module and could |
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt index 000556c932e9..e00c6cf09e85 100644 --- a/Documentation/power/userland-swsusp.txt +++ b/Documentation/power/userland-swsusp.txt | |||
@@ -93,21 +93,23 @@ SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to | |||
93 | to resume the system from RAM if there's enough battery power or restore | 93 | to resume the system from RAM if there's enough battery power or restore |
94 | its state on the basis of the saved suspend image otherwise) | 94 | its state on the basis of the saved suspend image otherwise) |
95 | 95 | ||
96 | SNAPSHOT_PMOPS - enable the usage of the pmops->prepare, pmops->enter and | 96 | SNAPSHOT_PMOPS - enable the usage of the hibernation_ops->prepare, |
97 | pmops->finish methods (the in-kernel swsusp knows these as the "platform | 97 | hibernate_ops->enter and hibernation_ops->finish methods (the in-kernel |
98 | method") which are needed on many machines to (among others) speed up | 98 | swsusp knows these as the "platform method") which are needed on many |
99 | the resume by letting the BIOS skip some steps or to let the system | 99 | machines to (among others) speed up the resume by letting the BIOS skip |
100 | recognise the correct state of the hardware after the resume (in | 100 | some steps or to let the system recognise the correct state of the |
101 | particular on many machines this ensures that unplugged AC | 101 | hardware after the resume (in particular on many machines this ensures |
102 | adapters get correctly detected and that kacpid does not run wild after | 102 | that unplugged AC adapters get correctly detected and that kacpid does |
103 | the resume). The last ioctl() argument can take one of the three | 103 | not run wild after the resume). The last ioctl() argument can take one |
104 | values, defined in kernel/power/power.h: | 104 | of the three values, defined in kernel/power/power.h: |
105 | PMOPS_PREPARE - make the kernel carry out the | 105 | PMOPS_PREPARE - make the kernel carry out the |
106 | pm_ops->prepare(PM_SUSPEND_DISK) operation | 106 | hibernation_ops->prepare() operation |
107 | PMOPS_ENTER - make the kernel power off the system by calling | 107 | PMOPS_ENTER - make the kernel power off the system by calling |
108 | pm_ops->enter(PM_SUSPEND_DISK) | 108 | hibernation_ops->enter() |
109 | PMOPS_FINISH - make the kernel carry out the | 109 | PMOPS_FINISH - make the kernel carry out the |
110 | pm_ops->finish(PM_SUSPEND_DISK) operation | 110 | hibernation_ops->finish() operation |
111 | Note that the actual constants are misnamed because they surface | ||
112 | internal kernel implementation details that have changed. | ||
111 | 113 | ||
112 | The device's read() operation can be used to transfer the snapshot image from | 114 | The device's read() operation can be used to transfer the snapshot image from |
113 | the kernel. It has the following limitations: | 115 | the kernel. It has the following limitations: |
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 41710ccf3a29..686a8e04a4f3 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <stdarg.h> | 16 | #include <stdarg.h> |
17 | #include <getopt.h> | 17 | #include <getopt.h> |
18 | #include <regex.h> | 18 | #include <regex.h> |
19 | #include <errno.h> | ||
19 | 20 | ||
20 | #define MAX_SLABS 500 | 21 | #define MAX_SLABS 500 |
21 | #define MAX_ALIASES 500 | 22 | #define MAX_ALIASES 500 |
@@ -41,12 +42,15 @@ struct aliasinfo { | |||
41 | } aliasinfo[MAX_ALIASES]; | 42 | } aliasinfo[MAX_ALIASES]; |
42 | 43 | ||
43 | int slabs = 0; | 44 | int slabs = 0; |
45 | int actual_slabs = 0; | ||
44 | int aliases = 0; | 46 | int aliases = 0; |
45 | int alias_targets = 0; | 47 | int alias_targets = 0; |
46 | int highest_node = 0; | 48 | int highest_node = 0; |
47 | 49 | ||
48 | char buffer[4096]; | 50 | char buffer[4096]; |
49 | 51 | ||
52 | int show_empty = 0; | ||
53 | int show_report = 0; | ||
50 | int show_alias = 0; | 54 | int show_alias = 0; |
51 | int show_slab = 0; | 55 | int show_slab = 0; |
52 | int skip_zero = 1; | 56 | int skip_zero = 1; |
@@ -59,6 +63,15 @@ int show_inverted = 0; | |||
59 | int show_single_ref = 0; | 63 | int show_single_ref = 0; |
60 | int show_totals = 0; | 64 | int show_totals = 0; |
61 | int sort_size = 0; | 65 | int sort_size = 0; |
66 | int set_debug = 0; | ||
67 | int show_ops = 0; | ||
68 | |||
69 | /* Debug options */ | ||
70 | int sanity = 0; | ||
71 | int redzone = 0; | ||
72 | int poison = 0; | ||
73 | int tracking = 0; | ||
74 | int tracing = 0; | ||
62 | 75 | ||
63 | int page_size; | 76 | int page_size; |
64 | 77 | ||
@@ -76,20 +89,33 @@ void fatal(const char *x, ...) | |||
76 | 89 | ||
77 | void usage(void) | 90 | void usage(void) |
78 | { | 91 | { |
79 | printf("slabinfo [-ahnpvtsz] [slab-regexp]\n" | 92 | printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" |
93 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" | ||
80 | "-a|--aliases Show aliases\n" | 94 | "-a|--aliases Show aliases\n" |
95 | "-d<options>|--debug=<options> Set/Clear Debug options\n" | ||
96 | "-e|--empty Show empty slabs\n" | ||
97 | "-f|--first-alias Show first alias\n" | ||
81 | "-h|--help Show usage information\n" | 98 | "-h|--help Show usage information\n" |
99 | "-i|--inverted Inverted list\n" | ||
100 | "-l|--slabs Show slabs\n" | ||
82 | "-n|--numa Show NUMA information\n" | 101 | "-n|--numa Show NUMA information\n" |
102 | "-o|--ops Show kmem_cache_ops\n" | ||
83 | "-s|--shrink Shrink slabs\n" | 103 | "-s|--shrink Shrink slabs\n" |
84 | "-v|--validate Validate slabs\n" | 104 | "-r|--report Detailed report on single slabs\n" |
105 | "-S|--Size Sort by size\n" | ||
85 | "-t|--tracking Show alloc/free information\n" | 106 | "-t|--tracking Show alloc/free information\n" |
86 | "-T|--Totals Show summary information\n" | 107 | "-T|--Totals Show summary information\n" |
87 | "-l|--slabs Show slabs\n" | 108 | "-v|--validate Validate slabs\n" |
88 | "-S|--Size Sort by size\n" | ||
89 | "-z|--zero Include empty slabs\n" | 109 | "-z|--zero Include empty slabs\n" |
90 | "-f|--first-alias Show first alias\n" | ||
91 | "-i|--inverted Inverted list\n" | ||
92 | "-1|--1ref Single reference\n" | 110 | "-1|--1ref Single reference\n" |
111 | "\nValid debug options (FZPUT may be combined)\n" | ||
112 | "a / A Switch on all debug options (=FZUP)\n" | ||
113 | "- Switch off all debug options\n" | ||
114 | "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" | ||
115 | "z / Z Redzoning\n" | ||
116 | "p / P Poisoning\n" | ||
117 | "u / U Tracking\n" | ||
118 | "t / T Tracing\n" | ||
93 | ); | 119 | ); |
94 | } | 120 | } |
95 | 121 | ||
@@ -143,11 +169,10 @@ unsigned long get_obj_and_str(char *name, char **x) | |||
143 | void set_obj(struct slabinfo *s, char *name, int n) | 169 | void set_obj(struct slabinfo *s, char *name, int n) |
144 | { | 170 | { |
145 | char x[100]; | 171 | char x[100]; |
172 | FILE *f; | ||
146 | 173 | ||
147 | sprintf(x, "%s/%s", s->name, name); | 174 | sprintf(x, "%s/%s", s->name, name); |
148 | 175 | f = fopen(x, "w"); | |
149 | FILE *f = fopen(x, "w"); | ||
150 | |||
151 | if (!f) | 176 | if (!f) |
152 | fatal("Cannot write to %s\n", x); | 177 | fatal("Cannot write to %s\n", x); |
153 | 178 | ||
@@ -155,6 +180,26 @@ void set_obj(struct slabinfo *s, char *name, int n) | |||
155 | fclose(f); | 180 | fclose(f); |
156 | } | 181 | } |
157 | 182 | ||
183 | unsigned long read_slab_obj(struct slabinfo *s, char *name) | ||
184 | { | ||
185 | char x[100]; | ||
186 | FILE *f; | ||
187 | int l; | ||
188 | |||
189 | sprintf(x, "%s/%s", s->name, name); | ||
190 | f = fopen(x, "r"); | ||
191 | if (!f) { | ||
192 | buffer[0] = 0; | ||
193 | l = 0; | ||
194 | } else { | ||
195 | l = fread(buffer, 1, sizeof(buffer), f); | ||
196 | buffer[l] = 0; | ||
197 | fclose(f); | ||
198 | } | ||
199 | return l; | ||
200 | } | ||
201 | |||
202 | |||
158 | /* | 203 | /* |
159 | * Put a size string together | 204 | * Put a size string together |
160 | */ | 205 | */ |
@@ -226,7 +271,7 @@ int line = 0; | |||
226 | 271 | ||
227 | void first_line(void) | 272 | void first_line(void) |
228 | { | 273 | { |
229 | printf("Name Objects Objsize Space " | 274 | printf("Name Objects Objsize Space " |
230 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | 275 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); |
231 | } | 276 | } |
232 | 277 | ||
@@ -246,10 +291,7 @@ struct aliasinfo *find_one_alias(struct slabinfo *find) | |||
246 | return best; | 291 | return best; |
247 | } | 292 | } |
248 | } | 293 | } |
249 | if (best) | 294 | return best; |
250 | return best; | ||
251 | fatal("Cannot find alias for %s\n", find->name); | ||
252 | return NULL; | ||
253 | } | 295 | } |
254 | 296 | ||
255 | unsigned long slab_size(struct slabinfo *s) | 297 | unsigned long slab_size(struct slabinfo *s) |
@@ -257,6 +299,126 @@ unsigned long slab_size(struct slabinfo *s) | |||
257 | return s->slabs * (page_size << s->order); | 299 | return s->slabs * (page_size << s->order); |
258 | } | 300 | } |
259 | 301 | ||
302 | void slab_numa(struct slabinfo *s, int mode) | ||
303 | { | ||
304 | int node; | ||
305 | |||
306 | if (strcmp(s->name, "*") == 0) | ||
307 | return; | ||
308 | |||
309 | if (!highest_node) { | ||
310 | printf("\n%s: No NUMA information available.\n", s->name); | ||
311 | return; | ||
312 | } | ||
313 | |||
314 | if (skip_zero && !s->slabs) | ||
315 | return; | ||
316 | |||
317 | if (!line) { | ||
318 | printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); | ||
319 | for(node = 0; node <= highest_node; node++) | ||
320 | printf(" %4d", node); | ||
321 | printf("\n----------------------"); | ||
322 | for(node = 0; node <= highest_node; node++) | ||
323 | printf("-----"); | ||
324 | printf("\n"); | ||
325 | } | ||
326 | printf("%-21s ", mode ? "All slabs" : s->name); | ||
327 | for(node = 0; node <= highest_node; node++) { | ||
328 | char b[20]; | ||
329 | |||
330 | store_size(b, s->numa[node]); | ||
331 | printf(" %4s", b); | ||
332 | } | ||
333 | printf("\n"); | ||
334 | if (mode) { | ||
335 | printf("%-21s ", "Partial slabs"); | ||
336 | for(node = 0; node <= highest_node; node++) { | ||
337 | char b[20]; | ||
338 | |||
339 | store_size(b, s->numa_partial[node]); | ||
340 | printf(" %4s", b); | ||
341 | } | ||
342 | printf("\n"); | ||
343 | } | ||
344 | line++; | ||
345 | } | ||
346 | |||
347 | void show_tracking(struct slabinfo *s) | ||
348 | { | ||
349 | printf("\n%s: Kernel object allocation\n", s->name); | ||
350 | printf("-----------------------------------------------------------------------\n"); | ||
351 | if (read_slab_obj(s, "alloc_calls")) | ||
352 | printf(buffer); | ||
353 | else | ||
354 | printf("No Data\n"); | ||
355 | |||
356 | printf("\n%s: Kernel object freeing\n", s->name); | ||
357 | printf("------------------------------------------------------------------------\n"); | ||
358 | if (read_slab_obj(s, "free_calls")) | ||
359 | printf(buffer); | ||
360 | else | ||
361 | printf("No Data\n"); | ||
362 | |||
363 | } | ||
364 | |||
365 | void ops(struct slabinfo *s) | ||
366 | { | ||
367 | if (strcmp(s->name, "*") == 0) | ||
368 | return; | ||
369 | |||
370 | if (read_slab_obj(s, "ops")) { | ||
371 | printf("\n%s: kmem_cache operations\n", s->name); | ||
372 | printf("--------------------------------------------\n"); | ||
373 | printf(buffer); | ||
374 | } else | ||
375 | printf("\n%s has no kmem_cache operations\n", s->name); | ||
376 | } | ||
377 | |||
378 | const char *onoff(int x) | ||
379 | { | ||
380 | if (x) | ||
381 | return "On "; | ||
382 | return "Off"; | ||
383 | } | ||
384 | |||
385 | void report(struct slabinfo *s) | ||
386 | { | ||
387 | if (strcmp(s->name, "*") == 0) | ||
388 | return; | ||
389 | printf("\nSlabcache: %-20s Aliases: %2d Order : %2d\n", s->name, s->aliases, s->order); | ||
390 | if (s->hwcache_align) | ||
391 | printf("** Hardware cacheline aligned\n"); | ||
392 | if (s->cache_dma) | ||
393 | printf("** Memory is allocated in a special DMA zone\n"); | ||
394 | if (s->destroy_by_rcu) | ||
395 | printf("** Slabs are destroyed via RCU\n"); | ||
396 | if (s->reclaim_account) | ||
397 | printf("** Reclaim accounting active\n"); | ||
398 | |||
399 | printf("\nSizes (bytes) Slabs Debug Memory\n"); | ||
400 | printf("------------------------------------------------------------------------\n"); | ||
401 | printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", | ||
402 | s->object_size, s->slabs, onoff(s->sanity_checks), | ||
403 | s->slabs * (page_size << s->order)); | ||
404 | printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", | ||
405 | s->slab_size, s->slabs - s->partial - s->cpu_slabs, | ||
406 | onoff(s->red_zone), s->objects * s->object_size); | ||
407 | printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", | ||
408 | page_size << s->order, s->partial, onoff(s->poison), | ||
409 | s->slabs * (page_size << s->order) - s->objects * s->object_size); | ||
410 | printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", | ||
411 | s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), | ||
412 | (s->slab_size - s->object_size) * s->objects); | ||
413 | printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", | ||
414 | s->align, s->objs_per_slab, onoff(s->trace), | ||
415 | ((page_size << s->order) - s->objs_per_slab * s->slab_size) * | ||
416 | s->slabs); | ||
417 | |||
418 | ops(s); | ||
419 | show_tracking(s); | ||
420 | slab_numa(s, 1); | ||
421 | } | ||
260 | 422 | ||
261 | void slabcache(struct slabinfo *s) | 423 | void slabcache(struct slabinfo *s) |
262 | { | 424 | { |
@@ -265,7 +427,18 @@ void slabcache(struct slabinfo *s) | |||
265 | char flags[20]; | 427 | char flags[20]; |
266 | char *p = flags; | 428 | char *p = flags; |
267 | 429 | ||
268 | if (skip_zero && !s->slabs) | 430 | if (strcmp(s->name, "*") == 0) |
431 | return; | ||
432 | |||
433 | if (actual_slabs == 1) { | ||
434 | report(s); | ||
435 | return; | ||
436 | } | ||
437 | |||
438 | if (skip_zero && !show_empty && !s->slabs) | ||
439 | return; | ||
440 | |||
441 | if (show_empty && s->slabs) | ||
269 | return; | 442 | return; |
270 | 443 | ||
271 | store_size(size_str, slab_size(s)); | 444 | store_size(size_str, slab_size(s)); |
@@ -303,48 +476,128 @@ void slabcache(struct slabinfo *s) | |||
303 | flags); | 476 | flags); |
304 | } | 477 | } |
305 | 478 | ||
306 | void slab_numa(struct slabinfo *s) | 479 | /* |
480 | * Analyze debug options. Return false if something is amiss. | ||
481 | */ | ||
482 | int debug_opt_scan(char *opt) | ||
307 | { | 483 | { |
308 | int node; | 484 | if (!opt || !opt[0] || strcmp(opt, "-") == 0) |
485 | return 1; | ||
486 | |||
487 | if (strcasecmp(opt, "a") == 0) { | ||
488 | sanity = 1; | ||
489 | poison = 1; | ||
490 | redzone = 1; | ||
491 | tracking = 1; | ||
492 | return 1; | ||
493 | } | ||
309 | 494 | ||
310 | if (!highest_node) | 495 | for ( ; *opt; opt++) |
311 | fatal("No NUMA information available.\n"); | 496 | switch (*opt) { |
497 | case 'F' : case 'f': | ||
498 | if (sanity) | ||
499 | return 0; | ||
500 | sanity = 1; | ||
501 | break; | ||
502 | case 'P' : case 'p': | ||
503 | if (poison) | ||
504 | return 0; | ||
505 | poison = 1; | ||
506 | break; | ||
312 | 507 | ||
313 | if (skip_zero && !s->slabs) | 508 | case 'Z' : case 'z': |
314 | return; | 509 | if (redzone) |
510 | return 0; | ||
511 | redzone = 1; | ||
512 | break; | ||
315 | 513 | ||
316 | if (!line) { | 514 | case 'U' : case 'u': |
317 | printf("\nSlab Node "); | 515 | if (tracking) |
318 | for(node = 0; node <= highest_node; node++) | 516 | return 0; |
319 | printf(" %4d", node); | 517 | tracking = 1; |
320 | printf("\n----------------------"); | 518 | break; |
321 | for(node = 0; node <= highest_node; node++) | ||
322 | printf("-----"); | ||
323 | printf("\n"); | ||
324 | } | ||
325 | printf("%-21s ", s->name); | ||
326 | for(node = 0; node <= highest_node; node++) { | ||
327 | char b[20]; | ||
328 | 519 | ||
329 | store_size(b, s->numa[node]); | 520 | case 'T' : case 't': |
330 | printf(" %4s", b); | 521 | if (tracing) |
331 | } | 522 | return 0; |
332 | printf("\n"); | 523 | tracing = 1; |
333 | line++; | 524 | break; |
525 | default: | ||
526 | return 0; | ||
527 | } | ||
528 | return 1; | ||
334 | } | 529 | } |
335 | 530 | ||
336 | void show_tracking(struct slabinfo *s) | 531 | int slab_empty(struct slabinfo *s) |
337 | { | 532 | { |
338 | printf("\n%s: Calls to allocate a slab object\n", s->name); | 533 | if (s->objects > 0) |
339 | printf("---------------------------------------------------\n"); | 534 | return 0; |
340 | if (read_obj("alloc_calls")) | ||
341 | printf(buffer); | ||
342 | 535 | ||
343 | printf("%s: Calls to free a slab object\n", s->name); | 536 | /* |
344 | printf("-----------------------------------------------\n"); | 537 | * We may still have slabs even if there are no objects. Shrinking will |
345 | if (read_obj("free_calls")) | 538 | * remove them. |
346 | printf(buffer); | 539 | */ |
540 | if (s->slabs != 0) | ||
541 | set_obj(s, "shrink", 1); | ||
347 | 542 | ||
543 | return 1; | ||
544 | } | ||
545 | |||
546 | void slab_debug(struct slabinfo *s) | ||
547 | { | ||
548 | if (sanity && !s->sanity_checks) { | ||
549 | set_obj(s, "sanity", 1); | ||
550 | } | ||
551 | if (!sanity && s->sanity_checks) { | ||
552 | if (slab_empty(s)) | ||
553 | set_obj(s, "sanity", 0); | ||
554 | else | ||
555 | fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); | ||
556 | } | ||
557 | if (redzone && !s->red_zone) { | ||
558 | if (slab_empty(s)) | ||
559 | set_obj(s, "red_zone", 1); | ||
560 | else | ||
561 | fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); | ||
562 | } | ||
563 | if (!redzone && s->red_zone) { | ||
564 | if (slab_empty(s)) | ||
565 | set_obj(s, "red_zone", 0); | ||
566 | else | ||
567 | fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); | ||
568 | } | ||
569 | if (poison && !s->poison) { | ||
570 | if (slab_empty(s)) | ||
571 | set_obj(s, "poison", 1); | ||
572 | else | ||
573 | fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); | ||
574 | } | ||
575 | if (!poison && s->poison) { | ||
576 | if (slab_empty(s)) | ||
577 | set_obj(s, "poison", 0); | ||
578 | else | ||
579 | fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); | ||
580 | } | ||
581 | if (tracking && !s->store_user) { | ||
582 | if (slab_empty(s)) | ||
583 | set_obj(s, "store_user", 1); | ||
584 | else | ||
585 | fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); | ||
586 | } | ||
587 | if (!tracking && s->store_user) { | ||
588 | if (slab_empty(s)) | ||
589 | set_obj(s, "store_user", 0); | ||
590 | else | ||
591 | fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); | ||
592 | } | ||
593 | if (tracing && !s->trace) { | ||
594 | if (slabs == 1) | ||
595 | set_obj(s, "trace", 1); | ||
596 | else | ||
597 | fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); | ||
598 | } | ||
599 | if (!tracing && s->trace) | ||
600 | set_obj(s, "trace", 1); | ||
348 | } | 601 | } |
349 | 602 | ||
350 | void totals(void) | 603 | void totals(void) |
@@ -673,7 +926,7 @@ void link_slabs(void) | |||
673 | 926 | ||
674 | for (a = aliasinfo; a < aliasinfo + aliases; a++) { | 927 | for (a = aliasinfo; a < aliasinfo + aliases; a++) { |
675 | 928 | ||
676 | for(s = slabinfo; s < slabinfo + slabs; s++) | 929 | for (s = slabinfo; s < slabinfo + slabs; s++) |
677 | if (strcmp(a->ref, s->name) == 0) { | 930 | if (strcmp(a->ref, s->name) == 0) { |
678 | a->slab = s; | 931 | a->slab = s; |
679 | s->refs++; | 932 | s->refs++; |
@@ -704,7 +957,7 @@ void alias(void) | |||
704 | continue; | 957 | continue; |
705 | } | 958 | } |
706 | } | 959 | } |
707 | printf("\n%-20s <- %s", a->slab->name, a->name); | 960 | printf("\n%-12s <- %s", a->slab->name, a->name); |
708 | active = a->slab->name; | 961 | active = a->slab->name; |
709 | } | 962 | } |
710 | else | 963 | else |
@@ -729,7 +982,12 @@ void rename_slabs(void) | |||
729 | 982 | ||
730 | a = find_one_alias(s); | 983 | a = find_one_alias(s); |
731 | 984 | ||
732 | s->name = a->name; | 985 | if (a) |
986 | s->name = a->name; | ||
987 | else { | ||
988 | s->name = "*"; | ||
989 | actual_slabs--; | ||
990 | } | ||
733 | } | 991 | } |
734 | } | 992 | } |
735 | 993 | ||
@@ -748,11 +1006,14 @@ void read_slab_dir(void) | |||
748 | char *t; | 1006 | char *t; |
749 | int count; | 1007 | int count; |
750 | 1008 | ||
1009 | if (chdir("/sys/slab")) | ||
1010 | fatal("SYSFS support for SLUB not active\n"); | ||
1011 | |||
751 | dir = opendir("."); | 1012 | dir = opendir("."); |
752 | while ((de = readdir(dir))) { | 1013 | while ((de = readdir(dir))) { |
753 | if (de->d_name[0] == '.' || | 1014 | if (de->d_name[0] == '.' || |
754 | slab_mismatch(de->d_name)) | 1015 | (de->d_name[0] != ':' && slab_mismatch(de->d_name))) |
755 | continue; | 1016 | continue; |
756 | switch (de->d_type) { | 1017 | switch (de->d_type) { |
757 | case DT_LNK: | 1018 | case DT_LNK: |
758 | alias->name = strdup(de->d_name); | 1019 | alias->name = strdup(de->d_name); |
@@ -807,6 +1068,7 @@ void read_slab_dir(void) | |||
807 | } | 1068 | } |
808 | closedir(dir); | 1069 | closedir(dir); |
809 | slabs = slab - slabinfo; | 1070 | slabs = slab - slabinfo; |
1071 | actual_slabs = slabs; | ||
810 | aliases = alias - aliasinfo; | 1072 | aliases = alias - aliasinfo; |
811 | if (slabs > MAX_SLABS) | 1073 | if (slabs > MAX_SLABS) |
812 | fatal("Too many slabs\n"); | 1074 | fatal("Too many slabs\n"); |
@@ -825,34 +1087,37 @@ void output_slabs(void) | |||
825 | 1087 | ||
826 | 1088 | ||
827 | if (show_numa) | 1089 | if (show_numa) |
828 | slab_numa(slab); | 1090 | slab_numa(slab, 0); |
829 | else | 1091 | else if (show_track) |
830 | if (show_track) | ||
831 | show_tracking(slab); | 1092 | show_tracking(slab); |
832 | else | 1093 | else if (validate) |
833 | if (validate) | ||
834 | slab_validate(slab); | 1094 | slab_validate(slab); |
835 | else | 1095 | else if (shrink) |
836 | if (shrink) | ||
837 | slab_shrink(slab); | 1096 | slab_shrink(slab); |
838 | else { | 1097 | else if (set_debug) |
839 | if (show_slab) | 1098 | slab_debug(slab); |
840 | slabcache(slab); | 1099 | else if (show_ops) |
841 | } | 1100 | ops(slab); |
1101 | else if (show_slab) | ||
1102 | slabcache(slab); | ||
842 | } | 1103 | } |
843 | } | 1104 | } |
844 | 1105 | ||
845 | struct option opts[] = { | 1106 | struct option opts[] = { |
846 | { "aliases", 0, NULL, 'a' }, | 1107 | { "aliases", 0, NULL, 'a' }, |
847 | { "slabs", 0, NULL, 'l' }, | 1108 | { "debug", 2, NULL, 'd' }, |
848 | { "numa", 0, NULL, 'n' }, | 1109 | { "empty", 0, NULL, 'e' }, |
849 | { "zero", 0, NULL, 'z' }, | ||
850 | { "help", 0, NULL, 'h' }, | ||
851 | { "validate", 0, NULL, 'v' }, | ||
852 | { "first-alias", 0, NULL, 'f' }, | 1110 | { "first-alias", 0, NULL, 'f' }, |
1111 | { "help", 0, NULL, 'h' }, | ||
1112 | { "inverted", 0, NULL, 'i'}, | ||
1113 | { "numa", 0, NULL, 'n' }, | ||
1114 | { "ops", 0, NULL, 'o' }, | ||
1115 | { "report", 0, NULL, 'r' }, | ||
853 | { "shrink", 0, NULL, 's' }, | 1116 | { "shrink", 0, NULL, 's' }, |
1117 | { "slabs", 0, NULL, 'l' }, | ||
854 | { "track", 0, NULL, 't'}, | 1118 | { "track", 0, NULL, 't'}, |
855 | { "inverted", 0, NULL, 'i'}, | 1119 | { "validate", 0, NULL, 'v' }, |
1120 | { "zero", 0, NULL, 'z' }, | ||
856 | { "1ref", 0, NULL, '1'}, | 1121 | { "1ref", 0, NULL, '1'}, |
857 | { NULL, 0, NULL, 0 } | 1122 | { NULL, 0, NULL, 0 } |
858 | }; | 1123 | }; |
@@ -864,10 +1129,9 @@ int main(int argc, char *argv[]) | |||
864 | char *pattern_source; | 1129 | char *pattern_source; |
865 | 1130 | ||
866 | page_size = getpagesize(); | 1131 | page_size = getpagesize(); |
867 | if (chdir("/sys/slab")) | ||
868 | fatal("This kernel does not have SLUB support.\n"); | ||
869 | 1132 | ||
870 | while ((c = getopt_long(argc, argv, "afhil1npstvzTS", opts, NULL)) != -1) | 1133 | while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", |
1134 | opts, NULL)) != -1) | ||
871 | switch(c) { | 1135 | switch(c) { |
872 | case '1': | 1136 | case '1': |
873 | show_single_ref = 1; | 1137 | show_single_ref = 1; |
@@ -875,6 +1139,14 @@ int main(int argc, char *argv[]) | |||
875 | case 'a': | 1139 | case 'a': |
876 | show_alias = 1; | 1140 | show_alias = 1; |
877 | break; | 1141 | break; |
1142 | case 'd': | ||
1143 | set_debug = 1; | ||
1144 | if (!debug_opt_scan(optarg)) | ||
1145 | fatal("Invalid debug option '%s'\n", optarg); | ||
1146 | break; | ||
1147 | case 'e': | ||
1148 | show_empty = 1; | ||
1149 | break; | ||
878 | case 'f': | 1150 | case 'f': |
879 | show_first_alias = 1; | 1151 | show_first_alias = 1; |
880 | break; | 1152 | break; |
@@ -887,6 +1159,12 @@ int main(int argc, char *argv[]) | |||
887 | case 'n': | 1159 | case 'n': |
888 | show_numa = 1; | 1160 | show_numa = 1; |
889 | break; | 1161 | break; |
1162 | case 'o': | ||
1163 | show_ops = 1; | ||
1164 | break; | ||
1165 | case 'r': | ||
1166 | show_report = 1; | ||
1167 | break; | ||
890 | case 's': | 1168 | case 's': |
891 | shrink = 1; | 1169 | shrink = 1; |
892 | break; | 1170 | break; |
@@ -914,8 +1192,8 @@ int main(int argc, char *argv[]) | |||
914 | 1192 | ||
915 | } | 1193 | } |
916 | 1194 | ||
917 | if (!show_slab && !show_alias && !show_track | 1195 | if (!show_slab && !show_alias && !show_track && !show_report |
918 | && !validate && !shrink) | 1196 | && !validate && !shrink && !set_debug && !show_ops) |
919 | show_slab = 1; | 1197 | show_slab = 1; |
920 | 1198 | ||
921 | if (argc > optind) | 1199 | if (argc > optind) |
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile index 6115fc1f0cfa..dc6bc01f232c 100644 --- a/arch/avr32/Makefile +++ b/arch/avr32/Makefile | |||
@@ -16,7 +16,7 @@ AFLAGS += -mrelax -mno-pic | |||
16 | CFLAGS_MODULE += -mno-relax | 16 | CFLAGS_MODULE += -mno-relax |
17 | LDFLAGS_vmlinux += --relax | 17 | LDFLAGS_vmlinux += --relax |
18 | 18 | ||
19 | cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000 | 19 | cpuflags-$(CONFIG_CPU_AT32AP7000) += -mcpu=ap7000 |
20 | 20 | ||
21 | CFLAGS += $(cpuflags-y) | 21 | CFLAGS += $(cpuflags-y) |
22 | AFLAGS += $(cpuflags-y) | 22 | AFLAGS += $(cpuflags-y) |
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 4e4181ed1c6d..13f988402613 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c | |||
@@ -330,13 +330,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, | |||
330 | { | 330 | { |
331 | struct pt_regs *childregs; | 331 | struct pt_regs *childregs; |
332 | 332 | ||
333 | childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1; | 333 | childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)task_stack_page(p))) - 1; |
334 | *childregs = *regs; | 334 | *childregs = *regs; |
335 | 335 | ||
336 | if (user_mode(regs)) | 336 | if (user_mode(regs)) |
337 | childregs->sp = usp; | 337 | childregs->sp = usp; |
338 | else | 338 | else |
339 | childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE; | 339 | childregs->sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
340 | 340 | ||
341 | childregs->r12 = 0; /* Set return value for child */ | 341 | childregs->r12 = 0; /* Set return value for child */ |
342 | 342 | ||
@@ -403,7 +403,7 @@ unsigned long get_wchan(struct task_struct *p) | |||
403 | if (!p || p == current || p->state == TASK_RUNNING) | 403 | if (!p || p == current || p->state == TASK_RUNNING) |
404 | return 0; | 404 | return 0; |
405 | 405 | ||
406 | stack_page = (unsigned long)p->thread_info; | 406 | stack_page = (unsigned long)task_stack_page(p); |
407 | BUG_ON(!stack_page); | 407 | BUG_ON(!stack_page); |
408 | 408 | ||
409 | /* | 409 | /* |
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index 8ac74dddbbde..3c36c2d16148 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | static struct pt_regs *get_user_regs(struct task_struct *tsk) | 25 | static struct pt_regs *get_user_regs(struct task_struct *tsk) |
26 | { | 26 | { |
27 | return (struct pt_regs *)((unsigned long) tsk->thread_info + | 27 | return (struct pt_regs *)((unsigned long)task_stack_page(tsk) + |
28 | THREAD_SIZE - sizeof(struct pt_regs)); | 28 | THREAD_SIZE - sizeof(struct pt_regs)); |
29 | } | 29 | } |
30 | 30 | ||
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 7c279586fbba..07f6a6fa340d 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S | |||
@@ -291,4 +291,5 @@ sys_call_table: | |||
291 | .long sys_shmget /* 275 */ | 291 | .long sys_shmget /* 275 */ |
292 | .long sys_shmdt | 292 | .long sys_shmdt |
293 | .long sys_shmctl | 293 | .long sys_shmctl |
294 | .long sys_utimensat | ||
294 | .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ | 295 | .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ |
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index 4de9edf96ed2..86d107511dd4 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c | |||
@@ -123,7 +123,7 @@ asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs) | |||
123 | 123 | ||
124 | /* This way of handling undefined instructions is stolen from ARM */ | 124 | /* This way of handling undefined instructions is stolen from ARM */ |
125 | static LIST_HEAD(undef_hook); | 125 | static LIST_HEAD(undef_hook); |
126 | static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED; | 126 | static DEFINE_SPINLOCK(undef_lock); |
127 | 127 | ||
128 | void register_undef_hook(struct undef_hook *hook) | 128 | void register_undef_hook(struct undef_hook *hook) |
129 | { | 129 | { |
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c index 7ad20cfb48a8..e7f72c995a32 100644 --- a/arch/avr32/kernel/vmlinux.lds.c +++ b/arch/avr32/kernel/vmlinux.lds.c | |||
@@ -35,7 +35,7 @@ SECTIONS | |||
35 | _einittext = .; | 35 | _einittext = .; |
36 | . = ALIGN(4); | 36 | . = ALIGN(4); |
37 | __tagtable_begin = .; | 37 | __tagtable_begin = .; |
38 | *(.taglist) | 38 | *(.taglist.init) |
39 | __tagtable_end = .; | 39 | __tagtable_end = .; |
40 | *(.init.data) | 40 | *(.init.data) |
41 | . = ALIGN(16); | 41 | . = ALIGN(16); |
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c index 00c435452d7e..0f8c89c9f832 100644 --- a/arch/avr32/mach-at32ap/clock.c +++ b/arch/avr32/mach-at32ap/clock.c | |||
@@ -18,7 +18,7 @@ | |||
18 | 18 | ||
19 | #include "clock.h" | 19 | #include "clock.h" |
20 | 20 | ||
21 | static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED; | 21 | static DEFINE_SPINLOCK(clk_lock); |
22 | 22 | ||
23 | struct clk *clk_get(struct device *dev, const char *id) | 23 | struct clk *clk_get(struct device *dev, const char *id) |
24 | { | 24 | { |
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c index b68d669f823d..099212d4567c 100644 --- a/arch/avr32/mm/dma-coherent.c +++ b/arch/avr32/mm/dma-coherent.c | |||
@@ -112,16 +112,21 @@ void dma_free_coherent(struct device *dev, size_t size, | |||
112 | } | 112 | } |
113 | EXPORT_SYMBOL(dma_free_coherent); | 113 | EXPORT_SYMBOL(dma_free_coherent); |
114 | 114 | ||
115 | #if 0 | ||
116 | void *dma_alloc_writecombine(struct device *dev, size_t size, | 115 | void *dma_alloc_writecombine(struct device *dev, size_t size, |
117 | dma_addr_t *handle, gfp_t gfp) | 116 | dma_addr_t *handle, gfp_t gfp) |
118 | { | 117 | { |
119 | struct page *page; | 118 | struct page *page; |
119 | dma_addr_t phys; | ||
120 | 120 | ||
121 | page = __dma_alloc(dev, size, handle, gfp); | 121 | page = __dma_alloc(dev, size, handle, gfp); |
122 | if (!page) | ||
123 | return NULL; | ||
124 | |||
125 | phys = page_to_phys(page); | ||
126 | *handle = phys; | ||
122 | 127 | ||
123 | /* Now, map the page into P3 with write-combining turned on */ | 128 | /* Now, map the page into P3 with write-combining turned on */ |
124 | return __ioremap(page_to_phys(page), size, _PAGE_BUFFER); | 129 | return __ioremap(phys, size, _PAGE_BUFFER); |
125 | } | 130 | } |
126 | EXPORT_SYMBOL(dma_alloc_writecombine); | 131 | EXPORT_SYMBOL(dma_alloc_writecombine); |
127 | 132 | ||
@@ -132,8 +137,7 @@ void dma_free_writecombine(struct device *dev, size_t size, | |||
132 | 137 | ||
133 | iounmap(cpu_addr); | 138 | iounmap(cpu_addr); |
134 | 139 | ||
135 | page = bus_to_page(handle); | 140 | page = phys_to_page(handle); |
136 | __dma_free(dev, size, page, handle); | 141 | __dma_free(dev, size, page, handle); |
137 | } | 142 | } |
138 | EXPORT_SYMBOL(dma_free_writecombine); | 143 | EXPORT_SYMBOL(dma_free_writecombine); |
139 | #endif | ||
diff --git a/arch/blackfin/kernel/asm-offsets.c b/arch/blackfin/kernel/asm-offsets.c index 41d9a9f89700..e455f4504509 100644 --- a/arch/blackfin/kernel/asm-offsets.c +++ b/arch/blackfin/kernel/asm-offsets.c | |||
@@ -46,7 +46,7 @@ int main(void) | |||
46 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); | 46 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); |
47 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); | 47 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); |
48 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); | 48 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); |
49 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info)); | 49 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack)); |
50 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); | 50 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); |
51 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); | 51 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); |
52 | DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending)); | 52 | DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending)); |
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index d7c8e514cb92..e718bb4a1ef0 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c | |||
@@ -73,7 +73,7 @@ | |||
73 | static inline struct pt_regs *get_user_regs(struct task_struct *task) | 73 | static inline struct pt_regs *get_user_regs(struct task_struct *task) |
74 | { | 74 | { |
75 | return (struct pt_regs *) | 75 | return (struct pt_regs *) |
76 | ((unsigned long)task->thread_info + | 76 | ((unsigned long)task_stack_page(task) + |
77 | (THREAD_SIZE - sizeof(struct pt_regs))); | 77 | (THREAD_SIZE - sizeof(struct pt_regs))); |
78 | } | 78 | } |
79 | 79 | ||
@@ -99,7 +99,7 @@ static inline long get_reg(struct task_struct *task, int regno) | |||
99 | unsigned char *reg_ptr; | 99 | unsigned char *reg_ptr; |
100 | 100 | ||
101 | struct pt_regs *regs = | 101 | struct pt_regs *regs = |
102 | (struct pt_regs *)((unsigned long)task->thread_info + | 102 | (struct pt_regs *)((unsigned long)task_stack_page(task) + |
103 | (THREAD_SIZE - sizeof(struct pt_regs))); | 103 | (THREAD_SIZE - sizeof(struct pt_regs))); |
104 | reg_ptr = (char *)regs; | 104 | reg_ptr = (char *)regs; |
105 | 105 | ||
@@ -125,7 +125,7 @@ put_reg(struct task_struct *task, int regno, unsigned long data) | |||
125 | char * reg_ptr; | 125 | char * reg_ptr; |
126 | 126 | ||
127 | struct pt_regs *regs = | 127 | struct pt_regs *regs = |
128 | (struct pt_regs *)((unsigned long)task->thread_info + | 128 | (struct pt_regs *)((unsigned long)task_stack_page(task) + |
129 | (THREAD_SIZE - sizeof(struct pt_regs))); | 129 | (THREAD_SIZE - sizeof(struct pt_regs))); |
130 | reg_ptr = (char *)regs; | 130 | reg_ptr = (char *)regs; |
131 | 131 | ||
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index eed694312a79..114738a45582 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig | |||
@@ -45,15 +45,15 @@ config TIME_LOW_RES | |||
45 | bool | 45 | bool |
46 | default y | 46 | default y |
47 | 47 | ||
48 | config ARCH_HAS_ILOG2_U32 | 48 | config QUICKLIST |
49 | bool | 49 | bool |
50 | default y | 50 | default y |
51 | 51 | ||
52 | config ARCH_HAS_ILOG2_U64 | 52 | config ARCH_HAS_ILOG2_U32 |
53 | bool | 53 | bool |
54 | default y | 54 | default y |
55 | 55 | ||
56 | config ARCH_USES_SLAB_PAGE_STRUCT | 56 | config ARCH_HAS_ILOG2_U64 |
57 | bool | 57 | bool |
58 | default y | 58 | default y |
59 | 59 | ||
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 515a5cea5469..9583a338e9d6 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c | |||
@@ -25,12 +25,14 @@ | |||
25 | #include <linux/elf.h> | 25 | #include <linux/elf.h> |
26 | #include <linux/reboot.h> | 26 | #include <linux/reboot.h> |
27 | #include <linux/interrupt.h> | 27 | #include <linux/interrupt.h> |
28 | #include <linux/pagemap.h> | ||
28 | 29 | ||
29 | #include <asm/asm-offsets.h> | 30 | #include <asm/asm-offsets.h> |
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/system.h> | 32 | #include <asm/system.h> |
32 | #include <asm/setup.h> | 33 | #include <asm/setup.h> |
33 | #include <asm/pgtable.h> | 34 | #include <asm/pgtable.h> |
35 | #include <asm/tlb.h> | ||
34 | #include <asm/gdb-stub.h> | 36 | #include <asm/gdb-stub.h> |
35 | #include <asm/mb-regs.h> | 37 | #include <asm/mb-regs.h> |
36 | 38 | ||
@@ -88,6 +90,8 @@ void cpu_idle(void) | |||
88 | while (!need_resched()) { | 90 | while (!need_resched()) { |
89 | irq_stat[cpu].idle_timestamp = jiffies; | 91 | irq_stat[cpu].idle_timestamp = jiffies; |
90 | 92 | ||
93 | check_pgt_cache(); | ||
94 | |||
91 | if (!frv_dma_inprogress && idle) | 95 | if (!frv_dma_inprogress && idle) |
92 | idle(); | 96 | idle(); |
93 | } | 97 | } |
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 598a26ab8ad8..7787c3cc52c6 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c | |||
@@ -13,12 +13,12 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/highmem.h> | 15 | #include <linux/highmem.h> |
16 | #include <linux/quicklist.h> | ||
16 | #include <asm/pgalloc.h> | 17 | #include <asm/pgalloc.h> |
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
18 | #include <asm/cacheflush.h> | 19 | #include <asm/cacheflush.h> |
19 | 20 | ||
20 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); | 21 | pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); |
21 | struct kmem_cache *pgd_cache; | ||
22 | 22 | ||
23 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 23 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
24 | { | 24 | { |
@@ -100,7 +100,7 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
100 | set_page_private(next, (unsigned long) pprev); | 100 | set_page_private(next, (unsigned long) pprev); |
101 | } | 101 | } |
102 | 102 | ||
103 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 103 | void pgd_ctor(void *pgd) |
104 | { | 104 | { |
105 | unsigned long flags; | 105 | unsigned long flags; |
106 | 106 | ||
@@ -120,7 +120,7 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |||
120 | } | 120 | } |
121 | 121 | ||
122 | /* never called when PTRS_PER_PMD > 1 */ | 122 | /* never called when PTRS_PER_PMD > 1 */ |
123 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 123 | void pgd_dtor(void *pgd) |
124 | { | 124 | { |
125 | unsigned long flags; /* can be called from interrupt context */ | 125 | unsigned long flags; /* can be called from interrupt context */ |
126 | 126 | ||
@@ -133,7 +133,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
133 | { | 133 | { |
134 | pgd_t *pgd; | 134 | pgd_t *pgd; |
135 | 135 | ||
136 | pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); | 136 | pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); |
137 | if (!pgd) | 137 | if (!pgd) |
138 | return pgd; | 138 | return pgd; |
139 | 139 | ||
@@ -143,15 +143,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
143 | void pgd_free(pgd_t *pgd) | 143 | void pgd_free(pgd_t *pgd) |
144 | { | 144 | { |
145 | /* in the non-PAE case, clear_page_tables() clears user pgd entries */ | 145 | /* in the non-PAE case, clear_page_tables() clears user pgd entries */ |
146 | kmem_cache_free(pgd_cache, pgd); | 146 | quicklist_free(0, pgd_dtor, pgd); |
147 | } | 147 | } |
148 | 148 | ||
149 | void __init pgtable_cache_init(void) | 149 | void __init pgtable_cache_init(void) |
150 | { | 150 | { |
151 | pgd_cache = kmem_cache_create("pgd", | ||
152 | PTRS_PER_PGD * sizeof(pgd_t), | ||
153 | PTRS_PER_PGD * sizeof(pgd_t), | ||
154 | SLAB_PANIC, | ||
155 | pgd_ctor, | ||
156 | pgd_dtor); | ||
157 | } | 151 | } |
152 | |||
153 | void check_pgt_cache(void) | ||
154 | { | ||
155 | quicklist_trim(0, pgd_dtor, 25, 16); | ||
156 | } | ||
157 | |||
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index b78b82ad28a3..fc30b4fd0914 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c | |||
@@ -30,7 +30,7 @@ int main(void) | |||
30 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); | 30 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); |
31 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); | 31 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); |
32 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); | 32 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); |
33 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info)); | 33 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack)); |
34 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); | 34 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); |
35 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); | 35 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); |
36 | 36 | ||
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 80b4c5d421b1..e5be819492ef 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -733,9 +733,11 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
733 | sys_dev = get_cpu_sysdev(cpu); | 733 | sys_dev = get_cpu_sysdev(cpu); |
734 | switch (action) { | 734 | switch (action) { |
735 | case CPU_ONLINE: | 735 | case CPU_ONLINE: |
736 | case CPU_ONLINE_FROZEN: | ||
736 | cache_add_dev(sys_dev); | 737 | cache_add_dev(sys_dev); |
737 | break; | 738 | break; |
738 | case CPU_DEAD: | 739 | case CPU_DEAD: |
740 | case CPU_DEAD_FROZEN: | ||
739 | cache_remove_dev(sys_dev); | 741 | cache_remove_dev(sys_dev); |
740 | break; | 742 | break; |
741 | } | 743 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 2f28540caae2..7ba7c3abd3a4 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c | |||
@@ -137,10 +137,12 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
137 | mutex_lock(&therm_cpu_lock); | 137 | mutex_lock(&therm_cpu_lock); |
138 | switch (action) { | 138 | switch (action) { |
139 | case CPU_ONLINE: | 139 | case CPU_ONLINE: |
140 | case CPU_ONLINE_FROZEN: | ||
140 | err = thermal_throttle_add_dev(sys_dev); | 141 | err = thermal_throttle_add_dev(sys_dev); |
141 | WARN_ON(err); | 142 | WARN_ON(err); |
142 | break; | 143 | break; |
143 | case CPU_DEAD: | 144 | case CPU_DEAD: |
145 | case CPU_DEAD_FROZEN: | ||
144 | thermal_throttle_remove_dev(sys_dev); | 146 | thermal_throttle_remove_dev(sys_dev); |
145 | break; | 147 | break; |
146 | } | 148 | } |
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 6471a5a13202..200fb3f9ebfb 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -77,8 +77,10 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | 77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
78 | 78 | ||
79 | /* If we can run i686 user-space code, call us an i686 */ | 79 | /* If we can run i686 user-space code, call us an i686 */ |
80 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) | 80 | #define USER686 ((1 << X86_FEATURE_TSC)|\ |
81 | if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 ) | 81 | (1 << X86_FEATURE_CX8)|\ |
82 | (1 << X86_FEATURE_CMOV)) | ||
83 | if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) | ||
82 | c->x86 = 6; | 84 | c->x86 = 6; |
83 | 85 | ||
84 | #ifdef CONFIG_SYSCTL | 86 | #ifdef CONFIG_SYSCTL |
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index eeae0d992337..5c2faa10e9fa 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c | |||
@@ -169,9 +169,11 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac | |||
169 | 169 | ||
170 | switch (action) { | 170 | switch (action) { |
171 | case CPU_ONLINE: | 171 | case CPU_ONLINE: |
172 | case CPU_ONLINE_FROZEN: | ||
172 | cpuid_device_create(cpu); | 173 | cpuid_device_create(cpu); |
173 | break; | 174 | break; |
174 | case CPU_DEAD: | 175 | case CPU_DEAD: |
176 | case CPU_DEAD_FROZEN: | ||
175 | device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); | 177 | device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); |
176 | break; | 178 | break; |
177 | } | 179 | } |
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index cbe7ec8dbb9f..83f825f2e2d7 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c | |||
@@ -567,7 +567,7 @@ static int cpu_request_microcode(int cpu) | |||
567 | return error; | 567 | return error; |
568 | } | 568 | } |
569 | 569 | ||
570 | static int apply_microcode_on_cpu(int cpu) | 570 | static int apply_microcode_check_cpu(int cpu) |
571 | { | 571 | { |
572 | struct cpuinfo_x86 *c = cpu_data + cpu; | 572 | struct cpuinfo_x86 *c = cpu_data + cpu; |
573 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 573 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
@@ -575,8 +575,9 @@ static int apply_microcode_on_cpu(int cpu) | |||
575 | unsigned int val[2]; | 575 | unsigned int val[2]; |
576 | int err = 0; | 576 | int err = 0; |
577 | 577 | ||
578 | /* Check if the microcode is available */ | ||
578 | if (!uci->mc) | 579 | if (!uci->mc) |
579 | return -EINVAL; | 580 | return 0; |
580 | 581 | ||
581 | old = current->cpus_allowed; | 582 | old = current->cpus_allowed; |
582 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | 583 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); |
@@ -614,7 +615,7 @@ static int apply_microcode_on_cpu(int cpu) | |||
614 | return err; | 615 | return err; |
615 | } | 616 | } |
616 | 617 | ||
617 | static void microcode_init_cpu(int cpu) | 618 | static void microcode_init_cpu(int cpu, int resume) |
618 | { | 619 | { |
619 | cpumask_t old; | 620 | cpumask_t old; |
620 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 621 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
@@ -624,8 +625,7 @@ static void microcode_init_cpu(int cpu) | |||
624 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | 625 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); |
625 | mutex_lock(µcode_mutex); | 626 | mutex_lock(µcode_mutex); |
626 | collect_cpu_info(cpu); | 627 | collect_cpu_info(cpu); |
627 | if (uci->valid && system_state == SYSTEM_RUNNING && | 628 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) |
628 | !suspend_cpu_hotplug) | ||
629 | cpu_request_microcode(cpu); | 629 | cpu_request_microcode(cpu); |
630 | mutex_unlock(µcode_mutex); | 630 | mutex_unlock(µcode_mutex); |
631 | set_cpus_allowed(current, old); | 631 | set_cpus_allowed(current, old); |
@@ -702,7 +702,7 @@ static struct attribute_group mc_attr_group = { | |||
702 | .name = "microcode", | 702 | .name = "microcode", |
703 | }; | 703 | }; |
704 | 704 | ||
705 | static int mc_sysdev_add(struct sys_device *sys_dev) | 705 | static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) |
706 | { | 706 | { |
707 | int err, cpu = sys_dev->id; | 707 | int err, cpu = sys_dev->id; |
708 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 708 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
@@ -711,39 +711,31 @@ static int mc_sysdev_add(struct sys_device *sys_dev) | |||
711 | return 0; | 711 | return 0; |
712 | 712 | ||
713 | pr_debug("Microcode:CPU %d added\n", cpu); | 713 | pr_debug("Microcode:CPU %d added\n", cpu); |
714 | /* If suspend_cpu_hotplug is set, the system is resuming and we should | 714 | memset(uci, 0, sizeof(*uci)); |
715 | * use the data from before the suspend. | ||
716 | */ | ||
717 | if (suspend_cpu_hotplug) { | ||
718 | err = apply_microcode_on_cpu(cpu); | ||
719 | if (err) | ||
720 | microcode_fini_cpu(cpu); | ||
721 | } | ||
722 | if (!uci->valid) | ||
723 | memset(uci, 0, sizeof(*uci)); | ||
724 | 715 | ||
725 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 716 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); |
726 | if (err) | 717 | if (err) |
727 | return err; | 718 | return err; |
728 | 719 | ||
729 | if (!uci->valid) | 720 | microcode_init_cpu(cpu, resume); |
730 | microcode_init_cpu(cpu); | ||
731 | 721 | ||
732 | return 0; | 722 | return 0; |
733 | } | 723 | } |
734 | 724 | ||
725 | static int mc_sysdev_add(struct sys_device *sys_dev) | ||
726 | { | ||
727 | return __mc_sysdev_add(sys_dev, 0); | ||
728 | } | ||
729 | |||
735 | static int mc_sysdev_remove(struct sys_device *sys_dev) | 730 | static int mc_sysdev_remove(struct sys_device *sys_dev) |
736 | { | 731 | { |
737 | int cpu = sys_dev->id; | 732 | int cpu = sys_dev->id; |
738 | 733 | ||
739 | if (!cpu_online(cpu)) | 734 | if (!cpu_online(cpu)) |
740 | return 0; | 735 | return 0; |
736 | |||
741 | pr_debug("Microcode:CPU %d removed\n", cpu); | 737 | pr_debug("Microcode:CPU %d removed\n", cpu); |
742 | /* If suspend_cpu_hotplug is set, the system is suspending and we should | 738 | microcode_fini_cpu(cpu); |
743 | * keep the microcode in memory for the resume. | ||
744 | */ | ||
745 | if (!suspend_cpu_hotplug) | ||
746 | microcode_fini_cpu(cpu); | ||
747 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 739 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
748 | return 0; | 740 | return 0; |
749 | } | 741 | } |
@@ -774,13 +766,34 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
774 | 766 | ||
775 | sys_dev = get_cpu_sysdev(cpu); | 767 | sys_dev = get_cpu_sysdev(cpu); |
776 | switch (action) { | 768 | switch (action) { |
769 | case CPU_UP_CANCELED_FROZEN: | ||
770 | /* The CPU refused to come up during a system resume */ | ||
771 | microcode_fini_cpu(cpu); | ||
772 | break; | ||
777 | case CPU_ONLINE: | 773 | case CPU_ONLINE: |
778 | case CPU_DOWN_FAILED: | 774 | case CPU_DOWN_FAILED: |
779 | mc_sysdev_add(sys_dev); | 775 | mc_sysdev_add(sys_dev); |
780 | break; | 776 | break; |
777 | case CPU_ONLINE_FROZEN: | ||
778 | /* System-wide resume is in progress, try to apply microcode */ | ||
779 | if (apply_microcode_check_cpu(cpu)) { | ||
780 | /* The application of microcode failed */ | ||
781 | microcode_fini_cpu(cpu); | ||
782 | __mc_sysdev_add(sys_dev, 1); | ||
783 | break; | ||
784 | } | ||
785 | case CPU_DOWN_FAILED_FROZEN: | ||
786 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | ||
787 | printk(KERN_ERR "Microcode: Failed to create the sysfs " | ||
788 | "group for CPU%d\n", cpu); | ||
789 | break; | ||
781 | case CPU_DOWN_PREPARE: | 790 | case CPU_DOWN_PREPARE: |
782 | mc_sysdev_remove(sys_dev); | 791 | mc_sysdev_remove(sys_dev); |
783 | break; | 792 | break; |
793 | case CPU_DOWN_PREPARE_FROZEN: | ||
794 | /* Suspend is in progress, only remove the interface */ | ||
795 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | ||
796 | break; | ||
784 | } | 797 | } |
785 | return NOTIFY_OK; | 798 | return NOTIFY_OK; |
786 | } | 799 | } |
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 8cd0a91ce107..0c1069b8d638 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c | |||
@@ -153,9 +153,11 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, | |||
153 | 153 | ||
154 | switch (action) { | 154 | switch (action) { |
155 | case CPU_ONLINE: | 155 | case CPU_ONLINE: |
156 | case CPU_ONLINE_FROZEN: | ||
156 | msr_device_create(cpu); | 157 | msr_device_create(cpu); |
157 | break; | 158 | break; |
158 | case CPU_DEAD: | 159 | case CPU_DEAD: |
160 | case CPU_DEAD_FROZEN: | ||
159 | device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); | 161 | device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); |
160 | break; | 162 | break; |
161 | } | 163 | } |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4bec0cbf407a..c05e7e861b29 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -305,7 +305,7 @@ void show_registers(struct pt_regs *regs) | |||
305 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); | 305 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
306 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 306 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
307 | TASK_COMM_LEN, current->comm, current->pid, | 307 | TASK_COMM_LEN, current->comm, current->pid, |
308 | current_thread_info(), current, current->thread_info); | 308 | current_thread_info(), current, task_thread_info(current)); |
309 | /* | 309 | /* |
310 | * When in-kernel, we also print out the stack and code at the | 310 | * When in-kernel, we also print out the stack and code at the |
311 | * time of the fault.. | 311 | * time of the fault.. |
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index a7b3999bb37a..74f3da634423 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c | |||
@@ -119,9 +119,7 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
119 | return 0; | 119 | return 0; |
120 | } | 120 | } |
121 | 121 | ||
122 | #ifdef CONFIG_SMP | ||
123 | int hard_smp_processor_id(void) | 122 | int hard_smp_processor_id(void) |
124 | { | 123 | { |
125 | return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); | 124 | return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); |
126 | } | 125 | } |
127 | #endif | ||
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c index 8fe7e4593d5f..9b77b39b71a6 100644 --- a/arch/i386/mach-voyager/voyager_basic.c +++ b/arch/i386/mach-voyager/voyager_basic.c | |||
@@ -292,8 +292,8 @@ machine_emergency_restart(void) | |||
292 | void | 292 | void |
293 | mca_nmi_hook(void) | 293 | mca_nmi_hook(void) |
294 | { | 294 | { |
295 | __u8 dumpval __attribute__((unused)) = inb(0xf823); | 295 | __u8 dumpval __maybe_unused = inb(0xf823); |
296 | __u8 swnmi __attribute__((unused)) = inb(0xf813); | 296 | __u8 swnmi __maybe_unused = inb(0xf813); |
297 | 297 | ||
298 | /* FIXME: assume dump switch pressed */ | 298 | /* FIXME: assume dump switch pressed */ |
299 | /* check to see if the dump switch was pressed */ | 299 | /* check to see if the dump switch was pressed */ |
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c index 1cf11af96de2..3de9f9ba2da6 100644 --- a/arch/i386/pci/init.c +++ b/arch/i386/pci/init.c | |||
@@ -6,7 +6,7 @@ | |||
6 | in the right sequence from here. */ | 6 | in the right sequence from here. */ |
7 | static __init int pci_access_init(void) | 7 | static __init int pci_access_init(void) |
8 | { | 8 | { |
9 | int type __attribute__((unused)) = 0; | 9 | int type __maybe_unused = 0; |
10 | 10 | ||
11 | #ifdef CONFIG_PCI_DIRECT | 11 | #ifdef CONFIG_PCI_DIRECT |
12 | type = pci_direct_probe(); | 12 | type = pci_direct_probe(); |
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index d3e9f33e8bdd..6a49600cf337 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c | |||
@@ -236,9 +236,11 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, | |||
236 | sys_dev = get_cpu_sysdev(cpu); | 236 | sys_dev = get_cpu_sysdev(cpu); |
237 | switch (action) { | 237 | switch (action) { |
238 | case CPU_ONLINE: | 238 | case CPU_ONLINE: |
239 | case CPU_ONLINE_FROZEN: | ||
239 | err_inject_add_dev(sys_dev); | 240 | err_inject_add_dev(sys_dev); |
240 | break; | 241 | break; |
241 | case CPU_DEAD: | 242 | case CPU_DEAD: |
243 | case CPU_DEAD_FROZEN: | ||
242 | err_inject_remove_dev(sys_dev); | 244 | err_inject_remove_dev(sys_dev); |
243 | break; | 245 | break; |
244 | } | 246 | } |
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 1d7cc7e2ce32..f8ae709de0b5 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -1689,7 +1689,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, | |||
1689 | ti->preempt_count = 1; | 1689 | ti->preempt_count = 1; |
1690 | ti->task = p; | 1690 | ti->task = p; |
1691 | ti->cpu = cpu; | 1691 | ti->cpu = cpu; |
1692 | p->thread_info = ti; | 1692 | p->stack = ti; |
1693 | p->state = TASK_UNINTERRUPTIBLE; | 1693 | p->state = TASK_UNINTERRUPTIBLE; |
1694 | cpu_set(cpu, p->cpus_allowed); | 1694 | cpu_set(cpu, p->cpus_allowed); |
1695 | INIT_LIST_HEAD(&p->tasks); | 1695 | INIT_LIST_HEAD(&p->tasks); |
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index a71df9ae0397..85829e27785c 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c | |||
@@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb, | |||
975 | 975 | ||
976 | switch (action) { | 976 | switch (action) { |
977 | case CPU_ONLINE: | 977 | case CPU_ONLINE: |
978 | case CPU_ONLINE_FROZEN: | ||
978 | create_palinfo_proc_entries(hotcpu); | 979 | create_palinfo_proc_entries(hotcpu); |
979 | break; | 980 | break; |
980 | case CPU_DEAD: | 981 | case CPU_DEAD: |
982 | case CPU_DEAD_FROZEN: | ||
981 | remove_palinfo_proc_entries(hotcpu); | 983 | remove_palinfo_proc_entries(hotcpu); |
982 | break; | 984 | break; |
983 | } | 985 | } |
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index a51f1d0bfb70..89f6b138a62c 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c | |||
@@ -582,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu | |||
582 | struct salinfo_data *data; | 582 | struct salinfo_data *data; |
583 | switch (action) { | 583 | switch (action) { |
584 | case CPU_ONLINE: | 584 | case CPU_ONLINE: |
585 | case CPU_ONLINE_FROZEN: | ||
585 | spin_lock_irqsave(&data_saved_lock, flags); | 586 | spin_lock_irqsave(&data_saved_lock, flags); |
586 | for (i = 0, data = salinfo_data; | 587 | for (i = 0, data = salinfo_data; |
587 | i < ARRAY_SIZE(salinfo_data); | 588 | i < ARRAY_SIZE(salinfo_data); |
@@ -592,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu | |||
592 | spin_unlock_irqrestore(&data_saved_lock, flags); | 593 | spin_unlock_irqrestore(&data_saved_lock, flags); |
593 | break; | 594 | break; |
594 | case CPU_DEAD: | 595 | case CPU_DEAD: |
596 | case CPU_DEAD_FROZEN: | ||
595 | spin_lock_irqsave(&data_saved_lock, flags); | 597 | spin_lock_irqsave(&data_saved_lock, flags); |
596 | for (i = 0, data = salinfo_data; | 598 | for (i = 0, data = salinfo_data; |
597 | i < ARRAY_SIZE(salinfo_data); | 599 | i < ARRAY_SIZE(salinfo_data); |
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 687500ddb4b8..94ae3c87d828 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c | |||
@@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, | |||
412 | sys_dev = get_cpu_sysdev(cpu); | 412 | sys_dev = get_cpu_sysdev(cpu); |
413 | switch (action) { | 413 | switch (action) { |
414 | case CPU_ONLINE: | 414 | case CPU_ONLINE: |
415 | case CPU_ONLINE_FROZEN: | ||
415 | cache_add_dev(sys_dev); | 416 | cache_add_dev(sys_dev); |
416 | break; | 417 | break; |
417 | case CPU_DEAD: | 418 | case CPU_DEAD: |
419 | case CPU_DEAD_FROZEN: | ||
418 | cache_remove_dev(sys_dev); | 420 | cache_remove_dev(sys_dev); |
419 | break; | 421 | break; |
420 | } | 422 | } |
diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c index b988c7bdc6e4..7cd183d346ef 100644 --- a/arch/m68knommu/kernel/asm-offsets.c +++ b/arch/m68knommu/kernel/asm-offsets.c | |||
@@ -31,7 +31,7 @@ int main(void) | |||
31 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); | 31 | DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); |
32 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); | 32 | DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); |
33 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); | 33 | DEFINE(TASK_THREAD, offsetof(struct task_struct, thread)); |
34 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info)); | 34 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack)); |
35 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); | 35 | DEFINE(TASK_MM, offsetof(struct task_struct, mm)); |
36 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); | 36 | DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); |
37 | 37 | ||
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 761a779d5c4f..3b27309d54b1 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c | |||
@@ -82,7 +82,7 @@ void output_task_defines(void) | |||
82 | { | 82 | { |
83 | text("/* MIPS task_struct offsets. */"); | 83 | text("/* MIPS task_struct offsets. */"); |
84 | offset("#define TASK_STATE ", struct task_struct, state); | 84 | offset("#define TASK_STATE ", struct task_struct, state); |
85 | offset("#define TASK_THREAD_INFO ", struct task_struct, thread_info); | 85 | offset("#define TASK_THREAD_INFO ", struct task_struct, stack); |
86 | offset("#define TASK_FLAGS ", struct task_struct, flags); | 86 | offset("#define TASK_FLAGS ", struct task_struct, flags); |
87 | offset("#define TASK_MM ", struct task_struct, mm); | 87 | offset("#define TASK_MM ", struct task_struct, mm); |
88 | offset("#define TASK_PID ", struct task_struct, pid); | 88 | offset("#define TASK_PID ", struct task_struct, pid); |
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 5dcfab6b288e..b361edb83dc6 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c | |||
@@ -560,7 +560,7 @@ void smtc_boot_secondary(int cpu, struct task_struct *idle) | |||
560 | write_tc_gpr_sp(__KSTK_TOS(idle)); | 560 | write_tc_gpr_sp(__KSTK_TOS(idle)); |
561 | 561 | ||
562 | /* global pointer */ | 562 | /* global pointer */ |
563 | write_tc_gpr_gp((unsigned long)idle->thread_info); | 563 | write_tc_gpr_gp((unsigned long)task_thread_info(idle)); |
564 | 564 | ||
565 | smtc_status |= SMTC_MTC_ACTIVE; | 565 | smtc_status |= SMTC_MTC_ACTIVE; |
566 | write_tc_c0_tchalt(0); | 566 | write_tc_c0_tchalt(0); |
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 54fdb959149c..d3b7917a87cb 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c | |||
@@ -54,7 +54,7 @@ | |||
54 | 54 | ||
55 | int main(void) | 55 | int main(void) |
56 | { | 56 | { |
57 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info)); | 57 | DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack)); |
58 | DEFINE(TASK_STATE, offsetof(struct task_struct, state)); | 58 | DEFINE(TASK_STATE, offsetof(struct task_struct, state)); |
59 | DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags)); | 59 | DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags)); |
60 | DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending)); | 60 | DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending)); |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8f48560b7ee2..37bc35e69dbe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -58,7 +58,7 @@ int main(void) | |||
58 | #ifdef CONFIG_PPC64 | 58 | #ifdef CONFIG_PPC64 |
59 | DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); | 59 | DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); |
60 | #else | 60 | #else |
61 | DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); | 61 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); |
62 | DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); | 62 | DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); |
63 | #endif /* CONFIG_PPC64 */ | 63 | #endif /* CONFIG_PPC64 */ |
64 | 64 | ||
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index cae39d9dfe48..68991c2d4a1b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c | |||
@@ -342,10 +342,12 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, | |||
342 | 342 | ||
343 | switch (action) { | 343 | switch (action) { |
344 | case CPU_ONLINE: | 344 | case CPU_ONLINE: |
345 | case CPU_ONLINE_FROZEN: | ||
345 | register_cpu_online(cpu); | 346 | register_cpu_online(cpu); |
346 | break; | 347 | break; |
347 | #ifdef CONFIG_HOTPLUG_CPU | 348 | #ifdef CONFIG_HOTPLUG_CPU |
348 | case CPU_DEAD: | 349 | case CPU_DEAD: |
350 | case CPU_DEAD_FROZEN: | ||
349 | unregister_cpu_online(cpu); | 351 | unregister_cpu_online(cpu); |
350 | break; | 352 | break; |
351 | #endif | 353 | #endif |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b3a592b25ab3..de45aa82d97b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, | |||
252 | 252 | ||
253 | switch (action) { | 253 | switch (action) { |
254 | case CPU_UP_PREPARE: | 254 | case CPU_UP_PREPARE: |
255 | case CPU_UP_PREPARE_FROZEN: | ||
255 | numa_setup_cpu(lcpu); | 256 | numa_setup_cpu(lcpu); |
256 | ret = NOTIFY_OK; | 257 | ret = NOTIFY_OK; |
257 | break; | 258 | break; |
258 | #ifdef CONFIG_HOTPLUG_CPU | 259 | #ifdef CONFIG_HOTPLUG_CPU |
259 | case CPU_DEAD: | 260 | case CPU_DEAD: |
261 | case CPU_DEAD_FROZEN: | ||
260 | case CPU_UP_CANCELED: | 262 | case CPU_UP_CANCELED: |
263 | case CPU_UP_CANCELED_FROZEN: | ||
261 | unmap_cpu_from_node(lcpu); | 264 | unmap_cpu_from_node(lcpu); |
262 | break; | 265 | break; |
263 | ret = NOTIFY_OK; | 266 | ret = NOTIFY_OK; |
diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c index c5850a272650..e8e94321b59e 100644 --- a/arch/ppc/kernel/asm-offsets.c +++ b/arch/ppc/kernel/asm-offsets.c | |||
@@ -35,7 +35,7 @@ int | |||
35 | main(void) | 35 | main(void) |
36 | { | 36 | { |
37 | DEFINE(THREAD, offsetof(struct task_struct, thread)); | 37 | DEFINE(THREAD, offsetof(struct task_struct, thread)); |
38 | DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); | 38 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); |
39 | DEFINE(MM, offsetof(struct task_struct, mm)); | 39 | DEFINE(MM, offsetof(struct task_struct, mm)); |
40 | DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); | 40 | DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); |
41 | DEFINE(KSP, offsetof(struct thread_struct, ksp)); | 41 | DEFINE(KSP, offsetof(struct thread_struct, ksp)); |
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee89b33145d5..81a2b92ab0c2 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c | |||
@@ -567,9 +567,11 @@ appldata_cpu_notify(struct notifier_block *self, | |||
567 | { | 567 | { |
568 | switch (action) { | 568 | switch (action) { |
569 | case CPU_ONLINE: | 569 | case CPU_ONLINE: |
570 | case CPU_ONLINE_FROZEN: | ||
570 | appldata_online_cpu((long) hcpu); | 571 | appldata_online_cpu((long) hcpu); |
571 | break; | 572 | break; |
572 | case CPU_DEAD: | 573 | case CPU_DEAD: |
574 | case CPU_DEAD_FROZEN: | ||
573 | appldata_offline_cpu((long) hcpu); | 575 | appldata_offline_cpu((long) hcpu); |
574 | break; | 576 | break; |
575 | default: | 577 | default: |
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ec514fe5ccd0..1375f8a4469e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | int main(void) | 16 | int main(void) |
17 | { | 17 | { |
18 | DEFINE(__THREAD_info, offsetof(struct task_struct, thread_info),); | 18 | DEFINE(__THREAD_info, offsetof(struct task_struct, stack),); |
19 | DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); | 19 | DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); |
20 | DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); | 20 | DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); |
21 | DEFINE(__THREAD_mm_segment, | 21 | DEFINE(__THREAD_mm_segment, |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b7977027a28f..09f028a3266b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -789,10 +789,12 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, | |||
789 | 789 | ||
790 | switch (action) { | 790 | switch (action) { |
791 | case CPU_ONLINE: | 791 | case CPU_ONLINE: |
792 | case CPU_ONLINE_FROZEN: | ||
792 | if (sysdev_create_file(s, &attr_capability)) | 793 | if (sysdev_create_file(s, &attr_capability)) |
793 | return NOTIFY_BAD; | 794 | return NOTIFY_BAD; |
794 | break; | 795 | break; |
795 | case CPU_DEAD: | 796 | case CPU_DEAD: |
797 | case CPU_DEAD_FROZEN: | ||
796 | sysdev_remove_file(s, &attr_capability); | 798 | sysdev_remove_file(s, &attr_capability); |
797 | break; | 799 | break; |
798 | } | 800 | } |
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index 29d7cfd1c970..6773ed76e414 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c | |||
@@ -28,7 +28,7 @@ int foo(void) | |||
28 | DEFINE(AOFF_task_gid, offsetof(struct task_struct, gid)); | 28 | DEFINE(AOFF_task_gid, offsetof(struct task_struct, gid)); |
29 | DEFINE(AOFF_task_euid, offsetof(struct task_struct, euid)); | 29 | DEFINE(AOFF_task_euid, offsetof(struct task_struct, euid)); |
30 | DEFINE(AOFF_task_egid, offsetof(struct task_struct, egid)); | 30 | DEFINE(AOFF_task_egid, offsetof(struct task_struct, egid)); |
31 | /* DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); */ | 31 | /* DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); */ |
32 | DEFINE(ASIZ_task_uid, sizeof(current->uid)); | 32 | DEFINE(ASIZ_task_uid, sizeof(current->uid)); |
33 | DEFINE(ASIZ_task_gid, sizeof(current->gid)); | 33 | DEFINE(ASIZ_task_gid, sizeof(current->gid)); |
34 | DEFINE(ASIZ_task_euid, sizeof(current->euid)); | 34 | DEFINE(ASIZ_task_euid, sizeof(current->euid)); |
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index dc652f210290..d0fde36395b4 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/init.h> | 19 | #include <linux/init.h> |
20 | #include <linux/kdebug.h> | 20 | #include <linux/kdebug.h> |
21 | 21 | ||
22 | #include <asm/smp.h> | ||
22 | #include <asm/delay.h> | 23 | #include <asm/delay.h> |
23 | #include <asm/system.h> | 24 | #include <asm/system.h> |
24 | #include <asm/ptrace.h> | 25 | #include <asm/ptrace.h> |
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 354cc6b70530..b9c0f307a8fa 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig | |||
@@ -320,21 +320,7 @@ source "crypto/Kconfig" | |||
320 | 320 | ||
321 | source "lib/Kconfig" | 321 | source "lib/Kconfig" |
322 | 322 | ||
323 | menu "SCSI support" | 323 | source "drivers/scsi/Kconfig" |
324 | depends on BROKEN | ||
325 | |||
326 | config SCSI | ||
327 | tristate "SCSI support" | ||
328 | |||
329 | # This gives us free_dma, which scsi.c wants. | ||
330 | config GENERIC_ISA_DMA | ||
331 | bool | ||
332 | depends on SCSI | ||
333 | default y | ||
334 | |||
335 | source "arch/um/Kconfig.scsi" | ||
336 | |||
337 | endmenu | ||
338 | 324 | ||
339 | source "drivers/md/Kconfig" | 325 | source "drivers/md/Kconfig" |
340 | 326 | ||
diff --git a/arch/um/Kconfig.scsi b/arch/um/Kconfig.scsi deleted file mode 100644 index c291c942b1a8..000000000000 --- a/arch/um/Kconfig.scsi +++ /dev/null | |||
@@ -1,58 +0,0 @@ | |||
1 | comment "SCSI support type (disk, tape, CD-ROM)" | ||
2 | depends on SCSI | ||
3 | |||
4 | config BLK_DEV_SD | ||
5 | tristate "SCSI disk support" | ||
6 | depends on SCSI | ||
7 | |||
8 | config SD_EXTRA_DEVS | ||
9 | int "Maximum number of SCSI disks that can be loaded as modules" | ||
10 | depends on BLK_DEV_SD | ||
11 | default "40" | ||
12 | |||
13 | config CHR_DEV_ST | ||
14 | tristate "SCSI tape support" | ||
15 | depends on SCSI | ||
16 | |||
17 | config BLK_DEV_SR | ||
18 | tristate "SCSI CD-ROM support" | ||
19 | depends on SCSI | ||
20 | |||
21 | config BLK_DEV_SR_VENDOR | ||
22 | bool "Enable vendor-specific extensions (for SCSI CDROM)" | ||
23 | depends on BLK_DEV_SR | ||
24 | |||
25 | config SR_EXTRA_DEVS | ||
26 | int "Maximum number of CDROM devices that can be loaded as modules" | ||
27 | depends on BLK_DEV_SR | ||
28 | default "2" | ||
29 | |||
30 | config CHR_DEV_SG | ||
31 | tristate "SCSI generic support" | ||
32 | depends on SCSI | ||
33 | |||
34 | comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs" | ||
35 | depends on SCSI | ||
36 | |||
37 | #if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then | ||
38 | config SCSI_DEBUG_QUEUES | ||
39 | bool "Enable extra checks in new queueing code" | ||
40 | depends on SCSI | ||
41 | |||
42 | #fi | ||
43 | config SCSI_MULTI_LUN | ||
44 | bool "Probe all LUNs on each SCSI device" | ||
45 | depends on SCSI | ||
46 | |||
47 | config SCSI_CONSTANTS | ||
48 | bool "Verbose SCSI error reporting (kernel size +=12K)" | ||
49 | depends on SCSI | ||
50 | |||
51 | config SCSI_LOGGING | ||
52 | bool "SCSI logging facility" | ||
53 | depends on SCSI | ||
54 | |||
55 | config SCSI_DEBUG | ||
56 | tristate "SCSI debugging host simulator (EXPERIMENTAL)" | ||
57 | depends on SCSI | ||
58 | |||
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index ef36facd8fe9..a96ae1a0610e 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c | |||
@@ -178,20 +178,23 @@ int start_uml_skas(void) | |||
178 | 178 | ||
179 | int external_pid_skas(struct task_struct *task) | 179 | int external_pid_skas(struct task_struct *task) |
180 | { | 180 | { |
181 | #warning Need to look up userspace_pid by cpu | 181 | /* FIXME: Need to look up userspace_pid by cpu */ |
182 | return(userspace_pid[0]); | 182 | return(userspace_pid[0]); |
183 | } | 183 | } |
184 | 184 | ||
185 | int thread_pid_skas(struct task_struct *task) | 185 | int thread_pid_skas(struct task_struct *task) |
186 | { | 186 | { |
187 | #warning Need to look up userspace_pid by cpu | 187 | /* FIXME: Need to look up userspace_pid by cpu */ |
188 | return(userspace_pid[0]); | 188 | return(userspace_pid[0]); |
189 | } | 189 | } |
190 | 190 | ||
191 | void kill_off_processes_skas(void) | 191 | void kill_off_processes_skas(void) |
192 | { | 192 | { |
193 | if(proc_mm) | 193 | if(proc_mm) |
194 | #warning need to loop over userspace_pids in kill_off_processes_skas | 194 | /* |
195 | * FIXME: need to loop over userspace_pids in | ||
196 | * kill_off_processes_skas | ||
197 | */ | ||
195 | os_kill_ptraced_process(userspace_pid[0], 1); | 198 | os_kill_ptraced_process(userspace_pid[0], 1); |
196 | else { | 199 | else { |
197 | struct task_struct *p; | 200 | struct task_struct *p; |
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 92a7b59120d6..2d9d2ca39299 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c | |||
@@ -239,6 +239,7 @@ out: | |||
239 | return ok; | 239 | return ok; |
240 | } | 240 | } |
241 | 241 | ||
242 | #ifdef UML_CONFIG_MODE_TT | ||
242 | void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) | 243 | void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) |
243 | { | 244 | { |
244 | int flags = 0, pages; | 245 | int flags = 0, pages; |
@@ -260,6 +261,7 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) | |||
260 | "errno = %d\n", errno); | 261 | "errno = %d\n", errno); |
261 | } | 262 | } |
262 | } | 263 | } |
264 | #endif | ||
263 | 265 | ||
264 | void init_new_thread_signals(void) | 266 | void init_new_thread_signals(void) |
265 | { | 267 | { |
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 8e490fff3d47..5c8946320799 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c | |||
@@ -68,7 +68,7 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) | |||
68 | int err, pid = mm_idp->u.pid; | 68 | int err, pid = mm_idp->u.pid; |
69 | 69 | ||
70 | if(proc_mm) | 70 | if(proc_mm) |
71 | #warning Need to look up userspace_pid by cpu | 71 | /* FIXME: Need to look up userspace_pid by cpu */ |
72 | pid = userspace_pid[0]; | 72 | pid = userspace_pid[0]; |
73 | 73 | ||
74 | multi_count++; | 74 | multi_count++; |
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 5c088a55396c..6a0e466d01e3 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c | |||
@@ -586,7 +586,7 @@ void switch_mm_skas(struct mm_id *mm_idp) | |||
586 | { | 586 | { |
587 | int err; | 587 | int err; |
588 | 588 | ||
589 | #warning need cpu pid in switch_mm_skas | 589 | /* FIXME: need cpu pid in switch_mm_skas */ |
590 | if(proc_mm){ | 590 | if(proc_mm){ |
591 | err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, | 591 | err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, |
592 | mm_idp->u.mm_fd); | 592 | mm_idp->u.mm_fd); |
diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c index 24f291369070..cee5c3142d41 100644 --- a/arch/v850/kernel/asm-offsets.c +++ b/arch/v850/kernel/asm-offsets.c | |||
@@ -29,7 +29,7 @@ int main (void) | |||
29 | DEFINE (TASK_PTRACE, offsetof (struct task_struct, ptrace)); | 29 | DEFINE (TASK_PTRACE, offsetof (struct task_struct, ptrace)); |
30 | DEFINE (TASK_BLOCKED, offsetof (struct task_struct, blocked)); | 30 | DEFINE (TASK_BLOCKED, offsetof (struct task_struct, blocked)); |
31 | DEFINE (TASK_THREAD, offsetof (struct task_struct, thread)); | 31 | DEFINE (TASK_THREAD, offsetof (struct task_struct, thread)); |
32 | DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, thread_info)); | 32 | DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, stack)); |
33 | DEFINE (TASK_MM, offsetof (struct task_struct, mm)); | 33 | DEFINE (TASK_MM, offsetof (struct task_struct, mm)); |
34 | DEFINE (TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm)); | 34 | DEFINE (TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm)); |
35 | DEFINE (TASK_PID, offsetof (struct task_struct, pid)); | 35 | DEFINE (TASK_PID, offsetof (struct task_struct, pid)); |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 3bc30d2c13d3..3eaceac32481 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -32,7 +32,7 @@ atomic_t irq_err_count; | |||
32 | */ | 32 | */ |
33 | static inline void stack_overflow_check(struct pt_regs *regs) | 33 | static inline void stack_overflow_check(struct pt_regs *regs) |
34 | { | 34 | { |
35 | u64 curbase = (u64) current->thread_info; | 35 | u64 curbase = (u64)task_stack_page(current); |
36 | static unsigned long warned = -60*HZ; | 36 | static unsigned long warned = -60*HZ; |
37 | 37 | ||
38 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && | 38 | if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 442169640e45..a14375dd5425 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -720,9 +720,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
720 | 720 | ||
721 | switch (action) { | 721 | switch (action) { |
722 | case CPU_ONLINE: | 722 | case CPU_ONLINE: |
723 | case CPU_ONLINE_FROZEN: | ||
723 | mce_create_device(cpu); | 724 | mce_create_device(cpu); |
724 | break; | 725 | break; |
725 | case CPU_DEAD: | 726 | case CPU_DEAD: |
727 | case CPU_DEAD_FROZEN: | ||
726 | mce_remove_device(cpu); | 728 | mce_remove_device(cpu); |
727 | break; | 729 | break; |
728 | } | 730 | } |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d0bd5d66e103..03356e64f9c8 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -654,9 +654,11 @@ static int threshold_cpu_callback(struct notifier_block *nfb, | |||
654 | 654 | ||
655 | switch (action) { | 655 | switch (action) { |
656 | case CPU_ONLINE: | 656 | case CPU_ONLINE: |
657 | case CPU_ONLINE_FROZEN: | ||
657 | threshold_create_device(cpu); | 658 | threshold_create_device(cpu); |
658 | break; | 659 | break; |
659 | case CPU_DEAD: | 660 | case CPU_DEAD: |
661 | case CPU_DEAD_FROZEN: | ||
660 | threshold_remove_device(cpu); | 662 | threshold_remove_device(cpu); |
661 | break; | 663 | break; |
662 | default: | 664 | default: |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index dc32cef96195..51d4c6fa88c8 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -327,7 +327,7 @@ static int __cpuinit | |||
327 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | 327 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
328 | { | 328 | { |
329 | long cpu = (long)arg; | 329 | long cpu = (long)arg; |
330 | if (action == CPU_ONLINE) | 330 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
331 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | 331 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); |
332 | return NOTIFY_DONE; | 332 | return NOTIFY_DONE; |
333 | } | 333 | } |
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index b256cfbef344..698079b3a336 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c | |||
@@ -70,7 +70,7 @@ int main(void) | |||
70 | DEFINE(TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm)); | 70 | DEFINE(TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm)); |
71 | DEFINE(TASK_PID, offsetof (struct task_struct, pid)); | 71 | DEFINE(TASK_PID, offsetof (struct task_struct, pid)); |
72 | DEFINE(TASK_THREAD, offsetof (struct task_struct, thread)); | 72 | DEFINE(TASK_THREAD, offsetof (struct task_struct, thread)); |
73 | DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, thread_info)); | 73 | DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack)); |
74 | DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct)); | 74 | DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct)); |
75 | BLANK(); | 75 | BLANK(); |
76 | 76 | ||
diff --git a/block/as-iosched.c b/block/as-iosched.c index 640aa839d63f..109e91b91ffa 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -1306,7 +1306,7 @@ static void as_exit_queue(elevator_t *e) | |||
1306 | struct as_data *ad = e->elevator_data; | 1306 | struct as_data *ad = e->elevator_data; |
1307 | 1307 | ||
1308 | del_timer_sync(&ad->antic_timer); | 1308 | del_timer_sync(&ad->antic_timer); |
1309 | kblockd_flush(); | 1309 | kblockd_flush_work(&ad->antic_work); |
1310 | 1310 | ||
1311 | BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); | 1311 | BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); |
1312 | BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); | 1312 | BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); |
diff --git a/block/genhd.c b/block/genhd.c index b5664440896c..93a2cf654597 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -213,6 +213,59 @@ struct gendisk *get_gendisk(dev_t dev, int *part) | |||
213 | return kobj ? to_disk(kobj) : NULL; | 213 | return kobj ? to_disk(kobj) : NULL; |
214 | } | 214 | } |
215 | 215 | ||
216 | /* | ||
217 | * print a full list of all partitions - intended for places where the root | ||
218 | * filesystem can't be mounted and thus to give the victim some idea of what | ||
219 | * went wrong | ||
220 | */ | ||
221 | void __init printk_all_partitions(void) | ||
222 | { | ||
223 | int n; | ||
224 | struct gendisk *sgp; | ||
225 | |||
226 | mutex_lock(&block_subsys_lock); | ||
227 | /* For each block device... */ | ||
228 | list_for_each_entry(sgp, &block_subsys.list, kobj.entry) { | ||
229 | char buf[BDEVNAME_SIZE]; | ||
230 | /* | ||
231 | * Don't show empty devices or things that have been surpressed | ||
232 | */ | ||
233 | if (get_capacity(sgp) == 0 || | ||
234 | (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) | ||
235 | continue; | ||
236 | |||
237 | /* | ||
238 | * Note, unlike /proc/partitions, I am showing the numbers in | ||
239 | * hex - the same format as the root= option takes. | ||
240 | */ | ||
241 | printk("%02x%02x %10llu %s", | ||
242 | sgp->major, sgp->first_minor, | ||
243 | (unsigned long long)get_capacity(sgp) >> 1, | ||
244 | disk_name(sgp, 0, buf)); | ||
245 | if (sgp->driverfs_dev != NULL && | ||
246 | sgp->driverfs_dev->driver != NULL) | ||
247 | printk(" driver: %s\n", | ||
248 | sgp->driverfs_dev->driver->name); | ||
249 | else | ||
250 | printk(" (driver?)\n"); | ||
251 | |||
252 | /* now show the partitions */ | ||
253 | for (n = 0; n < sgp->minors - 1; ++n) { | ||
254 | if (sgp->part[n] == NULL) | ||
255 | continue; | ||
256 | if (sgp->part[n]->nr_sects == 0) | ||
257 | continue; | ||
258 | printk(" %02x%02x %10llu %s\n", | ||
259 | sgp->major, n + 1 + sgp->first_minor, | ||
260 | (unsigned long long)sgp->part[n]->nr_sects >> 1, | ||
261 | disk_name(sgp, n + 1, buf)); | ||
262 | } /* partition subloop */ | ||
263 | } /* Block device loop */ | ||
264 | |||
265 | mutex_unlock(&block_subsys_lock); | ||
266 | return; | ||
267 | } | ||
268 | |||
216 | #ifdef CONFIG_PROC_FS | 269 | #ifdef CONFIG_PROC_FS |
217 | /* iterator */ | 270 | /* iterator */ |
218 | static void *part_start(struct seq_file *part, loff_t *pos) | 271 | static void *part_start(struct seq_file *part, loff_t *pos) |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index f294f1538f1e..17e188973428 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -1712,7 +1712,6 @@ EXPORT_SYMBOL(blk_stop_queue); | |||
1712 | void blk_sync_queue(struct request_queue *q) | 1712 | void blk_sync_queue(struct request_queue *q) |
1713 | { | 1713 | { |
1714 | del_timer_sync(&q->unplug_timer); | 1714 | del_timer_sync(&q->unplug_timer); |
1715 | kblockd_flush(); | ||
1716 | } | 1715 | } |
1717 | EXPORT_SYMBOL(blk_sync_queue); | 1716 | EXPORT_SYMBOL(blk_sync_queue); |
1718 | 1717 | ||
@@ -3508,7 +3507,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | |||
3508 | * If a CPU goes away, splice its entries to the current CPU | 3507 | * If a CPU goes away, splice its entries to the current CPU |
3509 | * and trigger a run of the softirq | 3508 | * and trigger a run of the softirq |
3510 | */ | 3509 | */ |
3511 | if (action == CPU_DEAD) { | 3510 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
3512 | int cpu = (unsigned long) hcpu; | 3511 | int cpu = (unsigned long) hcpu; |
3513 | 3512 | ||
3514 | local_irq_disable(); | 3513 | local_irq_disable(); |
@@ -3632,11 +3631,11 @@ int kblockd_schedule_work(struct work_struct *work) | |||
3632 | 3631 | ||
3633 | EXPORT_SYMBOL(kblockd_schedule_work); | 3632 | EXPORT_SYMBOL(kblockd_schedule_work); |
3634 | 3633 | ||
3635 | void kblockd_flush(void) | 3634 | void kblockd_flush_work(struct work_struct *work) |
3636 | { | 3635 | { |
3637 | flush_workqueue(kblockd_workqueue); | 3636 | cancel_work_sync(work); |
3638 | } | 3637 | } |
3639 | EXPORT_SYMBOL(kblockd_flush); | 3638 | EXPORT_SYMBOL(kblockd_flush_work); |
3640 | 3639 | ||
3641 | int __init blk_dev_init(void) | 3640 | int __init blk_dev_init(void) |
3642 | { | 3641 | { |
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index f8c63410bcbf..52b23471dd69 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c | |||
@@ -29,7 +29,6 @@ static u32 acpi_suspend_states[] = { | |||
29 | [PM_SUSPEND_ON] = ACPI_STATE_S0, | 29 | [PM_SUSPEND_ON] = ACPI_STATE_S0, |
30 | [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, | 30 | [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, |
31 | [PM_SUSPEND_MEM] = ACPI_STATE_S3, | 31 | [PM_SUSPEND_MEM] = ACPI_STATE_S3, |
32 | [PM_SUSPEND_DISK] = ACPI_STATE_S4, | ||
33 | [PM_SUSPEND_MAX] = ACPI_STATE_S5 | 32 | [PM_SUSPEND_MAX] = ACPI_STATE_S5 |
34 | }; | 33 | }; |
35 | 34 | ||
@@ -94,14 +93,6 @@ static int acpi_pm_enter(suspend_state_t pm_state) | |||
94 | do_suspend_lowlevel(); | 93 | do_suspend_lowlevel(); |
95 | break; | 94 | break; |
96 | 95 | ||
97 | case PM_SUSPEND_DISK: | ||
98 | if (acpi_pm_ops.pm_disk_mode == PM_DISK_PLATFORM) | ||
99 | status = acpi_enter_sleep_state(acpi_state); | ||
100 | break; | ||
101 | case PM_SUSPEND_MAX: | ||
102 | acpi_power_off(); | ||
103 | break; | ||
104 | |||
105 | default: | 96 | default: |
106 | return -EINVAL; | 97 | return -EINVAL; |
107 | } | 98 | } |
@@ -157,12 +148,13 @@ int acpi_suspend(u32 acpi_state) | |||
157 | suspend_state_t states[] = { | 148 | suspend_state_t states[] = { |
158 | [1] = PM_SUSPEND_STANDBY, | 149 | [1] = PM_SUSPEND_STANDBY, |
159 | [3] = PM_SUSPEND_MEM, | 150 | [3] = PM_SUSPEND_MEM, |
160 | [4] = PM_SUSPEND_DISK, | ||
161 | [5] = PM_SUSPEND_MAX | 151 | [5] = PM_SUSPEND_MAX |
162 | }; | 152 | }; |
163 | 153 | ||
164 | if (acpi_state < 6 && states[acpi_state]) | 154 | if (acpi_state < 6 && states[acpi_state]) |
165 | return pm_suspend(states[acpi_state]); | 155 | return pm_suspend(states[acpi_state]); |
156 | if (acpi_state == 4) | ||
157 | return hibernate(); | ||
166 | return -EINVAL; | 158 | return -EINVAL; |
167 | } | 159 | } |
168 | 160 | ||
@@ -189,6 +181,49 @@ static struct pm_ops acpi_pm_ops = { | |||
189 | .finish = acpi_pm_finish, | 181 | .finish = acpi_pm_finish, |
190 | }; | 182 | }; |
191 | 183 | ||
184 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
185 | static int acpi_hibernation_prepare(void) | ||
186 | { | ||
187 | return acpi_sleep_prepare(ACPI_STATE_S4); | ||
188 | } | ||
189 | |||
190 | static int acpi_hibernation_enter(void) | ||
191 | { | ||
192 | acpi_status status = AE_OK; | ||
193 | unsigned long flags = 0; | ||
194 | |||
195 | ACPI_FLUSH_CPU_CACHE(); | ||
196 | |||
197 | local_irq_save(flags); | ||
198 | acpi_enable_wakeup_device(ACPI_STATE_S4); | ||
199 | /* This shouldn't return. If it returns, we have a problem */ | ||
200 | status = acpi_enter_sleep_state(ACPI_STATE_S4); | ||
201 | local_irq_restore(flags); | ||
202 | |||
203 | return ACPI_SUCCESS(status) ? 0 : -EFAULT; | ||
204 | } | ||
205 | |||
206 | static void acpi_hibernation_finish(void) | ||
207 | { | ||
208 | acpi_leave_sleep_state(ACPI_STATE_S4); | ||
209 | acpi_disable_wakeup_device(ACPI_STATE_S4); | ||
210 | |||
211 | /* reset firmware waking vector */ | ||
212 | acpi_set_firmware_waking_vector((acpi_physical_address) 0); | ||
213 | |||
214 | if (init_8259A_after_S1) { | ||
215 | printk("Broken toshiba laptop -> kicking interrupts\n"); | ||
216 | init_8259A(0); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | static struct hibernation_ops acpi_hibernation_ops = { | ||
221 | .prepare = acpi_hibernation_prepare, | ||
222 | .enter = acpi_hibernation_enter, | ||
223 | .finish = acpi_hibernation_finish, | ||
224 | }; | ||
225 | #endif /* CONFIG_SOFTWARE_SUSPEND */ | ||
226 | |||
192 | /* | 227 | /* |
193 | * Toshiba fails to preserve interrupts over S1, reinitialization | 228 | * Toshiba fails to preserve interrupts over S1, reinitialization |
194 | * of 8259 is needed after S1 resume. | 229 | * of 8259 is needed after S1 resume. |
@@ -227,14 +262,18 @@ int __init acpi_sleep_init(void) | |||
227 | sleep_states[i] = 1; | 262 | sleep_states[i] = 1; |
228 | printk(" S%d", i); | 263 | printk(" S%d", i); |
229 | } | 264 | } |
230 | if (i == ACPI_STATE_S4) { | ||
231 | if (sleep_states[i]) | ||
232 | acpi_pm_ops.pm_disk_mode = PM_DISK_PLATFORM; | ||
233 | } | ||
234 | } | 265 | } |
235 | printk(")\n"); | 266 | printk(")\n"); |
236 | 267 | ||
237 | pm_set_ops(&acpi_pm_ops); | 268 | pm_set_ops(&acpi_pm_ops); |
269 | |||
270 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
271 | if (sleep_states[ACPI_STATE_S4]) | ||
272 | hibernation_set_ops(&acpi_hibernation_ops); | ||
273 | #else | ||
274 | sleep_states[ACPI_STATE_S4] = 0; | ||
275 | #endif | ||
276 | |||
238 | return 0; | 277 | return 0; |
239 | } | 278 | } |
240 | 279 | ||
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 5a76e5be61d5..76b45f0b8341 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c | |||
@@ -60,7 +60,7 @@ acpi_system_write_sleep(struct file *file, | |||
60 | state = simple_strtoul(str, NULL, 0); | 60 | state = simple_strtoul(str, NULL, 0); |
61 | #ifdef CONFIG_SOFTWARE_SUSPEND | 61 | #ifdef CONFIG_SOFTWARE_SUSPEND |
62 | if (state == 4) { | 62 | if (state == 4) { |
63 | error = pm_suspend(PM_SUSPEND_DISK); | 63 | error = hibernate(); |
64 | goto Done; | 64 | goto Done; |
65 | } | 65 | } |
66 | #endif | 66 | #endif |
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index a7950885d18e..fef87dd70d17 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c | |||
@@ -1316,7 +1316,7 @@ void ata_port_flush_task(struct ata_port *ap) | |||
1316 | spin_unlock_irqrestore(ap->lock, flags); | 1316 | spin_unlock_irqrestore(ap->lock, flags); |
1317 | 1317 | ||
1318 | DPRINTK("flush #1\n"); | 1318 | DPRINTK("flush #1\n"); |
1319 | flush_workqueue(ata_wq); | 1319 | cancel_work_sync(&ap->port_task.work); /* akpm: seems unneeded */ |
1320 | 1320 | ||
1321 | /* | 1321 | /* |
1322 | * At this point, if a task is running, it's guaranteed to see | 1322 | * At this point, if a task is running, it's guaranteed to see |
@@ -1327,7 +1327,7 @@ void ata_port_flush_task(struct ata_port *ap) | |||
1327 | if (ata_msg_ctl(ap)) | 1327 | if (ata_msg_ctl(ap)) |
1328 | ata_port_printk(ap, KERN_DEBUG, "%s: flush #2\n", | 1328 | ata_port_printk(ap, KERN_DEBUG, "%s: flush #2\n", |
1329 | __FUNCTION__); | 1329 | __FUNCTION__); |
1330 | flush_workqueue(ata_wq); | 1330 | cancel_work_sync(&ap->port_task.work); |
1331 | } | 1331 | } |
1332 | 1332 | ||
1333 | spin_lock_irqsave(ap->lock, flags); | 1333 | spin_lock_irqsave(ap->lock, flags); |
@@ -6475,9 +6475,9 @@ void ata_port_detach(struct ata_port *ap) | |||
6475 | /* Flush hotplug task. The sequence is similar to | 6475 | /* Flush hotplug task. The sequence is similar to |
6476 | * ata_port_flush_task(). | 6476 | * ata_port_flush_task(). |
6477 | */ | 6477 | */ |
6478 | flush_workqueue(ata_aux_wq); | 6478 | cancel_work_sync(&ap->hotplug_task.work); /* akpm: why? */ |
6479 | cancel_delayed_work(&ap->hotplug_task); | 6479 | cancel_delayed_work(&ap->hotplug_task); |
6480 | flush_workqueue(ata_aux_wq); | 6480 | cancel_work_sync(&ap->hotplug_task.work); |
6481 | 6481 | ||
6482 | skip_eh: | 6482 | skip_eh: |
6483 | /* remove the associated SCSI host */ | 6483 | /* remove the associated SCSI host */ |
diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 067a9e8bc377..8d8cdfec6529 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c | |||
@@ -126,10 +126,13 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, | |||
126 | 126 | ||
127 | switch (action) { | 127 | switch (action) { |
128 | case CPU_UP_PREPARE: | 128 | case CPU_UP_PREPARE: |
129 | case CPU_UP_PREPARE_FROZEN: | ||
129 | rc = topology_add_dev(cpu); | 130 | rc = topology_add_dev(cpu); |
130 | break; | 131 | break; |
131 | case CPU_UP_CANCELED: | 132 | case CPU_UP_CANCELED: |
133 | case CPU_UP_CANCELED_FROZEN: | ||
132 | case CPU_DEAD: | 134 | case CPU_DEAD: |
135 | case CPU_DEAD_FROZEN: | ||
133 | topology_remove_dev(cpu); | 136 | topology_remove_dev(cpu); |
134 | break; | 137 | break; |
135 | } | 138 | } |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index af6d7274a7cc..18cdd8c77626 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -243,17 +243,13 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
243 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | 243 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, |
244 | bvec->bv_page, bv_offs, size, IV); | 244 | bvec->bv_page, bv_offs, size, IV); |
245 | if (unlikely(transfer_result)) { | 245 | if (unlikely(transfer_result)) { |
246 | char *kaddr; | ||
247 | |||
248 | /* | 246 | /* |
249 | * The transfer failed, but we still write the data to | 247 | * The transfer failed, but we still write the data to |
250 | * keep prepare/commit calls balanced. | 248 | * keep prepare/commit calls balanced. |
251 | */ | 249 | */ |
252 | printk(KERN_ERR "loop: transfer error block %llu\n", | 250 | printk(KERN_ERR "loop: transfer error block %llu\n", |
253 | (unsigned long long)index); | 251 | (unsigned long long)index); |
254 | kaddr = kmap_atomic(page, KM_USER0); | 252 | zero_user_page(page, offset, size, KM_USER0); |
255 | memset(kaddr + offset, 0, size); | ||
256 | kunmap_atomic(kaddr, KM_USER0); | ||
257 | } | 253 | } |
258 | flush_dcache_page(page); | 254 | flush_dcache_page(page); |
259 | ret = aops->commit_write(file, page, offset, | 255 | ret = aops->commit_write(file, page, offset, |
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 090796bef78f..069ae39a9cd9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -366,20 +366,25 @@ static struct disk_attribute pid_attr = { | |||
366 | .show = pid_show, | 366 | .show = pid_show, |
367 | }; | 367 | }; |
368 | 368 | ||
369 | static void nbd_do_it(struct nbd_device *lo) | 369 | static int nbd_do_it(struct nbd_device *lo) |
370 | { | 370 | { |
371 | struct request *req; | 371 | struct request *req; |
372 | int ret; | ||
372 | 373 | ||
373 | BUG_ON(lo->magic != LO_MAGIC); | 374 | BUG_ON(lo->magic != LO_MAGIC); |
374 | 375 | ||
375 | lo->pid = current->pid; | 376 | lo->pid = current->pid; |
376 | sysfs_create_file(&lo->disk->kobj, &pid_attr.attr); | 377 | ret = sysfs_create_file(&lo->disk->kobj, &pid_attr.attr); |
378 | if (ret) { | ||
379 | printk(KERN_ERR "nbd: sysfs_create_file failed!"); | ||
380 | return ret; | ||
381 | } | ||
377 | 382 | ||
378 | while ((req = nbd_read_stat(lo)) != NULL) | 383 | while ((req = nbd_read_stat(lo)) != NULL) |
379 | nbd_end_request(req); | 384 | nbd_end_request(req); |
380 | 385 | ||
381 | sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr); | 386 | sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr); |
382 | return; | 387 | return 0; |
383 | } | 388 | } |
384 | 389 | ||
385 | static void nbd_clear_que(struct nbd_device *lo) | 390 | static void nbd_clear_que(struct nbd_device *lo) |
@@ -569,7 +574,9 @@ static int nbd_ioctl(struct inode *inode, struct file *file, | |||
569 | case NBD_DO_IT: | 574 | case NBD_DO_IT: |
570 | if (!lo->file) | 575 | if (!lo->file) |
571 | return -EINVAL; | 576 | return -EINVAL; |
572 | nbd_do_it(lo); | 577 | error = nbd_do_it(lo); |
578 | if (error) | ||
579 | return error; | ||
573 | /* on return tidy up in case we have a signal */ | 580 | /* on return tidy up in case we have a signal */ |
574 | /* Forcibly shutdown the socket causing all listeners | 581 | /* Forcibly shutdown the socket causing all listeners |
575 | * to error | 582 | * to error |
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 5f3acd8e64b8..7cda04b33534 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig | |||
@@ -91,3 +91,17 @@ config HW_RANDOM_OMAP | |||
91 | module will be called omap-rng. | 91 | module will be called omap-rng. |
92 | 92 | ||
93 | If unsure, say Y. | 93 | If unsure, say Y. |
94 | |||
95 | config HW_RANDOM_PASEMI | ||
96 | tristate "PA Semi HW Random Number Generator support" | ||
97 | depends on HW_RANDOM && PPC_PASEMI | ||
98 | default HW_RANDOM | ||
99 | ---help--- | ||
100 | This driver provides kernel-side support for the Random Number | ||
101 | Generator hardware found on PA6T-1682M processor. | ||
102 | |||
103 | To compile this driver as a module, choose M here: the | ||
104 | module will be called pasemi-rng. | ||
105 | |||
106 | If unsure, say Y. | ||
107 | |||
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index c41fa19454e3..c8b7300e2fb1 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile | |||
@@ -10,3 +10,4 @@ obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o | |||
10 | obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o | 10 | obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o |
11 | obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o | 11 | obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o |
12 | obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o | 12 | obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o |
13 | obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o | ||
diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c new file mode 100644 index 000000000000..fa6040b6c8f2 --- /dev/null +++ b/drivers/char/hw_random/pasemi-rng.c | |||
@@ -0,0 +1,156 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2006-2007 PA Semi, Inc | ||
3 | * | ||
4 | * Maintained by: Olof Johansson <olof@lixom.net> | ||
5 | * | ||
6 | * Driver for the PWRficient onchip rng | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/platform_device.h> | ||
25 | #include <linux/hw_random.h> | ||
26 | #include <asm/of_platform.h> | ||
27 | #include <asm/io.h> | ||
28 | |||
29 | #define SDCRNG_CTL_REG 0x00 | ||
30 | #define SDCRNG_CTL_FVLD_M 0x0000f000 | ||
31 | #define SDCRNG_CTL_FVLD_S 12 | ||
32 | #define SDCRNG_CTL_KSZ 0x00000800 | ||
33 | #define SDCRNG_CTL_RSRC_CRG 0x00000010 | ||
34 | #define SDCRNG_CTL_RSRC_RRG 0x00000000 | ||
35 | #define SDCRNG_CTL_CE 0x00000004 | ||
36 | #define SDCRNG_CTL_RE 0x00000002 | ||
37 | #define SDCRNG_CTL_DR 0x00000001 | ||
38 | #define SDCRNG_CTL_SELECT_RRG_RNG (SDCRNG_CTL_RE | SDCRNG_CTL_RSRC_RRG) | ||
39 | #define SDCRNG_CTL_SELECT_CRG_RNG (SDCRNG_CTL_CE | SDCRNG_CTL_RSRC_CRG) | ||
40 | #define SDCRNG_VAL_REG 0x20 | ||
41 | |||
42 | #define MODULE_NAME "pasemi_rng" | ||
43 | |||
44 | static int pasemi_rng_data_present(struct hwrng *rng) | ||
45 | { | ||
46 | void __iomem *rng_regs = (void __iomem *)rng->priv; | ||
47 | |||
48 | return (in_le32(rng_regs + SDCRNG_CTL_REG) | ||
49 | & SDCRNG_CTL_FVLD_M) ? 1 : 0; | ||
50 | } | ||
51 | |||
52 | static int pasemi_rng_data_read(struct hwrng *rng, u32 *data) | ||
53 | { | ||
54 | void __iomem *rng_regs = (void __iomem *)rng->priv; | ||
55 | *data = in_le32(rng_regs + SDCRNG_VAL_REG); | ||
56 | return 4; | ||
57 | } | ||
58 | |||
59 | static int pasemi_rng_init(struct hwrng *rng) | ||
60 | { | ||
61 | void __iomem *rng_regs = (void __iomem *)rng->priv; | ||
62 | u32 ctl; | ||
63 | |||
64 | ctl = SDCRNG_CTL_DR | SDCRNG_CTL_SELECT_RRG_RNG | SDCRNG_CTL_KSZ; | ||
65 | out_le32(rng_regs + SDCRNG_CTL_REG, ctl); | ||
66 | out_le32(rng_regs + SDCRNG_CTL_REG, ctl & ~SDCRNG_CTL_DR); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static void pasemi_rng_cleanup(struct hwrng *rng) | ||
72 | { | ||
73 | void __iomem *rng_regs = (void __iomem *)rng->priv; | ||
74 | u32 ctl; | ||
75 | |||
76 | ctl = SDCRNG_CTL_RE | SDCRNG_CTL_CE; | ||
77 | out_le32(rng_regs + SDCRNG_CTL_REG, | ||
78 | in_le32(rng_regs + SDCRNG_CTL_REG) & ~ctl); | ||
79 | } | ||
80 | |||
81 | static struct hwrng pasemi_rng = { | ||
82 | .name = MODULE_NAME, | ||
83 | .init = pasemi_rng_init, | ||
84 | .cleanup = pasemi_rng_cleanup, | ||
85 | .data_present = pasemi_rng_data_present, | ||
86 | .data_read = pasemi_rng_data_read, | ||
87 | }; | ||
88 | |||
89 | static int __devinit rng_probe(struct of_device *ofdev, | ||
90 | const struct of_device_id *match) | ||
91 | { | ||
92 | void __iomem *rng_regs; | ||
93 | struct device_node *rng_np = ofdev->node; | ||
94 | struct resource res; | ||
95 | int err = 0; | ||
96 | |||
97 | err = of_address_to_resource(rng_np, 0, &res); | ||
98 | if (err) | ||
99 | return -ENODEV; | ||
100 | |||
101 | rng_regs = ioremap(res.start, 0x100); | ||
102 | |||
103 | if (!rng_regs) | ||
104 | return -ENOMEM; | ||
105 | |||
106 | pasemi_rng.priv = (unsigned long)rng_regs; | ||
107 | |||
108 | printk(KERN_INFO "Registering PA Semi RNG\n"); | ||
109 | |||
110 | err = hwrng_register(&pasemi_rng); | ||
111 | |||
112 | if (err) | ||
113 | iounmap(rng_regs); | ||
114 | |||
115 | return err; | ||
116 | } | ||
117 | |||
118 | static int __devexit rng_remove(struct of_device *dev) | ||
119 | { | ||
120 | void __iomem *rng_regs = (void __iomem *)pasemi_rng.priv; | ||
121 | |||
122 | hwrng_unregister(&pasemi_rng); | ||
123 | iounmap(rng_regs); | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static struct of_device_id rng_match[] = { | ||
129 | { | ||
130 | .compatible = "1682m-rng", | ||
131 | }, | ||
132 | {}, | ||
133 | }; | ||
134 | |||
135 | static struct of_platform_driver rng_driver = { | ||
136 | .name = "pasemi-rng", | ||
137 | .match_table = rng_match, | ||
138 | .probe = rng_probe, | ||
139 | .remove = rng_remove, | ||
140 | }; | ||
141 | |||
142 | static int __init rng_init(void) | ||
143 | { | ||
144 | return of_register_platform_driver(&rng_driver); | ||
145 | } | ||
146 | module_init(rng_init); | ||
147 | |||
148 | static void __exit rng_exit(void) | ||
149 | { | ||
150 | of_unregister_platform_driver(&rng_driver); | ||
151 | } | ||
152 | module_exit(rng_exit); | ||
153 | |||
154 | MODULE_LICENSE("GPL"); | ||
155 | MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>"); | ||
156 | MODULE_DESCRIPTION("H/W RNG driver for PA Semi processor"); | ||
diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig index 27c1179ee527..f25facd97bb4 100644 --- a/drivers/char/pcmcia/Kconfig +++ b/drivers/char/pcmcia/Kconfig | |||
@@ -21,6 +21,7 @@ config SYNCLINK_CS | |||
21 | config CARDMAN_4000 | 21 | config CARDMAN_4000 |
22 | tristate "Omnikey Cardman 4000 support" | 22 | tristate "Omnikey Cardman 4000 support" |
23 | depends on PCMCIA | 23 | depends on PCMCIA |
24 | select BITREVERSE | ||
24 | help | 25 | help |
25 | Enable support for the Omnikey Cardman 4000 PCMCIA Smartcard | 26 | Enable support for the Omnikey Cardman 4000 PCMCIA Smartcard |
26 | reader. | 27 | reader. |
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 4ea587983aef..fee58e03dbe2 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
33 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
34 | #include <linux/bitrev.h> | ||
34 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
36 | 37 | ||
@@ -194,41 +195,17 @@ static inline unsigned char xinb(unsigned short port) | |||
194 | } | 195 | } |
195 | #endif | 196 | #endif |
196 | 197 | ||
197 | #define b_0000 15 | 198 | static inline unsigned char invert_revert(unsigned char ch) |
198 | #define b_0001 14 | 199 | { |
199 | #define b_0010 13 | 200 | return bitrev8(~ch); |
200 | #define b_0011 12 | 201 | } |
201 | #define b_0100 11 | ||
202 | #define b_0101 10 | ||
203 | #define b_0110 9 | ||
204 | #define b_0111 8 | ||
205 | #define b_1000 7 | ||
206 | #define b_1001 6 | ||
207 | #define b_1010 5 | ||
208 | #define b_1011 4 | ||
209 | #define b_1100 3 | ||
210 | #define b_1101 2 | ||
211 | #define b_1110 1 | ||
212 | #define b_1111 0 | ||
213 | |||
214 | static unsigned char irtab[16] = { | ||
215 | b_0000, b_1000, b_0100, b_1100, | ||
216 | b_0010, b_1010, b_0110, b_1110, | ||
217 | b_0001, b_1001, b_0101, b_1101, | ||
218 | b_0011, b_1011, b_0111, b_1111 | ||
219 | }; | ||
220 | 202 | ||
221 | static void str_invert_revert(unsigned char *b, int len) | 203 | static void str_invert_revert(unsigned char *b, int len) |
222 | { | 204 | { |
223 | int i; | 205 | int i; |
224 | 206 | ||
225 | for (i = 0; i < len; i++) | 207 | for (i = 0; i < len; i++) |
226 | b[i] = (irtab[b[i] & 0x0f] << 4) | irtab[b[i] >> 4]; | 208 | b[i] = invert_revert(b[i]); |
227 | } | ||
228 | |||
229 | static unsigned char invert_revert(unsigned char ch) | ||
230 | { | ||
231 | return (irtab[ch & 0x0f] << 4) | irtab[ch >> 4]; | ||
232 | } | 209 | } |
233 | 210 | ||
234 | #define ATRLENCK(dev,pos) \ | 211 | #define ATRLENCK(dev,pos) \ |
@@ -1881,8 +1858,11 @@ static int cm4000_probe(struct pcmcia_device *link) | |||
1881 | init_waitqueue_head(&dev->readq); | 1858 | init_waitqueue_head(&dev->readq); |
1882 | 1859 | ||
1883 | ret = cm4000_config(link, i); | 1860 | ret = cm4000_config(link, i); |
1884 | if (ret) | 1861 | if (ret) { |
1862 | dev_table[i] = NULL; | ||
1863 | kfree(dev); | ||
1885 | return ret; | 1864 | return ret; |
1865 | } | ||
1886 | 1866 | ||
1887 | class_device_create(cmm_class, NULL, MKDEV(major, i), NULL, | 1867 | class_device_create(cmm_class, NULL, MKDEV(major, i), NULL, |
1888 | "cmm%d", i); | 1868 | "cmm%d", i); |
@@ -1907,7 +1887,7 @@ static void cm4000_detach(struct pcmcia_device *link) | |||
1907 | cm4000_release(link); | 1887 | cm4000_release(link); |
1908 | 1888 | ||
1909 | dev_table[devno] = NULL; | 1889 | dev_table[devno] = NULL; |
1910 | kfree(dev); | 1890 | kfree(dev); |
1911 | 1891 | ||
1912 | class_device_destroy(cmm_class, MKDEV(major, devno)); | 1892 | class_device_destroy(cmm_class, MKDEV(major, devno)); |
1913 | 1893 | ||
@@ -1956,12 +1936,14 @@ static int __init cmm_init(void) | |||
1956 | if (major < 0) { | 1936 | if (major < 0) { |
1957 | printk(KERN_WARNING MODULE_NAME | 1937 | printk(KERN_WARNING MODULE_NAME |
1958 | ": could not get major number\n"); | 1938 | ": could not get major number\n"); |
1939 | class_destroy(cmm_class); | ||
1959 | return major; | 1940 | return major; |
1960 | } | 1941 | } |
1961 | 1942 | ||
1962 | rc = pcmcia_register_driver(&cm4000_driver); | 1943 | rc = pcmcia_register_driver(&cm4000_driver); |
1963 | if (rc < 0) { | 1944 | if (rc < 0) { |
1964 | unregister_chrdev(major, DEVICE_NAME); | 1945 | unregister_chrdev(major, DEVICE_NAME); |
1946 | class_destroy(cmm_class); | ||
1965 | return rc; | 1947 | return rc; |
1966 | } | 1948 | } |
1967 | 1949 | ||
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index f2e4ec4fd407..af88181a17f4 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c | |||
@@ -636,8 +636,11 @@ static int reader_probe(struct pcmcia_device *link) | |||
636 | setup_timer(&dev->poll_timer, cm4040_do_poll, 0); | 636 | setup_timer(&dev->poll_timer, cm4040_do_poll, 0); |
637 | 637 | ||
638 | ret = reader_config(link, i); | 638 | ret = reader_config(link, i); |
639 | if (ret) | 639 | if (ret) { |
640 | dev_table[i] = NULL; | ||
641 | kfree(dev); | ||
640 | return ret; | 642 | return ret; |
643 | } | ||
641 | 644 | ||
642 | class_device_create(cmx_class, NULL, MKDEV(major, i), NULL, | 645 | class_device_create(cmx_class, NULL, MKDEV(major, i), NULL, |
643 | "cmx%d", i); | 646 | "cmx%d", i); |
@@ -708,12 +711,14 @@ static int __init cm4040_init(void) | |||
708 | if (major < 0) { | 711 | if (major < 0) { |
709 | printk(KERN_WARNING MODULE_NAME | 712 | printk(KERN_WARNING MODULE_NAME |
710 | ": could not get major number\n"); | 713 | ": could not get major number\n"); |
714 | class_destroy(cmx_class); | ||
711 | return major; | 715 | return major; |
712 | } | 716 | } |
713 | 717 | ||
714 | rc = pcmcia_register_driver(&reader_driver); | 718 | rc = pcmcia_register_driver(&reader_driver); |
715 | if (rc < 0) { | 719 | if (rc < 0) { |
716 | unregister_chrdev(major, DEVICE_NAME); | 720 | unregister_chrdev(major, DEVICE_NAME); |
721 | class_destroy(cmx_class); | ||
717 | return rc; | 722 | return rc; |
718 | } | 723 | } |
719 | 724 | ||
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index f6ac1d316ea4..fc662e4ce58a 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c | |||
@@ -934,13 +934,6 @@ restart: | |||
934 | return -EINVAL; | 934 | return -EINVAL; |
935 | 935 | ||
936 | /* | 936 | /* |
937 | * No more input please, we are switching. The new ldisc | ||
938 | * will update this value in the ldisc open function | ||
939 | */ | ||
940 | |||
941 | tty->receive_room = 0; | ||
942 | |||
943 | /* | ||
944 | * Problem: What do we do if this blocks ? | 937 | * Problem: What do we do if this blocks ? |
945 | */ | 938 | */ |
946 | 939 | ||
@@ -951,6 +944,13 @@ restart: | |||
951 | return 0; | 944 | return 0; |
952 | } | 945 | } |
953 | 946 | ||
947 | /* | ||
948 | * No more input please, we are switching. The new ldisc | ||
949 | * will update this value in the ldisc open function | ||
950 | */ | ||
951 | |||
952 | tty->receive_room = 0; | ||
953 | |||
954 | o_ldisc = tty->ldisc; | 954 | o_ldisc = tty->ldisc; |
955 | o_tty = tty->link; | 955 | o_tty = tty->link; |
956 | 956 | ||
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 893dbaf386fb..eb37fba9b7ef 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
@@ -1685,9 +1685,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, | |||
1685 | if (sys_dev) { | 1685 | if (sys_dev) { |
1686 | switch (action) { | 1686 | switch (action) { |
1687 | case CPU_ONLINE: | 1687 | case CPU_ONLINE: |
1688 | case CPU_ONLINE_FROZEN: | ||
1688 | cpufreq_add_dev(sys_dev); | 1689 | cpufreq_add_dev(sys_dev); |
1689 | break; | 1690 | break; |
1690 | case CPU_DOWN_PREPARE: | 1691 | case CPU_DOWN_PREPARE: |
1692 | case CPU_DOWN_PREPARE_FROZEN: | ||
1691 | if (unlikely(lock_policy_rwsem_write(cpu))) | 1693 | if (unlikely(lock_policy_rwsem_write(cpu))) |
1692 | BUG(); | 1694 | BUG(); |
1693 | 1695 | ||
@@ -1699,6 +1701,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, | |||
1699 | __cpufreq_remove_dev(sys_dev); | 1701 | __cpufreq_remove_dev(sys_dev); |
1700 | break; | 1702 | break; |
1701 | case CPU_DOWN_FAILED: | 1703 | case CPU_DOWN_FAILED: |
1704 | case CPU_DOWN_FAILED_FROZEN: | ||
1702 | cpufreq_add_dev(sys_dev); | 1705 | cpufreq_add_dev(sys_dev); |
1703 | break; | 1706 | break; |
1704 | } | 1707 | } |
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index d1c7cac9316c..d2f0cbd8b8f3 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c | |||
@@ -313,9 +313,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb, | |||
313 | 313 | ||
314 | switch (action) { | 314 | switch (action) { |
315 | case CPU_ONLINE: | 315 | case CPU_ONLINE: |
316 | case CPU_ONLINE_FROZEN: | ||
316 | cpufreq_update_policy(cpu); | 317 | cpufreq_update_policy(cpu); |
317 | break; | 318 | break; |
318 | case CPU_DEAD: | 319 | case CPU_DEAD: |
320 | case CPU_DEAD_FROZEN: | ||
319 | cpufreq_stats_free_table(cpu); | 321 | cpufreq_stats_free_table(cpu); |
320 | break; | 322 | break; |
321 | } | 323 | } |
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 03b1f650d1c4..75e3911810a3 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c | |||
@@ -309,9 +309,11 @@ static int coretemp_cpu_callback(struct notifier_block *nfb, | |||
309 | 309 | ||
310 | switch (action) { | 310 | switch (action) { |
311 | case CPU_ONLINE: | 311 | case CPU_ONLINE: |
312 | case CPU_ONLINE_FROZEN: | ||
312 | coretemp_device_add(cpu); | 313 | coretemp_device_add(cpu); |
313 | break; | 314 | break; |
314 | case CPU_DEAD: | 315 | case CPU_DEAD: |
316 | case CPU_DEAD_FROZEN: | ||
315 | coretemp_device_remove(cpu); | 317 | coretemp_device_remove(cpu); |
316 | break; | 318 | break; |
317 | } | 319 | } |
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 7ed92dc3d833..3c3f2ebf3fc9 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c | |||
@@ -354,7 +354,7 @@ static void tps65010_interrupt(struct tps65010 *tps) | |||
354 | * also needs to get error handling and probably | 354 | * also needs to get error handling and probably |
355 | * an #ifdef CONFIG_SOFTWARE_SUSPEND | 355 | * an #ifdef CONFIG_SOFTWARE_SUSPEND |
356 | */ | 356 | */ |
357 | pm_suspend(PM_SUSPEND_DISK); | 357 | hibernate(); |
358 | #endif | 358 | #endif |
359 | poll = 1; | 359 | poll = 1; |
360 | } | 360 | } |
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index f284be1c9166..82dda2faf4d0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c | |||
@@ -745,6 +745,7 @@ static int comp_pool_callback(struct notifier_block *nfb, | |||
745 | 745 | ||
746 | switch (action) { | 746 | switch (action) { |
747 | case CPU_UP_PREPARE: | 747 | case CPU_UP_PREPARE: |
748 | case CPU_UP_PREPARE_FROZEN: | ||
748 | ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); | 749 | ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); |
749 | if(!create_comp_task(pool, cpu)) { | 750 | if(!create_comp_task(pool, cpu)) { |
750 | ehca_gen_err("Can't create comp_task for cpu: %x", cpu); | 751 | ehca_gen_err("Can't create comp_task for cpu: %x", cpu); |
@@ -752,24 +753,29 @@ static int comp_pool_callback(struct notifier_block *nfb, | |||
752 | } | 753 | } |
753 | break; | 754 | break; |
754 | case CPU_UP_CANCELED: | 755 | case CPU_UP_CANCELED: |
756 | case CPU_UP_CANCELED_FROZEN: | ||
755 | ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); | 757 | ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); |
756 | cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); | 758 | cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); |
757 | kthread_bind(cct->task, any_online_cpu(cpu_online_map)); | 759 | kthread_bind(cct->task, any_online_cpu(cpu_online_map)); |
758 | destroy_comp_task(pool, cpu); | 760 | destroy_comp_task(pool, cpu); |
759 | break; | 761 | break; |
760 | case CPU_ONLINE: | 762 | case CPU_ONLINE: |
763 | case CPU_ONLINE_FROZEN: | ||
761 | ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); | 764 | ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); |
762 | cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); | 765 | cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); |
763 | kthread_bind(cct->task, cpu); | 766 | kthread_bind(cct->task, cpu); |
764 | wake_up_process(cct->task); | 767 | wake_up_process(cct->task); |
765 | break; | 768 | break; |
766 | case CPU_DOWN_PREPARE: | 769 | case CPU_DOWN_PREPARE: |
770 | case CPU_DOWN_PREPARE_FROZEN: | ||
767 | ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); | 771 | ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); |
768 | break; | 772 | break; |
769 | case CPU_DOWN_FAILED: | 773 | case CPU_DOWN_FAILED: |
774 | case CPU_DOWN_FAILED_FROZEN: | ||
770 | ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); | 775 | ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); |
771 | break; | 776 | break; |
772 | case CPU_DEAD: | 777 | case CPU_DEAD: |
778 | case CPU_DEAD_FROZEN: | ||
773 | ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); | 779 | ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); |
774 | destroy_comp_task(pool, cpu); | 780 | destroy_comp_task(pool, cpu); |
775 | take_over_work(pool, cpu); | 781 | take_over_work(pool, cpu); |
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c8b8cfa332bb..0d892600ff00 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -2889,7 +2889,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
2889 | 2889 | ||
2890 | switch (val) { | 2890 | switch (val) { |
2891 | case CPU_DOWN_PREPARE: | 2891 | case CPU_DOWN_PREPARE: |
2892 | case CPU_DOWN_PREPARE_FROZEN: | ||
2892 | case CPU_UP_CANCELED: | 2893 | case CPU_UP_CANCELED: |
2894 | case CPU_UP_CANCELED_FROZEN: | ||
2893 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | 2895 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", |
2894 | cpu); | 2896 | cpu); |
2895 | decache_vcpus_on_cpu(cpu); | 2897 | decache_vcpus_on_cpu(cpu); |
@@ -2897,6 +2899,7 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
2897 | NULL, 0, 1); | 2899 | NULL, 0, 1); |
2898 | break; | 2900 | break; |
2899 | case CPU_ONLINE: | 2901 | case CPU_ONLINE: |
2902 | case CPU_ONLINE_FROZEN: | ||
2900 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | 2903 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", |
2901 | cpu); | 2904 | cpu); |
2902 | smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, | 2905 | smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, |
diff --git a/drivers/mca/mca-bus.c b/drivers/mca/mca-bus.c index da862e4632dd..67b8e9453b19 100644 --- a/drivers/mca/mca-bus.c +++ b/drivers/mca/mca-bus.c | |||
@@ -47,19 +47,25 @@ static int mca_bus_match (struct device *dev, struct device_driver *drv) | |||
47 | { | 47 | { |
48 | struct mca_device *mca_dev = to_mca_device (dev); | 48 | struct mca_device *mca_dev = to_mca_device (dev); |
49 | struct mca_driver *mca_drv = to_mca_driver (drv); | 49 | struct mca_driver *mca_drv = to_mca_driver (drv); |
50 | const short *mca_ids = mca_drv->id_table; | 50 | const unsigned short *mca_ids = mca_drv->id_table; |
51 | int i; | 51 | int i = 0; |
52 | 52 | ||
53 | if (!mca_ids) | 53 | if (mca_ids) { |
54 | return 0; | 54 | for(i = 0; mca_ids[i]; i++) { |
55 | 55 | if (mca_ids[i] == mca_dev->pos_id) { | |
56 | for(i = 0; mca_ids[i]; i++) { | 56 | mca_dev->index = i; |
57 | if (mca_ids[i] == mca_dev->pos_id) { | 57 | return 1; |
58 | mca_dev->index = i; | 58 | } |
59 | return 1; | ||
60 | } | 59 | } |
61 | } | 60 | } |
62 | 61 | /* If the integrated id is present, treat it as though it were an | |
62 | * additional id in the id_table (it can't be because by definition, | ||
63 | * integrated id's overflow a short */ | ||
64 | if (mca_drv->integrated_id && mca_dev->pos_id == | ||
65 | mca_drv->integrated_id) { | ||
66 | mca_dev->index = i; | ||
67 | return 1; | ||
68 | } | ||
63 | return 0; | 69 | return 0; |
64 | } | 70 | } |
65 | 71 | ||
diff --git a/drivers/mca/mca-driver.c b/drivers/mca/mca-driver.c index 2223466b3d8a..32cd39bcc715 100644 --- a/drivers/mca/mca-driver.c +++ b/drivers/mca/mca-driver.c | |||
@@ -36,12 +36,25 @@ int mca_register_driver(struct mca_driver *mca_drv) | |||
36 | mca_drv->driver.bus = &mca_bus_type; | 36 | mca_drv->driver.bus = &mca_bus_type; |
37 | if ((r = driver_register(&mca_drv->driver)) < 0) | 37 | if ((r = driver_register(&mca_drv->driver)) < 0) |
38 | return r; | 38 | return r; |
39 | mca_drv->integrated_id = 0; | ||
39 | } | 40 | } |
40 | 41 | ||
41 | return 0; | 42 | return 0; |
42 | } | 43 | } |
43 | EXPORT_SYMBOL(mca_register_driver); | 44 | EXPORT_SYMBOL(mca_register_driver); |
44 | 45 | ||
46 | int mca_register_driver_integrated(struct mca_driver *mca_driver, | ||
47 | int integrated_id) | ||
48 | { | ||
49 | int r = mca_register_driver(mca_driver); | ||
50 | |||
51 | if (!r) | ||
52 | mca_driver->integrated_id = integrated_id; | ||
53 | |||
54 | return r; | ||
55 | } | ||
56 | EXPORT_SYMBOL(mca_register_driver_integrated); | ||
57 | |||
45 | void mca_unregister_driver(struct mca_driver *mca_drv) | 58 | void mca_unregister_driver(struct mca_driver *mca_drv) |
46 | { | 59 | { |
47 | if (MCA_bus) | 60 | if (MCA_bus) |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4540ade6b6b5..7df934d69134 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -262,6 +262,15 @@ config DM_MULTIPATH_EMC | |||
262 | ---help--- | 262 | ---help--- |
263 | Multipath support for EMC CX/AX series hardware. | 263 | Multipath support for EMC CX/AX series hardware. |
264 | 264 | ||
265 | config DM_DELAY | ||
266 | tristate "I/O delaying target (EXPERIMENTAL)" | ||
267 | depends on BLK_DEV_DM && EXPERIMENTAL | ||
268 | ---help--- | ||
269 | A target that delays reads and/or writes and can send | ||
270 | them to different devices. Useful for testing. | ||
271 | |||
272 | If unsure, say N. | ||
273 | |||
265 | endmenu | 274 | endmenu |
266 | 275 | ||
267 | endif | 276 | endif |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 34957a68d921..38754084eac7 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -31,6 +31,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o | |||
31 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o | 31 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
32 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 32 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 33 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
34 | obj-$(CONFIG_DM_DELAY) += dm-delay.o | ||
34 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o | 35 | obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o |
35 | obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o | 36 | obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o |
36 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o | 37 | obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index da4349649f7f..c6be88826fae 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h | |||
@@ -8,17 +8,43 @@ | |||
8 | #define DM_BIO_LIST_H | 8 | #define DM_BIO_LIST_H |
9 | 9 | ||
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/prefetch.h> | ||
11 | 12 | ||
12 | struct bio_list { | 13 | struct bio_list { |
13 | struct bio *head; | 14 | struct bio *head; |
14 | struct bio *tail; | 15 | struct bio *tail; |
15 | }; | 16 | }; |
16 | 17 | ||
18 | static inline int bio_list_empty(const struct bio_list *bl) | ||
19 | { | ||
20 | return bl->head == NULL; | ||
21 | } | ||
22 | |||
23 | #define BIO_LIST_INIT { .head = NULL, .tail = NULL } | ||
24 | |||
25 | #define BIO_LIST(bl) \ | ||
26 | struct bio_list bl = BIO_LIST_INIT | ||
27 | |||
17 | static inline void bio_list_init(struct bio_list *bl) | 28 | static inline void bio_list_init(struct bio_list *bl) |
18 | { | 29 | { |
19 | bl->head = bl->tail = NULL; | 30 | bl->head = bl->tail = NULL; |
20 | } | 31 | } |
21 | 32 | ||
33 | #define bio_list_for_each(bio, bl) \ | ||
34 | for (bio = (bl)->head; bio && ({ prefetch(bio->bi_next); 1; }); \ | ||
35 | bio = bio->bi_next) | ||
36 | |||
37 | static inline unsigned bio_list_size(const struct bio_list *bl) | ||
38 | { | ||
39 | unsigned sz = 0; | ||
40 | struct bio *bio; | ||
41 | |||
42 | bio_list_for_each(bio, bl) | ||
43 | sz++; | ||
44 | |||
45 | return sz; | ||
46 | } | ||
47 | |||
22 | static inline void bio_list_add(struct bio_list *bl, struct bio *bio) | 48 | static inline void bio_list_add(struct bio_list *bl, struct bio *bio) |
23 | { | 49 | { |
24 | bio->bi_next = NULL; | 50 | bio->bi_next = NULL; |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d8121234c347..7b0fcfc9eaa5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -33,7 +33,6 @@ | |||
33 | struct crypt_io { | 33 | struct crypt_io { |
34 | struct dm_target *target; | 34 | struct dm_target *target; |
35 | struct bio *base_bio; | 35 | struct bio *base_bio; |
36 | struct bio *first_clone; | ||
37 | struct work_struct work; | 36 | struct work_struct work; |
38 | atomic_t pending; | 37 | atomic_t pending; |
39 | int error; | 38 | int error; |
@@ -107,6 +106,8 @@ struct crypt_config { | |||
107 | 106 | ||
108 | static struct kmem_cache *_crypt_io_pool; | 107 | static struct kmem_cache *_crypt_io_pool; |
109 | 108 | ||
109 | static void clone_init(struct crypt_io *, struct bio *); | ||
110 | |||
110 | /* | 111 | /* |
111 | * Different IV generation algorithms: | 112 | * Different IV generation algorithms: |
112 | * | 113 | * |
@@ -120,6 +121,9 @@ static struct kmem_cache *_crypt_io_pool; | |||
120 | * benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1 | 121 | * benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1 |
121 | * (needed for LRW-32-AES and possible other narrow block modes) | 122 | * (needed for LRW-32-AES and possible other narrow block modes) |
122 | * | 123 | * |
124 | * null: the initial vector is always zero. Provides compatibility with | ||
125 | * obsolete loop_fish2 devices. Do not use for new devices. | ||
126 | * | ||
123 | * plumb: unimplemented, see: | 127 | * plumb: unimplemented, see: |
124 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 | 128 | * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 |
125 | */ | 129 | */ |
@@ -256,6 +260,13 @@ static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | |||
256 | return 0; | 260 | return 0; |
257 | } | 261 | } |
258 | 262 | ||
263 | static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, sector_t sector) | ||
264 | { | ||
265 | memset(iv, 0, cc->iv_size); | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
259 | static struct crypt_iv_operations crypt_iv_plain_ops = { | 270 | static struct crypt_iv_operations crypt_iv_plain_ops = { |
260 | .generator = crypt_iv_plain_gen | 271 | .generator = crypt_iv_plain_gen |
261 | }; | 272 | }; |
@@ -272,6 +283,10 @@ static struct crypt_iv_operations crypt_iv_benbi_ops = { | |||
272 | .generator = crypt_iv_benbi_gen | 283 | .generator = crypt_iv_benbi_gen |
273 | }; | 284 | }; |
274 | 285 | ||
286 | static struct crypt_iv_operations crypt_iv_null_ops = { | ||
287 | .generator = crypt_iv_null_gen | ||
288 | }; | ||
289 | |||
275 | static int | 290 | static int |
276 | crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, | 291 | crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, |
277 | struct scatterlist *in, unsigned int length, | 292 | struct scatterlist *in, unsigned int length, |
@@ -378,36 +393,21 @@ static int crypt_convert(struct crypt_config *cc, | |||
378 | * This should never violate the device limitations | 393 | * This should never violate the device limitations |
379 | * May return a smaller bio when running out of pages | 394 | * May return a smaller bio when running out of pages |
380 | */ | 395 | */ |
381 | static struct bio * | 396 | static struct bio *crypt_alloc_buffer(struct crypt_io *io, unsigned int size) |
382 | crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, | ||
383 | struct bio *base_bio, unsigned int *bio_vec_idx) | ||
384 | { | 397 | { |
398 | struct crypt_config *cc = io->target->private; | ||
385 | struct bio *clone; | 399 | struct bio *clone; |
386 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | 400 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
387 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; | 401 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; |
388 | unsigned int i; | 402 | unsigned int i; |
389 | 403 | ||
390 | if (base_bio) { | 404 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); |
391 | clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_max_vecs, cc->bs); | ||
392 | __bio_clone(clone, base_bio); | ||
393 | } else | ||
394 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); | ||
395 | |||
396 | if (!clone) | 405 | if (!clone) |
397 | return NULL; | 406 | return NULL; |
398 | 407 | ||
399 | clone->bi_destructor = dm_crypt_bio_destructor; | 408 | clone_init(io, clone); |
400 | |||
401 | /* if the last bio was not complete, continue where that one ended */ | ||
402 | clone->bi_idx = *bio_vec_idx; | ||
403 | clone->bi_vcnt = *bio_vec_idx; | ||
404 | clone->bi_size = 0; | ||
405 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | ||
406 | |||
407 | /* clone->bi_idx pages have already been allocated */ | ||
408 | size -= clone->bi_idx * PAGE_SIZE; | ||
409 | 409 | ||
410 | for (i = clone->bi_idx; i < nr_iovecs; i++) { | 410 | for (i = 0; i < nr_iovecs; i++) { |
411 | struct bio_vec *bv = bio_iovec_idx(clone, i); | 411 | struct bio_vec *bv = bio_iovec_idx(clone, i); |
412 | 412 | ||
413 | bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask); | 413 | bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask); |
@@ -419,7 +419,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, | |||
419 | * return a partially allocated bio, the caller will then try | 419 | * return a partially allocated bio, the caller will then try |
420 | * to allocate additional bios while submitting this partial bio | 420 | * to allocate additional bios while submitting this partial bio |
421 | */ | 421 | */ |
422 | if ((i - clone->bi_idx) == (MIN_BIO_PAGES - 1)) | 422 | if (i == (MIN_BIO_PAGES - 1)) |
423 | gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; | 423 | gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; |
424 | 424 | ||
425 | bv->bv_offset = 0; | 425 | bv->bv_offset = 0; |
@@ -438,12 +438,6 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, | |||
438 | return NULL; | 438 | return NULL; |
439 | } | 439 | } |
440 | 440 | ||
441 | /* | ||
442 | * Remember the last bio_vec allocated to be able | ||
443 | * to correctly continue after the splitting. | ||
444 | */ | ||
445 | *bio_vec_idx = clone->bi_vcnt; | ||
446 | |||
447 | return clone; | 441 | return clone; |
448 | } | 442 | } |
449 | 443 | ||
@@ -495,9 +489,6 @@ static void dec_pending(struct crypt_io *io, int error) | |||
495 | if (!atomic_dec_and_test(&io->pending)) | 489 | if (!atomic_dec_and_test(&io->pending)) |
496 | return; | 490 | return; |
497 | 491 | ||
498 | if (io->first_clone) | ||
499 | bio_put(io->first_clone); | ||
500 | |||
501 | bio_endio(io->base_bio, io->base_bio->bi_size, io->error); | 492 | bio_endio(io->base_bio, io->base_bio->bi_size, io->error); |
502 | 493 | ||
503 | mempool_free(io, cc->io_pool); | 494 | mempool_free(io, cc->io_pool); |
@@ -562,6 +553,7 @@ static void clone_init(struct crypt_io *io, struct bio *clone) | |||
562 | clone->bi_end_io = crypt_endio; | 553 | clone->bi_end_io = crypt_endio; |
563 | clone->bi_bdev = cc->dev->bdev; | 554 | clone->bi_bdev = cc->dev->bdev; |
564 | clone->bi_rw = io->base_bio->bi_rw; | 555 | clone->bi_rw = io->base_bio->bi_rw; |
556 | clone->bi_destructor = dm_crypt_bio_destructor; | ||
565 | } | 557 | } |
566 | 558 | ||
567 | static void process_read(struct crypt_io *io) | 559 | static void process_read(struct crypt_io *io) |
@@ -585,7 +577,6 @@ static void process_read(struct crypt_io *io) | |||
585 | } | 577 | } |
586 | 578 | ||
587 | clone_init(io, clone); | 579 | clone_init(io, clone); |
588 | clone->bi_destructor = dm_crypt_bio_destructor; | ||
589 | clone->bi_idx = 0; | 580 | clone->bi_idx = 0; |
590 | clone->bi_vcnt = bio_segments(base_bio); | 581 | clone->bi_vcnt = bio_segments(base_bio); |
591 | clone->bi_size = base_bio->bi_size; | 582 | clone->bi_size = base_bio->bi_size; |
@@ -604,7 +595,6 @@ static void process_write(struct crypt_io *io) | |||
604 | struct convert_context ctx; | 595 | struct convert_context ctx; |
605 | unsigned remaining = base_bio->bi_size; | 596 | unsigned remaining = base_bio->bi_size; |
606 | sector_t sector = base_bio->bi_sector - io->target->begin; | 597 | sector_t sector = base_bio->bi_sector - io->target->begin; |
607 | unsigned bvec_idx = 0; | ||
608 | 598 | ||
609 | atomic_inc(&io->pending); | 599 | atomic_inc(&io->pending); |
610 | 600 | ||
@@ -615,14 +605,14 @@ static void process_write(struct crypt_io *io) | |||
615 | * so repeat the whole process until all the data can be handled. | 605 | * so repeat the whole process until all the data can be handled. |
616 | */ | 606 | */ |
617 | while (remaining) { | 607 | while (remaining) { |
618 | clone = crypt_alloc_buffer(cc, base_bio->bi_size, | 608 | clone = crypt_alloc_buffer(io, remaining); |
619 | io->first_clone, &bvec_idx); | ||
620 | if (unlikely(!clone)) { | 609 | if (unlikely(!clone)) { |
621 | dec_pending(io, -ENOMEM); | 610 | dec_pending(io, -ENOMEM); |
622 | return; | 611 | return; |
623 | } | 612 | } |
624 | 613 | ||
625 | ctx.bio_out = clone; | 614 | ctx.bio_out = clone; |
615 | ctx.idx_out = 0; | ||
626 | 616 | ||
627 | if (unlikely(crypt_convert(cc, &ctx) < 0)) { | 617 | if (unlikely(crypt_convert(cc, &ctx) < 0)) { |
628 | crypt_free_buffer_pages(cc, clone, clone->bi_size); | 618 | crypt_free_buffer_pages(cc, clone, clone->bi_size); |
@@ -631,31 +621,26 @@ static void process_write(struct crypt_io *io) | |||
631 | return; | 621 | return; |
632 | } | 622 | } |
633 | 623 | ||
634 | clone_init(io, clone); | 624 | /* crypt_convert should have filled the clone bio */ |
635 | clone->bi_sector = cc->start + sector; | 625 | BUG_ON(ctx.idx_out < clone->bi_vcnt); |
636 | |||
637 | if (!io->first_clone) { | ||
638 | /* | ||
639 | * hold a reference to the first clone, because it | ||
640 | * holds the bio_vec array and that can't be freed | ||
641 | * before all other clones are released | ||
642 | */ | ||
643 | bio_get(clone); | ||
644 | io->first_clone = clone; | ||
645 | } | ||
646 | 626 | ||
627 | clone->bi_sector = cc->start + sector; | ||
647 | remaining -= clone->bi_size; | 628 | remaining -= clone->bi_size; |
648 | sector += bio_sectors(clone); | 629 | sector += bio_sectors(clone); |
649 | 630 | ||
650 | /* prevent bio_put of first_clone */ | 631 | /* Grab another reference to the io struct |
632 | * before we kick off the request */ | ||
651 | if (remaining) | 633 | if (remaining) |
652 | atomic_inc(&io->pending); | 634 | atomic_inc(&io->pending); |
653 | 635 | ||
654 | generic_make_request(clone); | 636 | generic_make_request(clone); |
655 | 637 | ||
638 | /* Do not reference clone after this - it | ||
639 | * may be gone already. */ | ||
640 | |||
656 | /* out of memory -> run queues */ | 641 | /* out of memory -> run queues */ |
657 | if (remaining) | 642 | if (remaining) |
658 | congestion_wait(bio_data_dir(clone), HZ/100); | 643 | congestion_wait(WRITE, HZ/100); |
659 | } | 644 | } |
660 | } | 645 | } |
661 | 646 | ||
@@ -832,6 +817,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
832 | cc->iv_gen_ops = &crypt_iv_essiv_ops; | 817 | cc->iv_gen_ops = &crypt_iv_essiv_ops; |
833 | else if (strcmp(ivmode, "benbi") == 0) | 818 | else if (strcmp(ivmode, "benbi") == 0) |
834 | cc->iv_gen_ops = &crypt_iv_benbi_ops; | 819 | cc->iv_gen_ops = &crypt_iv_benbi_ops; |
820 | else if (strcmp(ivmode, "null") == 0) | ||
821 | cc->iv_gen_ops = &crypt_iv_null_ops; | ||
835 | else { | 822 | else { |
836 | ti->error = "Invalid IV mode"; | 823 | ti->error = "Invalid IV mode"; |
837 | goto bad2; | 824 | goto bad2; |
@@ -954,10 +941,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, | |||
954 | struct crypt_config *cc = ti->private; | 941 | struct crypt_config *cc = ti->private; |
955 | struct crypt_io *io; | 942 | struct crypt_io *io; |
956 | 943 | ||
944 | if (bio_barrier(bio)) | ||
945 | return -EOPNOTSUPP; | ||
946 | |||
957 | io = mempool_alloc(cc->io_pool, GFP_NOIO); | 947 | io = mempool_alloc(cc->io_pool, GFP_NOIO); |
958 | io->target = ti; | 948 | io->target = ti; |
959 | io->base_bio = bio; | 949 | io->base_bio = bio; |
960 | io->first_clone = NULL; | ||
961 | io->error = io->post_process = 0; | 950 | io->error = io->post_process = 0; |
962 | atomic_set(&io->pending, 0); | 951 | atomic_set(&io->pending, 0); |
963 | kcryptd_queue_io(io); | 952 | kcryptd_queue_io(io); |
@@ -1057,7 +1046,7 @@ error: | |||
1057 | 1046 | ||
1058 | static struct target_type crypt_target = { | 1047 | static struct target_type crypt_target = { |
1059 | .name = "crypt", | 1048 | .name = "crypt", |
1060 | .version= {1, 3, 0}, | 1049 | .version= {1, 5, 0}, |
1061 | .module = THIS_MODULE, | 1050 | .module = THIS_MODULE, |
1062 | .ctr = crypt_ctr, | 1051 | .ctr = crypt_ctr, |
1063 | .dtr = crypt_dtr, | 1052 | .dtr = crypt_dtr, |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c new file mode 100644 index 000000000000..52c7cf9e5803 --- /dev/null +++ b/drivers/md/dm-delay.c | |||
@@ -0,0 +1,383 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005-2007 Red Hat GmbH | ||
3 | * | ||
4 | * A target that delays reads and/or writes and can send | ||
5 | * them to different devices. | ||
6 | * | ||
7 | * This file is released under the GPL. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/blkdev.h> | ||
13 | #include <linux/bio.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include "dm.h" | ||
17 | #include "dm-bio-list.h" | ||
18 | |||
19 | #define DM_MSG_PREFIX "delay" | ||
20 | |||
21 | struct delay_c { | ||
22 | struct timer_list delay_timer; | ||
23 | struct semaphore timer_lock; | ||
24 | struct work_struct flush_expired_bios; | ||
25 | struct list_head delayed_bios; | ||
26 | atomic_t may_delay; | ||
27 | mempool_t *delayed_pool; | ||
28 | |||
29 | struct dm_dev *dev_read; | ||
30 | sector_t start_read; | ||
31 | unsigned read_delay; | ||
32 | unsigned reads; | ||
33 | |||
34 | struct dm_dev *dev_write; | ||
35 | sector_t start_write; | ||
36 | unsigned write_delay; | ||
37 | unsigned writes; | ||
38 | }; | ||
39 | |||
40 | struct delay_info { | ||
41 | struct delay_c *context; | ||
42 | struct list_head list; | ||
43 | struct bio *bio; | ||
44 | unsigned long expires; | ||
45 | }; | ||
46 | |||
47 | static DEFINE_MUTEX(delayed_bios_lock); | ||
48 | |||
49 | static struct workqueue_struct *kdelayd_wq; | ||
50 | static struct kmem_cache *delayed_cache; | ||
51 | |||
52 | static void handle_delayed_timer(unsigned long data) | ||
53 | { | ||
54 | struct delay_c *dc = (struct delay_c *)data; | ||
55 | |||
56 | queue_work(kdelayd_wq, &dc->flush_expired_bios); | ||
57 | } | ||
58 | |||
59 | static void queue_timeout(struct delay_c *dc, unsigned long expires) | ||
60 | { | ||
61 | down(&dc->timer_lock); | ||
62 | |||
63 | if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) | ||
64 | mod_timer(&dc->delay_timer, expires); | ||
65 | |||
66 | up(&dc->timer_lock); | ||
67 | } | ||
68 | |||
69 | static void flush_bios(struct bio *bio) | ||
70 | { | ||
71 | struct bio *n; | ||
72 | |||
73 | while (bio) { | ||
74 | n = bio->bi_next; | ||
75 | bio->bi_next = NULL; | ||
76 | generic_make_request(bio); | ||
77 | bio = n; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) | ||
82 | { | ||
83 | struct delay_info *delayed, *next; | ||
84 | unsigned long next_expires = 0; | ||
85 | int start_timer = 0; | ||
86 | BIO_LIST(flush_bios); | ||
87 | |||
88 | mutex_lock(&delayed_bios_lock); | ||
89 | list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { | ||
90 | if (flush_all || time_after_eq(jiffies, delayed->expires)) { | ||
91 | list_del(&delayed->list); | ||
92 | bio_list_add(&flush_bios, delayed->bio); | ||
93 | if ((bio_data_dir(delayed->bio) == WRITE)) | ||
94 | delayed->context->writes--; | ||
95 | else | ||
96 | delayed->context->reads--; | ||
97 | mempool_free(delayed, dc->delayed_pool); | ||
98 | continue; | ||
99 | } | ||
100 | |||
101 | if (!start_timer) { | ||
102 | start_timer = 1; | ||
103 | next_expires = delayed->expires; | ||
104 | } else | ||
105 | next_expires = min(next_expires, delayed->expires); | ||
106 | } | ||
107 | |||
108 | mutex_unlock(&delayed_bios_lock); | ||
109 | |||
110 | if (start_timer) | ||
111 | queue_timeout(dc, next_expires); | ||
112 | |||
113 | return bio_list_get(&flush_bios); | ||
114 | } | ||
115 | |||
116 | static void flush_expired_bios(struct work_struct *work) | ||
117 | { | ||
118 | struct delay_c *dc; | ||
119 | |||
120 | dc = container_of(work, struct delay_c, flush_expired_bios); | ||
121 | flush_bios(flush_delayed_bios(dc, 0)); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Mapping parameters: | ||
126 | * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] | ||
127 | * | ||
128 | * With separate write parameters, the first set is only used for reads. | ||
129 | * Delays are specified in milliseconds. | ||
130 | */ | ||
131 | static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) | ||
132 | { | ||
133 | struct delay_c *dc; | ||
134 | unsigned long long tmpll; | ||
135 | |||
136 | if (argc != 3 && argc != 6) { | ||
137 | ti->error = "requires exactly 3 or 6 arguments"; | ||
138 | return -EINVAL; | ||
139 | } | ||
140 | |||
141 | dc = kmalloc(sizeof(*dc), GFP_KERNEL); | ||
142 | if (!dc) { | ||
143 | ti->error = "Cannot allocate context"; | ||
144 | return -ENOMEM; | ||
145 | } | ||
146 | |||
147 | dc->reads = dc->writes = 0; | ||
148 | |||
149 | if (sscanf(argv[1], "%llu", &tmpll) != 1) { | ||
150 | ti->error = "Invalid device sector"; | ||
151 | goto bad; | ||
152 | } | ||
153 | dc->start_read = tmpll; | ||
154 | |||
155 | if (sscanf(argv[2], "%u", &dc->read_delay) != 1) { | ||
156 | ti->error = "Invalid delay"; | ||
157 | goto bad; | ||
158 | } | ||
159 | |||
160 | if (dm_get_device(ti, argv[0], dc->start_read, ti->len, | ||
161 | dm_table_get_mode(ti->table), &dc->dev_read)) { | ||
162 | ti->error = "Device lookup failed"; | ||
163 | goto bad; | ||
164 | } | ||
165 | |||
166 | if (argc == 3) { | ||
167 | dc->dev_write = NULL; | ||
168 | goto out; | ||
169 | } | ||
170 | |||
171 | if (sscanf(argv[4], "%llu", &tmpll) != 1) { | ||
172 | ti->error = "Invalid write device sector"; | ||
173 | goto bad; | ||
174 | } | ||
175 | dc->start_write = tmpll; | ||
176 | |||
177 | if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { | ||
178 | ti->error = "Invalid write delay"; | ||
179 | goto bad; | ||
180 | } | ||
181 | |||
182 | if (dm_get_device(ti, argv[3], dc->start_write, ti->len, | ||
183 | dm_table_get_mode(ti->table), &dc->dev_write)) { | ||
184 | ti->error = "Write device lookup failed"; | ||
185 | dm_put_device(ti, dc->dev_read); | ||
186 | goto bad; | ||
187 | } | ||
188 | |||
189 | out: | ||
190 | dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache); | ||
191 | if (!dc->delayed_pool) { | ||
192 | DMERR("Couldn't create delayed bio pool."); | ||
193 | goto bad; | ||
194 | } | ||
195 | |||
196 | init_timer(&dc->delay_timer); | ||
197 | dc->delay_timer.function = handle_delayed_timer; | ||
198 | dc->delay_timer.data = (unsigned long)dc; | ||
199 | |||
200 | INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); | ||
201 | INIT_LIST_HEAD(&dc->delayed_bios); | ||
202 | init_MUTEX(&dc->timer_lock); | ||
203 | atomic_set(&dc->may_delay, 1); | ||
204 | |||
205 | ti->private = dc; | ||
206 | return 0; | ||
207 | |||
208 | bad: | ||
209 | kfree(dc); | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | |||
213 | static void delay_dtr(struct dm_target *ti) | ||
214 | { | ||
215 | struct delay_c *dc = ti->private; | ||
216 | |||
217 | flush_workqueue(kdelayd_wq); | ||
218 | |||
219 | dm_put_device(ti, dc->dev_read); | ||
220 | |||
221 | if (dc->dev_write) | ||
222 | dm_put_device(ti, dc->dev_write); | ||
223 | |||
224 | mempool_destroy(dc->delayed_pool); | ||
225 | kfree(dc); | ||
226 | } | ||
227 | |||
228 | static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) | ||
229 | { | ||
230 | struct delay_info *delayed; | ||
231 | unsigned long expires = 0; | ||
232 | |||
233 | if (!delay || !atomic_read(&dc->may_delay)) | ||
234 | return 1; | ||
235 | |||
236 | delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO); | ||
237 | |||
238 | delayed->context = dc; | ||
239 | delayed->bio = bio; | ||
240 | delayed->expires = expires = jiffies + (delay * HZ / 1000); | ||
241 | |||
242 | mutex_lock(&delayed_bios_lock); | ||
243 | |||
244 | if (bio_data_dir(bio) == WRITE) | ||
245 | dc->writes++; | ||
246 | else | ||
247 | dc->reads++; | ||
248 | |||
249 | list_add_tail(&delayed->list, &dc->delayed_bios); | ||
250 | |||
251 | mutex_unlock(&delayed_bios_lock); | ||
252 | |||
253 | queue_timeout(dc, expires); | ||
254 | |||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | static void delay_presuspend(struct dm_target *ti) | ||
259 | { | ||
260 | struct delay_c *dc = ti->private; | ||
261 | |||
262 | atomic_set(&dc->may_delay, 0); | ||
263 | del_timer_sync(&dc->delay_timer); | ||
264 | flush_bios(flush_delayed_bios(dc, 1)); | ||
265 | } | ||
266 | |||
267 | static void delay_resume(struct dm_target *ti) | ||
268 | { | ||
269 | struct delay_c *dc = ti->private; | ||
270 | |||
271 | atomic_set(&dc->may_delay, 1); | ||
272 | } | ||
273 | |||
274 | static int delay_map(struct dm_target *ti, struct bio *bio, | ||
275 | union map_info *map_context) | ||
276 | { | ||
277 | struct delay_c *dc = ti->private; | ||
278 | |||
279 | if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { | ||
280 | bio->bi_bdev = dc->dev_write->bdev; | ||
281 | bio->bi_sector = dc->start_write + | ||
282 | (bio->bi_sector - ti->begin); | ||
283 | |||
284 | return delay_bio(dc, dc->write_delay, bio); | ||
285 | } | ||
286 | |||
287 | bio->bi_bdev = dc->dev_read->bdev; | ||
288 | bio->bi_sector = dc->start_read + | ||
289 | (bio->bi_sector - ti->begin); | ||
290 | |||
291 | return delay_bio(dc, dc->read_delay, bio); | ||
292 | } | ||
293 | |||
294 | static int delay_status(struct dm_target *ti, status_type_t type, | ||
295 | char *result, unsigned maxlen) | ||
296 | { | ||
297 | struct delay_c *dc = ti->private; | ||
298 | int sz = 0; | ||
299 | |||
300 | switch (type) { | ||
301 | case STATUSTYPE_INFO: | ||
302 | DMEMIT("%u %u", dc->reads, dc->writes); | ||
303 | break; | ||
304 | |||
305 | case STATUSTYPE_TABLE: | ||
306 | DMEMIT("%s %llu %u", dc->dev_read->name, | ||
307 | (unsigned long long) dc->start_read, | ||
308 | dc->read_delay); | ||
309 | if (dc->dev_write) | ||
310 | DMEMIT("%s %llu %u", dc->dev_write->name, | ||
311 | (unsigned long long) dc->start_write, | ||
312 | dc->write_delay); | ||
313 | break; | ||
314 | } | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static struct target_type delay_target = { | ||
320 | .name = "delay", | ||
321 | .version = {1, 0, 2}, | ||
322 | .module = THIS_MODULE, | ||
323 | .ctr = delay_ctr, | ||
324 | .dtr = delay_dtr, | ||
325 | .map = delay_map, | ||
326 | .presuspend = delay_presuspend, | ||
327 | .resume = delay_resume, | ||
328 | .status = delay_status, | ||
329 | }; | ||
330 | |||
331 | static int __init dm_delay_init(void) | ||
332 | { | ||
333 | int r = -ENOMEM; | ||
334 | |||
335 | kdelayd_wq = create_workqueue("kdelayd"); | ||
336 | if (!kdelayd_wq) { | ||
337 | DMERR("Couldn't start kdelayd"); | ||
338 | goto bad_queue; | ||
339 | } | ||
340 | |||
341 | delayed_cache = kmem_cache_create("dm-delay", | ||
342 | sizeof(struct delay_info), | ||
343 | __alignof__(struct delay_info), | ||
344 | 0, NULL, NULL); | ||
345 | if (!delayed_cache) { | ||
346 | DMERR("Couldn't create delayed bio cache."); | ||
347 | goto bad_memcache; | ||
348 | } | ||
349 | |||
350 | r = dm_register_target(&delay_target); | ||
351 | if (r < 0) { | ||
352 | DMERR("register failed %d", r); | ||
353 | goto bad_register; | ||
354 | } | ||
355 | |||
356 | return 0; | ||
357 | |||
358 | bad_register: | ||
359 | kmem_cache_destroy(delayed_cache); | ||
360 | bad_memcache: | ||
361 | destroy_workqueue(kdelayd_wq); | ||
362 | bad_queue: | ||
363 | return r; | ||
364 | } | ||
365 | |||
366 | static void __exit dm_delay_exit(void) | ||
367 | { | ||
368 | int r = dm_unregister_target(&delay_target); | ||
369 | |||
370 | if (r < 0) | ||
371 | DMERR("unregister failed %d", r); | ||
372 | |||
373 | kmem_cache_destroy(delayed_cache); | ||
374 | destroy_workqueue(kdelayd_wq); | ||
375 | } | ||
376 | |||
377 | /* Module hooks */ | ||
378 | module_init(dm_delay_init); | ||
379 | module_exit(dm_delay_exit); | ||
380 | |||
381 | MODULE_DESCRIPTION(DM_NAME " delay target"); | ||
382 | MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); | ||
383 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 99cdffa7fbfe..07e0a0c84f6e 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * dm-snapshot.c | 2 | * dm-exception-store.c |
3 | * | 3 | * |
4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | 4 | * Copyright (C) 2001-2002 Sistina Software (UK) Limited. |
5 | * Copyright (C) 2006 Red Hat GmbH | ||
5 | * | 6 | * |
6 | * This file is released under the GPL. | 7 | * This file is released under the GPL. |
7 | */ | 8 | */ |
@@ -123,6 +124,7 @@ struct pstore { | |||
123 | atomic_t pending_count; | 124 | atomic_t pending_count; |
124 | uint32_t callback_count; | 125 | uint32_t callback_count; |
125 | struct commit_callback *callbacks; | 126 | struct commit_callback *callbacks; |
127 | struct dm_io_client *io_client; | ||
126 | }; | 128 | }; |
127 | 129 | ||
128 | static inline unsigned int sectors_to_pages(unsigned int sectors) | 130 | static inline unsigned int sectors_to_pages(unsigned int sectors) |
@@ -159,14 +161,20 @@ static void free_area(struct pstore *ps) | |||
159 | */ | 161 | */ |
160 | static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) | 162 | static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) |
161 | { | 163 | { |
162 | struct io_region where; | 164 | struct io_region where = { |
163 | unsigned long bits; | 165 | .bdev = ps->snap->cow->bdev, |
164 | 166 | .sector = ps->snap->chunk_size * chunk, | |
165 | where.bdev = ps->snap->cow->bdev; | 167 | .count = ps->snap->chunk_size, |
166 | where.sector = ps->snap->chunk_size * chunk; | 168 | }; |
167 | where.count = ps->snap->chunk_size; | 169 | struct dm_io_request io_req = { |
168 | 170 | .bi_rw = rw, | |
169 | return dm_io_sync_vm(1, &where, rw, ps->area, &bits); | 171 | .mem.type = DM_IO_VMA, |
172 | .mem.ptr.vma = ps->area, | ||
173 | .client = ps->io_client, | ||
174 | .notify.fn = NULL, | ||
175 | }; | ||
176 | |||
177 | return dm_io(&io_req, 1, &where, NULL); | ||
170 | } | 178 | } |
171 | 179 | ||
172 | /* | 180 | /* |
@@ -213,17 +221,18 @@ static int read_header(struct pstore *ps, int *new_snapshot) | |||
213 | chunk_size_supplied = 0; | 221 | chunk_size_supplied = 0; |
214 | } | 222 | } |
215 | 223 | ||
216 | r = dm_io_get(sectors_to_pages(ps->snap->chunk_size)); | 224 | ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> |
217 | if (r) | 225 | chunk_size)); |
218 | return r; | 226 | if (IS_ERR(ps->io_client)) |
227 | return PTR_ERR(ps->io_client); | ||
219 | 228 | ||
220 | r = alloc_area(ps); | 229 | r = alloc_area(ps); |
221 | if (r) | 230 | if (r) |
222 | goto bad1; | 231 | return r; |
223 | 232 | ||
224 | r = chunk_io(ps, 0, READ); | 233 | r = chunk_io(ps, 0, READ); |
225 | if (r) | 234 | if (r) |
226 | goto bad2; | 235 | goto bad; |
227 | 236 | ||
228 | dh = (struct disk_header *) ps->area; | 237 | dh = (struct disk_header *) ps->area; |
229 | 238 | ||
@@ -235,7 +244,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) | |||
235 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { | 244 | if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { |
236 | DMWARN("Invalid or corrupt snapshot"); | 245 | DMWARN("Invalid or corrupt snapshot"); |
237 | r = -ENXIO; | 246 | r = -ENXIO; |
238 | goto bad2; | 247 | goto bad; |
239 | } | 248 | } |
240 | 249 | ||
241 | *new_snapshot = 0; | 250 | *new_snapshot = 0; |
@@ -252,27 +261,22 @@ static int read_header(struct pstore *ps, int *new_snapshot) | |||
252 | (unsigned long long)ps->snap->chunk_size); | 261 | (unsigned long long)ps->snap->chunk_size); |
253 | 262 | ||
254 | /* We had a bogus chunk_size. Fix stuff up. */ | 263 | /* We had a bogus chunk_size. Fix stuff up. */ |
255 | dm_io_put(sectors_to_pages(ps->snap->chunk_size)); | ||
256 | free_area(ps); | 264 | free_area(ps); |
257 | 265 | ||
258 | ps->snap->chunk_size = chunk_size; | 266 | ps->snap->chunk_size = chunk_size; |
259 | ps->snap->chunk_mask = chunk_size - 1; | 267 | ps->snap->chunk_mask = chunk_size - 1; |
260 | ps->snap->chunk_shift = ffs(chunk_size) - 1; | 268 | ps->snap->chunk_shift = ffs(chunk_size) - 1; |
261 | 269 | ||
262 | r = dm_io_get(sectors_to_pages(chunk_size)); | 270 | r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), |
271 | ps->io_client); | ||
263 | if (r) | 272 | if (r) |
264 | return r; | 273 | return r; |
265 | 274 | ||
266 | r = alloc_area(ps); | 275 | r = alloc_area(ps); |
267 | if (r) | 276 | return r; |
268 | goto bad1; | ||
269 | |||
270 | return 0; | ||
271 | 277 | ||
272 | bad2: | 278 | bad: |
273 | free_area(ps); | 279 | free_area(ps); |
274 | bad1: | ||
275 | dm_io_put(sectors_to_pages(ps->snap->chunk_size)); | ||
276 | return r; | 280 | return r; |
277 | } | 281 | } |
278 | 282 | ||
@@ -405,7 +409,7 @@ static void persistent_destroy(struct exception_store *store) | |||
405 | { | 409 | { |
406 | struct pstore *ps = get_info(store); | 410 | struct pstore *ps = get_info(store); |
407 | 411 | ||
408 | dm_io_put(sectors_to_pages(ps->snap->chunk_size)); | 412 | dm_io_client_destroy(ps->io_client); |
409 | vfree(ps->callbacks); | 413 | vfree(ps->callbacks); |
410 | free_area(ps); | 414 | free_area(ps); |
411 | kfree(ps); | 415 | kfree(ps); |
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h index 32eff28e4adc..e0832e6fcf36 100644 --- a/drivers/md/dm-hw-handler.h +++ b/drivers/md/dm-hw-handler.h | |||
@@ -16,6 +16,7 @@ | |||
16 | struct hw_handler_type; | 16 | struct hw_handler_type; |
17 | struct hw_handler { | 17 | struct hw_handler { |
18 | struct hw_handler_type *type; | 18 | struct hw_handler_type *type; |
19 | struct mapped_device *md; | ||
19 | void *context; | 20 | void *context; |
20 | }; | 21 | }; |
21 | 22 | ||
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 8bdc8a87b249..352c6fbeac53 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2003 Sistina Software | 2 | * Copyright (C) 2003 Sistina Software |
3 | * Copyright (C) 2006 Red Hat GmbH | ||
3 | * | 4 | * |
4 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
5 | */ | 6 | */ |
@@ -12,13 +13,17 @@ | |||
12 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
13 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
14 | 15 | ||
15 | static struct bio_set *_bios; | 16 | struct dm_io_client { |
17 | mempool_t *pool; | ||
18 | struct bio_set *bios; | ||
19 | }; | ||
16 | 20 | ||
17 | /* FIXME: can we shrink this ? */ | 21 | /* FIXME: can we shrink this ? */ |
18 | struct io { | 22 | struct io { |
19 | unsigned long error; | 23 | unsigned long error; |
20 | atomic_t count; | 24 | atomic_t count; |
21 | struct task_struct *sleeper; | 25 | struct task_struct *sleeper; |
26 | struct dm_io_client *client; | ||
22 | io_notify_fn callback; | 27 | io_notify_fn callback; |
23 | void *context; | 28 | void *context; |
24 | }; | 29 | }; |
@@ -26,63 +31,58 @@ struct io { | |||
26 | /* | 31 | /* |
27 | * io contexts are only dynamically allocated for asynchronous | 32 | * io contexts are only dynamically allocated for asynchronous |
28 | * io. Since async io is likely to be the majority of io we'll | 33 | * io. Since async io is likely to be the majority of io we'll |
29 | * have the same number of io contexts as buffer heads ! (FIXME: | 34 | * have the same number of io contexts as bios! (FIXME: must reduce this). |
30 | * must reduce this). | ||
31 | */ | 35 | */ |
32 | static unsigned _num_ios; | ||
33 | static mempool_t *_io_pool; | ||
34 | 36 | ||
35 | static unsigned int pages_to_ios(unsigned int pages) | 37 | static unsigned int pages_to_ios(unsigned int pages) |
36 | { | 38 | { |
37 | return 4 * pages; /* too many ? */ | 39 | return 4 * pages; /* too many ? */ |
38 | } | 40 | } |
39 | 41 | ||
40 | static int resize_pool(unsigned int new_ios) | 42 | /* |
43 | * Create a client with mempool and bioset. | ||
44 | */ | ||
45 | struct dm_io_client *dm_io_client_create(unsigned num_pages) | ||
41 | { | 46 | { |
42 | int r = 0; | 47 | unsigned ios = pages_to_ios(num_pages); |
43 | 48 | struct dm_io_client *client; | |
44 | if (_io_pool) { | ||
45 | if (new_ios == 0) { | ||
46 | /* free off the pool */ | ||
47 | mempool_destroy(_io_pool); | ||
48 | _io_pool = NULL; | ||
49 | bioset_free(_bios); | ||
50 | |||
51 | } else { | ||
52 | /* resize the pool */ | ||
53 | r = mempool_resize(_io_pool, new_ios, GFP_KERNEL); | ||
54 | } | ||
55 | 49 | ||
56 | } else { | 50 | client = kmalloc(sizeof(*client), GFP_KERNEL); |
57 | /* create new pool */ | 51 | if (!client) |
58 | _io_pool = mempool_create_kmalloc_pool(new_ios, | 52 | return ERR_PTR(-ENOMEM); |
59 | sizeof(struct io)); | 53 | |
60 | if (!_io_pool) | 54 | client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io)); |
61 | return -ENOMEM; | 55 | if (!client->pool) |
62 | 56 | goto bad; | |
63 | _bios = bioset_create(16, 16); | ||
64 | if (!_bios) { | ||
65 | mempool_destroy(_io_pool); | ||
66 | _io_pool = NULL; | ||
67 | return -ENOMEM; | ||
68 | } | ||
69 | } | ||
70 | 57 | ||
71 | if (!r) | 58 | client->bios = bioset_create(16, 16); |
72 | _num_ios = new_ios; | 59 | if (!client->bios) |
60 | goto bad; | ||
73 | 61 | ||
74 | return r; | 62 | return client; |
63 | |||
64 | bad: | ||
65 | if (client->pool) | ||
66 | mempool_destroy(client->pool); | ||
67 | kfree(client); | ||
68 | return ERR_PTR(-ENOMEM); | ||
75 | } | 69 | } |
70 | EXPORT_SYMBOL(dm_io_client_create); | ||
76 | 71 | ||
77 | int dm_io_get(unsigned int num_pages) | 72 | int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) |
78 | { | 73 | { |
79 | return resize_pool(_num_ios + pages_to_ios(num_pages)); | 74 | return mempool_resize(client->pool, pages_to_ios(num_pages), |
75 | GFP_KERNEL); | ||
80 | } | 76 | } |
77 | EXPORT_SYMBOL(dm_io_client_resize); | ||
81 | 78 | ||
82 | void dm_io_put(unsigned int num_pages) | 79 | void dm_io_client_destroy(struct dm_io_client *client) |
83 | { | 80 | { |
84 | resize_pool(_num_ios - pages_to_ios(num_pages)); | 81 | mempool_destroy(client->pool); |
82 | bioset_free(client->bios); | ||
83 | kfree(client); | ||
85 | } | 84 | } |
85 | EXPORT_SYMBOL(dm_io_client_destroy); | ||
86 | 86 | ||
87 | /*----------------------------------------------------------------- | 87 | /*----------------------------------------------------------------- |
88 | * We need to keep track of which region a bio is doing io for. | 88 | * We need to keep track of which region a bio is doing io for. |
@@ -118,7 +118,7 @@ static void dec_count(struct io *io, unsigned int region, int error) | |||
118 | io_notify_fn fn = io->callback; | 118 | io_notify_fn fn = io->callback; |
119 | void *context = io->context; | 119 | void *context = io->context; |
120 | 120 | ||
121 | mempool_free(io, _io_pool); | 121 | mempool_free(io, io->client->pool); |
122 | fn(r, context); | 122 | fn(r, context); |
123 | } | 123 | } |
124 | } | 124 | } |
@@ -126,7 +126,8 @@ static void dec_count(struct io *io, unsigned int region, int error) | |||
126 | 126 | ||
127 | static int endio(struct bio *bio, unsigned int done, int error) | 127 | static int endio(struct bio *bio, unsigned int done, int error) |
128 | { | 128 | { |
129 | struct io *io = (struct io *) bio->bi_private; | 129 | struct io *io; |
130 | unsigned region; | ||
130 | 131 | ||
131 | /* keep going until we've finished */ | 132 | /* keep going until we've finished */ |
132 | if (bio->bi_size) | 133 | if (bio->bi_size) |
@@ -135,10 +136,17 @@ static int endio(struct bio *bio, unsigned int done, int error) | |||
135 | if (error && bio_data_dir(bio) == READ) | 136 | if (error && bio_data_dir(bio) == READ) |
136 | zero_fill_bio(bio); | 137 | zero_fill_bio(bio); |
137 | 138 | ||
138 | dec_count(io, bio_get_region(bio), error); | 139 | /* |
140 | * The bio destructor in bio_put() may use the io object. | ||
141 | */ | ||
142 | io = bio->bi_private; | ||
143 | region = bio_get_region(bio); | ||
144 | |||
139 | bio->bi_max_vecs++; | 145 | bio->bi_max_vecs++; |
140 | bio_put(bio); | 146 | bio_put(bio); |
141 | 147 | ||
148 | dec_count(io, region, error); | ||
149 | |||
142 | return 0; | 150 | return 0; |
143 | } | 151 | } |
144 | 152 | ||
@@ -209,6 +217,9 @@ static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) | |||
209 | dp->context_ptr = bvec; | 217 | dp->context_ptr = bvec; |
210 | } | 218 | } |
211 | 219 | ||
220 | /* | ||
221 | * Functions for getting the pages from a VMA. | ||
222 | */ | ||
212 | static void vm_get_page(struct dpages *dp, | 223 | static void vm_get_page(struct dpages *dp, |
213 | struct page **p, unsigned long *len, unsigned *offset) | 224 | struct page **p, unsigned long *len, unsigned *offset) |
214 | { | 225 | { |
@@ -233,7 +244,34 @@ static void vm_dp_init(struct dpages *dp, void *data) | |||
233 | 244 | ||
234 | static void dm_bio_destructor(struct bio *bio) | 245 | static void dm_bio_destructor(struct bio *bio) |
235 | { | 246 | { |
236 | bio_free(bio, _bios); | 247 | struct io *io = bio->bi_private; |
248 | |||
249 | bio_free(bio, io->client->bios); | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Functions for getting the pages from kernel memory. | ||
254 | */ | ||
255 | static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, | ||
256 | unsigned *offset) | ||
257 | { | ||
258 | *p = virt_to_page(dp->context_ptr); | ||
259 | *offset = dp->context_u; | ||
260 | *len = PAGE_SIZE - dp->context_u; | ||
261 | } | ||
262 | |||
263 | static void km_next_page(struct dpages *dp) | ||
264 | { | ||
265 | dp->context_ptr += PAGE_SIZE - dp->context_u; | ||
266 | dp->context_u = 0; | ||
267 | } | ||
268 | |||
269 | static void km_dp_init(struct dpages *dp, void *data) | ||
270 | { | ||
271 | dp->get_page = km_get_page; | ||
272 | dp->next_page = km_next_page; | ||
273 | dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); | ||
274 | dp->context_ptr = data; | ||
237 | } | 275 | } |
238 | 276 | ||
239 | /*----------------------------------------------------------------- | 277 | /*----------------------------------------------------------------- |
@@ -256,7 +294,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where, | |||
256 | * to hide it from bio_add_page(). | 294 | * to hide it from bio_add_page(). |
257 | */ | 295 | */ |
258 | num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2; | 296 | num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2; |
259 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, _bios); | 297 | bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); |
260 | bio->bi_sector = where->sector + (where->count - remaining); | 298 | bio->bi_sector = where->sector + (where->count - remaining); |
261 | bio->bi_bdev = where->bdev; | 299 | bio->bi_bdev = where->bdev; |
262 | bio->bi_end_io = endio; | 300 | bio->bi_end_io = endio; |
@@ -311,8 +349,9 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
311 | dec_count(io, 0, 0); | 349 | dec_count(io, 0, 0); |
312 | } | 350 | } |
313 | 351 | ||
314 | static int sync_io(unsigned int num_regions, struct io_region *where, | 352 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, |
315 | int rw, struct dpages *dp, unsigned long *error_bits) | 353 | struct io_region *where, int rw, struct dpages *dp, |
354 | unsigned long *error_bits) | ||
316 | { | 355 | { |
317 | struct io io; | 356 | struct io io; |
318 | 357 | ||
@@ -324,6 +363,7 @@ static int sync_io(unsigned int num_regions, struct io_region *where, | |||
324 | io.error = 0; | 363 | io.error = 0; |
325 | atomic_set(&io.count, 1); /* see dispatch_io() */ | 364 | atomic_set(&io.count, 1); /* see dispatch_io() */ |
326 | io.sleeper = current; | 365 | io.sleeper = current; |
366 | io.client = client; | ||
327 | 367 | ||
328 | dispatch_io(rw, num_regions, where, dp, &io, 1); | 368 | dispatch_io(rw, num_regions, where, dp, &io, 1); |
329 | 369 | ||
@@ -340,12 +380,15 @@ static int sync_io(unsigned int num_regions, struct io_region *where, | |||
340 | if (atomic_read(&io.count)) | 380 | if (atomic_read(&io.count)) |
341 | return -EINTR; | 381 | return -EINTR; |
342 | 382 | ||
343 | *error_bits = io.error; | 383 | if (error_bits) |
384 | *error_bits = io.error; | ||
385 | |||
344 | return io.error ? -EIO : 0; | 386 | return io.error ? -EIO : 0; |
345 | } | 387 | } |
346 | 388 | ||
347 | static int async_io(unsigned int num_regions, struct io_region *where, int rw, | 389 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |
348 | struct dpages *dp, io_notify_fn fn, void *context) | 390 | struct io_region *where, int rw, struct dpages *dp, |
391 | io_notify_fn fn, void *context) | ||
349 | { | 392 | { |
350 | struct io *io; | 393 | struct io *io; |
351 | 394 | ||
@@ -355,10 +398,11 @@ static int async_io(unsigned int num_regions, struct io_region *where, int rw, | |||
355 | return -EIO; | 398 | return -EIO; |
356 | } | 399 | } |
357 | 400 | ||
358 | io = mempool_alloc(_io_pool, GFP_NOIO); | 401 | io = mempool_alloc(client->pool, GFP_NOIO); |
359 | io->error = 0; | 402 | io->error = 0; |
360 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 403 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
361 | io->sleeper = NULL; | 404 | io->sleeper = NULL; |
405 | io->client = client; | ||
362 | io->callback = fn; | 406 | io->callback = fn; |
363 | io->context = context; | 407 | io->context = context; |
364 | 408 | ||
@@ -366,61 +410,51 @@ static int async_io(unsigned int num_regions, struct io_region *where, int rw, | |||
366 | return 0; | 410 | return 0; |
367 | } | 411 | } |
368 | 412 | ||
369 | int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, | 413 | static int dp_init(struct dm_io_request *io_req, struct dpages *dp) |
370 | struct page_list *pl, unsigned int offset, | ||
371 | unsigned long *error_bits) | ||
372 | { | 414 | { |
373 | struct dpages dp; | 415 | /* Set up dpages based on memory type */ |
374 | list_dp_init(&dp, pl, offset); | 416 | switch (io_req->mem.type) { |
375 | return sync_io(num_regions, where, rw, &dp, error_bits); | 417 | case DM_IO_PAGE_LIST: |
376 | } | 418 | list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); |
419 | break; | ||
420 | |||
421 | case DM_IO_BVEC: | ||
422 | bvec_dp_init(dp, io_req->mem.ptr.bvec); | ||
423 | break; | ||
424 | |||
425 | case DM_IO_VMA: | ||
426 | vm_dp_init(dp, io_req->mem.ptr.vma); | ||
427 | break; | ||
428 | |||
429 | case DM_IO_KMEM: | ||
430 | km_dp_init(dp, io_req->mem.ptr.addr); | ||
431 | break; | ||
432 | |||
433 | default: | ||
434 | return -EINVAL; | ||
435 | } | ||
377 | 436 | ||
378 | int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, | 437 | return 0; |
379 | struct bio_vec *bvec, unsigned long *error_bits) | ||
380 | { | ||
381 | struct dpages dp; | ||
382 | bvec_dp_init(&dp, bvec); | ||
383 | return sync_io(num_regions, where, rw, &dp, error_bits); | ||
384 | } | 438 | } |
385 | 439 | ||
386 | int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, | 440 | /* |
387 | void *data, unsigned long *error_bits) | 441 | * New collapsed (a)synchronous interface |
442 | */ | ||
443 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | ||
444 | struct io_region *where, unsigned long *sync_error_bits) | ||
388 | { | 445 | { |
446 | int r; | ||
389 | struct dpages dp; | 447 | struct dpages dp; |
390 | vm_dp_init(&dp, data); | ||
391 | return sync_io(num_regions, where, rw, &dp, error_bits); | ||
392 | } | ||
393 | 448 | ||
394 | int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, | 449 | r = dp_init(io_req, &dp); |
395 | struct page_list *pl, unsigned int offset, | 450 | if (r) |
396 | io_notify_fn fn, void *context) | 451 | return r; |
397 | { | ||
398 | struct dpages dp; | ||
399 | list_dp_init(&dp, pl, offset); | ||
400 | return async_io(num_regions, where, rw, &dp, fn, context); | ||
401 | } | ||
402 | 452 | ||
403 | int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, | 453 | if (!io_req->notify.fn) |
404 | struct bio_vec *bvec, io_notify_fn fn, void *context) | 454 | return sync_io(io_req->client, num_regions, where, |
405 | { | 455 | io_req->bi_rw, &dp, sync_error_bits); |
406 | struct dpages dp; | ||
407 | bvec_dp_init(&dp, bvec); | ||
408 | return async_io(num_regions, where, rw, &dp, fn, context); | ||
409 | } | ||
410 | 456 | ||
411 | int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, | 457 | return async_io(io_req->client, num_regions, where, io_req->bi_rw, |
412 | void *data, io_notify_fn fn, void *context) | 458 | &dp, io_req->notify.fn, io_req->notify.context); |
413 | { | ||
414 | struct dpages dp; | ||
415 | vm_dp_init(&dp, data); | ||
416 | return async_io(num_regions, where, rw, &dp, fn, context); | ||
417 | } | 459 | } |
418 | 460 | EXPORT_SYMBOL(dm_io); | |
419 | EXPORT_SYMBOL(dm_io_get); | ||
420 | EXPORT_SYMBOL(dm_io_put); | ||
421 | EXPORT_SYMBOL(dm_io_sync); | ||
422 | EXPORT_SYMBOL(dm_io_async); | ||
423 | EXPORT_SYMBOL(dm_io_sync_bvec); | ||
424 | EXPORT_SYMBOL(dm_io_async_bvec); | ||
425 | EXPORT_SYMBOL(dm_io_sync_vm); | ||
426 | EXPORT_SYMBOL(dm_io_async_vm); | ||
diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h index f9035bfd1a9f..f647e2cceaa6 100644 --- a/drivers/md/dm-io.h +++ b/drivers/md/dm-io.h | |||
@@ -12,7 +12,7 @@ | |||
12 | struct io_region { | 12 | struct io_region { |
13 | struct block_device *bdev; | 13 | struct block_device *bdev; |
14 | sector_t sector; | 14 | sector_t sector; |
15 | sector_t count; | 15 | sector_t count; /* If this is zero the region is ignored. */ |
16 | }; | 16 | }; |
17 | 17 | ||
18 | struct page_list { | 18 | struct page_list { |
@@ -20,55 +20,60 @@ struct page_list { | |||
20 | struct page *page; | 20 | struct page *page; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | |||
24 | /* | ||
25 | * 'error' is a bitset, with each bit indicating whether an error | ||
26 | * occurred doing io to the corresponding region. | ||
27 | */ | ||
28 | typedef void (*io_notify_fn)(unsigned long error, void *context); | 23 | typedef void (*io_notify_fn)(unsigned long error, void *context); |
29 | 24 | ||
25 | enum dm_io_mem_type { | ||
26 | DM_IO_PAGE_LIST,/* Page list */ | ||
27 | DM_IO_BVEC, /* Bio vector */ | ||
28 | DM_IO_VMA, /* Virtual memory area */ | ||
29 | DM_IO_KMEM, /* Kernel memory */ | ||
30 | }; | ||
31 | |||
32 | struct dm_io_memory { | ||
33 | enum dm_io_mem_type type; | ||
34 | |||
35 | union { | ||
36 | struct page_list *pl; | ||
37 | struct bio_vec *bvec; | ||
38 | void *vma; | ||
39 | void *addr; | ||
40 | } ptr; | ||
41 | |||
42 | unsigned offset; | ||
43 | }; | ||
44 | |||
45 | struct dm_io_notify { | ||
46 | io_notify_fn fn; /* Callback for asynchronous requests */ | ||
47 | void *context; /* Passed to callback */ | ||
48 | }; | ||
30 | 49 | ||
31 | /* | 50 | /* |
32 | * Before anyone uses the IO interface they should call | 51 | * IO request structure |
33 | * dm_io_get(), specifying roughly how many pages they are | ||
34 | * expecting to perform io on concurrently. | ||
35 | * | ||
36 | * This function may block. | ||
37 | */ | 52 | */ |
38 | int dm_io_get(unsigned int num_pages); | 53 | struct dm_io_client; |
39 | void dm_io_put(unsigned int num_pages); | 54 | struct dm_io_request { |
55 | int bi_rw; /* READ|WRITE - not READA */ | ||
56 | struct dm_io_memory mem; /* Memory to use for io */ | ||
57 | struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ | ||
58 | struct dm_io_client *client; /* Client memory handler */ | ||
59 | }; | ||
40 | 60 | ||
41 | /* | 61 | /* |
42 | * Synchronous IO. | 62 | * For async io calls, users can alternatively use the dm_io() function below |
63 | * and dm_io_client_create() to create private mempools for the client. | ||
43 | * | 64 | * |
44 | * Please ensure that the rw flag in the next two functions is | 65 | * Create/destroy may block. |
45 | * either READ or WRITE, ie. we don't take READA. Any | ||
46 | * regions with a zero count field will be ignored. | ||
47 | */ | 66 | */ |
48 | int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, | 67 | struct dm_io_client *dm_io_client_create(unsigned num_pages); |
49 | struct page_list *pl, unsigned int offset, | 68 | int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); |
50 | unsigned long *error_bits); | 69 | void dm_io_client_destroy(struct dm_io_client *client); |
51 | |||
52 | int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, | ||
53 | struct bio_vec *bvec, unsigned long *error_bits); | ||
54 | |||
55 | int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, | ||
56 | void *data, unsigned long *error_bits); | ||
57 | 70 | ||
58 | /* | 71 | /* |
59 | * Aynchronous IO. | 72 | * IO interface using private per-client pools. |
60 | * | 73 | * Each bit in the optional 'sync_error_bits' bitset indicates whether an |
61 | * The 'where' array may be safely allocated on the stack since | 74 | * error occurred doing io to the corresponding region. |
62 | * the function takes a copy. | ||
63 | */ | 75 | */ |
64 | int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, | 76 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, |
65 | struct page_list *pl, unsigned int offset, | 77 | struct io_region *region, unsigned long *sync_error_bits); |
66 | io_notify_fn fn, void *context); | ||
67 | |||
68 | int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, | ||
69 | struct bio_vec *bvec, io_notify_fn fn, void *context); | ||
70 | |||
71 | int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, | ||
72 | void *data, io_notify_fn fn, void *context); | ||
73 | 78 | ||
74 | #endif | 79 | #endif |
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 6a9261351848..a66428d860fe 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c | |||
@@ -149,9 +149,12 @@ struct log_c { | |||
149 | FORCESYNC, /* Force a sync to happen */ | 149 | FORCESYNC, /* Force a sync to happen */ |
150 | } sync; | 150 | } sync; |
151 | 151 | ||
152 | struct dm_io_request io_req; | ||
153 | |||
152 | /* | 154 | /* |
153 | * Disk log fields | 155 | * Disk log fields |
154 | */ | 156 | */ |
157 | int log_dev_failed; | ||
155 | struct dm_dev *log_dev; | 158 | struct dm_dev *log_dev; |
156 | struct log_header header; | 159 | struct log_header header; |
157 | 160 | ||
@@ -199,13 +202,20 @@ static void header_from_disk(struct log_header *core, struct log_header *disk) | |||
199 | core->nr_regions = le64_to_cpu(disk->nr_regions); | 202 | core->nr_regions = le64_to_cpu(disk->nr_regions); |
200 | } | 203 | } |
201 | 204 | ||
205 | static int rw_header(struct log_c *lc, int rw) | ||
206 | { | ||
207 | lc->io_req.bi_rw = rw; | ||
208 | lc->io_req.mem.ptr.vma = lc->disk_header; | ||
209 | lc->io_req.notify.fn = NULL; | ||
210 | |||
211 | return dm_io(&lc->io_req, 1, &lc->header_location, NULL); | ||
212 | } | ||
213 | |||
202 | static int read_header(struct log_c *log) | 214 | static int read_header(struct log_c *log) |
203 | { | 215 | { |
204 | int r; | 216 | int r; |
205 | unsigned long ebits; | ||
206 | 217 | ||
207 | r = dm_io_sync_vm(1, &log->header_location, READ, | 218 | r = rw_header(log, READ); |
208 | log->disk_header, &ebits); | ||
209 | if (r) | 219 | if (r) |
210 | return r; | 220 | return r; |
211 | 221 | ||
@@ -233,11 +243,8 @@ static int read_header(struct log_c *log) | |||
233 | 243 | ||
234 | static inline int write_header(struct log_c *log) | 244 | static inline int write_header(struct log_c *log) |
235 | { | 245 | { |
236 | unsigned long ebits; | ||
237 | |||
238 | header_to_disk(&log->header, log->disk_header); | 246 | header_to_disk(&log->header, log->disk_header); |
239 | return dm_io_sync_vm(1, &log->header_location, WRITE, | 247 | return rw_header(log, WRITE); |
240 | log->disk_header, &ebits); | ||
241 | } | 248 | } |
242 | 249 | ||
243 | /*---------------------------------------------------------------- | 250 | /*---------------------------------------------------------------- |
@@ -256,6 +263,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
256 | uint32_t region_size; | 263 | uint32_t region_size; |
257 | unsigned int region_count; | 264 | unsigned int region_count; |
258 | size_t bitset_size, buf_size; | 265 | size_t bitset_size, buf_size; |
266 | int r; | ||
259 | 267 | ||
260 | if (argc < 1 || argc > 2) { | 268 | if (argc < 1 || argc > 2) { |
261 | DMWARN("wrong number of arguments to mirror log"); | 269 | DMWARN("wrong number of arguments to mirror log"); |
@@ -315,6 +323,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
315 | lc->disk_header = NULL; | 323 | lc->disk_header = NULL; |
316 | } else { | 324 | } else { |
317 | lc->log_dev = dev; | 325 | lc->log_dev = dev; |
326 | lc->log_dev_failed = 0; | ||
318 | lc->header_location.bdev = lc->log_dev->bdev; | 327 | lc->header_location.bdev = lc->log_dev->bdev; |
319 | lc->header_location.sector = 0; | 328 | lc->header_location.sector = 0; |
320 | 329 | ||
@@ -324,6 +333,15 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, | |||
324 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + | 333 | buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + |
325 | bitset_size, ti->limits.hardsect_size); | 334 | bitset_size, ti->limits.hardsect_size); |
326 | lc->header_location.count = buf_size >> SECTOR_SHIFT; | 335 | lc->header_location.count = buf_size >> SECTOR_SHIFT; |
336 | lc->io_req.mem.type = DM_IO_VMA; | ||
337 | lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, | ||
338 | PAGE_SIZE)); | ||
339 | if (IS_ERR(lc->io_req.client)) { | ||
340 | r = PTR_ERR(lc->io_req.client); | ||
341 | DMWARN("couldn't allocate disk io client"); | ||
342 | kfree(lc); | ||
343 | return -ENOMEM; | ||
344 | } | ||
327 | 345 | ||
328 | lc->disk_header = vmalloc(buf_size); | 346 | lc->disk_header = vmalloc(buf_size); |
329 | if (!lc->disk_header) { | 347 | if (!lc->disk_header) { |
@@ -424,6 +442,7 @@ static void disk_dtr(struct dirty_log *log) | |||
424 | 442 | ||
425 | dm_put_device(lc->ti, lc->log_dev); | 443 | dm_put_device(lc->ti, lc->log_dev); |
426 | vfree(lc->disk_header); | 444 | vfree(lc->disk_header); |
445 | dm_io_client_destroy(lc->io_req.client); | ||
427 | destroy_log_context(lc); | 446 | destroy_log_context(lc); |
428 | } | 447 | } |
429 | 448 | ||
@@ -437,6 +456,15 @@ static int count_bits32(uint32_t *addr, unsigned size) | |||
437 | return count; | 456 | return count; |
438 | } | 457 | } |
439 | 458 | ||
459 | static void fail_log_device(struct log_c *lc) | ||
460 | { | ||
461 | if (lc->log_dev_failed) | ||
462 | return; | ||
463 | |||
464 | lc->log_dev_failed = 1; | ||
465 | dm_table_event(lc->ti->table); | ||
466 | } | ||
467 | |||
440 | static int disk_resume(struct dirty_log *log) | 468 | static int disk_resume(struct dirty_log *log) |
441 | { | 469 | { |
442 | int r; | 470 | int r; |
@@ -446,8 +474,19 @@ static int disk_resume(struct dirty_log *log) | |||
446 | 474 | ||
447 | /* read the disk header */ | 475 | /* read the disk header */ |
448 | r = read_header(lc); | 476 | r = read_header(lc); |
449 | if (r) | 477 | if (r) { |
450 | return r; | 478 | DMWARN("%s: Failed to read header on mirror log device", |
479 | lc->log_dev->name); | ||
480 | fail_log_device(lc); | ||
481 | /* | ||
482 | * If the log device cannot be read, we must assume | ||
483 | * all regions are out-of-sync. If we simply return | ||
484 | * here, the state will be uninitialized and could | ||
485 | * lead us to return 'in-sync' status for regions | ||
486 | * that are actually 'out-of-sync'. | ||
487 | */ | ||
488 | lc->header.nr_regions = 0; | ||
489 | } | ||
451 | 490 | ||
452 | /* set or clear any new bits -- device has grown */ | 491 | /* set or clear any new bits -- device has grown */ |
453 | if (lc->sync == NOSYNC) | 492 | if (lc->sync == NOSYNC) |
@@ -472,7 +511,14 @@ static int disk_resume(struct dirty_log *log) | |||
472 | lc->header.nr_regions = lc->region_count; | 511 | lc->header.nr_regions = lc->region_count; |
473 | 512 | ||
474 | /* write the new header */ | 513 | /* write the new header */ |
475 | return write_header(lc); | 514 | r = write_header(lc); |
515 | if (r) { | ||
516 | DMWARN("%s: Failed to write header on mirror log device", | ||
517 | lc->log_dev->name); | ||
518 | fail_log_device(lc); | ||
519 | } | ||
520 | |||
521 | return r; | ||
476 | } | 522 | } |
477 | 523 | ||
478 | static uint32_t core_get_region_size(struct dirty_log *log) | 524 | static uint32_t core_get_region_size(struct dirty_log *log) |
@@ -516,7 +562,9 @@ static int disk_flush(struct dirty_log *log) | |||
516 | return 0; | 562 | return 0; |
517 | 563 | ||
518 | r = write_header(lc); | 564 | r = write_header(lc); |
519 | if (!r) | 565 | if (r) |
566 | fail_log_device(lc); | ||
567 | else | ||
520 | lc->touched = 0; | 568 | lc->touched = 0; |
521 | 569 | ||
522 | return r; | 570 | return r; |
@@ -591,6 +639,7 @@ static int core_status(struct dirty_log *log, status_type_t status, | |||
591 | 639 | ||
592 | switch(status) { | 640 | switch(status) { |
593 | case STATUSTYPE_INFO: | 641 | case STATUSTYPE_INFO: |
642 | DMEMIT("1 %s", log->type->name); | ||
594 | break; | 643 | break; |
595 | 644 | ||
596 | case STATUSTYPE_TABLE: | 645 | case STATUSTYPE_TABLE: |
@@ -606,17 +655,17 @@ static int disk_status(struct dirty_log *log, status_type_t status, | |||
606 | char *result, unsigned int maxlen) | 655 | char *result, unsigned int maxlen) |
607 | { | 656 | { |
608 | int sz = 0; | 657 | int sz = 0; |
609 | char buffer[16]; | ||
610 | struct log_c *lc = log->context; | 658 | struct log_c *lc = log->context; |
611 | 659 | ||
612 | switch(status) { | 660 | switch(status) { |
613 | case STATUSTYPE_INFO: | 661 | case STATUSTYPE_INFO: |
662 | DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name, | ||
663 | lc->log_dev_failed ? 'D' : 'A'); | ||
614 | break; | 664 | break; |
615 | 665 | ||
616 | case STATUSTYPE_TABLE: | 666 | case STATUSTYPE_TABLE: |
617 | format_dev_t(buffer, lc->log_dev->bdev->bd_dev); | ||
618 | DMEMIT("%s %u %s %u ", log->type->name, | 667 | DMEMIT("%s %u %s %u ", log->type->name, |
619 | lc->sync == DEFAULTSYNC ? 2 : 3, buffer, | 668 | lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, |
620 | lc->region_size); | 669 | lc->region_size); |
621 | DMEMIT_SYNC; | 670 | DMEMIT_SYNC; |
622 | } | 671 | } |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3aa013506967..de54b39e6ffe 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -668,6 +668,9 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) | |||
668 | return -EINVAL; | 668 | return -EINVAL; |
669 | } | 669 | } |
670 | 670 | ||
671 | m->hw_handler.md = dm_table_get_md(ti->table); | ||
672 | dm_put(m->hw_handler.md); | ||
673 | |||
671 | r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); | 674 | r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); |
672 | if (r) { | 675 | if (r) { |
673 | dm_put_hw_handler(hwht); | 676 | dm_put_hw_handler(hwht); |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 23a642619bed..ef124b71ccc8 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -21,15 +21,11 @@ | |||
21 | #include <linux/workqueue.h> | 21 | #include <linux/workqueue.h> |
22 | 22 | ||
23 | #define DM_MSG_PREFIX "raid1" | 23 | #define DM_MSG_PREFIX "raid1" |
24 | #define DM_IO_PAGES 64 | ||
24 | 25 | ||
25 | static struct workqueue_struct *_kmirrord_wq; | 26 | #define DM_RAID1_HANDLE_ERRORS 0x01 |
26 | static struct work_struct _kmirrord_work; | ||
27 | static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); | ||
28 | 27 | ||
29 | static inline void wake(void) | 28 | static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); |
30 | { | ||
31 | queue_work(_kmirrord_wq, &_kmirrord_work); | ||
32 | } | ||
33 | 29 | ||
34 | /*----------------------------------------------------------------- | 30 | /*----------------------------------------------------------------- |
35 | * Region hash | 31 | * Region hash |
@@ -125,17 +121,23 @@ struct mirror_set { | |||
125 | struct list_head list; | 121 | struct list_head list; |
126 | struct region_hash rh; | 122 | struct region_hash rh; |
127 | struct kcopyd_client *kcopyd_client; | 123 | struct kcopyd_client *kcopyd_client; |
124 | uint64_t features; | ||
128 | 125 | ||
129 | spinlock_t lock; /* protects the next two lists */ | 126 | spinlock_t lock; /* protects the next two lists */ |
130 | struct bio_list reads; | 127 | struct bio_list reads; |
131 | struct bio_list writes; | 128 | struct bio_list writes; |
132 | 129 | ||
130 | struct dm_io_client *io_client; | ||
131 | |||
133 | /* recovery */ | 132 | /* recovery */ |
134 | region_t nr_regions; | 133 | region_t nr_regions; |
135 | int in_sync; | 134 | int in_sync; |
136 | 135 | ||
137 | struct mirror *default_mirror; /* Default mirror */ | 136 | struct mirror *default_mirror; /* Default mirror */ |
138 | 137 | ||
138 | struct workqueue_struct *kmirrord_wq; | ||
139 | struct work_struct kmirrord_work; | ||
140 | |||
139 | unsigned int nr_mirrors; | 141 | unsigned int nr_mirrors; |
140 | struct mirror mirror[0]; | 142 | struct mirror mirror[0]; |
141 | }; | 143 | }; |
@@ -153,6 +155,11 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region) | |||
153 | return region << rh->region_shift; | 155 | return region << rh->region_shift; |
154 | } | 156 | } |
155 | 157 | ||
158 | static void wake(struct mirror_set *ms) | ||
159 | { | ||
160 | queue_work(ms->kmirrord_wq, &ms->kmirrord_work); | ||
161 | } | ||
162 | |||
156 | /* FIXME move this */ | 163 | /* FIXME move this */ |
157 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); | 164 | static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); |
158 | 165 | ||
@@ -398,8 +405,7 @@ static void rh_update_states(struct region_hash *rh) | |||
398 | mempool_free(reg, rh->region_pool); | 405 | mempool_free(reg, rh->region_pool); |
399 | } | 406 | } |
400 | 407 | ||
401 | if (!list_empty(&recovered)) | 408 | rh->log->type->flush(rh->log); |
402 | rh->log->type->flush(rh->log); | ||
403 | 409 | ||
404 | list_for_each_entry_safe (reg, next, &clean, list) | 410 | list_for_each_entry_safe (reg, next, &clean, list) |
405 | mempool_free(reg, rh->region_pool); | 411 | mempool_free(reg, rh->region_pool); |
@@ -471,7 +477,7 @@ static void rh_dec(struct region_hash *rh, region_t region) | |||
471 | spin_unlock_irqrestore(&rh->region_lock, flags); | 477 | spin_unlock_irqrestore(&rh->region_lock, flags); |
472 | 478 | ||
473 | if (should_wake) | 479 | if (should_wake) |
474 | wake(); | 480 | wake(rh->ms); |
475 | } | 481 | } |
476 | 482 | ||
477 | /* | 483 | /* |
@@ -558,7 +564,7 @@ static void rh_recovery_end(struct region *reg, int success) | |||
558 | list_add(®->list, ®->rh->recovered_regions); | 564 | list_add(®->list, ®->rh->recovered_regions); |
559 | spin_unlock_irq(&rh->region_lock); | 565 | spin_unlock_irq(&rh->region_lock); |
560 | 566 | ||
561 | wake(); | 567 | wake(rh->ms); |
562 | } | 568 | } |
563 | 569 | ||
564 | static void rh_flush(struct region_hash *rh) | 570 | static void rh_flush(struct region_hash *rh) |
@@ -592,7 +598,7 @@ static void rh_start_recovery(struct region_hash *rh) | |||
592 | for (i = 0; i < MAX_RECOVERY; i++) | 598 | for (i = 0; i < MAX_RECOVERY; i++) |
593 | up(&rh->recovery_count); | 599 | up(&rh->recovery_count); |
594 | 600 | ||
595 | wake(); | 601 | wake(rh->ms); |
596 | } | 602 | } |
597 | 603 | ||
598 | /* | 604 | /* |
@@ -735,7 +741,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) | |||
735 | /* | 741 | /* |
736 | * We can only read balance if the region is in sync. | 742 | * We can only read balance if the region is in sync. |
737 | */ | 743 | */ |
738 | if (rh_in_sync(&ms->rh, region, 0)) | 744 | if (rh_in_sync(&ms->rh, region, 1)) |
739 | m = choose_mirror(ms, bio->bi_sector); | 745 | m = choose_mirror(ms, bio->bi_sector); |
740 | else | 746 | else |
741 | m = ms->default_mirror; | 747 | m = ms->default_mirror; |
@@ -792,6 +798,14 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
792 | unsigned int i; | 798 | unsigned int i; |
793 | struct io_region io[KCOPYD_MAX_REGIONS+1]; | 799 | struct io_region io[KCOPYD_MAX_REGIONS+1]; |
794 | struct mirror *m; | 800 | struct mirror *m; |
801 | struct dm_io_request io_req = { | ||
802 | .bi_rw = WRITE, | ||
803 | .mem.type = DM_IO_BVEC, | ||
804 | .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, | ||
805 | .notify.fn = write_callback, | ||
806 | .notify.context = bio, | ||
807 | .client = ms->io_client, | ||
808 | }; | ||
795 | 809 | ||
796 | for (i = 0; i < ms->nr_mirrors; i++) { | 810 | for (i = 0; i < ms->nr_mirrors; i++) { |
797 | m = ms->mirror + i; | 811 | m = ms->mirror + i; |
@@ -802,9 +816,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio) | |||
802 | } | 816 | } |
803 | 817 | ||
804 | bio_set_ms(bio, ms); | 818 | bio_set_ms(bio, ms); |
805 | dm_io_async_bvec(ms->nr_mirrors, io, WRITE, | 819 | |
806 | bio->bi_io_vec + bio->bi_idx, | 820 | (void) dm_io(&io_req, ms->nr_mirrors, io, NULL); |
807 | write_callback, bio); | ||
808 | } | 821 | } |
809 | 822 | ||
810 | static void do_writes(struct mirror_set *ms, struct bio_list *writes) | 823 | static void do_writes(struct mirror_set *ms, struct bio_list *writes) |
@@ -870,11 +883,10 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
870 | /*----------------------------------------------------------------- | 883 | /*----------------------------------------------------------------- |
871 | * kmirrord | 884 | * kmirrord |
872 | *---------------------------------------------------------------*/ | 885 | *---------------------------------------------------------------*/ |
873 | static LIST_HEAD(_mirror_sets); | 886 | static void do_mirror(struct work_struct *work) |
874 | static DECLARE_RWSEM(_mirror_sets_lock); | ||
875 | |||
876 | static void do_mirror(struct mirror_set *ms) | ||
877 | { | 887 | { |
888 | struct mirror_set *ms =container_of(work, struct mirror_set, | ||
889 | kmirrord_work); | ||
878 | struct bio_list reads, writes; | 890 | struct bio_list reads, writes; |
879 | 891 | ||
880 | spin_lock(&ms->lock); | 892 | spin_lock(&ms->lock); |
@@ -890,16 +902,6 @@ static void do_mirror(struct mirror_set *ms) | |||
890 | do_writes(ms, &writes); | 902 | do_writes(ms, &writes); |
891 | } | 903 | } |
892 | 904 | ||
893 | static void do_work(struct work_struct *ignored) | ||
894 | { | ||
895 | struct mirror_set *ms; | ||
896 | |||
897 | down_read(&_mirror_sets_lock); | ||
898 | list_for_each_entry (ms, &_mirror_sets, list) | ||
899 | do_mirror(ms); | ||
900 | up_read(&_mirror_sets_lock); | ||
901 | } | ||
902 | |||
903 | /*----------------------------------------------------------------- | 905 | /*----------------------------------------------------------------- |
904 | * Target functions | 906 | * Target functions |
905 | *---------------------------------------------------------------*/ | 907 | *---------------------------------------------------------------*/ |
@@ -931,6 +933,13 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, | |||
931 | ms->in_sync = 0; | 933 | ms->in_sync = 0; |
932 | ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; | 934 | ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; |
933 | 935 | ||
936 | ms->io_client = dm_io_client_create(DM_IO_PAGES); | ||
937 | if (IS_ERR(ms->io_client)) { | ||
938 | ti->error = "Error creating dm_io client"; | ||
939 | kfree(ms); | ||
940 | return NULL; | ||
941 | } | ||
942 | |||
934 | if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { | 943 | if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { |
935 | ti->error = "Error creating dirty region hash"; | 944 | ti->error = "Error creating dirty region hash"; |
936 | kfree(ms); | 945 | kfree(ms); |
@@ -946,6 +955,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, | |||
946 | while (m--) | 955 | while (m--) |
947 | dm_put_device(ti, ms->mirror[m].dev); | 956 | dm_put_device(ti, ms->mirror[m].dev); |
948 | 957 | ||
958 | dm_io_client_destroy(ms->io_client); | ||
949 | rh_exit(&ms->rh); | 959 | rh_exit(&ms->rh); |
950 | kfree(ms); | 960 | kfree(ms); |
951 | } | 961 | } |
@@ -978,23 +988,6 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, | |||
978 | return 0; | 988 | return 0; |
979 | } | 989 | } |
980 | 990 | ||
981 | static int add_mirror_set(struct mirror_set *ms) | ||
982 | { | ||
983 | down_write(&_mirror_sets_lock); | ||
984 | list_add_tail(&ms->list, &_mirror_sets); | ||
985 | up_write(&_mirror_sets_lock); | ||
986 | wake(); | ||
987 | |||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | static void del_mirror_set(struct mirror_set *ms) | ||
992 | { | ||
993 | down_write(&_mirror_sets_lock); | ||
994 | list_del(&ms->list); | ||
995 | up_write(&_mirror_sets_lock); | ||
996 | } | ||
997 | |||
998 | /* | 991 | /* |
999 | * Create dirty log: log_type #log_params <log_params> | 992 | * Create dirty log: log_type #log_params <log_params> |
1000 | */ | 993 | */ |
@@ -1037,16 +1030,55 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti, | |||
1037 | return dl; | 1030 | return dl; |
1038 | } | 1031 | } |
1039 | 1032 | ||
1033 | static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, | ||
1034 | unsigned *args_used) | ||
1035 | { | ||
1036 | unsigned num_features; | ||
1037 | struct dm_target *ti = ms->ti; | ||
1038 | |||
1039 | *args_used = 0; | ||
1040 | |||
1041 | if (!argc) | ||
1042 | return 0; | ||
1043 | |||
1044 | if (sscanf(argv[0], "%u", &num_features) != 1) { | ||
1045 | ti->error = "Invalid number of features"; | ||
1046 | return -EINVAL; | ||
1047 | } | ||
1048 | |||
1049 | argc--; | ||
1050 | argv++; | ||
1051 | (*args_used)++; | ||
1052 | |||
1053 | if (num_features > argc) { | ||
1054 | ti->error = "Not enough arguments to support feature count"; | ||
1055 | return -EINVAL; | ||
1056 | } | ||
1057 | |||
1058 | if (!strcmp("handle_errors", argv[0])) | ||
1059 | ms->features |= DM_RAID1_HANDLE_ERRORS; | ||
1060 | else { | ||
1061 | ti->error = "Unrecognised feature requested"; | ||
1062 | return -EINVAL; | ||
1063 | } | ||
1064 | |||
1065 | (*args_used)++; | ||
1066 | |||
1067 | return 0; | ||
1068 | } | ||
1069 | |||
1040 | /* | 1070 | /* |
1041 | * Construct a mirror mapping: | 1071 | * Construct a mirror mapping: |
1042 | * | 1072 | * |
1043 | * log_type #log_params <log_params> | 1073 | * log_type #log_params <log_params> |
1044 | * #mirrors [mirror_path offset]{2,} | 1074 | * #mirrors [mirror_path offset]{2,} |
1075 | * [#features <features>] | ||
1045 | * | 1076 | * |
1046 | * log_type is "core" or "disk" | 1077 | * log_type is "core" or "disk" |
1047 | * #log_params is between 1 and 3 | 1078 | * #log_params is between 1 and 3 |
1079 | * | ||
1080 | * If present, features must be "handle_errors". | ||
1048 | */ | 1081 | */ |
1049 | #define DM_IO_PAGES 64 | ||
1050 | static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | 1082 | static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
1051 | { | 1083 | { |
1052 | int r; | 1084 | int r; |
@@ -1070,8 +1102,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1070 | 1102 | ||
1071 | argv++, argc--; | 1103 | argv++, argc--; |
1072 | 1104 | ||
1073 | if (argc != nr_mirrors * 2) { | 1105 | if (argc < nr_mirrors * 2) { |
1074 | ti->error = "Wrong number of mirror arguments"; | 1106 | ti->error = "Too few mirror arguments"; |
1075 | dm_destroy_dirty_log(dl); | 1107 | dm_destroy_dirty_log(dl); |
1076 | return -EINVAL; | 1108 | return -EINVAL; |
1077 | } | 1109 | } |
@@ -1096,13 +1128,37 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1096 | ti->private = ms; | 1128 | ti->private = ms; |
1097 | ti->split_io = ms->rh.region_size; | 1129 | ti->split_io = ms->rh.region_size; |
1098 | 1130 | ||
1131 | ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); | ||
1132 | if (!ms->kmirrord_wq) { | ||
1133 | DMERR("couldn't start kmirrord"); | ||
1134 | free_context(ms, ti, m); | ||
1135 | return -ENOMEM; | ||
1136 | } | ||
1137 | INIT_WORK(&ms->kmirrord_work, do_mirror); | ||
1138 | |||
1139 | r = parse_features(ms, argc, argv, &args_used); | ||
1140 | if (r) { | ||
1141 | free_context(ms, ti, ms->nr_mirrors); | ||
1142 | return r; | ||
1143 | } | ||
1144 | |||
1145 | argv += args_used; | ||
1146 | argc -= args_used; | ||
1147 | |||
1148 | if (argc) { | ||
1149 | ti->error = "Too many mirror arguments"; | ||
1150 | free_context(ms, ti, ms->nr_mirrors); | ||
1151 | return -EINVAL; | ||
1152 | } | ||
1153 | |||
1099 | r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); | 1154 | r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); |
1100 | if (r) { | 1155 | if (r) { |
1156 | destroy_workqueue(ms->kmirrord_wq); | ||
1101 | free_context(ms, ti, ms->nr_mirrors); | 1157 | free_context(ms, ti, ms->nr_mirrors); |
1102 | return r; | 1158 | return r; |
1103 | } | 1159 | } |
1104 | 1160 | ||
1105 | add_mirror_set(ms); | 1161 | wake(ms); |
1106 | return 0; | 1162 | return 0; |
1107 | } | 1163 | } |
1108 | 1164 | ||
@@ -1110,8 +1166,9 @@ static void mirror_dtr(struct dm_target *ti) | |||
1110 | { | 1166 | { |
1111 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1167 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1112 | 1168 | ||
1113 | del_mirror_set(ms); | 1169 | flush_workqueue(ms->kmirrord_wq); |
1114 | kcopyd_client_destroy(ms->kcopyd_client); | 1170 | kcopyd_client_destroy(ms->kcopyd_client); |
1171 | destroy_workqueue(ms->kmirrord_wq); | ||
1115 | free_context(ms, ti, ms->nr_mirrors); | 1172 | free_context(ms, ti, ms->nr_mirrors); |
1116 | } | 1173 | } |
1117 | 1174 | ||
@@ -1127,7 +1184,7 @@ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) | |||
1127 | spin_unlock(&ms->lock); | 1184 | spin_unlock(&ms->lock); |
1128 | 1185 | ||
1129 | if (should_wake) | 1186 | if (should_wake) |
1130 | wake(); | 1187 | wake(ms); |
1131 | } | 1188 | } |
1132 | 1189 | ||
1133 | /* | 1190 | /* |
@@ -1222,11 +1279,9 @@ static void mirror_resume(struct dm_target *ti) | |||
1222 | static int mirror_status(struct dm_target *ti, status_type_t type, | 1279 | static int mirror_status(struct dm_target *ti, status_type_t type, |
1223 | char *result, unsigned int maxlen) | 1280 | char *result, unsigned int maxlen) |
1224 | { | 1281 | { |
1225 | unsigned int m, sz; | 1282 | unsigned int m, sz = 0; |
1226 | struct mirror_set *ms = (struct mirror_set *) ti->private; | 1283 | struct mirror_set *ms = (struct mirror_set *) ti->private; |
1227 | 1284 | ||
1228 | sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen); | ||
1229 | |||
1230 | switch (type) { | 1285 | switch (type) { |
1231 | case STATUSTYPE_INFO: | 1286 | case STATUSTYPE_INFO: |
1232 | DMEMIT("%d ", ms->nr_mirrors); | 1287 | DMEMIT("%d ", ms->nr_mirrors); |
@@ -1237,13 +1292,21 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1237 | (unsigned long long)ms->rh.log->type-> | 1292 | (unsigned long long)ms->rh.log->type-> |
1238 | get_sync_count(ms->rh.log), | 1293 | get_sync_count(ms->rh.log), |
1239 | (unsigned long long)ms->nr_regions); | 1294 | (unsigned long long)ms->nr_regions); |
1295 | |||
1296 | sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen); | ||
1297 | |||
1240 | break; | 1298 | break; |
1241 | 1299 | ||
1242 | case STATUSTYPE_TABLE: | 1300 | case STATUSTYPE_TABLE: |
1301 | sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen); | ||
1302 | |||
1243 | DMEMIT("%d", ms->nr_mirrors); | 1303 | DMEMIT("%d", ms->nr_mirrors); |
1244 | for (m = 0; m < ms->nr_mirrors; m++) | 1304 | for (m = 0; m < ms->nr_mirrors; m++) |
1245 | DMEMIT(" %s %llu", ms->mirror[m].dev->name, | 1305 | DMEMIT(" %s %llu", ms->mirror[m].dev->name, |
1246 | (unsigned long long)ms->mirror[m].offset); | 1306 | (unsigned long long)ms->mirror[m].offset); |
1307 | |||
1308 | if (ms->features & DM_RAID1_HANDLE_ERRORS) | ||
1309 | DMEMIT(" 1 handle_errors"); | ||
1247 | } | 1310 | } |
1248 | 1311 | ||
1249 | return 0; | 1312 | return 0; |
@@ -1251,7 +1314,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, | |||
1251 | 1314 | ||
1252 | static struct target_type mirror_target = { | 1315 | static struct target_type mirror_target = { |
1253 | .name = "mirror", | 1316 | .name = "mirror", |
1254 | .version = {1, 0, 2}, | 1317 | .version = {1, 0, 3}, |
1255 | .module = THIS_MODULE, | 1318 | .module = THIS_MODULE, |
1256 | .ctr = mirror_ctr, | 1319 | .ctr = mirror_ctr, |
1257 | .dtr = mirror_dtr, | 1320 | .dtr = mirror_dtr, |
@@ -1270,20 +1333,11 @@ static int __init dm_mirror_init(void) | |||
1270 | if (r) | 1333 | if (r) |
1271 | return r; | 1334 | return r; |
1272 | 1335 | ||
1273 | _kmirrord_wq = create_singlethread_workqueue("kmirrord"); | ||
1274 | if (!_kmirrord_wq) { | ||
1275 | DMERR("couldn't start kmirrord"); | ||
1276 | dm_dirty_log_exit(); | ||
1277 | return r; | ||
1278 | } | ||
1279 | INIT_WORK(&_kmirrord_work, do_work); | ||
1280 | |||
1281 | r = dm_register_target(&mirror_target); | 1336 | r = dm_register_target(&mirror_target); |
1282 | if (r < 0) { | 1337 | if (r < 0) { |
1283 | DMERR("%s: Failed to register mirror target", | 1338 | DMERR("%s: Failed to register mirror target", |
1284 | mirror_target.name); | 1339 | mirror_target.name); |
1285 | dm_dirty_log_exit(); | 1340 | dm_dirty_log_exit(); |
1286 | destroy_workqueue(_kmirrord_wq); | ||
1287 | } | 1341 | } |
1288 | 1342 | ||
1289 | return r; | 1343 | return r; |
@@ -1297,7 +1351,6 @@ static void __exit dm_mirror_exit(void) | |||
1297 | if (r < 0) | 1351 | if (r < 0) |
1298 | DMERR("%s: unregister failed %d", mirror_target.name, r); | 1352 | DMERR("%s: unregister failed %d", mirror_target.name, r); |
1299 | 1353 | ||
1300 | destroy_workqueue(_kmirrord_wq); | ||
1301 | dm_dirty_log_exit(); | 1354 | dm_dirty_log_exit(); |
1302 | } | 1355 | } |
1303 | 1356 | ||
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 05befa91807a..2fc199b0016b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -425,13 +425,15 @@ static void close_dev(struct dm_dev *d, struct mapped_device *md) | |||
425 | } | 425 | } |
426 | 426 | ||
427 | /* | 427 | /* |
428 | * If possible (ie. blk_size[major] is set), this checks an area | 428 | * If possible, this checks an area of a destination device is valid. |
429 | * of a destination device is valid. | ||
430 | */ | 429 | */ |
431 | static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) | 430 | static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) |
432 | { | 431 | { |
433 | sector_t dev_size; | 432 | sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT; |
434 | dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT; | 433 | |
434 | if (!dev_size) | ||
435 | return 1; | ||
436 | |||
435 | return ((start < dev_size) && (len <= (dev_size - start))); | 437 | return ((start < dev_size) && (len <= (dev_size - start))); |
436 | } | 438 | } |
437 | 439 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 11a98df298ec..2717a355dc5b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1236,6 +1236,7 @@ void dm_put(struct mapped_device *md) | |||
1236 | free_dev(md); | 1236 | free_dev(md); |
1237 | } | 1237 | } |
1238 | } | 1238 | } |
1239 | EXPORT_SYMBOL_GPL(dm_put); | ||
1239 | 1240 | ||
1240 | /* | 1241 | /* |
1241 | * Process the deferred bios | 1242 | * Process the deferred bios |
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index b46f6c575f7e..dbc234e3c69f 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2002 Sistina Software (UK) Limited. | 2 | * Copyright (C) 2002 Sistina Software (UK) Limited. |
3 | * Copyright (C) 2006 Red Hat GmbH | ||
3 | * | 4 | * |
4 | * This file is released under the GPL. | 5 | * This file is released under the GPL. |
5 | * | 6 | * |
@@ -45,6 +46,8 @@ struct kcopyd_client { | |||
45 | unsigned int nr_pages; | 46 | unsigned int nr_pages; |
46 | unsigned int nr_free_pages; | 47 | unsigned int nr_free_pages; |
47 | 48 | ||
49 | struct dm_io_client *io_client; | ||
50 | |||
48 | wait_queue_head_t destroyq; | 51 | wait_queue_head_t destroyq; |
49 | atomic_t nr_jobs; | 52 | atomic_t nr_jobs; |
50 | }; | 53 | }; |
@@ -342,16 +345,20 @@ static void complete_io(unsigned long error, void *context) | |||
342 | static int run_io_job(struct kcopyd_job *job) | 345 | static int run_io_job(struct kcopyd_job *job) |
343 | { | 346 | { |
344 | int r; | 347 | int r; |
348 | struct dm_io_request io_req = { | ||
349 | .bi_rw = job->rw, | ||
350 | .mem.type = DM_IO_PAGE_LIST, | ||
351 | .mem.ptr.pl = job->pages, | ||
352 | .mem.offset = job->offset, | ||
353 | .notify.fn = complete_io, | ||
354 | .notify.context = job, | ||
355 | .client = job->kc->io_client, | ||
356 | }; | ||
345 | 357 | ||
346 | if (job->rw == READ) | 358 | if (job->rw == READ) |
347 | r = dm_io_async(1, &job->source, job->rw, | 359 | r = dm_io(&io_req, 1, &job->source, NULL); |
348 | job->pages, | ||
349 | job->offset, complete_io, job); | ||
350 | |||
351 | else | 360 | else |
352 | r = dm_io_async(job->num_dests, job->dests, job->rw, | 361 | r = dm_io(&io_req, job->num_dests, job->dests, NULL); |
353 | job->pages, | ||
354 | job->offset, complete_io, job); | ||
355 | 362 | ||
356 | return r; | 363 | return r; |
357 | } | 364 | } |
@@ -670,8 +677,9 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) | |||
670 | return r; | 677 | return r; |
671 | } | 678 | } |
672 | 679 | ||
673 | r = dm_io_get(nr_pages); | 680 | kc->io_client = dm_io_client_create(nr_pages); |
674 | if (r) { | 681 | if (IS_ERR(kc->io_client)) { |
682 | r = PTR_ERR(kc->io_client); | ||
675 | client_free_pages(kc); | 683 | client_free_pages(kc); |
676 | kfree(kc); | 684 | kfree(kc); |
677 | kcopyd_exit(); | 685 | kcopyd_exit(); |
@@ -691,7 +699,7 @@ void kcopyd_client_destroy(struct kcopyd_client *kc) | |||
691 | /* Wait for completion of all jobs submitted by this client. */ | 699 | /* Wait for completion of all jobs submitted by this client. */ |
692 | wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); | 700 | wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); |
693 | 701 | ||
694 | dm_io_put(kc->nr_pages); | 702 | dm_io_client_destroy(kc->io_client); |
695 | client_free_pages(kc); | 703 | client_free_pages(kc); |
696 | client_del(kc); | 704 | client_del(kc); |
697 | kfree(kc); | 705 | kfree(kc); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2b4315d7e5d6..2901d0c0ee9e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -33,6 +33,7 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/kernel.h> | ||
36 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
37 | #include <linux/linkage.h> | 38 | #include <linux/linkage.h> |
38 | #include <linux/raid/md.h> | 39 | #include <linux/raid/md.h> |
@@ -273,6 +274,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
273 | atomic_set(&new->active, 1); | 274 | atomic_set(&new->active, 1); |
274 | spin_lock_init(&new->write_lock); | 275 | spin_lock_init(&new->write_lock); |
275 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | ||
276 | 278 | ||
277 | new->queue = blk_alloc_queue(GFP_KERNEL); | 279 | new->queue = blk_alloc_queue(GFP_KERNEL); |
278 | if (!new->queue) { | 280 | if (!new->queue) { |
@@ -589,14 +591,41 @@ abort: | |||
589 | return ret; | 591 | return ret; |
590 | } | 592 | } |
591 | 593 | ||
594 | |||
595 | static u32 md_csum_fold(u32 csum) | ||
596 | { | ||
597 | csum = (csum & 0xffff) + (csum >> 16); | ||
598 | return (csum & 0xffff) + (csum >> 16); | ||
599 | } | ||
600 | |||
592 | static unsigned int calc_sb_csum(mdp_super_t * sb) | 601 | static unsigned int calc_sb_csum(mdp_super_t * sb) |
593 | { | 602 | { |
603 | u64 newcsum = 0; | ||
604 | u32 *sb32 = (u32*)sb; | ||
605 | int i; | ||
594 | unsigned int disk_csum, csum; | 606 | unsigned int disk_csum, csum; |
595 | 607 | ||
596 | disk_csum = sb->sb_csum; | 608 | disk_csum = sb->sb_csum; |
597 | sb->sb_csum = 0; | 609 | sb->sb_csum = 0; |
598 | csum = csum_partial((void *)sb, MD_SB_BYTES, 0); | 610 | |
611 | for (i = 0; i < MD_SB_BYTES/4 ; i++) | ||
612 | newcsum += sb32[i]; | ||
613 | csum = (newcsum & 0xffffffff) + (newcsum>>32); | ||
614 | |||
615 | |||
616 | #ifdef CONFIG_ALPHA | ||
617 | /* This used to use csum_partial, which was wrong for several | ||
618 | * reasons including that different results are returned on | ||
619 | * different architectures. It isn't critical that we get exactly | ||
620 | * the same return value as before (we always csum_fold before | ||
621 | * testing, and that removes any differences). However as we | ||
622 | * know that csum_partial always returned a 16bit value on | ||
623 | * alphas, do a fold to maximise conformity to previous behaviour. | ||
624 | */ | ||
625 | sb->sb_csum = md_csum_fold(disk_csum); | ||
626 | #else | ||
599 | sb->sb_csum = disk_csum; | 627 | sb->sb_csum = disk_csum; |
628 | #endif | ||
600 | return csum; | 629 | return csum; |
601 | } | 630 | } |
602 | 631 | ||
@@ -684,7 +713,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
684 | if (sb->raid_disks <= 0) | 713 | if (sb->raid_disks <= 0) |
685 | goto abort; | 714 | goto abort; |
686 | 715 | ||
687 | if (csum_fold(calc_sb_csum(sb)) != csum_fold(sb->sb_csum)) { | 716 | if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) { |
688 | printk(KERN_WARNING "md: invalid superblock checksum on %s\n", | 717 | printk(KERN_WARNING "md: invalid superblock checksum on %s\n", |
689 | b); | 718 | b); |
690 | goto abort; | 719 | goto abort; |
@@ -694,6 +723,17 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
694 | rdev->data_offset = 0; | 723 | rdev->data_offset = 0; |
695 | rdev->sb_size = MD_SB_BYTES; | 724 | rdev->sb_size = MD_SB_BYTES; |
696 | 725 | ||
726 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) { | ||
727 | if (sb->level != 1 && sb->level != 4 | ||
728 | && sb->level != 5 && sb->level != 6 | ||
729 | && sb->level != 10) { | ||
730 | /* FIXME use a better test */ | ||
731 | printk(KERN_WARNING | ||
732 | "md: bitmaps not supported for this level.\n"); | ||
733 | goto abort; | ||
734 | } | ||
735 | } | ||
736 | |||
697 | if (sb->level == LEVEL_MULTIPATH) | 737 | if (sb->level == LEVEL_MULTIPATH) |
698 | rdev->desc_nr = -1; | 738 | rdev->desc_nr = -1; |
699 | else | 739 | else |
@@ -792,16 +832,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
792 | mddev->max_disks = MD_SB_DISKS; | 832 | mddev->max_disks = MD_SB_DISKS; |
793 | 833 | ||
794 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 834 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
795 | mddev->bitmap_file == NULL) { | 835 | mddev->bitmap_file == NULL) |
796 | if (mddev->level != 1 && mddev->level != 4 | ||
797 | && mddev->level != 5 && mddev->level != 6 | ||
798 | && mddev->level != 10) { | ||
799 | /* FIXME use a better test */ | ||
800 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); | ||
801 | return -EINVAL; | ||
802 | } | ||
803 | mddev->bitmap_offset = mddev->default_bitmap_offset; | 836 | mddev->bitmap_offset = mddev->default_bitmap_offset; |
804 | } | ||
805 | 837 | ||
806 | } else if (mddev->pers == NULL) { | 838 | } else if (mddev->pers == NULL) { |
807 | /* Insist on good event counter while assembling */ | 839 | /* Insist on good event counter while assembling */ |
@@ -1058,6 +1090,18 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1058 | bdevname(rdev->bdev,b)); | 1090 | bdevname(rdev->bdev,b)); |
1059 | return -EINVAL; | 1091 | return -EINVAL; |
1060 | } | 1092 | } |
1093 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) { | ||
1094 | if (sb->level != cpu_to_le32(1) && | ||
1095 | sb->level != cpu_to_le32(4) && | ||
1096 | sb->level != cpu_to_le32(5) && | ||
1097 | sb->level != cpu_to_le32(6) && | ||
1098 | sb->level != cpu_to_le32(10)) { | ||
1099 | printk(KERN_WARNING | ||
1100 | "md: bitmaps not supported for this level.\n"); | ||
1101 | return -EINVAL; | ||
1102 | } | ||
1103 | } | ||
1104 | |||
1061 | rdev->preferred_minor = 0xffff; | 1105 | rdev->preferred_minor = 0xffff; |
1062 | rdev->data_offset = le64_to_cpu(sb->data_offset); | 1106 | rdev->data_offset = le64_to_cpu(sb->data_offset); |
1063 | atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); | 1107 | atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); |
@@ -1141,14 +1185,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1141 | mddev->max_disks = (4096-256)/2; | 1185 | mddev->max_disks = (4096-256)/2; |
1142 | 1186 | ||
1143 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && | 1187 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && |
1144 | mddev->bitmap_file == NULL ) { | 1188 | mddev->bitmap_file == NULL ) |
1145 | if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 | ||
1146 | && mddev->level != 10) { | ||
1147 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); | ||
1148 | return -EINVAL; | ||
1149 | } | ||
1150 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | 1189 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); |
1151 | } | 1190 | |
1152 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { | 1191 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { |
1153 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); | 1192 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); |
1154 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); | 1193 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); |
@@ -2204,6 +2243,10 @@ static ssize_t | |||
2204 | layout_show(mddev_t *mddev, char *page) | 2243 | layout_show(mddev_t *mddev, char *page) |
2205 | { | 2244 | { |
2206 | /* just a number, not meaningful for all levels */ | 2245 | /* just a number, not meaningful for all levels */ |
2246 | if (mddev->reshape_position != MaxSector && | ||
2247 | mddev->layout != mddev->new_layout) | ||
2248 | return sprintf(page, "%d (%d)\n", | ||
2249 | mddev->new_layout, mddev->layout); | ||
2207 | return sprintf(page, "%d\n", mddev->layout); | 2250 | return sprintf(page, "%d\n", mddev->layout); |
2208 | } | 2251 | } |
2209 | 2252 | ||
@@ -2212,13 +2255,16 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
2212 | { | 2255 | { |
2213 | char *e; | 2256 | char *e; |
2214 | unsigned long n = simple_strtoul(buf, &e, 10); | 2257 | unsigned long n = simple_strtoul(buf, &e, 10); |
2215 | if (mddev->pers) | ||
2216 | return -EBUSY; | ||
2217 | 2258 | ||
2218 | if (!*buf || (*e && *e != '\n')) | 2259 | if (!*buf || (*e && *e != '\n')) |
2219 | return -EINVAL; | 2260 | return -EINVAL; |
2220 | 2261 | ||
2221 | mddev->layout = n; | 2262 | if (mddev->pers) |
2263 | return -EBUSY; | ||
2264 | if (mddev->reshape_position != MaxSector) | ||
2265 | mddev->new_layout = n; | ||
2266 | else | ||
2267 | mddev->layout = n; | ||
2222 | return len; | 2268 | return len; |
2223 | } | 2269 | } |
2224 | static struct md_sysfs_entry md_layout = | 2270 | static struct md_sysfs_entry md_layout = |
@@ -2230,6 +2276,10 @@ raid_disks_show(mddev_t *mddev, char *page) | |||
2230 | { | 2276 | { |
2231 | if (mddev->raid_disks == 0) | 2277 | if (mddev->raid_disks == 0) |
2232 | return 0; | 2278 | return 0; |
2279 | if (mddev->reshape_position != MaxSector && | ||
2280 | mddev->delta_disks != 0) | ||
2281 | return sprintf(page, "%d (%d)\n", mddev->raid_disks, | ||
2282 | mddev->raid_disks - mddev->delta_disks); | ||
2233 | return sprintf(page, "%d\n", mddev->raid_disks); | 2283 | return sprintf(page, "%d\n", mddev->raid_disks); |
2234 | } | 2284 | } |
2235 | 2285 | ||
@@ -2247,7 +2297,11 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len) | |||
2247 | 2297 | ||
2248 | if (mddev->pers) | 2298 | if (mddev->pers) |
2249 | rv = update_raid_disks(mddev, n); | 2299 | rv = update_raid_disks(mddev, n); |
2250 | else | 2300 | else if (mddev->reshape_position != MaxSector) { |
2301 | int olddisks = mddev->raid_disks - mddev->delta_disks; | ||
2302 | mddev->delta_disks = n - olddisks; | ||
2303 | mddev->raid_disks = n; | ||
2304 | } else | ||
2251 | mddev->raid_disks = n; | 2305 | mddev->raid_disks = n; |
2252 | return rv ? rv : len; | 2306 | return rv ? rv : len; |
2253 | } | 2307 | } |
@@ -2257,6 +2311,10 @@ __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); | |||
2257 | static ssize_t | 2311 | static ssize_t |
2258 | chunk_size_show(mddev_t *mddev, char *page) | 2312 | chunk_size_show(mddev_t *mddev, char *page) |
2259 | { | 2313 | { |
2314 | if (mddev->reshape_position != MaxSector && | ||
2315 | mddev->chunk_size != mddev->new_chunk) | ||
2316 | return sprintf(page, "%d (%d)\n", mddev->new_chunk, | ||
2317 | mddev->chunk_size); | ||
2260 | return sprintf(page, "%d\n", mddev->chunk_size); | 2318 | return sprintf(page, "%d\n", mddev->chunk_size); |
2261 | } | 2319 | } |
2262 | 2320 | ||
@@ -2267,12 +2325,15 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2267 | char *e; | 2325 | char *e; |
2268 | unsigned long n = simple_strtoul(buf, &e, 10); | 2326 | unsigned long n = simple_strtoul(buf, &e, 10); |
2269 | 2327 | ||
2270 | if (mddev->pers) | ||
2271 | return -EBUSY; | ||
2272 | if (!*buf || (*e && *e != '\n')) | 2328 | if (!*buf || (*e && *e != '\n')) |
2273 | return -EINVAL; | 2329 | return -EINVAL; |
2274 | 2330 | ||
2275 | mddev->chunk_size = n; | 2331 | if (mddev->pers) |
2332 | return -EBUSY; | ||
2333 | else if (mddev->reshape_position != MaxSector) | ||
2334 | mddev->new_chunk = n; | ||
2335 | else | ||
2336 | mddev->chunk_size = n; | ||
2276 | return len; | 2337 | return len; |
2277 | } | 2338 | } |
2278 | static struct md_sysfs_entry md_chunk_size = | 2339 | static struct md_sysfs_entry md_chunk_size = |
@@ -2637,8 +2698,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2637 | minor = simple_strtoul(buf, &e, 10); | 2698 | minor = simple_strtoul(buf, &e, 10); |
2638 | if (e==buf || (*e && *e != '\n') ) | 2699 | if (e==buf || (*e && *e != '\n') ) |
2639 | return -EINVAL; | 2700 | return -EINVAL; |
2640 | if (major >= sizeof(super_types)/sizeof(super_types[0]) || | 2701 | if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL) |
2641 | super_types[major].name == NULL) | ||
2642 | return -ENOENT; | 2702 | return -ENOENT; |
2643 | mddev->major_version = major; | 2703 | mddev->major_version = major; |
2644 | mddev->minor_version = minor; | 2704 | mddev->minor_version = minor; |
@@ -2859,6 +2919,37 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | |||
2859 | static struct md_sysfs_entry md_suspend_hi = | 2919 | static struct md_sysfs_entry md_suspend_hi = |
2860 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | 2920 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); |
2861 | 2921 | ||
2922 | static ssize_t | ||
2923 | reshape_position_show(mddev_t *mddev, char *page) | ||
2924 | { | ||
2925 | if (mddev->reshape_position != MaxSector) | ||
2926 | return sprintf(page, "%llu\n", | ||
2927 | (unsigned long long)mddev->reshape_position); | ||
2928 | strcpy(page, "none\n"); | ||
2929 | return 5; | ||
2930 | } | ||
2931 | |||
2932 | static ssize_t | ||
2933 | reshape_position_store(mddev_t *mddev, const char *buf, size_t len) | ||
2934 | { | ||
2935 | char *e; | ||
2936 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
2937 | if (mddev->pers) | ||
2938 | return -EBUSY; | ||
2939 | if (buf == e || (*e && *e != '\n')) | ||
2940 | return -EINVAL; | ||
2941 | mddev->reshape_position = new; | ||
2942 | mddev->delta_disks = 0; | ||
2943 | mddev->new_level = mddev->level; | ||
2944 | mddev->new_layout = mddev->layout; | ||
2945 | mddev->new_chunk = mddev->chunk_size; | ||
2946 | return len; | ||
2947 | } | ||
2948 | |||
2949 | static struct md_sysfs_entry md_reshape_position = | ||
2950 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, | ||
2951 | reshape_position_store); | ||
2952 | |||
2862 | 2953 | ||
2863 | static struct attribute *md_default_attrs[] = { | 2954 | static struct attribute *md_default_attrs[] = { |
2864 | &md_level.attr, | 2955 | &md_level.attr, |
@@ -2871,6 +2962,7 @@ static struct attribute *md_default_attrs[] = { | |||
2871 | &md_new_device.attr, | 2962 | &md_new_device.attr, |
2872 | &md_safe_delay.attr, | 2963 | &md_safe_delay.attr, |
2873 | &md_array_state.attr, | 2964 | &md_array_state.attr, |
2965 | &md_reshape_position.attr, | ||
2874 | NULL, | 2966 | NULL, |
2875 | }; | 2967 | }; |
2876 | 2968 | ||
@@ -3012,6 +3104,7 @@ static int do_md_run(mddev_t * mddev) | |||
3012 | struct gendisk *disk; | 3104 | struct gendisk *disk; |
3013 | struct mdk_personality *pers; | 3105 | struct mdk_personality *pers; |
3014 | char b[BDEVNAME_SIZE]; | 3106 | char b[BDEVNAME_SIZE]; |
3107 | struct block_device *bdev; | ||
3015 | 3108 | ||
3016 | if (list_empty(&mddev->disks)) | 3109 | if (list_empty(&mddev->disks)) |
3017 | /* cannot run an array with no devices.. */ | 3110 | /* cannot run an array with no devices.. */ |
@@ -3239,7 +3332,13 @@ static int do_md_run(mddev_t * mddev) | |||
3239 | md_wakeup_thread(mddev->thread); | 3332 | md_wakeup_thread(mddev->thread); |
3240 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 3333 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
3241 | 3334 | ||
3242 | mddev->changed = 1; | 3335 | bdev = bdget_disk(mddev->gendisk, 0); |
3336 | if (bdev) { | ||
3337 | bd_set_size(bdev, mddev->array_size << 1); | ||
3338 | blkdev_ioctl(bdev->bd_inode, NULL, BLKRRPART, 0); | ||
3339 | bdput(bdev); | ||
3340 | } | ||
3341 | |||
3243 | md_new_event(mddev); | 3342 | md_new_event(mddev); |
3244 | kobject_uevent(&mddev->gendisk->kobj, KOBJ_CHANGE); | 3343 | kobject_uevent(&mddev->gendisk->kobj, KOBJ_CHANGE); |
3245 | return 0; | 3344 | return 0; |
@@ -3361,7 +3460,6 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3361 | mddev->pers = NULL; | 3460 | mddev->pers = NULL; |
3362 | 3461 | ||
3363 | set_capacity(disk, 0); | 3462 | set_capacity(disk, 0); |
3364 | mddev->changed = 1; | ||
3365 | 3463 | ||
3366 | if (mddev->ro) | 3464 | if (mddev->ro) |
3367 | mddev->ro = 0; | 3465 | mddev->ro = 0; |
@@ -3409,6 +3507,7 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3409 | mddev->size = 0; | 3507 | mddev->size = 0; |
3410 | mddev->raid_disks = 0; | 3508 | mddev->raid_disks = 0; |
3411 | mddev->recovery_cp = 0; | 3509 | mddev->recovery_cp = 0; |
3510 | mddev->reshape_position = MaxSector; | ||
3412 | 3511 | ||
3413 | } else if (mddev->pers) | 3512 | } else if (mddev->pers) |
3414 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3513 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -4019,7 +4118,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4019 | if (info->raid_disks == 0) { | 4118 | if (info->raid_disks == 0) { |
4020 | /* just setting version number for superblock loading */ | 4119 | /* just setting version number for superblock loading */ |
4021 | if (info->major_version < 0 || | 4120 | if (info->major_version < 0 || |
4022 | info->major_version >= sizeof(super_types)/sizeof(super_types[0]) || | 4121 | info->major_version >= ARRAY_SIZE(super_types) || |
4023 | super_types[info->major_version].name == NULL) { | 4122 | super_types[info->major_version].name == NULL) { |
4024 | /* maybe try to auto-load a module? */ | 4123 | /* maybe try to auto-load a module? */ |
4025 | printk(KERN_INFO | 4124 | printk(KERN_INFO |
@@ -4500,20 +4599,6 @@ static int md_release(struct inode *inode, struct file * file) | |||
4500 | return 0; | 4599 | return 0; |
4501 | } | 4600 | } |
4502 | 4601 | ||
4503 | static int md_media_changed(struct gendisk *disk) | ||
4504 | { | ||
4505 | mddev_t *mddev = disk->private_data; | ||
4506 | |||
4507 | return mddev->changed; | ||
4508 | } | ||
4509 | |||
4510 | static int md_revalidate(struct gendisk *disk) | ||
4511 | { | ||
4512 | mddev_t *mddev = disk->private_data; | ||
4513 | |||
4514 | mddev->changed = 0; | ||
4515 | return 0; | ||
4516 | } | ||
4517 | static struct block_device_operations md_fops = | 4602 | static struct block_device_operations md_fops = |
4518 | { | 4603 | { |
4519 | .owner = THIS_MODULE, | 4604 | .owner = THIS_MODULE, |
@@ -4521,8 +4606,6 @@ static struct block_device_operations md_fops = | |||
4521 | .release = md_release, | 4606 | .release = md_release, |
4522 | .ioctl = md_ioctl, | 4607 | .ioctl = md_ioctl, |
4523 | .getgeo = md_getgeo, | 4608 | .getgeo = md_getgeo, |
4524 | .media_changed = md_media_changed, | ||
4525 | .revalidate_disk= md_revalidate, | ||
4526 | }; | 4609 | }; |
4527 | 4610 | ||
4528 | static int md_thread(void * arg) | 4611 | static int md_thread(void * arg) |
@@ -4941,15 +5024,6 @@ static int md_seq_open(struct inode *inode, struct file *file) | |||
4941 | return error; | 5024 | return error; |
4942 | } | 5025 | } |
4943 | 5026 | ||
4944 | static int md_seq_release(struct inode *inode, struct file *file) | ||
4945 | { | ||
4946 | struct seq_file *m = file->private_data; | ||
4947 | struct mdstat_info *mi = m->private; | ||
4948 | m->private = NULL; | ||
4949 | kfree(mi); | ||
4950 | return seq_release(inode, file); | ||
4951 | } | ||
4952 | |||
4953 | static unsigned int mdstat_poll(struct file *filp, poll_table *wait) | 5027 | static unsigned int mdstat_poll(struct file *filp, poll_table *wait) |
4954 | { | 5028 | { |
4955 | struct seq_file *m = filp->private_data; | 5029 | struct seq_file *m = filp->private_data; |
@@ -4971,7 +5045,7 @@ static const struct file_operations md_seq_fops = { | |||
4971 | .open = md_seq_open, | 5045 | .open = md_seq_open, |
4972 | .read = seq_read, | 5046 | .read = seq_read, |
4973 | .llseek = seq_lseek, | 5047 | .llseek = seq_lseek, |
4974 | .release = md_seq_release, | 5048 | .release = seq_release_private, |
4975 | .poll = mdstat_poll, | 5049 | .poll = mdstat_poll, |
4976 | }; | 5050 | }; |
4977 | 5051 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 97ee870b265d..1b7130cad21f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -2063,7 +2063,6 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
2063 | */ | 2063 | */ |
2064 | mddev->array_size = sectors>>1; | 2064 | mddev->array_size = sectors>>1; |
2065 | set_capacity(mddev->gendisk, mddev->array_size << 1); | 2065 | set_capacity(mddev->gendisk, mddev->array_size << 1); |
2066 | mddev->changed = 1; | ||
2067 | if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) { | 2066 | if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) { |
2068 | mddev->recovery_cp = mddev->size << 1; | 2067 | mddev->recovery_cp = mddev->size << 1; |
2069 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2068 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8d59914f2057..a72e70ad0975 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -353,8 +353,8 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
353 | struct kmem_cache *sc; | 353 | struct kmem_cache *sc; |
354 | int devs = conf->raid_disks; | 354 | int devs = conf->raid_disks; |
355 | 355 | ||
356 | sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); | 356 | sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); |
357 | sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev)); | 357 | sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); |
358 | conf->active_name = 0; | 358 | conf->active_name = 0; |
359 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 359 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
360 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 360 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
@@ -3864,7 +3864,6 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
3864 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | 3864 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); |
3865 | mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1; | 3865 | mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1; |
3866 | set_capacity(mddev->gendisk, mddev->array_size << 1); | 3866 | set_capacity(mddev->gendisk, mddev->array_size << 1); |
3867 | mddev->changed = 1; | ||
3868 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { | 3867 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { |
3869 | mddev->recovery_cp = mddev->size << 1; | 3868 | mddev->recovery_cp = mddev->size << 1; |
3870 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3869 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -3999,7 +3998,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
3999 | conf->mddev->array_size = conf->mddev->size * | 3998 | conf->mddev->array_size = conf->mddev->size * |
4000 | (conf->raid_disks - conf->max_degraded); | 3999 | (conf->raid_disks - conf->max_degraded); |
4001 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); | 4000 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); |
4002 | conf->mddev->changed = 1; | ||
4003 | 4001 | ||
4004 | bdev = bdget_disk(conf->mddev->gendisk, 0); | 4002 | bdev = bdget_disk(conf->mddev->gendisk, 0); |
4005 | if (bdev) { | 4003 | if (bdev) { |
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index b6c16704aaab..7385acfa1dd9 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c | |||
@@ -501,9 +501,9 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay) | |||
501 | { | 501 | { |
502 | #ifdef CONFIG_MMC_DEBUG | 502 | #ifdef CONFIG_MMC_DEBUG |
503 | unsigned long flags; | 503 | unsigned long flags; |
504 | spin_lock_irqsave(host->lock, flags); | 504 | spin_lock_irqsave(&host->lock, flags); |
505 | BUG_ON(host->removed); | 505 | BUG_ON(host->removed); |
506 | spin_unlock_irqrestore(host->lock, flags); | 506 | spin_unlock_irqrestore(&host->lock, flags); |
507 | #endif | 507 | #endif |
508 | 508 | ||
509 | mmc_schedule_delayed_work(&host->detect, delay); | 509 | mmc_schedule_delayed_work(&host->detect, delay); |
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 3a03a74c0609..637ae8f68791 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c | |||
@@ -1214,7 +1214,7 @@ e1000_remove(struct pci_dev *pdev) | |||
1214 | int i; | 1214 | int i; |
1215 | #endif | 1215 | #endif |
1216 | 1216 | ||
1217 | flush_scheduled_work(); | 1217 | cancel_work_sync(&adapter->reset_task); |
1218 | 1218 | ||
1219 | e1000_release_manageability(adapter); | 1219 | e1000_release_manageability(adapter); |
1220 | 1220 | ||
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index eed433d6056a..f71dab347667 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c | |||
@@ -662,10 +662,10 @@ int phy_stop_interrupts(struct phy_device *phydev) | |||
662 | phy_error(phydev); | 662 | phy_error(phydev); |
663 | 663 | ||
664 | /* | 664 | /* |
665 | * Finish any pending work; we might have been scheduled | 665 | * Finish any pending work; we might have been scheduled to be called |
666 | * to be called from keventd ourselves, though. | 666 | * from keventd ourselves, but cancel_work_sync() handles that. |
667 | */ | 667 | */ |
668 | run_scheduled_work(&phydev->phy_queue); | 668 | cancel_work_sync(&phydev->phy_queue); |
669 | 669 | ||
670 | free_irq(phydev->irq, phydev); | 670 | free_irq(phydev->irq, phydev); |
671 | 671 | ||
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index e5e901ecd808..923b9c725cc3 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c | |||
@@ -3716,10 +3716,8 @@ static void tg3_reset_task(struct work_struct *work) | |||
3716 | unsigned int restart_timer; | 3716 | unsigned int restart_timer; |
3717 | 3717 | ||
3718 | tg3_full_lock(tp, 0); | 3718 | tg3_full_lock(tp, 0); |
3719 | tp->tg3_flags |= TG3_FLAG_IN_RESET_TASK; | ||
3720 | 3719 | ||
3721 | if (!netif_running(tp->dev)) { | 3720 | if (!netif_running(tp->dev)) { |
3722 | tp->tg3_flags &= ~TG3_FLAG_IN_RESET_TASK; | ||
3723 | tg3_full_unlock(tp); | 3721 | tg3_full_unlock(tp); |
3724 | return; | 3722 | return; |
3725 | } | 3723 | } |
@@ -3750,8 +3748,6 @@ static void tg3_reset_task(struct work_struct *work) | |||
3750 | mod_timer(&tp->timer, jiffies + 1); | 3748 | mod_timer(&tp->timer, jiffies + 1); |
3751 | 3749 | ||
3752 | out: | 3750 | out: |
3753 | tp->tg3_flags &= ~TG3_FLAG_IN_RESET_TASK; | ||
3754 | |||
3755 | tg3_full_unlock(tp); | 3751 | tg3_full_unlock(tp); |
3756 | } | 3752 | } |
3757 | 3753 | ||
@@ -7390,12 +7386,7 @@ static int tg3_close(struct net_device *dev) | |||
7390 | { | 7386 | { |
7391 | struct tg3 *tp = netdev_priv(dev); | 7387 | struct tg3 *tp = netdev_priv(dev); |
7392 | 7388 | ||
7393 | /* Calling flush_scheduled_work() may deadlock because | 7389 | cancel_work_sync(&tp->reset_task); |
7394 | * linkwatch_event() may be on the workqueue and it will try to get | ||
7395 | * the rtnl_lock which we are holding. | ||
7396 | */ | ||
7397 | while (tp->tg3_flags & TG3_FLAG_IN_RESET_TASK) | ||
7398 | msleep(1); | ||
7399 | 7390 | ||
7400 | netif_stop_queue(dev); | 7391 | netif_stop_queue(dev); |
7401 | 7392 | ||
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 4d334cf5a243..bd9f4f428e5b 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h | |||
@@ -2228,7 +2228,7 @@ struct tg3 { | |||
2228 | #define TG3_FLAG_JUMBO_RING_ENABLE 0x00800000 | 2228 | #define TG3_FLAG_JUMBO_RING_ENABLE 0x00800000 |
2229 | #define TG3_FLAG_10_100_ONLY 0x01000000 | 2229 | #define TG3_FLAG_10_100_ONLY 0x01000000 |
2230 | #define TG3_FLAG_PAUSE_AUTONEG 0x02000000 | 2230 | #define TG3_FLAG_PAUSE_AUTONEG 0x02000000 |
2231 | #define TG3_FLAG_IN_RESET_TASK 0x04000000 | 2231 | |
2232 | #define TG3_FLAG_40BIT_DMA_BUG 0x08000000 | 2232 | #define TG3_FLAG_40BIT_DMA_BUG 0x08000000 |
2233 | #define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000 | 2233 | #define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000 |
2234 | #define TG3_FLAG_SUPPORT_MSI 0x20000000 | 2234 | #define TG3_FLAG_SUPPORT_MSI 0x20000000 |
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index 66e7bc985797..1d8a2f6bb8eb 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c | |||
@@ -22,10 +22,7 @@ | |||
22 | #include <asm/io.h> | 22 | #include <asm/io.h> |
23 | #include <asm/arch/board.h> | 23 | #include <asm/arch/board.h> |
24 | #include <asm/arch/gpio.h> | 24 | #include <asm/arch/gpio.h> |
25 | |||
26 | #ifdef CONFIG_ARCH_AT91 | ||
27 | #include <asm/arch/cpu.h> | 25 | #include <asm/arch/cpu.h> |
28 | #endif | ||
29 | 26 | ||
30 | #include "atmel_spi.h" | 27 | #include "atmel_spi.h" |
31 | 28 | ||
@@ -552,10 +549,8 @@ static int __init atmel_spi_probe(struct platform_device *pdev) | |||
552 | goto out_free_buffer; | 549 | goto out_free_buffer; |
553 | as->irq = irq; | 550 | as->irq = irq; |
554 | as->clk = clk; | 551 | as->clk = clk; |
555 | #ifdef CONFIG_ARCH_AT91 | ||
556 | if (!cpu_is_at91rm9200()) | 552 | if (!cpu_is_at91rm9200()) |
557 | as->new_1 = 1; | 553 | as->new_1 = 1; |
558 | #endif | ||
559 | 554 | ||
560 | ret = request_irq(irq, atmel_spi_interrupt, 0, | 555 | ret = request_irq(irq, atmel_spi_interrupt, 0, |
561 | pdev->dev.bus_id, master); | 556 | pdev->dev.bus_id, master); |
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index b082d95bbbaa..11e9b15ca45a 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c | |||
@@ -1033,7 +1033,7 @@ static int usbatm_do_heavy_init(void *arg) | |||
1033 | 1033 | ||
1034 | static int usbatm_heavy_init(struct usbatm_data *instance) | 1034 | static int usbatm_heavy_init(struct usbatm_data *instance) |
1035 | { | 1035 | { |
1036 | int ret = kernel_thread(usbatm_do_heavy_init, instance, CLONE_KERNEL); | 1036 | int ret = kernel_thread(usbatm_do_heavy_init, instance, CLONE_FS | CLONE_FILES); |
1037 | 1037 | ||
1038 | if (ret < 0) { | 1038 | if (ret < 0) { |
1039 | usb_err(instance, "%s: failed to create kernel_thread (%d)!\n", __func__, ret); | 1039 | usb_err(instance, "%s: failed to create kernel_thread (%d)!\n", __func__, ret); |
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 1132ba5ff391..9a256d2ff9dc 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig | |||
@@ -1348,6 +1348,20 @@ config FB_VOODOO1 | |||
1348 | Please read the <file:Documentation/fb/README-sstfb.txt> for supported | 1348 | Please read the <file:Documentation/fb/README-sstfb.txt> for supported |
1349 | options and other important info support. | 1349 | options and other important info support. |
1350 | 1350 | ||
1351 | config FB_VT8623 | ||
1352 | tristate "VIA VT8623 support" | ||
1353 | depends on FB && PCI | ||
1354 | select FB_CFB_FILLRECT | ||
1355 | select FB_CFB_COPYAREA | ||
1356 | select FB_CFB_IMAGEBLIT | ||
1357 | select FB_TILEBLITTING | ||
1358 | select FB_SVGALIB | ||
1359 | select VGASTATE | ||
1360 | select FONT_8x16 if FRAMEBUFFER_CONSOLE | ||
1361 | ---help--- | ||
1362 | Driver for CastleRock integrated graphics core in the | ||
1363 | VIA VT8623 [Apollo CLE266] chipset. | ||
1364 | |||
1351 | config FB_CYBLA | 1365 | config FB_CYBLA |
1352 | tristate "Cyberblade/i1 support" | 1366 | tristate "Cyberblade/i1 support" |
1353 | depends on FB && PCI && X86_32 && !64BIT | 1367 | depends on FB && PCI && X86_32 && !64BIT |
@@ -1401,6 +1415,20 @@ config FB_TRIDENT_ACCEL | |||
1401 | This will compile the Trident frame buffer device with | 1415 | This will compile the Trident frame buffer device with |
1402 | acceleration functions. | 1416 | acceleration functions. |
1403 | 1417 | ||
1418 | config FB_ARK | ||
1419 | tristate "ARK 2000PV support" | ||
1420 | depends on FB && PCI | ||
1421 | select FB_CFB_FILLRECT | ||
1422 | select FB_CFB_COPYAREA | ||
1423 | select FB_CFB_IMAGEBLIT | ||
1424 | select FB_TILEBLITTING | ||
1425 | select FB_SVGALIB | ||
1426 | select VGASTATE | ||
1427 | select FONT_8x16 if FRAMEBUFFER_CONSOLE | ||
1428 | ---help--- | ||
1429 | Driver for PCI graphics boards with ARK 2000PV chip | ||
1430 | and ICS 5342 RAMDAC. | ||
1431 | |||
1404 | config FB_PM3 | 1432 | config FB_PM3 |
1405 | tristate "Permedia3 support" | 1433 | tristate "Permedia3 support" |
1406 | depends on FB && PCI && BROKEN | 1434 | depends on FB && PCI && BROKEN |
diff --git a/drivers/video/Makefile b/drivers/video/Makefile index a916c204274f..0b70567458fb 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile | |||
@@ -54,10 +54,12 @@ obj-$(CONFIG_FB_VALKYRIE) += valkyriefb.o | |||
54 | obj-$(CONFIG_FB_CT65550) += chipsfb.o | 54 | obj-$(CONFIG_FB_CT65550) += chipsfb.o |
55 | obj-$(CONFIG_FB_IMSTT) += imsttfb.o | 55 | obj-$(CONFIG_FB_IMSTT) += imsttfb.o |
56 | obj-$(CONFIG_FB_FM2) += fm2fb.o | 56 | obj-$(CONFIG_FB_FM2) += fm2fb.o |
57 | obj-$(CONFIG_FB_VT8623) += vt8623fb.o | ||
57 | obj-$(CONFIG_FB_CYBLA) += cyblafb.o | 58 | obj-$(CONFIG_FB_CYBLA) += cyblafb.o |
58 | obj-$(CONFIG_FB_TRIDENT) += tridentfb.o | 59 | obj-$(CONFIG_FB_TRIDENT) += tridentfb.o |
59 | obj-$(CONFIG_FB_LE80578) += vermilion/ | 60 | obj-$(CONFIG_FB_LE80578) += vermilion/ |
60 | obj-$(CONFIG_FB_S3) += s3fb.o | 61 | obj-$(CONFIG_FB_S3) += s3fb.o |
62 | obj-$(CONFIG_FB_ARK) += arkfb.o | ||
61 | obj-$(CONFIG_FB_STI) += stifb.o | 63 | obj-$(CONFIG_FB_STI) += stifb.o |
62 | obj-$(CONFIG_FB_FFB) += ffb.o sbuslib.o | 64 | obj-$(CONFIG_FB_FFB) += ffb.o sbuslib.o |
63 | obj-$(CONFIG_FB_CG6) += cg6.o sbuslib.o | 65 | obj-$(CONFIG_FB_CG6) += cg6.o sbuslib.o |
diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c new file mode 100644 index 000000000000..ba6fede5c466 --- /dev/null +++ b/drivers/video/arkfb.c | |||
@@ -0,0 +1,1200 @@ | |||
1 | /* | ||
2 | * linux/drivers/video/arkfb.c -- Frame buffer device driver for ARK 2000PV | ||
3 | * with ICS 5342 dac (it is easy to add support for different dacs). | ||
4 | * | ||
5 | * Copyright (c) 2007 Ondrej Zajicek <santiago@crfreenet.org> | ||
6 | * | ||
7 | * This file is subject to the terms and conditions of the GNU General Public | ||
8 | * License. See the file COPYING in the main directory of this archive for | ||
9 | * more details. | ||
10 | * | ||
11 | * Code is based on s3fb | ||
12 | */ | ||
13 | |||
14 | #include <linux/version.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/tty.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/fb.h> | ||
24 | #include <linux/svga.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/pci.h> | ||
27 | #include <linux/console.h> /* Why should fb driver call console functions? because acquire_console_sem() */ | ||
28 | #include <video/vga.h> | ||
29 | |||
30 | #ifdef CONFIG_MTRR | ||
31 | #include <asm/mtrr.h> | ||
32 | #endif | ||
33 | |||
34 | struct arkfb_info { | ||
35 | int mclk_freq; | ||
36 | int mtrr_reg; | ||
37 | |||
38 | struct dac_info *dac; | ||
39 | struct vgastate state; | ||
40 | struct mutex open_lock; | ||
41 | unsigned int ref_count; | ||
42 | u32 pseudo_palette[16]; | ||
43 | }; | ||
44 | |||
45 | |||
46 | /* ------------------------------------------------------------------------- */ | ||
47 | |||
48 | |||
49 | static const struct svga_fb_format arkfb_formats[] = { | ||
50 | { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
51 | FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4, FB_VISUAL_PSEUDOCOLOR, 8, 8}, | ||
52 | { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
53 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 16}, | ||
54 | { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1, | ||
55 | FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 8, 16}, | ||
56 | { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
57 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 8}, | ||
58 | {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0, | ||
59 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4}, | ||
60 | {16, {11, 5, 0}, {5, 6, 0}, {0, 5, 0}, {0, 0, 0}, 0, | ||
61 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4}, | ||
62 | {24, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0, | ||
63 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 8, 8}, | ||
64 | {32, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0, | ||
65 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 2}, | ||
66 | SVGA_FORMAT_END | ||
67 | }; | ||
68 | |||
69 | |||
70 | /* CRT timing register sets */ | ||
71 | |||
72 | static const struct vga_regset ark_h_total_regs[] = {{0x00, 0, 7}, {0x41, 7, 7}, VGA_REGSET_END}; | ||
73 | static const struct vga_regset ark_h_display_regs[] = {{0x01, 0, 7}, {0x41, 6, 6}, VGA_REGSET_END}; | ||
74 | static const struct vga_regset ark_h_blank_start_regs[] = {{0x02, 0, 7}, {0x41, 5, 5}, VGA_REGSET_END}; | ||
75 | static const struct vga_regset ark_h_blank_end_regs[] = {{0x03, 0, 4}, {0x05, 7, 7 }, VGA_REGSET_END}; | ||
76 | static const struct vga_regset ark_h_sync_start_regs[] = {{0x04, 0, 7}, {0x41, 4, 4}, VGA_REGSET_END}; | ||
77 | static const struct vga_regset ark_h_sync_end_regs[] = {{0x05, 0, 4}, VGA_REGSET_END}; | ||
78 | |||
79 | static const struct vga_regset ark_v_total_regs[] = {{0x06, 0, 7}, {0x07, 0, 0}, {0x07, 5, 5}, {0x40, 7, 7}, VGA_REGSET_END}; | ||
80 | static const struct vga_regset ark_v_display_regs[] = {{0x12, 0, 7}, {0x07, 1, 1}, {0x07, 6, 6}, {0x40, 6, 6}, VGA_REGSET_END}; | ||
81 | static const struct vga_regset ark_v_blank_start_regs[] = {{0x15, 0, 7}, {0x07, 3, 3}, {0x09, 5, 5}, {0x40, 5, 5}, VGA_REGSET_END}; | ||
82 | // const struct vga_regset ark_v_blank_end_regs[] = {{0x16, 0, 6}, VGA_REGSET_END}; | ||
83 | static const struct vga_regset ark_v_blank_end_regs[] = {{0x16, 0, 7}, VGA_REGSET_END}; | ||
84 | static const struct vga_regset ark_v_sync_start_regs[] = {{0x10, 0, 7}, {0x07, 2, 2}, {0x07, 7, 7}, {0x40, 4, 4}, VGA_REGSET_END}; | ||
85 | static const struct vga_regset ark_v_sync_end_regs[] = {{0x11, 0, 3}, VGA_REGSET_END}; | ||
86 | |||
87 | static const struct vga_regset ark_line_compare_regs[] = {{0x18, 0, 7}, {0x07, 4, 4}, {0x09, 6, 6}, VGA_REGSET_END}; | ||
88 | static const struct vga_regset ark_start_address_regs[] = {{0x0d, 0, 7}, {0x0c, 0, 7}, {0x40, 0, 2}, VGA_REGSET_END}; | ||
89 | static const struct vga_regset ark_offset_regs[] = {{0x13, 0, 7}, {0x41, 3, 3}, VGA_REGSET_END}; | ||
90 | |||
91 | static const struct svga_timing_regs ark_timing_regs = { | ||
92 | ark_h_total_regs, ark_h_display_regs, ark_h_blank_start_regs, | ||
93 | ark_h_blank_end_regs, ark_h_sync_start_regs, ark_h_sync_end_regs, | ||
94 | ark_v_total_regs, ark_v_display_regs, ark_v_blank_start_regs, | ||
95 | ark_v_blank_end_regs, ark_v_sync_start_regs, ark_v_sync_end_regs, | ||
96 | }; | ||
97 | |||
98 | |||
99 | /* ------------------------------------------------------------------------- */ | ||
100 | |||
101 | |||
102 | /* Module parameters */ | ||
103 | |||
104 | static char *mode = "640x480-8@60"; | ||
105 | |||
106 | #ifdef CONFIG_MTRR | ||
107 | static int mtrr = 1; | ||
108 | #endif | ||
109 | |||
110 | MODULE_AUTHOR("(c) 2007 Ondrej Zajicek <santiago@crfreenet.org>"); | ||
111 | MODULE_LICENSE("GPL"); | ||
112 | MODULE_DESCRIPTION("fbdev driver for ARK 2000PV"); | ||
113 | |||
114 | module_param(mode, charp, 0444); | ||
115 | MODULE_PARM_DESC(mode, "Default video mode ('640x480-8@60', etc)"); | ||
116 | |||
117 | #ifdef CONFIG_MTRR | ||
118 | module_param(mtrr, int, 0444); | ||
119 | MODULE_PARM_DESC(mtrr, "Enable write-combining with MTRR (1=enable, 0=disable, default=1)"); | ||
120 | #endif | ||
121 | |||
122 | static int threshold = 4; | ||
123 | |||
124 | module_param(threshold, int, 0644); | ||
125 | MODULE_PARM_DESC(threshold, "FIFO threshold"); | ||
126 | |||
127 | |||
128 | /* ------------------------------------------------------------------------- */ | ||
129 | |||
130 | |||
131 | static void arkfb_settile(struct fb_info *info, struct fb_tilemap *map) | ||
132 | { | ||
133 | const u8 *font = map->data; | ||
134 | u8 __iomem *fb = (u8 __iomem *)info->screen_base; | ||
135 | int i, c; | ||
136 | |||
137 | if ((map->width != 8) || (map->height != 16) || | ||
138 | (map->depth != 1) || (map->length != 256)) { | ||
139 | printk(KERN_ERR "fb%d: unsupported font parameters: width %d, " | ||
140 | "height %d, depth %d, length %d\n", info->node, | ||
141 | map->width, map->height, map->depth, map->length); | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | fb += 2; | ||
146 | for (c = 0; c < map->length; c++) { | ||
147 | for (i = 0; i < map->height; i++) { | ||
148 | fb_writeb(font[i], &fb[i * 4]); | ||
149 | fb_writeb(font[i], &fb[i * 4 + (128 * 8)]); | ||
150 | } | ||
151 | fb += 128; | ||
152 | |||
153 | if ((c % 8) == 7) | ||
154 | fb += 128*8; | ||
155 | |||
156 | font += map->height; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static struct fb_tile_ops arkfb_tile_ops = { | ||
161 | .fb_settile = arkfb_settile, | ||
162 | .fb_tilecopy = svga_tilecopy, | ||
163 | .fb_tilefill = svga_tilefill, | ||
164 | .fb_tileblit = svga_tileblit, | ||
165 | .fb_tilecursor = svga_tilecursor, | ||
166 | .fb_get_tilemax = svga_get_tilemax, | ||
167 | }; | ||
168 | |||
169 | |||
170 | /* ------------------------------------------------------------------------- */ | ||
171 | |||
172 | |||
173 | /* image data is MSB-first, fb structure is MSB-first too */ | ||
174 | static inline u32 expand_color(u32 c) | ||
175 | { | ||
176 | return ((c & 1) | ((c & 2) << 7) | ((c & 4) << 14) | ((c & 8) << 21)) * 0xFF; | ||
177 | } | ||
178 | |||
179 | /* arkfb_iplan_imageblit silently assumes that almost everything is 8-pixel aligned */ | ||
180 | static void arkfb_iplan_imageblit(struct fb_info *info, const struct fb_image *image) | ||
181 | { | ||
182 | u32 fg = expand_color(image->fg_color); | ||
183 | u32 bg = expand_color(image->bg_color); | ||
184 | const u8 *src1, *src; | ||
185 | u8 __iomem *dst1; | ||
186 | u32 __iomem *dst; | ||
187 | u32 val; | ||
188 | int x, y; | ||
189 | |||
190 | src1 = image->data; | ||
191 | dst1 = info->screen_base + (image->dy * info->fix.line_length) | ||
192 | + ((image->dx / 8) * 4); | ||
193 | |||
194 | for (y = 0; y < image->height; y++) { | ||
195 | src = src1; | ||
196 | dst = (u32 __iomem *) dst1; | ||
197 | for (x = 0; x < image->width; x += 8) { | ||
198 | val = *(src++) * 0x01010101; | ||
199 | val = (val & fg) | (~val & bg); | ||
200 | fb_writel(val, dst++); | ||
201 | } | ||
202 | src1 += image->width / 8; | ||
203 | dst1 += info->fix.line_length; | ||
204 | } | ||
205 | |||
206 | } | ||
207 | |||
208 | /* arkfb_iplan_fillrect silently assumes that almost everything is 8-pixel aligned */ | ||
209 | static void arkfb_iplan_fillrect(struct fb_info *info, const struct fb_fillrect *rect) | ||
210 | { | ||
211 | u32 fg = expand_color(rect->color); | ||
212 | u8 __iomem *dst1; | ||
213 | u32 __iomem *dst; | ||
214 | int x, y; | ||
215 | |||
216 | dst1 = info->screen_base + (rect->dy * info->fix.line_length) | ||
217 | + ((rect->dx / 8) * 4); | ||
218 | |||
219 | for (y = 0; y < rect->height; y++) { | ||
220 | dst = (u32 __iomem *) dst1; | ||
221 | for (x = 0; x < rect->width; x += 8) { | ||
222 | fb_writel(fg, dst++); | ||
223 | } | ||
224 | dst1 += info->fix.line_length; | ||
225 | } | ||
226 | |||
227 | } | ||
228 | |||
229 | |||
230 | /* image data is MSB-first, fb structure is high-nibble-in-low-byte-first */ | ||
231 | static inline u32 expand_pixel(u32 c) | ||
232 | { | ||
233 | return (((c & 1) << 24) | ((c & 2) << 27) | ((c & 4) << 14) | ((c & 8) << 17) | | ||
234 | ((c & 16) << 4) | ((c & 32) << 7) | ((c & 64) >> 6) | ((c & 128) >> 3)) * 0xF; | ||
235 | } | ||
236 | |||
237 | /* arkfb_cfb4_imageblit silently assumes that almost everything is 8-pixel aligned */ | ||
238 | static void arkfb_cfb4_imageblit(struct fb_info *info, const struct fb_image *image) | ||
239 | { | ||
240 | u32 fg = image->fg_color * 0x11111111; | ||
241 | u32 bg = image->bg_color * 0x11111111; | ||
242 | const u8 *src1, *src; | ||
243 | u8 __iomem *dst1; | ||
244 | u32 __iomem *dst; | ||
245 | u32 val; | ||
246 | int x, y; | ||
247 | |||
248 | src1 = image->data; | ||
249 | dst1 = info->screen_base + (image->dy * info->fix.line_length) | ||
250 | + ((image->dx / 8) * 4); | ||
251 | |||
252 | for (y = 0; y < image->height; y++) { | ||
253 | src = src1; | ||
254 | dst = (u32 __iomem *) dst1; | ||
255 | for (x = 0; x < image->width; x += 8) { | ||
256 | val = expand_pixel(*(src++)); | ||
257 | val = (val & fg) | (~val & bg); | ||
258 | fb_writel(val, dst++); | ||
259 | } | ||
260 | src1 += image->width / 8; | ||
261 | dst1 += info->fix.line_length; | ||
262 | } | ||
263 | |||
264 | } | ||
265 | |||
266 | static void arkfb_imageblit(struct fb_info *info, const struct fb_image *image) | ||
267 | { | ||
268 | if ((info->var.bits_per_pixel == 4) && (image->depth == 1) | ||
269 | && ((image->width % 8) == 0) && ((image->dx % 8) == 0)) { | ||
270 | if (info->fix.type == FB_TYPE_INTERLEAVED_PLANES) | ||
271 | arkfb_iplan_imageblit(info, image); | ||
272 | else | ||
273 | arkfb_cfb4_imageblit(info, image); | ||
274 | } else | ||
275 | cfb_imageblit(info, image); | ||
276 | } | ||
277 | |||
278 | static void arkfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) | ||
279 | { | ||
280 | if ((info->var.bits_per_pixel == 4) | ||
281 | && ((rect->width % 8) == 0) && ((rect->dx % 8) == 0) | ||
282 | && (info->fix.type == FB_TYPE_INTERLEAVED_PLANES)) | ||
283 | arkfb_iplan_fillrect(info, rect); | ||
284 | else | ||
285 | cfb_fillrect(info, rect); | ||
286 | } | ||
287 | |||
288 | |||
289 | /* ------------------------------------------------------------------------- */ | ||
290 | |||
291 | |||
292 | enum | ||
293 | { | ||
294 | DAC_PSEUDO8_8, | ||
295 | DAC_RGB1555_8, | ||
296 | DAC_RGB0565_8, | ||
297 | DAC_RGB0888_8, | ||
298 | DAC_RGB8888_8, | ||
299 | DAC_PSEUDO8_16, | ||
300 | DAC_RGB1555_16, | ||
301 | DAC_RGB0565_16, | ||
302 | DAC_RGB0888_16, | ||
303 | DAC_RGB8888_16, | ||
304 | DAC_MAX | ||
305 | }; | ||
306 | |||
307 | struct dac_ops { | ||
308 | int (*dac_get_mode)(struct dac_info *info); | ||
309 | int (*dac_set_mode)(struct dac_info *info, int mode); | ||
310 | int (*dac_get_freq)(struct dac_info *info, int channel); | ||
311 | int (*dac_set_freq)(struct dac_info *info, int channel, u32 freq); | ||
312 | void (*dac_release)(struct dac_info *info); | ||
313 | }; | ||
314 | |||
315 | typedef void (*dac_read_regs_t)(void *data, u8 *code, int count); | ||
316 | typedef void (*dac_write_regs_t)(void *data, u8 *code, int count); | ||
317 | |||
318 | struct dac_info | ||
319 | { | ||
320 | struct dac_ops *dacops; | ||
321 | dac_read_regs_t dac_read_regs; | ||
322 | dac_write_regs_t dac_write_regs; | ||
323 | void *data; | ||
324 | }; | ||
325 | |||
326 | |||
327 | static inline u8 dac_read_reg(struct dac_info *info, u8 reg) | ||
328 | { | ||
329 | u8 code[2] = {reg, 0}; | ||
330 | info->dac_read_regs(info->data, code, 1); | ||
331 | return code[1]; | ||
332 | } | ||
333 | |||
334 | static inline void dac_read_regs(struct dac_info *info, u8 *code, int count) | ||
335 | { | ||
336 | info->dac_read_regs(info->data, code, count); | ||
337 | } | ||
338 | |||
339 | static inline void dac_write_reg(struct dac_info *info, u8 reg, u8 val) | ||
340 | { | ||
341 | u8 code[2] = {reg, val}; | ||
342 | info->dac_write_regs(info->data, code, 1); | ||
343 | } | ||
344 | |||
345 | static inline void dac_write_regs(struct dac_info *info, u8 *code, int count) | ||
346 | { | ||
347 | info->dac_write_regs(info->data, code, count); | ||
348 | } | ||
349 | |||
350 | static inline int dac_set_mode(struct dac_info *info, int mode) | ||
351 | { | ||
352 | return info->dacops->dac_set_mode(info, mode); | ||
353 | } | ||
354 | |||
355 | static inline int dac_set_freq(struct dac_info *info, int channel, u32 freq) | ||
356 | { | ||
357 | return info->dacops->dac_set_freq(info, channel, freq); | ||
358 | } | ||
359 | |||
360 | static inline void dac_release(struct dac_info *info) | ||
361 | { | ||
362 | info->dacops->dac_release(info); | ||
363 | } | ||
364 | |||
365 | |||
366 | /* ------------------------------------------------------------------------- */ | ||
367 | |||
368 | |||
369 | /* ICS5342 DAC */ | ||
370 | |||
371 | struct ics5342_info | ||
372 | { | ||
373 | struct dac_info dac; | ||
374 | u8 mode; | ||
375 | }; | ||
376 | |||
377 | #define DAC_PAR(info) ((struct ics5342_info *) info) | ||
378 | |||
379 | /* LSB is set to distinguish unused slots */ | ||
380 | static const u8 ics5342_mode_table[DAC_MAX] = { | ||
381 | [DAC_PSEUDO8_8] = 0x01, [DAC_RGB1555_8] = 0x21, [DAC_RGB0565_8] = 0x61, | ||
382 | [DAC_RGB0888_8] = 0x41, [DAC_PSEUDO8_16] = 0x11, [DAC_RGB1555_16] = 0x31, | ||
383 | [DAC_RGB0565_16] = 0x51, [DAC_RGB0888_16] = 0x91, [DAC_RGB8888_16] = 0x71 | ||
384 | }; | ||
385 | |||
386 | static int ics5342_set_mode(struct dac_info *info, int mode) | ||
387 | { | ||
388 | u8 code; | ||
389 | |||
390 | if (mode >= DAC_MAX) | ||
391 | return -EINVAL; | ||
392 | |||
393 | code = ics5342_mode_table[mode]; | ||
394 | |||
395 | if (! code) | ||
396 | return -EINVAL; | ||
397 | |||
398 | dac_write_reg(info, 6, code & 0xF0); | ||
399 | DAC_PAR(info)->mode = mode; | ||
400 | |||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static const struct svga_pll ics5342_pll = {3, 129, 3, 33, 0, 3, | ||
405 | 60000, 250000, 14318}; | ||
406 | |||
407 | /* pd4 - allow only posdivider 4 (r=2) */ | ||
408 | static const struct svga_pll ics5342_pll_pd4 = {3, 129, 3, 33, 2, 2, | ||
409 | 60000, 335000, 14318}; | ||
410 | |||
411 | /* 270 MHz should be upper bound for VCO clock according to specs, | ||
412 | but that is too restrictive in pd4 case */ | ||
413 | |||
414 | static int ics5342_set_freq(struct dac_info *info, int channel, u32 freq) | ||
415 | { | ||
416 | u16 m, n, r; | ||
417 | |||
418 | /* only postdivider 4 (r=2) is valid in mode DAC_PSEUDO8_16 */ | ||
419 | int rv = svga_compute_pll((DAC_PAR(info)->mode == DAC_PSEUDO8_16) | ||
420 | ? &ics5342_pll_pd4 : &ics5342_pll, | ||
421 | freq, &m, &n, &r, 0); | ||
422 | |||
423 | if (rv < 0) { | ||
424 | return -EINVAL; | ||
425 | } else { | ||
426 | u8 code[6] = {4, 3, 5, m-2, 5, (n-2) | (r << 5)}; | ||
427 | dac_write_regs(info, code, 3); | ||
428 | return 0; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | static void ics5342_release(struct dac_info *info) | ||
433 | { | ||
434 | ics5342_set_mode(info, DAC_PSEUDO8_8); | ||
435 | kfree(info); | ||
436 | } | ||
437 | |||
438 | static struct dac_ops ics5342_ops = { | ||
439 | .dac_set_mode = ics5342_set_mode, | ||
440 | .dac_set_freq = ics5342_set_freq, | ||
441 | .dac_release = ics5342_release | ||
442 | }; | ||
443 | |||
444 | |||
445 | static struct dac_info * ics5342_init(dac_read_regs_t drr, dac_write_regs_t dwr, void *data) | ||
446 | { | ||
447 | struct dac_info *info = kzalloc(sizeof(struct ics5342_info), GFP_KERNEL); | ||
448 | |||
449 | if (! info) | ||
450 | return NULL; | ||
451 | |||
452 | info->dacops = &ics5342_ops; | ||
453 | info->dac_read_regs = drr; | ||
454 | info->dac_write_regs = dwr; | ||
455 | info->data = data; | ||
456 | DAC_PAR(info)->mode = DAC_PSEUDO8_8; /* estimation */ | ||
457 | return info; | ||
458 | } | ||
459 | |||
460 | |||
461 | /* ------------------------------------------------------------------------- */ | ||
462 | |||
463 | |||
464 | static unsigned short dac_regs[4] = {0x3c8, 0x3c9, 0x3c6, 0x3c7}; | ||
465 | |||
466 | static void ark_dac_read_regs(void *data, u8 *code, int count) | ||
467 | { | ||
468 | u8 regval = vga_rseq(NULL, 0x1C); | ||
469 | |||
470 | while (count != 0) | ||
471 | { | ||
472 | vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0); | ||
473 | code[1] = vga_r(NULL, dac_regs[code[0] & 3]); | ||
474 | count--; | ||
475 | code += 2; | ||
476 | } | ||
477 | |||
478 | vga_wseq(NULL, 0x1C, regval); | ||
479 | } | ||
480 | |||
481 | static void ark_dac_write_regs(void *data, u8 *code, int count) | ||
482 | { | ||
483 | u8 regval = vga_rseq(NULL, 0x1C); | ||
484 | |||
485 | while (count != 0) | ||
486 | { | ||
487 | vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0); | ||
488 | vga_w(NULL, dac_regs[code[0] & 3], code[1]); | ||
489 | count--; | ||
490 | code += 2; | ||
491 | } | ||
492 | |||
493 | vga_wseq(NULL, 0x1C, regval); | ||
494 | } | ||
495 | |||
496 | |||
497 | static void ark_set_pixclock(struct fb_info *info, u32 pixclock) | ||
498 | { | ||
499 | struct arkfb_info *par = info->par; | ||
500 | u8 regval; | ||
501 | |||
502 | int rv = dac_set_freq(par->dac, 0, 1000000000 / pixclock); | ||
503 | if (rv < 0) { | ||
504 | printk(KERN_ERR "fb%d: cannot set requested pixclock, keeping old value\n", info->node); | ||
505 | return; | ||
506 | } | ||
507 | |||
508 | /* Set VGA misc register */ | ||
509 | regval = vga_r(NULL, VGA_MIS_R); | ||
510 | vga_w(NULL, VGA_MIS_W, regval | VGA_MIS_ENB_PLL_LOAD); | ||
511 | } | ||
512 | |||
513 | |||
514 | /* Open framebuffer */ | ||
515 | |||
516 | static int arkfb_open(struct fb_info *info, int user) | ||
517 | { | ||
518 | struct arkfb_info *par = info->par; | ||
519 | |||
520 | mutex_lock(&(par->open_lock)); | ||
521 | if (par->ref_count == 0) { | ||
522 | memset(&(par->state), 0, sizeof(struct vgastate)); | ||
523 | par->state.flags = VGA_SAVE_MODE | VGA_SAVE_FONTS | VGA_SAVE_CMAP; | ||
524 | par->state.num_crtc = 0x60; | ||
525 | par->state.num_seq = 0x30; | ||
526 | save_vga(&(par->state)); | ||
527 | } | ||
528 | |||
529 | par->ref_count++; | ||
530 | mutex_unlock(&(par->open_lock)); | ||
531 | |||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | /* Close framebuffer */ | ||
536 | |||
537 | static int arkfb_release(struct fb_info *info, int user) | ||
538 | { | ||
539 | struct arkfb_info *par = info->par; | ||
540 | |||
541 | mutex_lock(&(par->open_lock)); | ||
542 | if (par->ref_count == 0) { | ||
543 | mutex_unlock(&(par->open_lock)); | ||
544 | return -EINVAL; | ||
545 | } | ||
546 | |||
547 | if (par->ref_count == 1) { | ||
548 | restore_vga(&(par->state)); | ||
549 | dac_set_mode(par->dac, DAC_PSEUDO8_8); | ||
550 | } | ||
551 | |||
552 | par->ref_count--; | ||
553 | mutex_unlock(&(par->open_lock)); | ||
554 | |||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | /* Validate passed in var */ | ||
559 | |||
560 | static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) | ||
561 | { | ||
562 | int rv, mem, step; | ||
563 | |||
564 | /* Find appropriate format */ | ||
565 | rv = svga_match_format (arkfb_formats, var, NULL); | ||
566 | if (rv < 0) | ||
567 | { | ||
568 | printk(KERN_ERR "fb%d: unsupported mode requested\n", info->node); | ||
569 | return rv; | ||
570 | } | ||
571 | |||
572 | /* Do not allow to have real resoulution larger than virtual */ | ||
573 | if (var->xres > var->xres_virtual) | ||
574 | var->xres_virtual = var->xres; | ||
575 | |||
576 | if (var->yres > var->yres_virtual) | ||
577 | var->yres_virtual = var->yres; | ||
578 | |||
579 | /* Round up xres_virtual to have proper alignment of lines */ | ||
580 | step = arkfb_formats[rv].xresstep - 1; | ||
581 | var->xres_virtual = (var->xres_virtual+step) & ~step; | ||
582 | |||
583 | |||
584 | /* Check whether have enough memory */ | ||
585 | mem = ((var->bits_per_pixel * var->xres_virtual) >> 3) * var->yres_virtual; | ||
586 | if (mem > info->screen_size) | ||
587 | { | ||
588 | printk(KERN_ERR "fb%d: not enough framebuffer memory (%d kB requested , %d kB available)\n", info->node, mem >> 10, (unsigned int) (info->screen_size >> 10)); | ||
589 | return -EINVAL; | ||
590 | } | ||
591 | |||
592 | rv = svga_check_timings (&ark_timing_regs, var, info->node); | ||
593 | if (rv < 0) | ||
594 | { | ||
595 | printk(KERN_ERR "fb%d: invalid timings requested\n", info->node); | ||
596 | return rv; | ||
597 | } | ||
598 | |||
599 | /* Interlaced mode is broken */ | ||
600 | if (var->vmode & FB_VMODE_INTERLACED) | ||
601 | return -EINVAL; | ||
602 | |||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | /* Set video mode from par */ | ||
607 | |||
608 | static int arkfb_set_par(struct fb_info *info) | ||
609 | { | ||
610 | struct arkfb_info *par = info->par; | ||
611 | u32 value, mode, hmul, hdiv, offset_value, screen_size; | ||
612 | u32 bpp = info->var.bits_per_pixel; | ||
613 | u8 regval; | ||
614 | |||
615 | if (bpp != 0) { | ||
616 | info->fix.ypanstep = 1; | ||
617 | info->fix.line_length = (info->var.xres_virtual * bpp) / 8; | ||
618 | |||
619 | info->flags &= ~FBINFO_MISC_TILEBLITTING; | ||
620 | info->tileops = NULL; | ||
621 | |||
622 | /* in 4bpp supports 8p wide tiles only, any tiles otherwise */ | ||
623 | info->pixmap.blit_x = (bpp == 4) ? (1 << (8 - 1)) : (~(u32)0); | ||
624 | info->pixmap.blit_y = ~(u32)0; | ||
625 | |||
626 | offset_value = (info->var.xres_virtual * bpp) / 64; | ||
627 | screen_size = info->var.yres_virtual * info->fix.line_length; | ||
628 | } else { | ||
629 | info->fix.ypanstep = 16; | ||
630 | info->fix.line_length = 0; | ||
631 | |||
632 | info->flags |= FBINFO_MISC_TILEBLITTING; | ||
633 | info->tileops = &arkfb_tile_ops; | ||
634 | |||
635 | /* supports 8x16 tiles only */ | ||
636 | info->pixmap.blit_x = 1 << (8 - 1); | ||
637 | info->pixmap.blit_y = 1 << (16 - 1); | ||
638 | |||
639 | offset_value = info->var.xres_virtual / 16; | ||
640 | screen_size = (info->var.xres_virtual * info->var.yres_virtual) / 64; | ||
641 | } | ||
642 | |||
643 | info->var.xoffset = 0; | ||
644 | info->var.yoffset = 0; | ||
645 | info->var.activate = FB_ACTIVATE_NOW; | ||
646 | |||
647 | /* Unlock registers */ | ||
648 | svga_wcrt_mask(0x11, 0x00, 0x80); | ||
649 | |||
650 | /* Blank screen and turn off sync */ | ||
651 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
652 | svga_wcrt_mask(0x17, 0x00, 0x80); | ||
653 | |||
654 | /* Set default values */ | ||
655 | svga_set_default_gfx_regs(); | ||
656 | svga_set_default_atc_regs(); | ||
657 | svga_set_default_seq_regs(); | ||
658 | svga_set_default_crt_regs(); | ||
659 | svga_wcrt_multi(ark_line_compare_regs, 0xFFFFFFFF); | ||
660 | svga_wcrt_multi(ark_start_address_regs, 0); | ||
661 | |||
662 | /* ARK specific initialization */ | ||
663 | svga_wseq_mask(0x10, 0x1F, 0x1F); /* enable linear framebuffer and full memory access */ | ||
664 | svga_wseq_mask(0x12, 0x03, 0x03); /* 4 MB linear framebuffer size */ | ||
665 | |||
666 | vga_wseq(NULL, 0x13, info->fix.smem_start >> 16); | ||
667 | vga_wseq(NULL, 0x14, info->fix.smem_start >> 24); | ||
668 | vga_wseq(NULL, 0x15, 0); | ||
669 | vga_wseq(NULL, 0x16, 0); | ||
670 | |||
671 | /* Set the FIFO threshold register */ | ||
672 | /* It is fascinating way to store 5-bit value in 8-bit register */ | ||
673 | regval = 0x10 | ((threshold & 0x0E) >> 1) | (threshold & 0x01) << 7 | (threshold & 0x10) << 1; | ||
674 | vga_wseq(NULL, 0x18, regval); | ||
675 | |||
676 | /* Set the offset register */ | ||
677 | pr_debug("fb%d: offset register : %d\n", info->node, offset_value); | ||
678 | svga_wcrt_multi(ark_offset_regs, offset_value); | ||
679 | |||
680 | /* fix for hi-res textmode */ | ||
681 | svga_wcrt_mask(0x40, 0x08, 0x08); | ||
682 | |||
683 | if (info->var.vmode & FB_VMODE_DOUBLE) | ||
684 | svga_wcrt_mask(0x09, 0x80, 0x80); | ||
685 | else | ||
686 | svga_wcrt_mask(0x09, 0x00, 0x80); | ||
687 | |||
688 | if (info->var.vmode & FB_VMODE_INTERLACED) | ||
689 | svga_wcrt_mask(0x44, 0x04, 0x04); | ||
690 | else | ||
691 | svga_wcrt_mask(0x44, 0x00, 0x04); | ||
692 | |||
693 | hmul = 1; | ||
694 | hdiv = 1; | ||
695 | mode = svga_match_format(arkfb_formats, &(info->var), &(info->fix)); | ||
696 | |||
697 | /* Set mode-specific register values */ | ||
698 | switch (mode) { | ||
699 | case 0: | ||
700 | pr_debug("fb%d: text mode\n", info->node); | ||
701 | svga_set_textmode_vga_regs(); | ||
702 | |||
703 | vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */ | ||
704 | svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */ | ||
705 | dac_set_mode(par->dac, DAC_PSEUDO8_8); | ||
706 | |||
707 | break; | ||
708 | case 1: | ||
709 | pr_debug("fb%d: 4 bit pseudocolor\n", info->node); | ||
710 | vga_wgfx(NULL, VGA_GFX_MODE, 0x40); | ||
711 | |||
712 | vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */ | ||
713 | svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */ | ||
714 | dac_set_mode(par->dac, DAC_PSEUDO8_8); | ||
715 | break; | ||
716 | case 2: | ||
717 | pr_debug("fb%d: 4 bit pseudocolor, planar\n", info->node); | ||
718 | |||
719 | vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */ | ||
720 | svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */ | ||
721 | dac_set_mode(par->dac, DAC_PSEUDO8_8); | ||
722 | break; | ||
723 | case 3: | ||
724 | pr_debug("fb%d: 8 bit pseudocolor\n", info->node); | ||
725 | |||
726 | vga_wseq(NULL, 0x11, 0x16); /* 8bpp accel mode */ | ||
727 | |||
728 | if (info->var.pixclock > 20000) { | ||
729 | pr_debug("fb%d: not using multiplex\n", info->node); | ||
730 | svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */ | ||
731 | dac_set_mode(par->dac, DAC_PSEUDO8_8); | ||
732 | } else { | ||
733 | pr_debug("fb%d: using multiplex\n", info->node); | ||
734 | svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */ | ||
735 | dac_set_mode(par->dac, DAC_PSEUDO8_16); | ||
736 | hdiv = 2; | ||
737 | } | ||
738 | break; | ||
739 | case 4: | ||
740 | pr_debug("fb%d: 5/5/5 truecolor\n", info->node); | ||
741 | |||
742 | vga_wseq(NULL, 0x11, 0x1A); /* 16bpp accel mode */ | ||
743 | svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */ | ||
744 | dac_set_mode(par->dac, DAC_RGB1555_16); | ||
745 | break; | ||
746 | case 5: | ||
747 | pr_debug("fb%d: 5/6/5 truecolor\n", info->node); | ||
748 | |||
749 | vga_wseq(NULL, 0x11, 0x1A); /* 16bpp accel mode */ | ||
750 | svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */ | ||
751 | dac_set_mode(par->dac, DAC_RGB0565_16); | ||
752 | break; | ||
753 | case 6: | ||
754 | pr_debug("fb%d: 8/8/8 truecolor\n", info->node); | ||
755 | |||
756 | vga_wseq(NULL, 0x11, 0x16); /* 8bpp accel mode ??? */ | ||
757 | svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */ | ||
758 | dac_set_mode(par->dac, DAC_RGB0888_16); | ||
759 | hmul = 3; | ||
760 | hdiv = 2; | ||
761 | break; | ||
762 | case 7: | ||
763 | pr_debug("fb%d: 8/8/8/8 truecolor\n", info->node); | ||
764 | |||
765 | vga_wseq(NULL, 0x11, 0x1E); /* 32bpp accel mode */ | ||
766 | svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */ | ||
767 | dac_set_mode(par->dac, DAC_RGB8888_16); | ||
768 | hmul = 2; | ||
769 | break; | ||
770 | default: | ||
771 | printk(KERN_ERR "fb%d: unsupported mode - bug\n", info->node); | ||
772 | return -EINVAL; | ||
773 | } | ||
774 | |||
775 | ark_set_pixclock(info, (hdiv * info->var.pixclock) / hmul); | ||
776 | svga_set_timings(&ark_timing_regs, &(info->var), hmul, hdiv, | ||
777 | (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, | ||
778 | (info->var.vmode & FB_VMODE_INTERLACED) ? 2 : 1, | ||
779 | hmul, info->node); | ||
780 | |||
781 | /* Set interlaced mode start/end register */ | ||
782 | value = info->var.xres + info->var.left_margin + info->var.right_margin + info->var.hsync_len; | ||
783 | value = ((value * hmul / hdiv) / 8) - 5; | ||
784 | vga_wcrt(NULL, 0x42, (value + 1) / 2); | ||
785 | |||
786 | memset_io(info->screen_base, 0x00, screen_size); | ||
787 | /* Device and screen back on */ | ||
788 | svga_wcrt_mask(0x17, 0x80, 0x80); | ||
789 | svga_wseq_mask(0x01, 0x00, 0x20); | ||
790 | |||
791 | return 0; | ||
792 | } | ||
793 | |||
794 | /* Set a colour register */ | ||
795 | |||
796 | static int arkfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, | ||
797 | u_int transp, struct fb_info *fb) | ||
798 | { | ||
799 | switch (fb->var.bits_per_pixel) { | ||
800 | case 0: | ||
801 | case 4: | ||
802 | if (regno >= 16) | ||
803 | return -EINVAL; | ||
804 | |||
805 | if ((fb->var.bits_per_pixel == 4) && | ||
806 | (fb->var.nonstd == 0)) { | ||
807 | outb(0xF0, VGA_PEL_MSK); | ||
808 | outb(regno*16, VGA_PEL_IW); | ||
809 | } else { | ||
810 | outb(0x0F, VGA_PEL_MSK); | ||
811 | outb(regno, VGA_PEL_IW); | ||
812 | } | ||
813 | outb(red >> 10, VGA_PEL_D); | ||
814 | outb(green >> 10, VGA_PEL_D); | ||
815 | outb(blue >> 10, VGA_PEL_D); | ||
816 | break; | ||
817 | case 8: | ||
818 | if (regno >= 256) | ||
819 | return -EINVAL; | ||
820 | |||
821 | outb(0xFF, VGA_PEL_MSK); | ||
822 | outb(regno, VGA_PEL_IW); | ||
823 | outb(red >> 10, VGA_PEL_D); | ||
824 | outb(green >> 10, VGA_PEL_D); | ||
825 | outb(blue >> 10, VGA_PEL_D); | ||
826 | break; | ||
827 | case 16: | ||
828 | if (regno >= 16) | ||
829 | return 0; | ||
830 | |||
831 | if (fb->var.green.length == 5) | ||
832 | ((u32*)fb->pseudo_palette)[regno] = ((red & 0xF800) >> 1) | | ||
833 | ((green & 0xF800) >> 6) | ((blue & 0xF800) >> 11); | ||
834 | else if (fb->var.green.length == 6) | ||
835 | ((u32*)fb->pseudo_palette)[regno] = (red & 0xF800) | | ||
836 | ((green & 0xFC00) >> 5) | ((blue & 0xF800) >> 11); | ||
837 | else | ||
838 | return -EINVAL; | ||
839 | break; | ||
840 | case 24: | ||
841 | case 32: | ||
842 | if (regno >= 16) | ||
843 | return 0; | ||
844 | |||
845 | ((u32*)fb->pseudo_palette)[regno] = ((red & 0xFF00) << 8) | | ||
846 | (green & 0xFF00) | ((blue & 0xFF00) >> 8); | ||
847 | break; | ||
848 | default: | ||
849 | return -EINVAL; | ||
850 | } | ||
851 | |||
852 | return 0; | ||
853 | } | ||
854 | |||
855 | /* Set the display blanking state */ | ||
856 | |||
857 | static int arkfb_blank(int blank_mode, struct fb_info *info) | ||
858 | { | ||
859 | switch (blank_mode) { | ||
860 | case FB_BLANK_UNBLANK: | ||
861 | pr_debug("fb%d: unblank\n", info->node); | ||
862 | svga_wseq_mask(0x01, 0x00, 0x20); | ||
863 | svga_wcrt_mask(0x17, 0x80, 0x80); | ||
864 | break; | ||
865 | case FB_BLANK_NORMAL: | ||
866 | pr_debug("fb%d: blank\n", info->node); | ||
867 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
868 | svga_wcrt_mask(0x17, 0x80, 0x80); | ||
869 | break; | ||
870 | case FB_BLANK_POWERDOWN: | ||
871 | case FB_BLANK_HSYNC_SUSPEND: | ||
872 | case FB_BLANK_VSYNC_SUSPEND: | ||
873 | pr_debug("fb%d: sync down\n", info->node); | ||
874 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
875 | svga_wcrt_mask(0x17, 0x00, 0x80); | ||
876 | break; | ||
877 | } | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | |||
882 | /* Pan the display */ | ||
883 | |||
884 | static int arkfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) | ||
885 | { | ||
886 | unsigned int offset; | ||
887 | |||
888 | /* Calculate the offset */ | ||
889 | if (var->bits_per_pixel == 0) { | ||
890 | offset = (var->yoffset / 16) * (var->xres_virtual / 2) + (var->xoffset / 2); | ||
891 | offset = offset >> 2; | ||
892 | } else { | ||
893 | offset = (var->yoffset * info->fix.line_length) + | ||
894 | (var->xoffset * var->bits_per_pixel / 8); | ||
895 | offset = offset >> ((var->bits_per_pixel == 4) ? 2 : 3); | ||
896 | } | ||
897 | |||
898 | /* Set the offset */ | ||
899 | svga_wcrt_multi(ark_start_address_regs, offset); | ||
900 | |||
901 | return 0; | ||
902 | } | ||
903 | |||
904 | |||
905 | /* ------------------------------------------------------------------------- */ | ||
906 | |||
907 | |||
908 | /* Frame buffer operations */ | ||
909 | |||
910 | static struct fb_ops arkfb_ops = { | ||
911 | .owner = THIS_MODULE, | ||
912 | .fb_open = arkfb_open, | ||
913 | .fb_release = arkfb_release, | ||
914 | .fb_check_var = arkfb_check_var, | ||
915 | .fb_set_par = arkfb_set_par, | ||
916 | .fb_setcolreg = arkfb_setcolreg, | ||
917 | .fb_blank = arkfb_blank, | ||
918 | .fb_pan_display = arkfb_pan_display, | ||
919 | .fb_fillrect = arkfb_fillrect, | ||
920 | .fb_copyarea = cfb_copyarea, | ||
921 | .fb_imageblit = arkfb_imageblit, | ||
922 | .fb_get_caps = svga_get_caps, | ||
923 | }; | ||
924 | |||
925 | |||
926 | /* ------------------------------------------------------------------------- */ | ||
927 | |||
928 | |||
929 | /* PCI probe */ | ||
930 | static int __devinit ark_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) | ||
931 | { | ||
932 | struct fb_info *info; | ||
933 | struct arkfb_info *par; | ||
934 | int rc; | ||
935 | u8 regval; | ||
936 | |||
937 | /* Ignore secondary VGA device because there is no VGA arbitration */ | ||
938 | if (! svga_primary_device(dev)) { | ||
939 | dev_info(&(dev->dev), "ignoring secondary device\n"); | ||
940 | return -ENODEV; | ||
941 | } | ||
942 | |||
943 | /* Allocate and fill driver data structure */ | ||
944 | info = framebuffer_alloc(sizeof(struct arkfb_info), NULL); | ||
945 | if (! info) { | ||
946 | dev_err(&(dev->dev), "cannot allocate memory\n"); | ||
947 | return -ENOMEM; | ||
948 | } | ||
949 | |||
950 | par = info->par; | ||
951 | mutex_init(&par->open_lock); | ||
952 | |||
953 | info->flags = FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_YPAN; | ||
954 | info->fbops = &arkfb_ops; | ||
955 | |||
956 | /* Prepare PCI device */ | ||
957 | rc = pci_enable_device(dev); | ||
958 | if (rc < 0) { | ||
959 | dev_err(&(dev->dev), "cannot enable PCI device\n"); | ||
960 | goto err_enable_device; | ||
961 | } | ||
962 | |||
963 | rc = pci_request_regions(dev, "arkfb"); | ||
964 | if (rc < 0) { | ||
965 | dev_err(&(dev->dev), "cannot reserve framebuffer region\n"); | ||
966 | goto err_request_regions; | ||
967 | } | ||
968 | |||
969 | par->dac = ics5342_init(ark_dac_read_regs, ark_dac_write_regs, info); | ||
970 | if (! par->dac) { | ||
971 | rc = -ENOMEM; | ||
972 | dev_err(&(dev->dev), "RAMDAC initialization failed\n"); | ||
973 | goto err_dac; | ||
974 | } | ||
975 | |||
976 | info->fix.smem_start = pci_resource_start(dev, 0); | ||
977 | info->fix.smem_len = pci_resource_len(dev, 0); | ||
978 | |||
979 | /* Map physical IO memory address into kernel space */ | ||
980 | info->screen_base = pci_iomap(dev, 0, 0); | ||
981 | if (! info->screen_base) { | ||
982 | rc = -ENOMEM; | ||
983 | dev_err(&(dev->dev), "iomap for framebuffer failed\n"); | ||
984 | goto err_iomap; | ||
985 | } | ||
986 | |||
987 | /* FIXME get memsize */ | ||
988 | regval = vga_rseq(NULL, 0x10); | ||
989 | info->screen_size = (1 << (regval >> 6)) << 20; | ||
990 | info->fix.smem_len = info->screen_size; | ||
991 | |||
992 | strcpy(info->fix.id, "ARK 2000PV"); | ||
993 | info->fix.mmio_start = 0; | ||
994 | info->fix.mmio_len = 0; | ||
995 | info->fix.type = FB_TYPE_PACKED_PIXELS; | ||
996 | info->fix.visual = FB_VISUAL_PSEUDOCOLOR; | ||
997 | info->fix.ypanstep = 0; | ||
998 | info->fix.accel = FB_ACCEL_NONE; | ||
999 | info->pseudo_palette = (void*) (par->pseudo_palette); | ||
1000 | |||
1001 | /* Prepare startup mode */ | ||
1002 | rc = fb_find_mode(&(info->var), info, mode, NULL, 0, NULL, 8); | ||
1003 | if (! ((rc == 1) || (rc == 2))) { | ||
1004 | rc = -EINVAL; | ||
1005 | dev_err(&(dev->dev), "mode %s not found\n", mode); | ||
1006 | goto err_find_mode; | ||
1007 | } | ||
1008 | |||
1009 | rc = fb_alloc_cmap(&info->cmap, 256, 0); | ||
1010 | if (rc < 0) { | ||
1011 | dev_err(&(dev->dev), "cannot allocate colormap\n"); | ||
1012 | goto err_alloc_cmap; | ||
1013 | } | ||
1014 | |||
1015 | rc = register_framebuffer(info); | ||
1016 | if (rc < 0) { | ||
1017 | dev_err(&(dev->dev), "cannot register framebugger\n"); | ||
1018 | goto err_reg_fb; | ||
1019 | } | ||
1020 | |||
1021 | printk(KERN_INFO "fb%d: %s on %s, %d MB RAM\n", info->node, info->fix.id, | ||
1022 | pci_name(dev), info->fix.smem_len >> 20); | ||
1023 | |||
1024 | /* Record a reference to the driver data */ | ||
1025 | pci_set_drvdata(dev, info); | ||
1026 | |||
1027 | #ifdef CONFIG_MTRR | ||
1028 | if (mtrr) { | ||
1029 | par->mtrr_reg = -1; | ||
1030 | par->mtrr_reg = mtrr_add(info->fix.smem_start, info->fix.smem_len, MTRR_TYPE_WRCOMB, 1); | ||
1031 | } | ||
1032 | #endif | ||
1033 | |||
1034 | return 0; | ||
1035 | |||
1036 | /* Error handling */ | ||
1037 | err_reg_fb: | ||
1038 | fb_dealloc_cmap(&info->cmap); | ||
1039 | err_alloc_cmap: | ||
1040 | err_find_mode: | ||
1041 | pci_iounmap(dev, info->screen_base); | ||
1042 | err_iomap: | ||
1043 | dac_release(par->dac); | ||
1044 | err_dac: | ||
1045 | pci_release_regions(dev); | ||
1046 | err_request_regions: | ||
1047 | /* pci_disable_device(dev); */ | ||
1048 | err_enable_device: | ||
1049 | framebuffer_release(info); | ||
1050 | return rc; | ||
1051 | } | ||
1052 | |||
1053 | /* PCI remove */ | ||
1054 | |||
1055 | static void __devexit ark_pci_remove(struct pci_dev *dev) | ||
1056 | { | ||
1057 | struct fb_info *info = pci_get_drvdata(dev); | ||
1058 | struct arkfb_info *par = info->par; | ||
1059 | |||
1060 | if (info) { | ||
1061 | #ifdef CONFIG_MTRR | ||
1062 | if (par->mtrr_reg >= 0) { | ||
1063 | mtrr_del(par->mtrr_reg, 0, 0); | ||
1064 | par->mtrr_reg = -1; | ||
1065 | } | ||
1066 | #endif | ||
1067 | |||
1068 | dac_release(par->dac); | ||
1069 | unregister_framebuffer(info); | ||
1070 | fb_dealloc_cmap(&info->cmap); | ||
1071 | |||
1072 | pci_iounmap(dev, info->screen_base); | ||
1073 | pci_release_regions(dev); | ||
1074 | /* pci_disable_device(dev); */ | ||
1075 | |||
1076 | pci_set_drvdata(dev, NULL); | ||
1077 | framebuffer_release(info); | ||
1078 | } | ||
1079 | } | ||
1080 | |||
1081 | |||
1082 | #ifdef CONFIG_PM | ||
1083 | /* PCI suspend */ | ||
1084 | |||
1085 | static int ark_pci_suspend (struct pci_dev* dev, pm_message_t state) | ||
1086 | { | ||
1087 | struct fb_info *info = pci_get_drvdata(dev); | ||
1088 | struct arkfb_info *par = info->par; | ||
1089 | |||
1090 | dev_info(&(dev->dev), "suspend\n"); | ||
1091 | |||
1092 | acquire_console_sem(); | ||
1093 | mutex_lock(&(par->open_lock)); | ||
1094 | |||
1095 | if ((state.event == PM_EVENT_FREEZE) || (par->ref_count == 0)) { | ||
1096 | mutex_unlock(&(par->open_lock)); | ||
1097 | release_console_sem(); | ||
1098 | return 0; | ||
1099 | } | ||
1100 | |||
1101 | fb_set_suspend(info, 1); | ||
1102 | |||
1103 | pci_save_state(dev); | ||
1104 | pci_disable_device(dev); | ||
1105 | pci_set_power_state(dev, pci_choose_state(dev, state)); | ||
1106 | |||
1107 | mutex_unlock(&(par->open_lock)); | ||
1108 | release_console_sem(); | ||
1109 | |||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | |||
1114 | /* PCI resume */ | ||
1115 | |||
1116 | static int ark_pci_resume (struct pci_dev* dev) | ||
1117 | { | ||
1118 | struct fb_info *info = pci_get_drvdata(dev); | ||
1119 | struct arkfb_info *par = info->par; | ||
1120 | |||
1121 | dev_info(&(dev->dev), "resume\n"); | ||
1122 | |||
1123 | acquire_console_sem(); | ||
1124 | mutex_lock(&(par->open_lock)); | ||
1125 | |||
1126 | if (par->ref_count == 0) { | ||
1127 | mutex_unlock(&(par->open_lock)); | ||
1128 | release_console_sem(); | ||
1129 | return 0; | ||
1130 | } | ||
1131 | |||
1132 | pci_set_power_state(dev, PCI_D0); | ||
1133 | pci_restore_state(dev); | ||
1134 | |||
1135 | if (pci_enable_device(dev)) | ||
1136 | goto fail; | ||
1137 | |||
1138 | pci_set_master(dev); | ||
1139 | |||
1140 | arkfb_set_par(info); | ||
1141 | fb_set_suspend(info, 0); | ||
1142 | |||
1143 | mutex_unlock(&(par->open_lock)); | ||
1144 | fail: | ||
1145 | release_console_sem(); | ||
1146 | return 0; | ||
1147 | } | ||
1148 | #else | ||
1149 | #define ark_pci_suspend NULL | ||
1150 | #define ark_pci_resume NULL | ||
1151 | #endif /* CONFIG_PM */ | ||
1152 | |||
1153 | /* List of boards that we are trying to support */ | ||
1154 | |||
1155 | static struct pci_device_id ark_devices[] __devinitdata = { | ||
1156 | {PCI_DEVICE(0xEDD8, 0xA099)}, | ||
1157 | {0, 0, 0, 0, 0, 0, 0} | ||
1158 | }; | ||
1159 | |||
1160 | |||
1161 | MODULE_DEVICE_TABLE(pci, ark_devices); | ||
1162 | |||
1163 | static struct pci_driver arkfb_pci_driver = { | ||
1164 | .name = "arkfb", | ||
1165 | .id_table = ark_devices, | ||
1166 | .probe = ark_pci_probe, | ||
1167 | .remove = __devexit_p(ark_pci_remove), | ||
1168 | .suspend = ark_pci_suspend, | ||
1169 | .resume = ark_pci_resume, | ||
1170 | }; | ||
1171 | |||
1172 | /* Cleanup */ | ||
1173 | |||
1174 | static void __exit arkfb_cleanup(void) | ||
1175 | { | ||
1176 | pr_debug("arkfb: cleaning up\n"); | ||
1177 | pci_unregister_driver(&arkfb_pci_driver); | ||
1178 | } | ||
1179 | |||
1180 | /* Driver Initialisation */ | ||
1181 | |||
1182 | static int __init arkfb_init(void) | ||
1183 | { | ||
1184 | |||
1185 | #ifndef MODULE | ||
1186 | char *option = NULL; | ||
1187 | |||
1188 | if (fb_get_options("arkfb", &option)) | ||
1189 | return -ENODEV; | ||
1190 | |||
1191 | if (option && *option) | ||
1192 | mode = option; | ||
1193 | #endif | ||
1194 | |||
1195 | pr_debug("arkfb: initializing\n"); | ||
1196 | return pci_register_driver(&arkfb_pci_driver); | ||
1197 | } | ||
1198 | |||
1199 | module_init(arkfb_init); | ||
1200 | module_exit(arkfb_cleanup); | ||
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 08d4e11d9121..38c2e2558f5e 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c | |||
@@ -1236,6 +1236,10 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) | |||
1236 | pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; | 1236 | pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; |
1237 | #elif defined(__arm__) || defined(__sh__) || defined(__m32r__) | 1237 | #elif defined(__arm__) || defined(__sh__) || defined(__m32r__) |
1238 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | 1238 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); |
1239 | #elif defined(__avr32__) | ||
1240 | vma->vm_page_prot = __pgprot((pgprot_val(vma->vm_page_prot) | ||
1241 | & ~_PAGE_CACHABLE) | ||
1242 | | (_PAGE_BUFFER | _PAGE_DIRTY)); | ||
1239 | #elif defined(__ia64__) | 1243 | #elif defined(__ia64__) |
1240 | if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) | 1244 | if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) |
1241 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | 1245 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); |
diff --git a/drivers/video/nvidia/nv_hw.c b/drivers/video/nvidia/nv_hw.c index f297c7b14a41..c627955aa124 100644 --- a/drivers/video/nvidia/nv_hw.c +++ b/drivers/video/nvidia/nv_hw.c | |||
@@ -149,8 +149,7 @@ static void nvGetClocks(struct nvidia_par *par, unsigned int *MClk, | |||
149 | pll = NV_RD32(par->PMC, 0x4024); | 149 | pll = NV_RD32(par->PMC, 0x4024); |
150 | M = pll & 0xFF; | 150 | M = pll & 0xFF; |
151 | N = (pll >> 8) & 0xFF; | 151 | N = (pll >> 8) & 0xFF; |
152 | if (((par->Chipset & 0xfff0) == 0x0290) || | 152 | if (((par->Chipset & 0xfff0) == 0x0290) || ((par->Chipset & 0xfff0) == 0x0390) || ((par->Chipset & 0xfff0) == 0x02E0)) { |
153 | ((par->Chipset & 0xfff0) == 0x0390)) { | ||
154 | MB = 1; | 153 | MB = 1; |
155 | NB = 1; | 154 | NB = 1; |
156 | } else { | 155 | } else { |
@@ -963,6 +962,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state) | |||
963 | 962 | ||
964 | if (((par->Chipset & 0xfff0) == 0x0090) || | 963 | if (((par->Chipset & 0xfff0) == 0x0090) || |
965 | ((par->Chipset & 0xfff0) == 0x01D0) || | 964 | ((par->Chipset & 0xfff0) == 0x01D0) || |
965 | ((par->Chipset & 0xfff0) == 0x02E0) || | ||
966 | ((par->Chipset & 0xfff0) == 0x0290)) | 966 | ((par->Chipset & 0xfff0) == 0x0290)) |
967 | regions = 15; | 967 | regions = 15; |
968 | for(i = 0; i < regions; i++) { | 968 | for(i = 0; i < regions; i++) { |
@@ -1275,6 +1275,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state) | |||
1275 | 0x00100000); | 1275 | 0x00100000); |
1276 | break; | 1276 | break; |
1277 | case 0x0090: | 1277 | case 0x0090: |
1278 | case 0x02E0: | ||
1278 | case 0x0290: | 1279 | case 0x0290: |
1279 | NV_WR32(par->PRAMDAC, 0x0608, | 1280 | NV_WR32(par->PRAMDAC, 0x0608, |
1280 | NV_RD32(par->PRAMDAC, 0x0608) | | 1281 | NV_RD32(par->PRAMDAC, 0x0608) | |
@@ -1352,6 +1353,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state) | |||
1352 | } else { | 1353 | } else { |
1353 | if (((par->Chipset & 0xfff0) == 0x0090) || | 1354 | if (((par->Chipset & 0xfff0) == 0x0090) || |
1354 | ((par->Chipset & 0xfff0) == 0x01D0) || | 1355 | ((par->Chipset & 0xfff0) == 0x01D0) || |
1356 | ((par->Chipset & 0xfff0) == 0x02E0) || | ||
1355 | ((par->Chipset & 0xfff0) == 0x0290)) { | 1357 | ((par->Chipset & 0xfff0) == 0x0290)) { |
1356 | for (i = 0; i < 60; i++) { | 1358 | for (i = 0; i < 60; i++) { |
1357 | NV_WR32(par->PGRAPH, | 1359 | NV_WR32(par->PGRAPH, |
@@ -1403,6 +1405,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state) | |||
1403 | } else { | 1405 | } else { |
1404 | if ((par->Chipset & 0xfff0) == 0x0090 || | 1406 | if ((par->Chipset & 0xfff0) == 0x0090 || |
1405 | (par->Chipset & 0xfff0) == 0x01D0 || | 1407 | (par->Chipset & 0xfff0) == 0x01D0 || |
1408 | (par->Chipset & 0xfff0) == 0x02E0 || | ||
1406 | (par->Chipset & 0xfff0) == 0x0290) { | 1409 | (par->Chipset & 0xfff0) == 0x0290) { |
1407 | NV_WR32(par->PGRAPH, 0x0DF0, | 1410 | NV_WR32(par->PGRAPH, 0x0DF0, |
1408 | NV_RD32(par->PFB, 0x0200)); | 1411 | NV_RD32(par->PFB, 0x0200)); |
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index 7c36b5fe582e..f85edf084da3 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c | |||
@@ -1243,6 +1243,7 @@ static u32 __devinit nvidia_get_arch(struct fb_info *info) | |||
1243 | case 0x0140: /* GeForce 6600 */ | 1243 | case 0x0140: /* GeForce 6600 */ |
1244 | case 0x0160: /* GeForce 6200 */ | 1244 | case 0x0160: /* GeForce 6200 */ |
1245 | case 0x01D0: /* GeForce 7200, 7300, 7400 */ | 1245 | case 0x01D0: /* GeForce 7200, 7300, 7400 */ |
1246 | case 0x02E0: /* GeForce 7300 GT */ | ||
1246 | case 0x0090: /* GeForce 7800 */ | 1247 | case 0x0090: /* GeForce 7800 */ |
1247 | case 0x0210: /* GeForce 6800 */ | 1248 | case 0x0210: /* GeForce 6800 */ |
1248 | case 0x0220: /* GeForce 6200 */ | 1249 | case 0x0220: /* GeForce 6200 */ |
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index 756fafb41d78..d11735895a01 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c | |||
@@ -796,23 +796,6 @@ static int s3fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) | |||
796 | return 0; | 796 | return 0; |
797 | } | 797 | } |
798 | 798 | ||
799 | /* Get capabilities of accelerator based on the mode */ | ||
800 | |||
801 | static void s3fb_get_caps(struct fb_info *info, struct fb_blit_caps *caps, | ||
802 | struct fb_var_screeninfo *var) | ||
803 | { | ||
804 | if (var->bits_per_pixel == 0) { | ||
805 | /* can only support 256 8x16 bitmap */ | ||
806 | caps->x = 1 << (8 - 1); | ||
807 | caps->y = 1 << (16 - 1); | ||
808 | caps->len = 256; | ||
809 | } else { | ||
810 | caps->x = ~(u32)0; | ||
811 | caps->y = ~(u32)0; | ||
812 | caps->len = ~(u32)0; | ||
813 | } | ||
814 | } | ||
815 | |||
816 | /* ------------------------------------------------------------------------- */ | 799 | /* ------------------------------------------------------------------------- */ |
817 | 800 | ||
818 | /* Frame buffer operations */ | 801 | /* Frame buffer operations */ |
@@ -829,7 +812,7 @@ static struct fb_ops s3fb_ops = { | |||
829 | .fb_fillrect = s3fb_fillrect, | 812 | .fb_fillrect = s3fb_fillrect, |
830 | .fb_copyarea = cfb_copyarea, | 813 | .fb_copyarea = cfb_copyarea, |
831 | .fb_imageblit = s3fb_imageblit, | 814 | .fb_imageblit = s3fb_imageblit, |
832 | .fb_get_caps = s3fb_get_caps, | 815 | .fb_get_caps = svga_get_caps, |
833 | }; | 816 | }; |
834 | 817 | ||
835 | /* ------------------------------------------------------------------------- */ | 818 | /* ------------------------------------------------------------------------- */ |
diff --git a/drivers/video/svgalib.c b/drivers/video/svgalib.c index 079cdc911e48..25df928d37d8 100644 --- a/drivers/video/svgalib.c +++ b/drivers/video/svgalib.c | |||
@@ -347,6 +347,23 @@ int svga_get_tilemax(struct fb_info *info) | |||
347 | return 256; | 347 | return 256; |
348 | } | 348 | } |
349 | 349 | ||
350 | /* Get capabilities of accelerator based on the mode */ | ||
351 | |||
352 | void svga_get_caps(struct fb_info *info, struct fb_blit_caps *caps, | ||
353 | struct fb_var_screeninfo *var) | ||
354 | { | ||
355 | if (var->bits_per_pixel == 0) { | ||
356 | /* can only support 256 8x16 bitmap */ | ||
357 | caps->x = 1 << (8 - 1); | ||
358 | caps->y = 1 << (16 - 1); | ||
359 | caps->len = 256; | ||
360 | } else { | ||
361 | caps->x = (var->bits_per_pixel == 4) ? 1 << (8 - 1) : ~(u32)0; | ||
362 | caps->y = ~(u32)0; | ||
363 | caps->len = ~(u32)0; | ||
364 | } | ||
365 | } | ||
366 | EXPORT_SYMBOL(svga_get_caps); | ||
350 | 367 | ||
351 | /* ------------------------------------------------------------------------- */ | 368 | /* ------------------------------------------------------------------------- */ |
352 | 369 | ||
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c new file mode 100644 index 000000000000..5e9755e464a1 --- /dev/null +++ b/drivers/video/vt8623fb.c | |||
@@ -0,0 +1,927 @@ | |||
1 | /* | ||
2 | * linux/drivers/video/vt8623fb.c - fbdev driver for | ||
3 | * integrated graphic core in VIA VT8623 [CLE266] chipset | ||
4 | * | ||
5 | * Copyright (c) 2006-2007 Ondrej Zajicek <santiago@crfreenet.org> | ||
6 | * | ||
7 | * This file is subject to the terms and conditions of the GNU General Public | ||
8 | * License. See the file COPYING in the main directory of this archive for | ||
9 | * more details. | ||
10 | * | ||
11 | * Code is based on s3fb, some parts are from David Boucher's viafb | ||
12 | * (http://davesdomain.org.uk/viafb/) | ||
13 | */ | ||
14 | |||
15 | #include <linux/version.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/tty.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/delay.h> | ||
24 | #include <linux/fb.h> | ||
25 | #include <linux/svga.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/pci.h> | ||
28 | #include <linux/console.h> /* Why should fb driver call console functions? because acquire_console_sem() */ | ||
29 | #include <video/vga.h> | ||
30 | |||
31 | #ifdef CONFIG_MTRR | ||
32 | #include <asm/mtrr.h> | ||
33 | #endif | ||
34 | |||
35 | struct vt8623fb_info { | ||
36 | char __iomem *mmio_base; | ||
37 | int mtrr_reg; | ||
38 | struct vgastate state; | ||
39 | struct mutex open_lock; | ||
40 | unsigned int ref_count; | ||
41 | u32 pseudo_palette[16]; | ||
42 | }; | ||
43 | |||
44 | |||
45 | |||
46 | /* ------------------------------------------------------------------------- */ | ||
47 | |||
48 | static const struct svga_fb_format vt8623fb_formats[] = { | ||
49 | { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
50 | FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP8, FB_VISUAL_PSEUDOCOLOR, 16, 16}, | ||
51 | { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
52 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 16, 16}, | ||
53 | { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1, | ||
54 | FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 16, 16}, | ||
55 | { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, | ||
56 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 8}, | ||
57 | /* {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0, | ||
58 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4}, */ | ||
59 | {16, {11, 5, 0}, {5, 6, 0}, {0, 5, 0}, {0, 0, 0}, 0, | ||
60 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4}, | ||
61 | {32, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0, | ||
62 | FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 2}, | ||
63 | SVGA_FORMAT_END | ||
64 | }; | ||
65 | |||
66 | static const struct svga_pll vt8623_pll = {2, 127, 2, 7, 0, 3, | ||
67 | 60000, 300000, 14318}; | ||
68 | |||
69 | /* CRT timing register sets */ | ||
70 | |||
71 | struct vga_regset vt8623_h_total_regs[] = {{0x00, 0, 7}, {0x36, 3, 3}, VGA_REGSET_END}; | ||
72 | struct vga_regset vt8623_h_display_regs[] = {{0x01, 0, 7}, VGA_REGSET_END}; | ||
73 | struct vga_regset vt8623_h_blank_start_regs[] = {{0x02, 0, 7}, VGA_REGSET_END}; | ||
74 | struct vga_regset vt8623_h_blank_end_regs[] = {{0x03, 0, 4}, {0x05, 7, 7}, {0x33, 5, 5}, VGA_REGSET_END}; | ||
75 | struct vga_regset vt8623_h_sync_start_regs[] = {{0x04, 0, 7}, {0x33, 4, 4}, VGA_REGSET_END}; | ||
76 | struct vga_regset vt8623_h_sync_end_regs[] = {{0x05, 0, 4}, VGA_REGSET_END}; | ||
77 | |||
78 | struct vga_regset vt8623_v_total_regs[] = {{0x06, 0, 7}, {0x07, 0, 0}, {0x07, 5, 5}, {0x35, 0, 0}, VGA_REGSET_END}; | ||
79 | struct vga_regset vt8623_v_display_regs[] = {{0x12, 0, 7}, {0x07, 1, 1}, {0x07, 6, 6}, {0x35, 2, 2}, VGA_REGSET_END}; | ||
80 | struct vga_regset vt8623_v_blank_start_regs[] = {{0x15, 0, 7}, {0x07, 3, 3}, {0x09, 5, 5}, {0x35, 3, 3}, VGA_REGSET_END}; | ||
81 | struct vga_regset vt8623_v_blank_end_regs[] = {{0x16, 0, 7}, VGA_REGSET_END}; | ||
82 | struct vga_regset vt8623_v_sync_start_regs[] = {{0x10, 0, 7}, {0x07, 2, 2}, {0x07, 7, 7}, {0x35, 1, 1}, VGA_REGSET_END}; | ||
83 | struct vga_regset vt8623_v_sync_end_regs[] = {{0x11, 0, 3}, VGA_REGSET_END}; | ||
84 | |||
85 | struct vga_regset vt8623_offset_regs[] = {{0x13, 0, 7}, {0x35, 5, 7}, VGA_REGSET_END}; | ||
86 | struct vga_regset vt8623_line_compare_regs[] = {{0x18, 0, 7}, {0x07, 4, 4}, {0x09, 6, 6}, {0x33, 0, 2}, {0x35, 4, 4}, VGA_REGSET_END}; | ||
87 | struct vga_regset vt8623_fetch_count_regs[] = {{0x1C, 0, 7}, {0x1D, 0, 1}, VGA_REGSET_END}; | ||
88 | struct vga_regset vt8623_start_address_regs[] = {{0x0d, 0, 7}, {0x0c, 0, 7}, {0x34, 0, 7}, {0x48, 0, 1}, VGA_REGSET_END}; | ||
89 | |||
90 | struct svga_timing_regs vt8623_timing_regs = { | ||
91 | vt8623_h_total_regs, vt8623_h_display_regs, vt8623_h_blank_start_regs, | ||
92 | vt8623_h_blank_end_regs, vt8623_h_sync_start_regs, vt8623_h_sync_end_regs, | ||
93 | vt8623_v_total_regs, vt8623_v_display_regs, vt8623_v_blank_start_regs, | ||
94 | vt8623_v_blank_end_regs, vt8623_v_sync_start_regs, vt8623_v_sync_end_regs, | ||
95 | }; | ||
96 | |||
97 | |||
98 | /* ------------------------------------------------------------------------- */ | ||
99 | |||
100 | |||
101 | /* Module parameters */ | ||
102 | |||
103 | static char *mode = "640x480-8@60"; | ||
104 | |||
105 | #ifdef CONFIG_MTRR | ||
106 | static int mtrr = 1; | ||
107 | #endif | ||
108 | |||
109 | MODULE_AUTHOR("(c) 2006 Ondrej Zajicek <santiago@crfreenet.org>"); | ||
110 | MODULE_LICENSE("GPL"); | ||
111 | MODULE_DESCRIPTION("fbdev driver for integrated graphics core in VIA VT8623 [CLE266]"); | ||
112 | |||
113 | module_param(mode, charp, 0644); | ||
114 | MODULE_PARM_DESC(mode, "Default video mode ('640x480-8@60', etc)"); | ||
115 | |||
116 | #ifdef CONFIG_MTRR | ||
117 | module_param(mtrr, int, 0444); | ||
118 | MODULE_PARM_DESC(mtrr, "Enable write-combining with MTRR (1=enable, 0=disable, default=1)"); | ||
119 | #endif | ||
120 | |||
121 | |||
122 | /* ------------------------------------------------------------------------- */ | ||
123 | |||
124 | |||
125 | static struct fb_tile_ops vt8623fb_tile_ops = { | ||
126 | .fb_settile = svga_settile, | ||
127 | .fb_tilecopy = svga_tilecopy, | ||
128 | .fb_tilefill = svga_tilefill, | ||
129 | .fb_tileblit = svga_tileblit, | ||
130 | .fb_tilecursor = svga_tilecursor, | ||
131 | .fb_get_tilemax = svga_get_tilemax, | ||
132 | }; | ||
133 | |||
134 | |||
135 | /* ------------------------------------------------------------------------- */ | ||
136 | |||
137 | |||
138 | /* image data is MSB-first, fb structure is MSB-first too */ | ||
139 | static inline u32 expand_color(u32 c) | ||
140 | { | ||
141 | return ((c & 1) | ((c & 2) << 7) | ((c & 4) << 14) | ((c & 8) << 21)) * 0xFF; | ||
142 | } | ||
143 | |||
144 | /* vt8623fb_iplan_imageblit silently assumes that almost everything is 8-pixel aligned */ | ||
145 | static void vt8623fb_iplan_imageblit(struct fb_info *info, const struct fb_image *image) | ||
146 | { | ||
147 | u32 fg = expand_color(image->fg_color); | ||
148 | u32 bg = expand_color(image->bg_color); | ||
149 | const u8 *src1, *src; | ||
150 | u8 __iomem *dst1; | ||
151 | u32 __iomem *dst; | ||
152 | u32 val; | ||
153 | int x, y; | ||
154 | |||
155 | src1 = image->data; | ||
156 | dst1 = info->screen_base + (image->dy * info->fix.line_length) | ||
157 | + ((image->dx / 8) * 4); | ||
158 | |||
159 | for (y = 0; y < image->height; y++) { | ||
160 | src = src1; | ||
161 | dst = (u32 __iomem *) dst1; | ||
162 | for (x = 0; x < image->width; x += 8) { | ||
163 | val = *(src++) * 0x01010101; | ||
164 | val = (val & fg) | (~val & bg); | ||
165 | fb_writel(val, dst++); | ||
166 | } | ||
167 | src1 += image->width / 8; | ||
168 | dst1 += info->fix.line_length; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | /* vt8623fb_iplan_fillrect silently assumes that almost everything is 8-pixel aligned */ | ||
173 | static void vt8623fb_iplan_fillrect(struct fb_info *info, const struct fb_fillrect *rect) | ||
174 | { | ||
175 | u32 fg = expand_color(rect->color); | ||
176 | u8 __iomem *dst1; | ||
177 | u32 __iomem *dst; | ||
178 | int x, y; | ||
179 | |||
180 | dst1 = info->screen_base + (rect->dy * info->fix.line_length) | ||
181 | + ((rect->dx / 8) * 4); | ||
182 | |||
183 | for (y = 0; y < rect->height; y++) { | ||
184 | dst = (u32 __iomem *) dst1; | ||
185 | for (x = 0; x < rect->width; x += 8) { | ||
186 | fb_writel(fg, dst++); | ||
187 | } | ||
188 | dst1 += info->fix.line_length; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | |||
193 | /* image data is MSB-first, fb structure is high-nibble-in-low-byte-first */ | ||
194 | static inline u32 expand_pixel(u32 c) | ||
195 | { | ||
196 | return (((c & 1) << 24) | ((c & 2) << 27) | ((c & 4) << 14) | ((c & 8) << 17) | | ||
197 | ((c & 16) << 4) | ((c & 32) << 7) | ((c & 64) >> 6) | ((c & 128) >> 3)) * 0xF; | ||
198 | } | ||
199 | |||
200 | /* vt8623fb_cfb4_imageblit silently assumes that almost everything is 8-pixel aligned */ | ||
201 | static void vt8623fb_cfb4_imageblit(struct fb_info *info, const struct fb_image *image) | ||
202 | { | ||
203 | u32 fg = image->fg_color * 0x11111111; | ||
204 | u32 bg = image->bg_color * 0x11111111; | ||
205 | const u8 *src1, *src; | ||
206 | u8 __iomem *dst1; | ||
207 | u32 __iomem *dst; | ||
208 | u32 val; | ||
209 | int x, y; | ||
210 | |||
211 | src1 = image->data; | ||
212 | dst1 = info->screen_base + (image->dy * info->fix.line_length) | ||
213 | + ((image->dx / 8) * 4); | ||
214 | |||
215 | for (y = 0; y < image->height; y++) { | ||
216 | src = src1; | ||
217 | dst = (u32 __iomem *) dst1; | ||
218 | for (x = 0; x < image->width; x += 8) { | ||
219 | val = expand_pixel(*(src++)); | ||
220 | val = (val & fg) | (~val & bg); | ||
221 | fb_writel(val, dst++); | ||
222 | } | ||
223 | src1 += image->width / 8; | ||
224 | dst1 += info->fix.line_length; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | static void vt8623fb_imageblit(struct fb_info *info, const struct fb_image *image) | ||
229 | { | ||
230 | if ((info->var.bits_per_pixel == 4) && (image->depth == 1) | ||
231 | && ((image->width % 8) == 0) && ((image->dx % 8) == 0)) { | ||
232 | if (info->fix.type == FB_TYPE_INTERLEAVED_PLANES) | ||
233 | vt8623fb_iplan_imageblit(info, image); | ||
234 | else | ||
235 | vt8623fb_cfb4_imageblit(info, image); | ||
236 | } else | ||
237 | cfb_imageblit(info, image); | ||
238 | } | ||
239 | |||
240 | static void vt8623fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) | ||
241 | { | ||
242 | if ((info->var.bits_per_pixel == 4) | ||
243 | && ((rect->width % 8) == 0) && ((rect->dx % 8) == 0) | ||
244 | && (info->fix.type == FB_TYPE_INTERLEAVED_PLANES)) | ||
245 | vt8623fb_iplan_fillrect(info, rect); | ||
246 | else | ||
247 | cfb_fillrect(info, rect); | ||
248 | } | ||
249 | |||
250 | |||
251 | /* ------------------------------------------------------------------------- */ | ||
252 | |||
253 | |||
254 | static void vt8623_set_pixclock(struct fb_info *info, u32 pixclock) | ||
255 | { | ||
256 | u16 m, n, r; | ||
257 | u8 regval; | ||
258 | int rv; | ||
259 | |||
260 | rv = svga_compute_pll(&vt8623_pll, 1000000000 / pixclock, &m, &n, &r, info->node); | ||
261 | if (rv < 0) { | ||
262 | printk(KERN_ERR "fb%d: cannot set requested pixclock, keeping old value\n", info->node); | ||
263 | return; | ||
264 | } | ||
265 | |||
266 | /* Set VGA misc register */ | ||
267 | regval = vga_r(NULL, VGA_MIS_R); | ||
268 | vga_w(NULL, VGA_MIS_W, regval | VGA_MIS_ENB_PLL_LOAD); | ||
269 | |||
270 | /* Set clock registers */ | ||
271 | vga_wseq(NULL, 0x46, (n | (r << 6))); | ||
272 | vga_wseq(NULL, 0x47, m); | ||
273 | |||
274 | udelay(1000); | ||
275 | |||
276 | /* PLL reset */ | ||
277 | svga_wseq_mask(0x40, 0x02, 0x02); | ||
278 | svga_wseq_mask(0x40, 0x00, 0x02); | ||
279 | } | ||
280 | |||
281 | |||
282 | static int vt8623fb_open(struct fb_info *info, int user) | ||
283 | { | ||
284 | struct vt8623fb_info *par = info->par; | ||
285 | |||
286 | mutex_lock(&(par->open_lock)); | ||
287 | if (par->ref_count == 0) { | ||
288 | memset(&(par->state), 0, sizeof(struct vgastate)); | ||
289 | par->state.flags = VGA_SAVE_MODE | VGA_SAVE_FONTS | VGA_SAVE_CMAP; | ||
290 | par->state.num_crtc = 0xA2; | ||
291 | par->state.num_seq = 0x50; | ||
292 | save_vga(&(par->state)); | ||
293 | } | ||
294 | |||
295 | par->ref_count++; | ||
296 | mutex_unlock(&(par->open_lock)); | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int vt8623fb_release(struct fb_info *info, int user) | ||
302 | { | ||
303 | struct vt8623fb_info *par = info->par; | ||
304 | |||
305 | mutex_lock(&(par->open_lock)); | ||
306 | if (par->ref_count == 0) { | ||
307 | mutex_unlock(&(par->open_lock)); | ||
308 | return -EINVAL; | ||
309 | } | ||
310 | |||
311 | if (par->ref_count == 1) | ||
312 | restore_vga(&(par->state)); | ||
313 | |||
314 | par->ref_count--; | ||
315 | mutex_unlock(&(par->open_lock)); | ||
316 | |||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) | ||
321 | { | ||
322 | int rv, mem, step; | ||
323 | |||
324 | /* Find appropriate format */ | ||
325 | rv = svga_match_format (vt8623fb_formats, var, NULL); | ||
326 | if (rv < 0) | ||
327 | { | ||
328 | printk(KERN_ERR "fb%d: unsupported mode requested\n", info->node); | ||
329 | return rv; | ||
330 | } | ||
331 | |||
332 | /* Do not allow to have real resoulution larger than virtual */ | ||
333 | if (var->xres > var->xres_virtual) | ||
334 | var->xres_virtual = var->xres; | ||
335 | |||
336 | if (var->yres > var->yres_virtual) | ||
337 | var->yres_virtual = var->yres; | ||
338 | |||
339 | /* Round up xres_virtual to have proper alignment of lines */ | ||
340 | step = vt8623fb_formats[rv].xresstep - 1; | ||
341 | var->xres_virtual = (var->xres_virtual+step) & ~step; | ||
342 | |||
343 | /* Check whether have enough memory */ | ||
344 | mem = ((var->bits_per_pixel * var->xres_virtual) >> 3) * var->yres_virtual; | ||
345 | if (mem > info->screen_size) | ||
346 | { | ||
347 | printk(KERN_ERR "fb%d: not enough framebuffer memory (%d kB requested , %d kB available)\n", info->node, mem >> 10, (unsigned int) (info->screen_size >> 10)); | ||
348 | return -EINVAL; | ||
349 | } | ||
350 | |||
351 | /* Text mode is limited to 256 kB of memory */ | ||
352 | if ((var->bits_per_pixel == 0) && (mem > (256*1024))) | ||
353 | { | ||
354 | printk(KERN_ERR "fb%d: text framebuffer size too large (%d kB requested, 256 kB possible)\n", info->node, mem >> 10); | ||
355 | return -EINVAL; | ||
356 | } | ||
357 | |||
358 | rv = svga_check_timings (&vt8623_timing_regs, var, info->node); | ||
359 | if (rv < 0) | ||
360 | { | ||
361 | printk(KERN_ERR "fb%d: invalid timings requested\n", info->node); | ||
362 | return rv; | ||
363 | } | ||
364 | |||
365 | /* Interlaced mode not supported */ | ||
366 | if (var->vmode & FB_VMODE_INTERLACED) | ||
367 | return -EINVAL; | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | |||
373 | static int vt8623fb_set_par(struct fb_info *info) | ||
374 | { | ||
375 | u32 mode, offset_value, fetch_value, screen_size; | ||
376 | u32 bpp = info->var.bits_per_pixel; | ||
377 | |||
378 | if (bpp != 0) { | ||
379 | info->fix.ypanstep = 1; | ||
380 | info->fix.line_length = (info->var.xres_virtual * bpp) / 8; | ||
381 | |||
382 | info->flags &= ~FBINFO_MISC_TILEBLITTING; | ||
383 | info->tileops = NULL; | ||
384 | |||
385 | /* in 4bpp supports 8p wide tiles only, any tiles otherwise */ | ||
386 | info->pixmap.blit_x = (bpp == 4) ? (1 << (8 - 1)) : (~(u32)0); | ||
387 | info->pixmap.blit_y = ~(u32)0; | ||
388 | |||
389 | offset_value = (info->var.xres_virtual * bpp) / 64; | ||
390 | fetch_value = ((info->var.xres * bpp) / 128) + 4; | ||
391 | |||
392 | if (bpp == 4) | ||
393 | fetch_value = (info->var.xres / 8) + 8; /* + 0 is OK */ | ||
394 | |||
395 | screen_size = info->var.yres_virtual * info->fix.line_length; | ||
396 | } else { | ||
397 | info->fix.ypanstep = 16; | ||
398 | info->fix.line_length = 0; | ||
399 | |||
400 | info->flags |= FBINFO_MISC_TILEBLITTING; | ||
401 | info->tileops = &vt8623fb_tile_ops; | ||
402 | |||
403 | /* supports 8x16 tiles only */ | ||
404 | info->pixmap.blit_x = 1 << (8 - 1); | ||
405 | info->pixmap.blit_y = 1 << (16 - 1); | ||
406 | |||
407 | offset_value = info->var.xres_virtual / 16; | ||
408 | fetch_value = (info->var.xres / 8) + 8; | ||
409 | screen_size = (info->var.xres_virtual * info->var.yres_virtual) / 64; | ||
410 | } | ||
411 | |||
412 | info->var.xoffset = 0; | ||
413 | info->var.yoffset = 0; | ||
414 | info->var.activate = FB_ACTIVATE_NOW; | ||
415 | |||
416 | /* Unlock registers */ | ||
417 | svga_wseq_mask(0x10, 0x01, 0x01); | ||
418 | svga_wcrt_mask(0x11, 0x00, 0x80); | ||
419 | svga_wcrt_mask(0x47, 0x00, 0x01); | ||
420 | |||
421 | /* Device, screen and sync off */ | ||
422 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
423 | svga_wcrt_mask(0x36, 0x30, 0x30); | ||
424 | svga_wcrt_mask(0x17, 0x00, 0x80); | ||
425 | |||
426 | /* Set default values */ | ||
427 | svga_set_default_gfx_regs(); | ||
428 | svga_set_default_atc_regs(); | ||
429 | svga_set_default_seq_regs(); | ||
430 | svga_set_default_crt_regs(); | ||
431 | svga_wcrt_multi(vt8623_line_compare_regs, 0xFFFFFFFF); | ||
432 | svga_wcrt_multi(vt8623_start_address_regs, 0); | ||
433 | |||
434 | svga_wcrt_multi(vt8623_offset_regs, offset_value); | ||
435 | svga_wseq_multi(vt8623_fetch_count_regs, fetch_value); | ||
436 | |||
437 | if (info->var.vmode & FB_VMODE_DOUBLE) | ||
438 | svga_wcrt_mask(0x09, 0x80, 0x80); | ||
439 | else | ||
440 | svga_wcrt_mask(0x09, 0x00, 0x80); | ||
441 | |||
442 | svga_wseq_mask(0x1E, 0xF0, 0xF0); // DI/DVP bus | ||
443 | svga_wseq_mask(0x2A, 0x0F, 0x0F); // DI/DVP bus | ||
444 | svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read treshold | ||
445 | vga_wseq(NULL, 0x17, 0x1F); // FIFO depth | ||
446 | vga_wseq(NULL, 0x18, 0x4E); | ||
447 | svga_wseq_mask(0x1A, 0x08, 0x08); // enable MMIO ? | ||
448 | |||
449 | vga_wcrt(NULL, 0x32, 0x00); | ||
450 | vga_wcrt(NULL, 0x34, 0x00); | ||
451 | vga_wcrt(NULL, 0x6A, 0x80); | ||
452 | vga_wcrt(NULL, 0x6A, 0xC0); | ||
453 | |||
454 | vga_wgfx(NULL, 0x20, 0x00); | ||
455 | vga_wgfx(NULL, 0x21, 0x00); | ||
456 | vga_wgfx(NULL, 0x22, 0x00); | ||
457 | |||
458 | /* Set SR15 according to number of bits per pixel */ | ||
459 | mode = svga_match_format(vt8623fb_formats, &(info->var), &(info->fix)); | ||
460 | switch (mode) { | ||
461 | case 0: | ||
462 | pr_debug("fb%d: text mode\n", info->node); | ||
463 | svga_set_textmode_vga_regs(); | ||
464 | svga_wseq_mask(0x15, 0x00, 0xFE); | ||
465 | svga_wcrt_mask(0x11, 0x60, 0x70); | ||
466 | break; | ||
467 | case 1: | ||
468 | pr_debug("fb%d: 4 bit pseudocolor\n", info->node); | ||
469 | vga_wgfx(NULL, VGA_GFX_MODE, 0x40); | ||
470 | svga_wseq_mask(0x15, 0x20, 0xFE); | ||
471 | svga_wcrt_mask(0x11, 0x00, 0x70); | ||
472 | break; | ||
473 | case 2: | ||
474 | pr_debug("fb%d: 4 bit pseudocolor, planar\n", info->node); | ||
475 | svga_wseq_mask(0x15, 0x00, 0xFE); | ||
476 | svga_wcrt_mask(0x11, 0x00, 0x70); | ||
477 | break; | ||
478 | case 3: | ||
479 | pr_debug("fb%d: 8 bit pseudocolor\n", info->node); | ||
480 | svga_wseq_mask(0x15, 0x22, 0xFE); | ||
481 | break; | ||
482 | case 4: | ||
483 | pr_debug("fb%d: 5/6/5 truecolor\n", info->node); | ||
484 | svga_wseq_mask(0x15, 0xB6, 0xFE); | ||
485 | break; | ||
486 | case 5: | ||
487 | pr_debug("fb%d: 8/8/8 truecolor\n", info->node); | ||
488 | svga_wseq_mask(0x15, 0xAE, 0xFE); | ||
489 | break; | ||
490 | default: | ||
491 | printk(KERN_ERR "vt8623fb: unsupported mode - bug\n"); | ||
492 | return (-EINVAL); | ||
493 | } | ||
494 | |||
495 | vt8623_set_pixclock(info, info->var.pixclock); | ||
496 | svga_set_timings(&vt8623_timing_regs, &(info->var), 1, 1, | ||
497 | (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, 1, | ||
498 | 1, info->node); | ||
499 | |||
500 | memset_io(info->screen_base, 0x00, screen_size); | ||
501 | |||
502 | /* Device and screen back on */ | ||
503 | svga_wcrt_mask(0x17, 0x80, 0x80); | ||
504 | svga_wcrt_mask(0x36, 0x00, 0x30); | ||
505 | svga_wseq_mask(0x01, 0x00, 0x20); | ||
506 | |||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | |||
511 | static int vt8623fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, | ||
512 | u_int transp, struct fb_info *fb) | ||
513 | { | ||
514 | switch (fb->var.bits_per_pixel) { | ||
515 | case 0: | ||
516 | case 4: | ||
517 | if (regno >= 16) | ||
518 | return -EINVAL; | ||
519 | |||
520 | outb(0x0F, VGA_PEL_MSK); | ||
521 | outb(regno, VGA_PEL_IW); | ||
522 | outb(red >> 10, VGA_PEL_D); | ||
523 | outb(green >> 10, VGA_PEL_D); | ||
524 | outb(blue >> 10, VGA_PEL_D); | ||
525 | break; | ||
526 | case 8: | ||
527 | if (regno >= 256) | ||
528 | return -EINVAL; | ||
529 | |||
530 | outb(0xFF, VGA_PEL_MSK); | ||
531 | outb(regno, VGA_PEL_IW); | ||
532 | outb(red >> 10, VGA_PEL_D); | ||
533 | outb(green >> 10, VGA_PEL_D); | ||
534 | outb(blue >> 10, VGA_PEL_D); | ||
535 | break; | ||
536 | case 16: | ||
537 | if (regno >= 16) | ||
538 | return 0; | ||
539 | |||
540 | if (fb->var.green.length == 5) | ||
541 | ((u32*)fb->pseudo_palette)[regno] = ((red & 0xF800) >> 1) | | ||
542 | ((green & 0xF800) >> 6) | ((blue & 0xF800) >> 11); | ||
543 | else if (fb->var.green.length == 6) | ||
544 | ((u32*)fb->pseudo_palette)[regno] = (red & 0xF800) | | ||
545 | ((green & 0xFC00) >> 5) | ((blue & 0xF800) >> 11); | ||
546 | else | ||
547 | return -EINVAL; | ||
548 | break; | ||
549 | case 24: | ||
550 | case 32: | ||
551 | if (regno >= 16) | ||
552 | return 0; | ||
553 | |||
554 | /* ((transp & 0xFF00) << 16) */ | ||
555 | ((u32*)fb->pseudo_palette)[regno] = ((red & 0xFF00) << 8) | | ||
556 | (green & 0xFF00) | ((blue & 0xFF00) >> 8); | ||
557 | break; | ||
558 | default: | ||
559 | return -EINVAL; | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | |||
566 | static int vt8623fb_blank(int blank_mode, struct fb_info *info) | ||
567 | { | ||
568 | switch (blank_mode) { | ||
569 | case FB_BLANK_UNBLANK: | ||
570 | pr_debug("fb%d: unblank\n", info->node); | ||
571 | svga_wcrt_mask(0x36, 0x00, 0x30); | ||
572 | svga_wseq_mask(0x01, 0x00, 0x20); | ||
573 | break; | ||
574 | case FB_BLANK_NORMAL: | ||
575 | pr_debug("fb%d: blank\n", info->node); | ||
576 | svga_wcrt_mask(0x36, 0x00, 0x30); | ||
577 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
578 | break; | ||
579 | case FB_BLANK_HSYNC_SUSPEND: | ||
580 | pr_debug("fb%d: DPMS standby (hsync off)\n", info->node); | ||
581 | svga_wcrt_mask(0x36, 0x10, 0x30); | ||
582 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
583 | break; | ||
584 | case FB_BLANK_VSYNC_SUSPEND: | ||
585 | pr_debug("fb%d: DPMS suspend (vsync off)\n", info->node); | ||
586 | svga_wcrt_mask(0x36, 0x20, 0x30); | ||
587 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
588 | break; | ||
589 | case FB_BLANK_POWERDOWN: | ||
590 | pr_debug("fb%d: DPMS off (no sync)\n", info->node); | ||
591 | svga_wcrt_mask(0x36, 0x30, 0x30); | ||
592 | svga_wseq_mask(0x01, 0x20, 0x20); | ||
593 | break; | ||
594 | } | ||
595 | |||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | |||
600 | static int vt8623fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) | ||
601 | { | ||
602 | unsigned int offset; | ||
603 | |||
604 | /* Calculate the offset */ | ||
605 | if (var->bits_per_pixel == 0) { | ||
606 | offset = (var->yoffset / 16) * var->xres_virtual + var->xoffset; | ||
607 | offset = offset >> 3; | ||
608 | } else { | ||
609 | offset = (var->yoffset * info->fix.line_length) + | ||
610 | (var->xoffset * var->bits_per_pixel / 8); | ||
611 | offset = offset >> ((var->bits_per_pixel == 4) ? 2 : 1); | ||
612 | } | ||
613 | |||
614 | /* Set the offset */ | ||
615 | svga_wcrt_multi(vt8623_start_address_regs, offset); | ||
616 | |||
617 | return 0; | ||
618 | } | ||
619 | |||
620 | |||
621 | /* ------------------------------------------------------------------------- */ | ||
622 | |||
623 | |||
624 | /* Frame buffer operations */ | ||
625 | |||
626 | static struct fb_ops vt8623fb_ops = { | ||
627 | .owner = THIS_MODULE, | ||
628 | .fb_open = vt8623fb_open, | ||
629 | .fb_release = vt8623fb_release, | ||
630 | .fb_check_var = vt8623fb_check_var, | ||
631 | .fb_set_par = vt8623fb_set_par, | ||
632 | .fb_setcolreg = vt8623fb_setcolreg, | ||
633 | .fb_blank = vt8623fb_blank, | ||
634 | .fb_pan_display = vt8623fb_pan_display, | ||
635 | .fb_fillrect = vt8623fb_fillrect, | ||
636 | .fb_copyarea = cfb_copyarea, | ||
637 | .fb_imageblit = vt8623fb_imageblit, | ||
638 | .fb_get_caps = svga_get_caps, | ||
639 | }; | ||
640 | |||
641 | |||
642 | /* PCI probe */ | ||
643 | |||
644 | static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) | ||
645 | { | ||
646 | struct fb_info *info; | ||
647 | struct vt8623fb_info *par; | ||
648 | unsigned int memsize1, memsize2; | ||
649 | int rc; | ||
650 | |||
651 | /* Ignore secondary VGA device because there is no VGA arbitration */ | ||
652 | if (! svga_primary_device(dev)) { | ||
653 | dev_info(&(dev->dev), "ignoring secondary device\n"); | ||
654 | return -ENODEV; | ||
655 | } | ||
656 | |||
657 | /* Allocate and fill driver data structure */ | ||
658 | info = framebuffer_alloc(sizeof(struct vt8623fb_info), NULL); | ||
659 | if (! info) { | ||
660 | dev_err(&(dev->dev), "cannot allocate memory\n"); | ||
661 | return -ENOMEM; | ||
662 | } | ||
663 | |||
664 | par = info->par; | ||
665 | mutex_init(&par->open_lock); | ||
666 | |||
667 | info->flags = FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_YPAN; | ||
668 | info->fbops = &vt8623fb_ops; | ||
669 | |||
670 | /* Prepare PCI device */ | ||
671 | |||
672 | rc = pci_enable_device(dev); | ||
673 | if (rc < 0) { | ||
674 | dev_err(&(dev->dev), "cannot enable PCI device\n"); | ||
675 | goto err_enable_device; | ||
676 | } | ||
677 | |||
678 | rc = pci_request_regions(dev, "vt8623fb"); | ||
679 | if (rc < 0) { | ||
680 | dev_err(&(dev->dev), "cannot reserve framebuffer region\n"); | ||
681 | goto err_request_regions; | ||
682 | } | ||
683 | |||
684 | info->fix.smem_start = pci_resource_start(dev, 0); | ||
685 | info->fix.smem_len = pci_resource_len(dev, 0); | ||
686 | info->fix.mmio_start = pci_resource_start(dev, 1); | ||
687 | info->fix.mmio_len = pci_resource_len(dev, 1); | ||
688 | |||
689 | /* Map physical IO memory address into kernel space */ | ||
690 | info->screen_base = pci_iomap(dev, 0, 0); | ||
691 | if (! info->screen_base) { | ||
692 | rc = -ENOMEM; | ||
693 | dev_err(&(dev->dev), "iomap for framebuffer failed\n"); | ||
694 | goto err_iomap_1; | ||
695 | } | ||
696 | |||
697 | par->mmio_base = pci_iomap(dev, 1, 0); | ||
698 | if (! par->mmio_base) { | ||
699 | rc = -ENOMEM; | ||
700 | dev_err(&(dev->dev), "iomap for MMIO failed\n"); | ||
701 | goto err_iomap_2; | ||
702 | } | ||
703 | |||
704 | /* Find how many physical memory there is on card */ | ||
705 | memsize1 = (vga_rseq(NULL, 0x34) + 1) >> 1; | ||
706 | memsize2 = vga_rseq(NULL, 0x39) << 2; | ||
707 | |||
708 | if ((16 <= memsize1) && (memsize1 <= 64) && (memsize1 == memsize2)) | ||
709 | info->screen_size = memsize1 << 20; | ||
710 | else { | ||
711 | dev_err(&(dev->dev), "memory size detection failed (%x %x), suppose 16 MB\n", memsize1, memsize2); | ||
712 | info->screen_size = 16 << 20; | ||
713 | } | ||
714 | |||
715 | info->fix.smem_len = info->screen_size; | ||
716 | strcpy(info->fix.id, "VIA VT8623"); | ||
717 | info->fix.type = FB_TYPE_PACKED_PIXELS; | ||
718 | info->fix.visual = FB_VISUAL_PSEUDOCOLOR; | ||
719 | info->fix.ypanstep = 0; | ||
720 | info->fix.accel = FB_ACCEL_NONE; | ||
721 | info->pseudo_palette = (void*)par->pseudo_palette; | ||
722 | |||
723 | /* Prepare startup mode */ | ||
724 | |||
725 | rc = fb_find_mode(&(info->var), info, mode, NULL, 0, NULL, 8); | ||
726 | if (! ((rc == 1) || (rc == 2))) { | ||
727 | rc = -EINVAL; | ||
728 | dev_err(&(dev->dev), "mode %s not found\n", mode); | ||
729 | goto err_find_mode; | ||
730 | } | ||
731 | |||
732 | rc = fb_alloc_cmap(&info->cmap, 256, 0); | ||
733 | if (rc < 0) { | ||
734 | dev_err(&(dev->dev), "cannot allocate colormap\n"); | ||
735 | goto err_alloc_cmap; | ||
736 | } | ||
737 | |||
738 | rc = register_framebuffer(info); | ||
739 | if (rc < 0) { | ||
740 | dev_err(&(dev->dev), "cannot register framebugger\n"); | ||
741 | goto err_reg_fb; | ||
742 | } | ||
743 | |||
744 | printk(KERN_INFO "fb%d: %s on %s, %d MB RAM\n", info->node, info->fix.id, | ||
745 | pci_name(dev), info->fix.smem_len >> 20); | ||
746 | |||
747 | /* Record a reference to the driver data */ | ||
748 | pci_set_drvdata(dev, info); | ||
749 | |||
750 | #ifdef CONFIG_MTRR | ||
751 | if (mtrr) { | ||
752 | par->mtrr_reg = -1; | ||
753 | par->mtrr_reg = mtrr_add(info->fix.smem_start, info->fix.smem_len, MTRR_TYPE_WRCOMB, 1); | ||
754 | } | ||
755 | #endif | ||
756 | |||
757 | return 0; | ||
758 | |||
759 | /* Error handling */ | ||
760 | err_reg_fb: | ||
761 | fb_dealloc_cmap(&info->cmap); | ||
762 | err_alloc_cmap: | ||
763 | err_find_mode: | ||
764 | pci_iounmap(dev, par->mmio_base); | ||
765 | err_iomap_2: | ||
766 | pci_iounmap(dev, info->screen_base); | ||
767 | err_iomap_1: | ||
768 | pci_release_regions(dev); | ||
769 | err_request_regions: | ||
770 | /* pci_disable_device(dev); */ | ||
771 | err_enable_device: | ||
772 | framebuffer_release(info); | ||
773 | return rc; | ||
774 | } | ||
775 | |||
776 | /* PCI remove */ | ||
777 | |||
778 | static void __devexit vt8623_pci_remove(struct pci_dev *dev) | ||
779 | { | ||
780 | struct fb_info *info = pci_get_drvdata(dev); | ||
781 | struct vt8623fb_info *par = info->par; | ||
782 | |||
783 | if (info) { | ||
784 | #ifdef CONFIG_MTRR | ||
785 | if (par->mtrr_reg >= 0) { | ||
786 | mtrr_del(par->mtrr_reg, 0, 0); | ||
787 | par->mtrr_reg = -1; | ||
788 | } | ||
789 | #endif | ||
790 | |||
791 | unregister_framebuffer(info); | ||
792 | fb_dealloc_cmap(&info->cmap); | ||
793 | |||
794 | pci_iounmap(dev, info->screen_base); | ||
795 | pci_iounmap(dev, par->mmio_base); | ||
796 | pci_release_regions(dev); | ||
797 | /* pci_disable_device(dev); */ | ||
798 | |||
799 | pci_set_drvdata(dev, NULL); | ||
800 | framebuffer_release(info); | ||
801 | } | ||
802 | } | ||
803 | |||
804 | |||
805 | #ifdef CONFIG_PM | ||
806 | /* PCI suspend */ | ||
807 | |||
808 | static int vt8623_pci_suspend(struct pci_dev* dev, pm_message_t state) | ||
809 | { | ||
810 | struct fb_info *info = pci_get_drvdata(dev); | ||
811 | struct vt8623fb_info *par = info->par; | ||
812 | |||
813 | dev_info(&(dev->dev), "suspend\n"); | ||
814 | |||
815 | acquire_console_sem(); | ||
816 | mutex_lock(&(par->open_lock)); | ||
817 | |||
818 | if ((state.event == PM_EVENT_FREEZE) || (par->ref_count == 0)) { | ||
819 | mutex_unlock(&(par->open_lock)); | ||
820 | release_console_sem(); | ||
821 | return 0; | ||
822 | } | ||
823 | |||
824 | fb_set_suspend(info, 1); | ||
825 | |||
826 | pci_save_state(dev); | ||
827 | pci_disable_device(dev); | ||
828 | pci_set_power_state(dev, pci_choose_state(dev, state)); | ||
829 | |||
830 | mutex_unlock(&(par->open_lock)); | ||
831 | release_console_sem(); | ||
832 | |||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | |||
837 | /* PCI resume */ | ||
838 | |||
839 | static int vt8623_pci_resume(struct pci_dev* dev) | ||
840 | { | ||
841 | struct fb_info *info = pci_get_drvdata(dev); | ||
842 | struct vt8623fb_info *par = info->par; | ||
843 | |||
844 | dev_info(&(dev->dev), "resume\n"); | ||
845 | |||
846 | acquire_console_sem(); | ||
847 | mutex_lock(&(par->open_lock)); | ||
848 | |||
849 | if (par->ref_count == 0) { | ||
850 | mutex_unlock(&(par->open_lock)); | ||
851 | release_console_sem(); | ||
852 | return 0; | ||
853 | } | ||
854 | |||
855 | pci_set_power_state(dev, PCI_D0); | ||
856 | pci_restore_state(dev); | ||
857 | |||
858 | if (pci_enable_device(dev)) | ||
859 | goto fail; | ||
860 | |||
861 | pci_set_master(dev); | ||
862 | |||
863 | vt8623fb_set_par(info); | ||
864 | fb_set_suspend(info, 0); | ||
865 | |||
866 | mutex_unlock(&(par->open_lock)); | ||
867 | fail: | ||
868 | release_console_sem(); | ||
869 | |||
870 | return 0; | ||
871 | } | ||
872 | #else | ||
873 | #define vt8623_pci_suspend NULL | ||
874 | #define vt8623_pci_resume NULL | ||
875 | #endif /* CONFIG_PM */ | ||
876 | |||
877 | /* List of boards that we are trying to support */ | ||
878 | |||
879 | static struct pci_device_id vt8623_devices[] __devinitdata = { | ||
880 | {PCI_DEVICE(PCI_VENDOR_ID_VIA, 0x3122)}, | ||
881 | {0, 0, 0, 0, 0, 0, 0} | ||
882 | }; | ||
883 | |||
884 | MODULE_DEVICE_TABLE(pci, vt8623_devices); | ||
885 | |||
886 | static struct pci_driver vt8623fb_pci_driver = { | ||
887 | .name = "vt8623fb", | ||
888 | .id_table = vt8623_devices, | ||
889 | .probe = vt8623_pci_probe, | ||
890 | .remove = __devexit_p(vt8623_pci_remove), | ||
891 | .suspend = vt8623_pci_suspend, | ||
892 | .resume = vt8623_pci_resume, | ||
893 | }; | ||
894 | |||
895 | /* Cleanup */ | ||
896 | |||
897 | static void __exit vt8623fb_cleanup(void) | ||
898 | { | ||
899 | pr_debug("vt8623fb: cleaning up\n"); | ||
900 | pci_unregister_driver(&vt8623fb_pci_driver); | ||
901 | } | ||
902 | |||
903 | /* Driver Initialisation */ | ||
904 | |||
905 | int __init vt8623fb_init(void) | ||
906 | { | ||
907 | |||
908 | #ifndef MODULE | ||
909 | char *option = NULL; | ||
910 | |||
911 | if (fb_get_options("vt8623fb", &option)) | ||
912 | return -ENODEV; | ||
913 | |||
914 | if (option && *option) | ||
915 | mode = option; | ||
916 | #endif | ||
917 | |||
918 | pr_debug("vt8623fb: initializing\n"); | ||
919 | return pci_register_driver(&vt8623fb_pci_driver); | ||
920 | } | ||
921 | |||
922 | /* ------------------------------------------------------------------------- */ | ||
923 | |||
924 | /* Modularization */ | ||
925 | |||
926 | module_init(vt8623fb_init); | ||
927 | module_exit(vt8623fb_cleanup); | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index 4aa8079e71be..c8796906f584 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -628,11 +628,7 @@ static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned | |||
628 | return err; | 628 | return err; |
629 | } | 629 | } |
630 | if (to < PAGE_CACHE_SIZE) { | 630 | if (to < PAGE_CACHE_SIZE) { |
631 | char *kaddr = kmap_atomic(page, KM_USER0); | 631 | zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0); |
632 | |||
633 | memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); | ||
634 | flush_dcache_page(page); | ||
635 | kunmap_atomic(kaddr, KM_USER0); | ||
636 | if (size > offset + to) { | 632 | if (size > offset + to) { |
637 | if (size < offset + PAGE_CACHE_SIZE) | 633 | if (size < offset + PAGE_CACHE_SIZE) |
638 | tmp = size & ~PAGE_CACHE_MASK; | 634 | tmp = size & ~PAGE_CACHE_MASK; |
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cf83e5d63512..73ce561f3ea0 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile | |||
@@ -22,6 +22,7 @@ kafs-objs := \ | |||
22 | vlclient.o \ | 22 | vlclient.o \ |
23 | vlocation.o \ | 23 | vlocation.o \ |
24 | vnode.o \ | 24 | vnode.o \ |
25 | volume.o | 25 | volume.o \ |
26 | write.o | ||
26 | 27 | ||
27 | obj-$(CONFIG_AFS_FS) := kafs.o | 28 | obj-$(CONFIG_AFS_FS) := kafs.o |
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 89e0d1650a72..2198006d2d03 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h | |||
@@ -18,6 +18,8 @@ | |||
18 | enum AFS_FS_Operations { | 18 | enum AFS_FS_Operations { |
19 | FSFETCHDATA = 130, /* AFS Fetch file data */ | 19 | FSFETCHDATA = 130, /* AFS Fetch file data */ |
20 | FSFETCHSTATUS = 132, /* AFS Fetch file status */ | 20 | FSFETCHSTATUS = 132, /* AFS Fetch file status */ |
21 | FSSTOREDATA = 133, /* AFS Store file data */ | ||
22 | FSSTORESTATUS = 135, /* AFS Store file status */ | ||
21 | FSREMOVEFILE = 136, /* AFS Remove a file */ | 23 | FSREMOVEFILE = 136, /* AFS Remove a file */ |
22 | FSCREATEFILE = 137, /* AFS Create a file */ | 24 | FSCREATEFILE = 137, /* AFS Create a file */ |
23 | FSRENAME = 138, /* AFS Rename or move a file or directory */ | 25 | FSRENAME = 138, /* AFS Rename or move a file or directory */ |
diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 9bdbf36a9aa9..f64e40fefc02 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c | |||
@@ -44,7 +44,7 @@ void afs_init_callback_state(struct afs_server *server) | |||
44 | while (!RB_EMPTY_ROOT(&server->cb_promises)) { | 44 | while (!RB_EMPTY_ROOT(&server->cb_promises)) { |
45 | vnode = rb_entry(server->cb_promises.rb_node, | 45 | vnode = rb_entry(server->cb_promises.rb_node, |
46 | struct afs_vnode, cb_promise); | 46 | struct afs_vnode, cb_promise); |
47 | _debug("UNPROMISE { vid=%x vn=%u uq=%u}", | 47 | _debug("UNPROMISE { vid=%x:%u uq=%u}", |
48 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); | 48 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); |
49 | rb_erase(&vnode->cb_promise, &server->cb_promises); | 49 | rb_erase(&vnode->cb_promise, &server->cb_promises); |
50 | vnode->cb_promised = false; | 50 | vnode->cb_promised = false; |
@@ -84,11 +84,8 @@ void afs_broken_callback_work(struct work_struct *work) | |||
84 | 84 | ||
85 | /* if the vnode's data version number changed then its contents | 85 | /* if the vnode's data version number changed then its contents |
86 | * are different */ | 86 | * are different */ |
87 | if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { | 87 | if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) |
88 | _debug("zap data {%x:%u}", | 88 | afs_zap_data(vnode); |
89 | vnode->fid.vid, vnode->fid.vnode); | ||
90 | invalidate_remote_inode(&vnode->vfs_inode); | ||
91 | } | ||
92 | } | 89 | } |
93 | 90 | ||
94 | out: | 91 | out: |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0c1e902f17a3..2fb31276196b 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -55,7 +55,8 @@ const struct inode_operations afs_dir_inode_operations = { | |||
55 | .rmdir = afs_rmdir, | 55 | .rmdir = afs_rmdir, |
56 | .rename = afs_rename, | 56 | .rename = afs_rename, |
57 | .permission = afs_permission, | 57 | .permission = afs_permission, |
58 | .getattr = afs_inode_getattr, | 58 | .getattr = afs_getattr, |
59 | .setattr = afs_setattr, | ||
59 | }; | 60 | }; |
60 | 61 | ||
61 | static struct dentry_operations afs_fs_dentry_operations = { | 62 | static struct dentry_operations afs_fs_dentry_operations = { |
@@ -491,7 +492,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
491 | 492 | ||
492 | vnode = AFS_FS_I(dir); | 493 | vnode = AFS_FS_I(dir); |
493 | 494 | ||
494 | _enter("{%x:%d},%p{%s},", | 495 | _enter("{%x:%u},%p{%s},", |
495 | vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); | 496 | vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name); |
496 | 497 | ||
497 | ASSERTCMP(dentry->d_inode, ==, NULL); | 498 | ASSERTCMP(dentry->d_inode, ==, NULL); |
@@ -731,7 +732,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
731 | 732 | ||
732 | dvnode = AFS_FS_I(dir); | 733 | dvnode = AFS_FS_I(dir); |
733 | 734 | ||
734 | _enter("{%x:%d},{%s},%o", | 735 | _enter("{%x:%u},{%s},%o", |
735 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); | 736 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); |
736 | 737 | ||
737 | ret = -ENAMETOOLONG; | 738 | ret = -ENAMETOOLONG; |
@@ -796,7 +797,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) | |||
796 | 797 | ||
797 | dvnode = AFS_FS_I(dir); | 798 | dvnode = AFS_FS_I(dir); |
798 | 799 | ||
799 | _enter("{%x:%d},{%s}", | 800 | _enter("{%x:%u},{%s}", |
800 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 801 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
801 | 802 | ||
802 | ret = -ENAMETOOLONG; | 803 | ret = -ENAMETOOLONG; |
@@ -842,7 +843,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) | |||
842 | 843 | ||
843 | dvnode = AFS_FS_I(dir); | 844 | dvnode = AFS_FS_I(dir); |
844 | 845 | ||
845 | _enter("{%x:%d},{%s}", | 846 | _enter("{%x:%u},{%s}", |
846 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 847 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
847 | 848 | ||
848 | ret = -ENAMETOOLONG; | 849 | ret = -ENAMETOOLONG; |
@@ -916,7 +917,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
916 | 917 | ||
917 | dvnode = AFS_FS_I(dir); | 918 | dvnode = AFS_FS_I(dir); |
918 | 919 | ||
919 | _enter("{%x:%d},{%s},%o,", | 920 | _enter("{%x:%u},{%s},%o,", |
920 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); | 921 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); |
921 | 922 | ||
922 | ret = -ENAMETOOLONG; | 923 | ret = -ENAMETOOLONG; |
@@ -983,7 +984,7 @@ static int afs_link(struct dentry *from, struct inode *dir, | |||
983 | vnode = AFS_FS_I(from->d_inode); | 984 | vnode = AFS_FS_I(from->d_inode); |
984 | dvnode = AFS_FS_I(dir); | 985 | dvnode = AFS_FS_I(dir); |
985 | 986 | ||
986 | _enter("{%x:%d},{%x:%d},{%s}", | 987 | _enter("{%x:%u},{%x:%u},{%s}", |
987 | vnode->fid.vid, vnode->fid.vnode, | 988 | vnode->fid.vid, vnode->fid.vnode, |
988 | dvnode->fid.vid, dvnode->fid.vnode, | 989 | dvnode->fid.vid, dvnode->fid.vnode, |
989 | dentry->d_name.name); | 990 | dentry->d_name.name); |
@@ -1032,7 +1033,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, | |||
1032 | 1033 | ||
1033 | dvnode = AFS_FS_I(dir); | 1034 | dvnode = AFS_FS_I(dir); |
1034 | 1035 | ||
1035 | _enter("{%x:%d},{%s},%s", | 1036 | _enter("{%x:%u},{%s},%s", |
1036 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, | 1037 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, |
1037 | content); | 1038 | content); |
1038 | 1039 | ||
@@ -1104,7 +1105,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1104 | orig_dvnode = AFS_FS_I(old_dir); | 1105 | orig_dvnode = AFS_FS_I(old_dir); |
1105 | new_dvnode = AFS_FS_I(new_dir); | 1106 | new_dvnode = AFS_FS_I(new_dir); |
1106 | 1107 | ||
1107 | _enter("{%x:%d},{%x:%d},{%x:%d},{%s}", | 1108 | _enter("{%x:%u},{%x:%u},{%x:%u},{%s}", |
1108 | orig_dvnode->fid.vid, orig_dvnode->fid.vnode, | 1109 | orig_dvnode->fid.vid, orig_dvnode->fid.vnode, |
1109 | vnode->fid.vid, vnode->fid.vnode, | 1110 | vnode->fid.vid, vnode->fid.vnode, |
1110 | new_dvnode->fid.vid, new_dvnode->fid.vnode, | 1111 | new_dvnode->fid.vid, new_dvnode->fid.vnode, |
diff --git a/fs/afs/file.c b/fs/afs/file.c index ae256498f4f7..3e25795e5a42 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -15,32 +15,43 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
18 | #include <linux/writeback.h> | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | 20 | ||
20 | static int afs_file_readpage(struct file *file, struct page *page); | 21 | static int afs_readpage(struct file *file, struct page *page); |
21 | static void afs_file_invalidatepage(struct page *page, unsigned long offset); | 22 | static void afs_invalidatepage(struct page *page, unsigned long offset); |
22 | static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); | 23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); |
24 | static int afs_launder_page(struct page *page); | ||
23 | 25 | ||
24 | const struct file_operations afs_file_operations = { | 26 | const struct file_operations afs_file_operations = { |
25 | .open = afs_open, | 27 | .open = afs_open, |
26 | .release = afs_release, | 28 | .release = afs_release, |
27 | .llseek = generic_file_llseek, | 29 | .llseek = generic_file_llseek, |
28 | .read = do_sync_read, | 30 | .read = do_sync_read, |
31 | .write = do_sync_write, | ||
29 | .aio_read = generic_file_aio_read, | 32 | .aio_read = generic_file_aio_read, |
33 | .aio_write = afs_file_write, | ||
30 | .mmap = generic_file_readonly_mmap, | 34 | .mmap = generic_file_readonly_mmap, |
31 | .sendfile = generic_file_sendfile, | 35 | .sendfile = generic_file_sendfile, |
36 | .fsync = afs_fsync, | ||
32 | }; | 37 | }; |
33 | 38 | ||
34 | const struct inode_operations afs_file_inode_operations = { | 39 | const struct inode_operations afs_file_inode_operations = { |
35 | .getattr = afs_inode_getattr, | 40 | .getattr = afs_getattr, |
41 | .setattr = afs_setattr, | ||
36 | .permission = afs_permission, | 42 | .permission = afs_permission, |
37 | }; | 43 | }; |
38 | 44 | ||
39 | const struct address_space_operations afs_fs_aops = { | 45 | const struct address_space_operations afs_fs_aops = { |
40 | .readpage = afs_file_readpage, | 46 | .readpage = afs_readpage, |
41 | .set_page_dirty = __set_page_dirty_nobuffers, | 47 | .set_page_dirty = afs_set_page_dirty, |
42 | .releasepage = afs_file_releasepage, | 48 | .launder_page = afs_launder_page, |
43 | .invalidatepage = afs_file_invalidatepage, | 49 | .releasepage = afs_releasepage, |
50 | .invalidatepage = afs_invalidatepage, | ||
51 | .prepare_write = afs_prepare_write, | ||
52 | .commit_write = afs_commit_write, | ||
53 | .writepage = afs_writepage, | ||
54 | .writepages = afs_writepages, | ||
44 | }; | 55 | }; |
45 | 56 | ||
46 | /* | 57 | /* |
@@ -52,7 +63,7 @@ int afs_open(struct inode *inode, struct file *file) | |||
52 | struct key *key; | 63 | struct key *key; |
53 | int ret; | 64 | int ret; |
54 | 65 | ||
55 | _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); | 66 | _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); |
56 | 67 | ||
57 | key = afs_request_key(vnode->volume->cell); | 68 | key = afs_request_key(vnode->volume->cell); |
58 | if (IS_ERR(key)) { | 69 | if (IS_ERR(key)) { |
@@ -78,7 +89,7 @@ int afs_release(struct inode *inode, struct file *file) | |||
78 | { | 89 | { |
79 | struct afs_vnode *vnode = AFS_FS_I(inode); | 90 | struct afs_vnode *vnode = AFS_FS_I(inode); |
80 | 91 | ||
81 | _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode); | 92 | _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); |
82 | 93 | ||
83 | key_put(file->private_data); | 94 | key_put(file->private_data); |
84 | _leave(" = 0"); | 95 | _leave(" = 0"); |
@@ -89,10 +100,10 @@ int afs_release(struct inode *inode, struct file *file) | |||
89 | * deal with notification that a page was read from the cache | 100 | * deal with notification that a page was read from the cache |
90 | */ | 101 | */ |
91 | #ifdef AFS_CACHING_SUPPORT | 102 | #ifdef AFS_CACHING_SUPPORT |
92 | static void afs_file_readpage_read_complete(void *cookie_data, | 103 | static void afs_readpage_read_complete(void *cookie_data, |
93 | struct page *page, | 104 | struct page *page, |
94 | void *data, | 105 | void *data, |
95 | int error) | 106 | int error) |
96 | { | 107 | { |
97 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | 108 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); |
98 | 109 | ||
@@ -109,10 +120,10 @@ static void afs_file_readpage_read_complete(void *cookie_data, | |||
109 | * deal with notification that a page was written to the cache | 120 | * deal with notification that a page was written to the cache |
110 | */ | 121 | */ |
111 | #ifdef AFS_CACHING_SUPPORT | 122 | #ifdef AFS_CACHING_SUPPORT |
112 | static void afs_file_readpage_write_complete(void *cookie_data, | 123 | static void afs_readpage_write_complete(void *cookie_data, |
113 | struct page *page, | 124 | struct page *page, |
114 | void *data, | 125 | void *data, |
115 | int error) | 126 | int error) |
116 | { | 127 | { |
117 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | 128 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); |
118 | 129 | ||
@@ -121,9 +132,9 @@ static void afs_file_readpage_write_complete(void *cookie_data, | |||
121 | #endif | 132 | #endif |
122 | 133 | ||
123 | /* | 134 | /* |
124 | * AFS read page from file (or symlink) | 135 | * AFS read page from file, directory or symlink |
125 | */ | 136 | */ |
126 | static int afs_file_readpage(struct file *file, struct page *page) | 137 | static int afs_readpage(struct file *file, struct page *page) |
127 | { | 138 | { |
128 | struct afs_vnode *vnode; | 139 | struct afs_vnode *vnode; |
129 | struct inode *inode; | 140 | struct inode *inode; |
@@ -219,39 +230,17 @@ error: | |||
219 | } | 230 | } |
220 | 231 | ||
221 | /* | 232 | /* |
222 | * get a page cookie for the specified page | ||
223 | */ | ||
224 | #ifdef AFS_CACHING_SUPPORT | ||
225 | int afs_cache_get_page_cookie(struct page *page, | ||
226 | struct cachefs_page **_page_cookie) | ||
227 | { | ||
228 | int ret; | ||
229 | |||
230 | _enter(""); | ||
231 | ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO); | ||
232 | |||
233 | _leave(" = %d", ret); | ||
234 | return ret; | ||
235 | } | ||
236 | #endif | ||
237 | |||
238 | /* | ||
239 | * invalidate part or all of a page | 233 | * invalidate part or all of a page |
240 | */ | 234 | */ |
241 | static void afs_file_invalidatepage(struct page *page, unsigned long offset) | 235 | static void afs_invalidatepage(struct page *page, unsigned long offset) |
242 | { | 236 | { |
243 | int ret = 1; | 237 | int ret = 1; |
244 | 238 | ||
245 | _enter("{%lu},%lu", page->index, offset); | 239 | kenter("{%lu},%lu", page->index, offset); |
246 | 240 | ||
247 | BUG_ON(!PageLocked(page)); | 241 | BUG_ON(!PageLocked(page)); |
248 | 242 | ||
249 | if (PagePrivate(page)) { | 243 | if (PagePrivate(page)) { |
250 | #ifdef AFS_CACHING_SUPPORT | ||
251 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | ||
252 | cachefs_uncache_page(vnode->cache,page); | ||
253 | #endif | ||
254 | |||
255 | /* We release buffers only if the entire page is being | 244 | /* We release buffers only if the entire page is being |
256 | * invalidated. | 245 | * invalidated. |
257 | * The get_block cached value has been unconditionally | 246 | * The get_block cached value has been unconditionally |
@@ -272,25 +261,33 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset) | |||
272 | } | 261 | } |
273 | 262 | ||
274 | /* | 263 | /* |
264 | * write back a dirty page | ||
265 | */ | ||
266 | static int afs_launder_page(struct page *page) | ||
267 | { | ||
268 | _enter("{%lu}", page->index); | ||
269 | |||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | /* | ||
275 | * release a page and cleanup its private data | 274 | * release a page and cleanup its private data |
276 | */ | 275 | */ |
277 | static int afs_file_releasepage(struct page *page, gfp_t gfp_flags) | 276 | static int afs_releasepage(struct page *page, gfp_t gfp_flags) |
278 | { | 277 | { |
279 | struct cachefs_page *pageio; | 278 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); |
279 | struct afs_writeback *wb; | ||
280 | 280 | ||
281 | _enter("{%lu},%x", page->index, gfp_flags); | 281 | _enter("{{%x:%u}[%lu],%lx},%x", |
282 | vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, | ||
283 | gfp_flags); | ||
282 | 284 | ||
283 | if (PagePrivate(page)) { | 285 | if (PagePrivate(page)) { |
284 | #ifdef AFS_CACHING_SUPPORT | 286 | wb = (struct afs_writeback *) page_private(page); |
285 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | 287 | ASSERT(wb != NULL); |
286 | cachefs_uncache_page(vnode->cache, page); | ||
287 | #endif | ||
288 | |||
289 | pageio = (struct cachefs_page *) page_private(page); | ||
290 | set_page_private(page, 0); | 288 | set_page_private(page, 0); |
291 | ClearPagePrivate(page); | 289 | ClearPagePrivate(page); |
292 | 290 | afs_put_writeback(wb); | |
293 | kfree(pageio); | ||
294 | } | 291 | } |
295 | 292 | ||
296 | _leave(" = 0"); | 293 | _leave(" = 0"); |
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index e54e6c2ad343..025b1903d9e1 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
@@ -33,8 +33,10 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid) | |||
33 | */ | 33 | */ |
34 | static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | 34 | static void xdr_decode_AFSFetchStatus(const __be32 **_bp, |
35 | struct afs_file_status *status, | 35 | struct afs_file_status *status, |
36 | struct afs_vnode *vnode) | 36 | struct afs_vnode *vnode, |
37 | afs_dataversion_t *store_version) | ||
37 | { | 38 | { |
39 | afs_dataversion_t expected_version; | ||
38 | const __be32 *bp = *_bp; | 40 | const __be32 *bp = *_bp; |
39 | umode_t mode; | 41 | umode_t mode; |
40 | u64 data_version, size; | 42 | u64 data_version, size; |
@@ -101,7 +103,11 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
101 | vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; | 103 | vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; |
102 | } | 104 | } |
103 | 105 | ||
104 | if (status->data_version != data_version) { | 106 | expected_version = status->data_version; |
107 | if (store_version) | ||
108 | expected_version = *store_version; | ||
109 | |||
110 | if (expected_version != data_version) { | ||
105 | status->data_version = data_version; | 111 | status->data_version = data_version; |
106 | if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { | 112 | if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { |
107 | _debug("vnode modified %llx on {%x:%u}", | 113 | _debug("vnode modified %llx on {%x:%u}", |
@@ -110,6 +116,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
110 | set_bit(AFS_VNODE_MODIFIED, &vnode->flags); | 116 | set_bit(AFS_VNODE_MODIFIED, &vnode->flags); |
111 | set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); | 117 | set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); |
112 | } | 118 | } |
119 | } else if (store_version) { | ||
120 | status->data_version = data_version; | ||
113 | } | 121 | } |
114 | } | 122 | } |
115 | 123 | ||
@@ -156,6 +164,44 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp, | |||
156 | } | 164 | } |
157 | 165 | ||
158 | /* | 166 | /* |
167 | * encode the requested attributes into an AFSStoreStatus block | ||
168 | */ | ||
169 | static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr) | ||
170 | { | ||
171 | __be32 *bp = *_bp; | ||
172 | u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0; | ||
173 | |||
174 | mask = 0; | ||
175 | if (attr->ia_valid & ATTR_MTIME) { | ||
176 | mask |= AFS_SET_MTIME; | ||
177 | mtime = attr->ia_mtime.tv_sec; | ||
178 | } | ||
179 | |||
180 | if (attr->ia_valid & ATTR_UID) { | ||
181 | mask |= AFS_SET_OWNER; | ||
182 | owner = attr->ia_uid; | ||
183 | } | ||
184 | |||
185 | if (attr->ia_valid & ATTR_GID) { | ||
186 | mask |= AFS_SET_GROUP; | ||
187 | group = attr->ia_gid; | ||
188 | } | ||
189 | |||
190 | if (attr->ia_valid & ATTR_MODE) { | ||
191 | mask |= AFS_SET_MODE; | ||
192 | mode = attr->ia_mode & S_IALLUGO; | ||
193 | } | ||
194 | |||
195 | *bp++ = htonl(mask); | ||
196 | *bp++ = htonl(mtime); | ||
197 | *bp++ = htonl(owner); | ||
198 | *bp++ = htonl(group); | ||
199 | *bp++ = htonl(mode); | ||
200 | *bp++ = 0; /* segment size */ | ||
201 | *_bp = bp; | ||
202 | } | ||
203 | |||
204 | /* | ||
159 | * deliver reply data to an FS.FetchStatus | 205 | * deliver reply data to an FS.FetchStatus |
160 | */ | 206 | */ |
161 | static int afs_deliver_fs_fetch_status(struct afs_call *call, | 207 | static int afs_deliver_fs_fetch_status(struct afs_call *call, |
@@ -175,7 +221,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, | |||
175 | 221 | ||
176 | /* unmarshall the reply once we've received all of it */ | 222 | /* unmarshall the reply once we've received all of it */ |
177 | bp = call->buffer; | 223 | bp = call->buffer; |
178 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 224 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
179 | xdr_decode_AFSCallBack(&bp, vnode); | 225 | xdr_decode_AFSCallBack(&bp, vnode); |
180 | if (call->reply2) | 226 | if (call->reply2) |
181 | xdr_decode_AFSVolSync(&bp, call->reply2); | 227 | xdr_decode_AFSVolSync(&bp, call->reply2); |
@@ -206,7 +252,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, | |||
206 | struct afs_call *call; | 252 | struct afs_call *call; |
207 | __be32 *bp; | 253 | __be32 *bp; |
208 | 254 | ||
209 | _enter(",%x,{%x:%d},,", | 255 | _enter(",%x,{%x:%u},,", |
210 | key_serial(key), vnode->fid.vid, vnode->fid.vnode); | 256 | key_serial(key), vnode->fid.vid, vnode->fid.vnode); |
211 | 257 | ||
212 | call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); | 258 | call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); |
@@ -265,25 +311,20 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
265 | call->offset = 0; | 311 | call->offset = 0; |
266 | call->unmarshall++; | 312 | call->unmarshall++; |
267 | 313 | ||
268 | if (call->count < PAGE_SIZE) { | ||
269 | page = call->reply3; | ||
270 | buffer = kmap_atomic(page, KM_USER0); | ||
271 | memset(buffer + PAGE_SIZE - call->count, 0, | ||
272 | call->count); | ||
273 | kunmap_atomic(buffer, KM_USER0); | ||
274 | } | ||
275 | |||
276 | /* extract the returned data */ | 314 | /* extract the returned data */ |
277 | case 2: | 315 | case 2: |
278 | _debug("extract data"); | 316 | _debug("extract data"); |
279 | page = call->reply3; | 317 | if (call->count > 0) { |
280 | buffer = kmap_atomic(page, KM_USER0); | 318 | page = call->reply3; |
281 | ret = afs_extract_data(call, skb, last, buffer, call->count); | 319 | buffer = kmap_atomic(page, KM_USER0); |
282 | kunmap_atomic(buffer, KM_USER0); | 320 | ret = afs_extract_data(call, skb, last, buffer, |
283 | switch (ret) { | 321 | call->count); |
284 | case 0: break; | 322 | kunmap_atomic(buffer, KM_USER0); |
285 | case -EAGAIN: return 0; | 323 | switch (ret) { |
286 | default: return ret; | 324 | case 0: break; |
325 | case -EAGAIN: return 0; | ||
326 | default: return ret; | ||
327 | } | ||
287 | } | 328 | } |
288 | 329 | ||
289 | call->offset = 0; | 330 | call->offset = 0; |
@@ -300,7 +341,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
300 | } | 341 | } |
301 | 342 | ||
302 | bp = call->buffer; | 343 | bp = call->buffer; |
303 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 344 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
304 | xdr_decode_AFSCallBack(&bp, vnode); | 345 | xdr_decode_AFSCallBack(&bp, vnode); |
305 | if (call->reply2) | 346 | if (call->reply2) |
306 | xdr_decode_AFSVolSync(&bp, call->reply2); | 347 | xdr_decode_AFSVolSync(&bp, call->reply2); |
@@ -318,6 +359,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, | |||
318 | if (!last) | 359 | if (!last) |
319 | return 0; | 360 | return 0; |
320 | 361 | ||
362 | if (call->count < PAGE_SIZE) { | ||
363 | _debug("clear"); | ||
364 | page = call->reply3; | ||
365 | buffer = kmap_atomic(page, KM_USER0); | ||
366 | memset(buffer + call->count, 0, PAGE_SIZE - call->count); | ||
367 | kunmap_atomic(buffer, KM_USER0); | ||
368 | } | ||
369 | |||
321 | _leave(" = 0 [done]"); | 370 | _leave(" = 0 [done]"); |
322 | return 0; | 371 | return 0; |
323 | } | 372 | } |
@@ -476,8 +525,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, | |||
476 | /* unmarshall the reply once we've received all of it */ | 525 | /* unmarshall the reply once we've received all of it */ |
477 | bp = call->buffer; | 526 | bp = call->buffer; |
478 | xdr_decode_AFSFid(&bp, call->reply2); | 527 | xdr_decode_AFSFid(&bp, call->reply2); |
479 | xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); | 528 | xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); |
480 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 529 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
481 | xdr_decode_AFSCallBack_raw(&bp, call->reply4); | 530 | xdr_decode_AFSCallBack_raw(&bp, call->reply4); |
482 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | 531 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ |
483 | 532 | ||
@@ -574,7 +623,7 @@ static int afs_deliver_fs_remove(struct afs_call *call, | |||
574 | 623 | ||
575 | /* unmarshall the reply once we've received all of it */ | 624 | /* unmarshall the reply once we've received all of it */ |
576 | bp = call->buffer; | 625 | bp = call->buffer; |
577 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 626 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
578 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | 627 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ |
579 | 628 | ||
580 | _leave(" = 0 [done]"); | 629 | _leave(" = 0 [done]"); |
@@ -657,8 +706,8 @@ static int afs_deliver_fs_link(struct afs_call *call, | |||
657 | 706 | ||
658 | /* unmarshall the reply once we've received all of it */ | 707 | /* unmarshall the reply once we've received all of it */ |
659 | bp = call->buffer; | 708 | bp = call->buffer; |
660 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 709 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
661 | xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode); | 710 | xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL); |
662 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | 711 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ |
663 | 712 | ||
664 | _leave(" = 0 [done]"); | 713 | _leave(" = 0 [done]"); |
@@ -746,8 +795,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call, | |||
746 | /* unmarshall the reply once we've received all of it */ | 795 | /* unmarshall the reply once we've received all of it */ |
747 | bp = call->buffer; | 796 | bp = call->buffer; |
748 | xdr_decode_AFSFid(&bp, call->reply2); | 797 | xdr_decode_AFSFid(&bp, call->reply2); |
749 | xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL); | 798 | xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL); |
750 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode); | 799 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); |
751 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | 800 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ |
752 | 801 | ||
753 | _leave(" = 0 [done]"); | 802 | _leave(" = 0 [done]"); |
@@ -852,9 +901,10 @@ static int afs_deliver_fs_rename(struct afs_call *call, | |||
852 | 901 | ||
853 | /* unmarshall the reply once we've received all of it */ | 902 | /* unmarshall the reply once we've received all of it */ |
854 | bp = call->buffer; | 903 | bp = call->buffer; |
855 | xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode); | 904 | xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL); |
856 | if (new_dvnode != orig_dvnode) | 905 | if (new_dvnode != orig_dvnode) |
857 | xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode); | 906 | xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode, |
907 | NULL); | ||
858 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | 908 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ |
859 | 909 | ||
860 | _leave(" = 0 [done]"); | 910 | _leave(" = 0 [done]"); |
@@ -936,3 +986,262 @@ int afs_fs_rename(struct afs_server *server, | |||
936 | 986 | ||
937 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | 987 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); |
938 | } | 988 | } |
989 | |||
990 | /* | ||
991 | * deliver reply data to an FS.StoreData | ||
992 | */ | ||
993 | static int afs_deliver_fs_store_data(struct afs_call *call, | ||
994 | struct sk_buff *skb, bool last) | ||
995 | { | ||
996 | struct afs_vnode *vnode = call->reply; | ||
997 | const __be32 *bp; | ||
998 | |||
999 | _enter(",,%u", last); | ||
1000 | |||
1001 | afs_transfer_reply(call, skb); | ||
1002 | if (!last) { | ||
1003 | _leave(" = 0 [more]"); | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | if (call->reply_size != call->reply_max) { | ||
1008 | _leave(" = -EBADMSG [%u != %u]", | ||
1009 | call->reply_size, call->reply_max); | ||
1010 | return -EBADMSG; | ||
1011 | } | ||
1012 | |||
1013 | /* unmarshall the reply once we've received all of it */ | ||
1014 | bp = call->buffer; | ||
1015 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, | ||
1016 | &call->store_version); | ||
1017 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | ||
1018 | |||
1019 | afs_pages_written_back(vnode, call); | ||
1020 | |||
1021 | _leave(" = 0 [done]"); | ||
1022 | return 0; | ||
1023 | } | ||
1024 | |||
1025 | /* | ||
1026 | * FS.StoreData operation type | ||
1027 | */ | ||
1028 | static const struct afs_call_type afs_RXFSStoreData = { | ||
1029 | .name = "FS.StoreData", | ||
1030 | .deliver = afs_deliver_fs_store_data, | ||
1031 | .abort_to_error = afs_abort_to_error, | ||
1032 | .destructor = afs_flat_call_destructor, | ||
1033 | }; | ||
1034 | |||
1035 | /* | ||
1036 | * store a set of pages | ||
1037 | */ | ||
1038 | int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, | ||
1039 | pgoff_t first, pgoff_t last, | ||
1040 | unsigned offset, unsigned to, | ||
1041 | const struct afs_wait_mode *wait_mode) | ||
1042 | { | ||
1043 | struct afs_vnode *vnode = wb->vnode; | ||
1044 | struct afs_call *call; | ||
1045 | loff_t size, pos, i_size; | ||
1046 | __be32 *bp; | ||
1047 | |||
1048 | _enter(",%x,{%x:%u},,", | ||
1049 | key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); | ||
1050 | |||
1051 | size = to - offset; | ||
1052 | if (first != last) | ||
1053 | size += (loff_t)(last - first) << PAGE_SHIFT; | ||
1054 | pos = (loff_t)first << PAGE_SHIFT; | ||
1055 | pos += offset; | ||
1056 | |||
1057 | i_size = i_size_read(&vnode->vfs_inode); | ||
1058 | if (pos + size > i_size) | ||
1059 | i_size = size + pos; | ||
1060 | |||
1061 | _debug("size %llx, at %llx, i_size %llx", | ||
1062 | (unsigned long long) size, (unsigned long long) pos, | ||
1063 | (unsigned long long) i_size); | ||
1064 | |||
1065 | BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store | ||
1066 | |||
1067 | call = afs_alloc_flat_call(&afs_RXFSStoreData, | ||
1068 | (4 + 6 + 3) * 4, | ||
1069 | (21 + 6) * 4); | ||
1070 | if (!call) | ||
1071 | return -ENOMEM; | ||
1072 | |||
1073 | call->wb = wb; | ||
1074 | call->key = wb->key; | ||
1075 | call->reply = vnode; | ||
1076 | call->service_id = FS_SERVICE; | ||
1077 | call->port = htons(AFS_FS_PORT); | ||
1078 | call->mapping = vnode->vfs_inode.i_mapping; | ||
1079 | call->first = first; | ||
1080 | call->last = last; | ||
1081 | call->first_offset = offset; | ||
1082 | call->last_to = to; | ||
1083 | call->send_pages = true; | ||
1084 | call->store_version = vnode->status.data_version + 1; | ||
1085 | |||
1086 | /* marshall the parameters */ | ||
1087 | bp = call->request; | ||
1088 | *bp++ = htonl(FSSTOREDATA); | ||
1089 | *bp++ = htonl(vnode->fid.vid); | ||
1090 | *bp++ = htonl(vnode->fid.vnode); | ||
1091 | *bp++ = htonl(vnode->fid.unique); | ||
1092 | |||
1093 | *bp++ = 0; /* mask */ | ||
1094 | *bp++ = 0; /* mtime */ | ||
1095 | *bp++ = 0; /* owner */ | ||
1096 | *bp++ = 0; /* group */ | ||
1097 | *bp++ = 0; /* unix mode */ | ||
1098 | *bp++ = 0; /* segment size */ | ||
1099 | |||
1100 | *bp++ = htonl(pos); | ||
1101 | *bp++ = htonl(size); | ||
1102 | *bp++ = htonl(i_size); | ||
1103 | |||
1104 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * deliver reply data to an FS.StoreStatus | ||
1109 | */ | ||
1110 | static int afs_deliver_fs_store_status(struct afs_call *call, | ||
1111 | struct sk_buff *skb, bool last) | ||
1112 | { | ||
1113 | afs_dataversion_t *store_version; | ||
1114 | struct afs_vnode *vnode = call->reply; | ||
1115 | const __be32 *bp; | ||
1116 | |||
1117 | _enter(",,%u", last); | ||
1118 | |||
1119 | afs_transfer_reply(call, skb); | ||
1120 | if (!last) { | ||
1121 | _leave(" = 0 [more]"); | ||
1122 | return 0; | ||
1123 | } | ||
1124 | |||
1125 | if (call->reply_size != call->reply_max) { | ||
1126 | _leave(" = -EBADMSG [%u != %u]", | ||
1127 | call->reply_size, call->reply_max); | ||
1128 | return -EBADMSG; | ||
1129 | } | ||
1130 | |||
1131 | /* unmarshall the reply once we've received all of it */ | ||
1132 | store_version = NULL; | ||
1133 | if (call->operation_ID == FSSTOREDATA) | ||
1134 | store_version = &call->store_version; | ||
1135 | |||
1136 | bp = call->buffer; | ||
1137 | xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version); | ||
1138 | /* xdr_decode_AFSVolSync(&bp, call->replyX); */ | ||
1139 | |||
1140 | _leave(" = 0 [done]"); | ||
1141 | return 0; | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * FS.StoreStatus operation type | ||
1146 | */ | ||
1147 | static const struct afs_call_type afs_RXFSStoreStatus = { | ||
1148 | .name = "FS.StoreStatus", | ||
1149 | .deliver = afs_deliver_fs_store_status, | ||
1150 | .abort_to_error = afs_abort_to_error, | ||
1151 | .destructor = afs_flat_call_destructor, | ||
1152 | }; | ||
1153 | |||
1154 | static const struct afs_call_type afs_RXFSStoreData_as_Status = { | ||
1155 | .name = "FS.StoreData", | ||
1156 | .deliver = afs_deliver_fs_store_status, | ||
1157 | .abort_to_error = afs_abort_to_error, | ||
1158 | .destructor = afs_flat_call_destructor, | ||
1159 | }; | ||
1160 | |||
1161 | /* | ||
1162 | * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus | ||
1163 | * so as to alter the file size also | ||
1164 | */ | ||
1165 | static int afs_fs_setattr_size(struct afs_server *server, struct key *key, | ||
1166 | struct afs_vnode *vnode, struct iattr *attr, | ||
1167 | const struct afs_wait_mode *wait_mode) | ||
1168 | { | ||
1169 | struct afs_call *call; | ||
1170 | __be32 *bp; | ||
1171 | |||
1172 | _enter(",%x,{%x:%u},,", | ||
1173 | key_serial(key), vnode->fid.vid, vnode->fid.vnode); | ||
1174 | |||
1175 | ASSERT(attr->ia_valid & ATTR_SIZE); | ||
1176 | ASSERTCMP(attr->ia_size, <=, 0xffffffff); // TODO: use 64-bit store | ||
1177 | |||
1178 | call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status, | ||
1179 | (4 + 6 + 3) * 4, | ||
1180 | (21 + 6) * 4); | ||
1181 | if (!call) | ||
1182 | return -ENOMEM; | ||
1183 | |||
1184 | call->key = key; | ||
1185 | call->reply = vnode; | ||
1186 | call->service_id = FS_SERVICE; | ||
1187 | call->port = htons(AFS_FS_PORT); | ||
1188 | call->store_version = vnode->status.data_version + 1; | ||
1189 | call->operation_ID = FSSTOREDATA; | ||
1190 | |||
1191 | /* marshall the parameters */ | ||
1192 | bp = call->request; | ||
1193 | *bp++ = htonl(FSSTOREDATA); | ||
1194 | *bp++ = htonl(vnode->fid.vid); | ||
1195 | *bp++ = htonl(vnode->fid.vnode); | ||
1196 | *bp++ = htonl(vnode->fid.unique); | ||
1197 | |||
1198 | xdr_encode_AFS_StoreStatus(&bp, attr); | ||
1199 | |||
1200 | *bp++ = 0; /* position of start of write */ | ||
1201 | *bp++ = 0; /* size of write */ | ||
1202 | *bp++ = htonl(attr->ia_size); /* new file length */ | ||
1203 | |||
1204 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1205 | } | ||
1206 | |||
1207 | /* | ||
1208 | * set the attributes on a file, using FS.StoreData if there's a change in file | ||
1209 | * size, and FS.StoreStatus otherwise | ||
1210 | */ | ||
1211 | int afs_fs_setattr(struct afs_server *server, struct key *key, | ||
1212 | struct afs_vnode *vnode, struct iattr *attr, | ||
1213 | const struct afs_wait_mode *wait_mode) | ||
1214 | { | ||
1215 | struct afs_call *call; | ||
1216 | __be32 *bp; | ||
1217 | |||
1218 | if (attr->ia_valid & ATTR_SIZE) | ||
1219 | return afs_fs_setattr_size(server, key, vnode, attr, | ||
1220 | wait_mode); | ||
1221 | |||
1222 | _enter(",%x,{%x:%u},,", | ||
1223 | key_serial(key), vnode->fid.vid, vnode->fid.vnode); | ||
1224 | |||
1225 | call = afs_alloc_flat_call(&afs_RXFSStoreStatus, | ||
1226 | (4 + 6) * 4, | ||
1227 | (21 + 6) * 4); | ||
1228 | if (!call) | ||
1229 | return -ENOMEM; | ||
1230 | |||
1231 | call->key = key; | ||
1232 | call->reply = vnode; | ||
1233 | call->service_id = FS_SERVICE; | ||
1234 | call->port = htons(AFS_FS_PORT); | ||
1235 | call->operation_ID = FSSTORESTATUS; | ||
1236 | |||
1237 | /* marshall the parameters */ | ||
1238 | bp = call->request; | ||
1239 | *bp++ = htonl(FSSTORESTATUS); | ||
1240 | *bp++ = htonl(vnode->fid.vid); | ||
1241 | *bp++ = htonl(vnode->fid.vnode); | ||
1242 | *bp++ = htonl(vnode->fid.unique); | ||
1243 | |||
1244 | xdr_encode_AFS_StoreStatus(&bp, attr); | ||
1245 | |||
1246 | return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode); | ||
1247 | } | ||
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c184a4ee5995..515a5d12d8fb 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -125,7 +125,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
125 | struct inode *inode; | 125 | struct inode *inode; |
126 | int ret; | 126 | int ret; |
127 | 127 | ||
128 | _enter(",{%u,%u,%u},,", fid->vid, fid->vnode, fid->unique); | 128 | _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique); |
129 | 129 | ||
130 | as = sb->s_fs_info; | 130 | as = sb->s_fs_info; |
131 | data.volume = as->volume; | 131 | data.volume = as->volume; |
@@ -204,6 +204,19 @@ bad_inode: | |||
204 | } | 204 | } |
205 | 205 | ||
206 | /* | 206 | /* |
207 | * mark the data attached to an inode as obsolete due to a write on the server | ||
208 | * - might also want to ditch all the outstanding writes and dirty pages | ||
209 | */ | ||
210 | void afs_zap_data(struct afs_vnode *vnode) | ||
211 | { | ||
212 | _enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode); | ||
213 | |||
214 | /* nuke all the non-dirty pages that aren't locked, mapped or being | ||
215 | * written back */ | ||
216 | invalidate_remote_inode(&vnode->vfs_inode); | ||
217 | } | ||
218 | |||
219 | /* | ||
207 | * validate a vnode/inode | 220 | * validate a vnode/inode |
208 | * - there are several things we need to check | 221 | * - there are several things we need to check |
209 | * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, | 222 | * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, |
@@ -258,10 +271,8 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) | |||
258 | 271 | ||
259 | /* if the vnode's data version number changed then its contents are | 272 | /* if the vnode's data version number changed then its contents are |
260 | * different */ | 273 | * different */ |
261 | if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { | 274 | if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) |
262 | _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode); | 275 | afs_zap_data(vnode); |
263 | invalidate_remote_inode(&vnode->vfs_inode); | ||
264 | } | ||
265 | 276 | ||
266 | clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); | 277 | clear_bit(AFS_VNODE_MODIFIED, &vnode->flags); |
267 | mutex_unlock(&vnode->validate_lock); | 278 | mutex_unlock(&vnode->validate_lock); |
@@ -278,7 +289,7 @@ error_unlock: | |||
278 | /* | 289 | /* |
279 | * read the attributes of an inode | 290 | * read the attributes of an inode |
280 | */ | 291 | */ |
281 | int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, | 292 | int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, |
282 | struct kstat *stat) | 293 | struct kstat *stat) |
283 | { | 294 | { |
284 | struct inode *inode; | 295 | struct inode *inode; |
@@ -301,7 +312,7 @@ void afs_clear_inode(struct inode *inode) | |||
301 | 312 | ||
302 | vnode = AFS_FS_I(inode); | 313 | vnode = AFS_FS_I(inode); |
303 | 314 | ||
304 | _enter("{%x:%d.%d} v=%u x=%u t=%u }", | 315 | _enter("{%x:%u.%d} v=%u x=%u t=%u }", |
305 | vnode->fid.vid, | 316 | vnode->fid.vid, |
306 | vnode->fid.vnode, | 317 | vnode->fid.vnode, |
307 | vnode->fid.unique, | 318 | vnode->fid.unique, |
@@ -323,6 +334,7 @@ void afs_clear_inode(struct inode *inode) | |||
323 | vnode->server = NULL; | 334 | vnode->server = NULL; |
324 | } | 335 | } |
325 | 336 | ||
337 | ASSERT(list_empty(&vnode->writebacks)); | ||
326 | ASSERT(!vnode->cb_promised); | 338 | ASSERT(!vnode->cb_promised); |
327 | 339 | ||
328 | #ifdef AFS_CACHING_SUPPORT | 340 | #ifdef AFS_CACHING_SUPPORT |
@@ -339,3 +351,47 @@ void afs_clear_inode(struct inode *inode) | |||
339 | 351 | ||
340 | _leave(""); | 352 | _leave(""); |
341 | } | 353 | } |
354 | |||
355 | /* | ||
356 | * set the attributes of an inode | ||
357 | */ | ||
358 | int afs_setattr(struct dentry *dentry, struct iattr *attr) | ||
359 | { | ||
360 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); | ||
361 | struct key *key; | ||
362 | int ret; | ||
363 | |||
364 | _enter("{%x:%u},{n=%s},%x", | ||
365 | vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, | ||
366 | attr->ia_valid); | ||
367 | |||
368 | if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | | ||
369 | ATTR_MTIME))) { | ||
370 | _leave(" = 0 [unsupported]"); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | /* flush any dirty data outstanding on a regular file */ | ||
375 | if (S_ISREG(vnode->vfs_inode.i_mode)) { | ||
376 | filemap_write_and_wait(vnode->vfs_inode.i_mapping); | ||
377 | afs_writeback_all(vnode); | ||
378 | } | ||
379 | |||
380 | if (attr->ia_valid & ATTR_FILE) { | ||
381 | key = attr->ia_file->private_data; | ||
382 | } else { | ||
383 | key = afs_request_key(vnode->volume->cell); | ||
384 | if (IS_ERR(key)) { | ||
385 | ret = PTR_ERR(key); | ||
386 | goto error; | ||
387 | } | ||
388 | } | ||
389 | |||
390 | ret = afs_vnode_setattr(vnode, key, attr); | ||
391 | if (!(attr->ia_valid & ATTR_FILE)) | ||
392 | key_put(key); | ||
393 | |||
394 | error: | ||
395 | _leave(" = %d", ret); | ||
396 | return ret; | ||
397 | } | ||
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d90c158cd934..a30d4fa768e3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #define AFS_CELL_MAX_ADDRS 15 | 22 | #define AFS_CELL_MAX_ADDRS 15 |
23 | 23 | ||
24 | struct pagevec; | ||
24 | struct afs_call; | 25 | struct afs_call; |
25 | 26 | ||
26 | typedef enum { | 27 | typedef enum { |
@@ -75,12 +76,15 @@ struct afs_call { | |||
75 | struct key *key; /* security for this call */ | 76 | struct key *key; /* security for this call */ |
76 | struct afs_server *server; /* server affected by incoming CM call */ | 77 | struct afs_server *server; /* server affected by incoming CM call */ |
77 | void *request; /* request data (first part) */ | 78 | void *request; /* request data (first part) */ |
78 | void *request2; /* request data (second part) */ | 79 | struct address_space *mapping; /* page set */ |
80 | struct afs_writeback *wb; /* writeback being performed */ | ||
79 | void *buffer; /* reply receive buffer */ | 81 | void *buffer; /* reply receive buffer */ |
80 | void *reply; /* reply buffer (first part) */ | 82 | void *reply; /* reply buffer (first part) */ |
81 | void *reply2; /* reply buffer (second part) */ | 83 | void *reply2; /* reply buffer (second part) */ |
82 | void *reply3; /* reply buffer (third part) */ | 84 | void *reply3; /* reply buffer (third part) */ |
83 | void *reply4; /* reply buffer (fourth part) */ | 85 | void *reply4; /* reply buffer (fourth part) */ |
86 | pgoff_t first; /* first page in mapping to deal with */ | ||
87 | pgoff_t last; /* last page in mapping to deal with */ | ||
84 | enum { /* call state */ | 88 | enum { /* call state */ |
85 | AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ | 89 | AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ |
86 | AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ | 90 | AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ |
@@ -97,14 +101,18 @@ struct afs_call { | |||
97 | unsigned request_size; /* size of request data */ | 101 | unsigned request_size; /* size of request data */ |
98 | unsigned reply_max; /* maximum size of reply */ | 102 | unsigned reply_max; /* maximum size of reply */ |
99 | unsigned reply_size; /* current size of reply */ | 103 | unsigned reply_size; /* current size of reply */ |
104 | unsigned first_offset; /* offset into mapping[first] */ | ||
105 | unsigned last_to; /* amount of mapping[last] */ | ||
100 | unsigned short offset; /* offset into received data store */ | 106 | unsigned short offset; /* offset into received data store */ |
101 | unsigned char unmarshall; /* unmarshalling phase */ | 107 | unsigned char unmarshall; /* unmarshalling phase */ |
102 | bool incoming; /* T if incoming call */ | 108 | bool incoming; /* T if incoming call */ |
109 | bool send_pages; /* T if data from mapping should be sent */ | ||
103 | u16 service_id; /* RxRPC service ID to call */ | 110 | u16 service_id; /* RxRPC service ID to call */ |
104 | __be16 port; /* target UDP port */ | 111 | __be16 port; /* target UDP port */ |
105 | __be32 operation_ID; /* operation ID for an incoming call */ | 112 | __be32 operation_ID; /* operation ID for an incoming call */ |
106 | u32 count; /* count for use in unmarshalling */ | 113 | u32 count; /* count for use in unmarshalling */ |
107 | __be32 tmp; /* place to extract temporary data */ | 114 | __be32 tmp; /* place to extract temporary data */ |
115 | afs_dataversion_t store_version; /* updated version expected from store */ | ||
108 | }; | 116 | }; |
109 | 117 | ||
110 | struct afs_call_type { | 118 | struct afs_call_type { |
@@ -124,6 +132,32 @@ struct afs_call_type { | |||
124 | }; | 132 | }; |
125 | 133 | ||
126 | /* | 134 | /* |
135 | * record of an outstanding writeback on a vnode | ||
136 | */ | ||
137 | struct afs_writeback { | ||
138 | struct list_head link; /* link in vnode->writebacks */ | ||
139 | struct work_struct writer; /* work item to perform the writeback */ | ||
140 | struct afs_vnode *vnode; /* vnode to which this write applies */ | ||
141 | struct key *key; /* owner of this write */ | ||
142 | wait_queue_head_t waitq; /* completion and ready wait queue */ | ||
143 | pgoff_t first; /* first page in batch */ | ||
144 | pgoff_t point; /* last page in current store op */ | ||
145 | pgoff_t last; /* last page in batch (inclusive) */ | ||
146 | unsigned offset_first; /* offset into first page of start of write */ | ||
147 | unsigned to_last; /* offset into last page of end of write */ | ||
148 | int num_conflicts; /* count of conflicting writes in list */ | ||
149 | int usage; | ||
150 | bool conflicts; /* T if has dependent conflicts */ | ||
151 | enum { | ||
152 | AFS_WBACK_SYNCING, /* synchronisation being performed */ | ||
153 | AFS_WBACK_PENDING, /* write pending */ | ||
154 | AFS_WBACK_CONFLICTING, /* conflicting writes posted */ | ||
155 | AFS_WBACK_WRITING, /* writing back */ | ||
156 | AFS_WBACK_COMPLETE /* the writeback record has been unlinked */ | ||
157 | } state __attribute__((packed)); | ||
158 | }; | ||
159 | |||
160 | /* | ||
127 | * AFS superblock private data | 161 | * AFS superblock private data |
128 | * - there's one superblock per volume | 162 | * - there's one superblock per volume |
129 | */ | 163 | */ |
@@ -305,6 +339,7 @@ struct afs_vnode { | |||
305 | wait_queue_head_t update_waitq; /* status fetch waitqueue */ | 339 | wait_queue_head_t update_waitq; /* status fetch waitqueue */ |
306 | int update_cnt; /* number of outstanding ops that will update the | 340 | int update_cnt; /* number of outstanding ops that will update the |
307 | * status */ | 341 | * status */ |
342 | spinlock_t writeback_lock; /* lock for writebacks */ | ||
308 | spinlock_t lock; /* waitqueue/flags lock */ | 343 | spinlock_t lock; /* waitqueue/flags lock */ |
309 | unsigned long flags; | 344 | unsigned long flags; |
310 | #define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ | 345 | #define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */ |
@@ -316,6 +351,8 @@ struct afs_vnode { | |||
316 | 351 | ||
317 | long acl_order; /* ACL check count (callback break count) */ | 352 | long acl_order; /* ACL check count (callback break count) */ |
318 | 353 | ||
354 | struct list_head writebacks; /* alterations in pagecache that need writing */ | ||
355 | |||
319 | /* outstanding callback notification on this file */ | 356 | /* outstanding callback notification on this file */ |
320 | struct rb_node server_rb; /* link in server->fs_vnodes */ | 357 | struct rb_node server_rb; /* link in server->fs_vnodes */ |
321 | struct rb_node cb_promise; /* link in server->cb_promises */ | 358 | struct rb_node cb_promise; /* link in server->cb_promises */ |
@@ -433,10 +470,6 @@ extern const struct file_operations afs_file_operations; | |||
433 | extern int afs_open(struct inode *, struct file *); | 470 | extern int afs_open(struct inode *, struct file *); |
434 | extern int afs_release(struct inode *, struct file *); | 471 | extern int afs_release(struct inode *, struct file *); |
435 | 472 | ||
436 | #ifdef AFS_CACHING_SUPPORT | ||
437 | extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **); | ||
438 | #endif | ||
439 | |||
440 | /* | 473 | /* |
441 | * fsclient.c | 474 | * fsclient.c |
442 | */ | 475 | */ |
@@ -467,6 +500,12 @@ extern int afs_fs_rename(struct afs_server *, struct key *, | |||
467 | struct afs_vnode *, const char *, | 500 | struct afs_vnode *, const char *, |
468 | struct afs_vnode *, const char *, | 501 | struct afs_vnode *, const char *, |
469 | const struct afs_wait_mode *); | 502 | const struct afs_wait_mode *); |
503 | extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *, | ||
504 | pgoff_t, pgoff_t, unsigned, unsigned, | ||
505 | const struct afs_wait_mode *); | ||
506 | extern int afs_fs_setattr(struct afs_server *, struct key *, | ||
507 | struct afs_vnode *, struct iattr *, | ||
508 | const struct afs_wait_mode *); | ||
470 | 509 | ||
471 | /* | 510 | /* |
472 | * inode.c | 511 | * inode.c |
@@ -474,10 +513,10 @@ extern int afs_fs_rename(struct afs_server *, struct key *, | |||
474 | extern struct inode *afs_iget(struct super_block *, struct key *, | 513 | extern struct inode *afs_iget(struct super_block *, struct key *, |
475 | struct afs_fid *, struct afs_file_status *, | 514 | struct afs_fid *, struct afs_file_status *, |
476 | struct afs_callback *); | 515 | struct afs_callback *); |
516 | extern void afs_zap_data(struct afs_vnode *); | ||
477 | extern int afs_validate(struct afs_vnode *, struct key *); | 517 | extern int afs_validate(struct afs_vnode *, struct key *); |
478 | extern int afs_inode_getattr(struct vfsmount *, struct dentry *, | 518 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
479 | struct kstat *); | 519 | extern int afs_setattr(struct dentry *, struct iattr *); |
480 | extern void afs_zap_permits(struct rcu_head *); | ||
481 | extern void afs_clear_inode(struct inode *); | 520 | extern void afs_clear_inode(struct inode *); |
482 | 521 | ||
483 | /* | 522 | /* |
@@ -533,6 +572,7 @@ extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, | |||
533 | */ | 572 | */ |
534 | extern void afs_clear_permits(struct afs_vnode *); | 573 | extern void afs_clear_permits(struct afs_vnode *); |
535 | extern void afs_cache_permit(struct afs_vnode *, struct key *, long); | 574 | extern void afs_cache_permit(struct afs_vnode *, struct key *, long); |
575 | extern void afs_zap_permits(struct rcu_head *); | ||
536 | extern struct key *afs_request_key(struct afs_cell *); | 576 | extern struct key *afs_request_key(struct afs_cell *); |
537 | extern int afs_permission(struct inode *, int, struct nameidata *); | 577 | extern int afs_permission(struct inode *, int, struct nameidata *); |
538 | 578 | ||
@@ -629,6 +669,9 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, | |||
629 | struct afs_file_status *, struct afs_server **); | 669 | struct afs_file_status *, struct afs_server **); |
630 | extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, | 670 | extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, |
631 | struct key *, const char *, const char *); | 671 | struct key *, const char *, const char *); |
672 | extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, | ||
673 | unsigned, unsigned); | ||
674 | extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); | ||
632 | 675 | ||
633 | /* | 676 | /* |
634 | * volume.c | 677 | * volume.c |
@@ -645,6 +688,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *); | |||
645 | extern int afs_volume_release_fileserver(struct afs_vnode *, | 688 | extern int afs_volume_release_fileserver(struct afs_vnode *, |
646 | struct afs_server *, int); | 689 | struct afs_server *, int); |
647 | 690 | ||
691 | /* | ||
692 | * write.c | ||
693 | */ | ||
694 | extern int afs_set_page_dirty(struct page *); | ||
695 | extern void afs_put_writeback(struct afs_writeback *); | ||
696 | extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); | ||
697 | extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); | ||
698 | extern int afs_writepage(struct page *, struct writeback_control *); | ||
699 | extern int afs_writepages(struct address_space *, struct writeback_control *); | ||
700 | extern int afs_write_inode(struct inode *, int); | ||
701 | extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); | ||
702 | extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, | ||
703 | unsigned long, loff_t); | ||
704 | extern int afs_writeback_all(struct afs_vnode *); | ||
705 | extern int afs_fsync(struct file *, struct dentry *, int); | ||
706 | |||
707 | |||
648 | /*****************************************************************************/ | 708 | /*****************************************************************************/ |
649 | /* | 709 | /* |
650 | * debug tracing | 710 | * debug tracing |
@@ -726,6 +786,21 @@ do { \ | |||
726 | } \ | 786 | } \ |
727 | } while(0) | 787 | } while(0) |
728 | 788 | ||
789 | #define ASSERTRANGE(L, OP1, N, OP2, H) \ | ||
790 | do { \ | ||
791 | if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \ | ||
792 | printk(KERN_ERR "\n"); \ | ||
793 | printk(KERN_ERR "AFS: Assertion failed\n"); \ | ||
794 | printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \ | ||
795 | (unsigned long)(L), (unsigned long)(N), \ | ||
796 | (unsigned long)(H)); \ | ||
797 | printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \ | ||
798 | (unsigned long)(L), (unsigned long)(N), \ | ||
799 | (unsigned long)(H)); \ | ||
800 | BUG(); \ | ||
801 | } \ | ||
802 | } while(0) | ||
803 | |||
729 | #define ASSERTIF(C, X) \ | 804 | #define ASSERTIF(C, X) \ |
730 | do { \ | 805 | do { \ |
731 | if (unlikely((C) && !(X))) { \ | 806 | if (unlikely((C) && !(X))) { \ |
@@ -758,6 +833,10 @@ do { \ | |||
758 | do { \ | 833 | do { \ |
759 | } while(0) | 834 | } while(0) |
760 | 835 | ||
836 | #define ASSERTRANGE(L, OP1, N, OP2, H) \ | ||
837 | do { \ | ||
838 | } while(0) | ||
839 | |||
761 | #define ASSERTIF(C, X) \ | 840 | #define ASSERTIF(C, X) \ |
762 | do { \ | 841 | do { \ |
763 | } while(0) | 842 | } while(0) |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 80ec6fd19a73..f1f71ff7d5c6 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -149,6 +149,7 @@ error_cache: | |||
149 | afs_vlocation_purge(); | 149 | afs_vlocation_purge(); |
150 | afs_cell_purge(); | 150 | afs_cell_purge(); |
151 | afs_proc_cleanup(); | 151 | afs_proc_cleanup(); |
152 | rcu_barrier(); | ||
152 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); | 153 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); |
153 | return ret; | 154 | return ret; |
154 | } | 155 | } |
@@ -176,6 +177,7 @@ static void __exit afs_exit(void) | |||
176 | cachefs_unregister_netfs(&afs_cache_netfs); | 177 | cachefs_unregister_netfs(&afs_cache_netfs); |
177 | #endif | 178 | #endif |
178 | afs_proc_cleanup(); | 179 | afs_proc_cleanup(); |
180 | rcu_barrier(); | ||
179 | } | 181 | } |
180 | 182 | ||
181 | module_exit(afs_exit); | 183 | module_exit(afs_exit); |
diff --git a/fs/afs/misc.c b/fs/afs/misc.c index cdb9792d8161..d1a889c40742 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c | |||
@@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code) | |||
22 | { | 22 | { |
23 | switch (abort_code) { | 23 | switch (abort_code) { |
24 | case 13: return -EACCES; | 24 | case 13: return -EACCES; |
25 | case 27: return -EFBIG; | ||
25 | case 30: return -EROFS; | 26 | case 30: return -EROFS; |
26 | case VSALVAGE: return -EIO; | 27 | case VSALVAGE: return -EIO; |
27 | case VNOVNODE: return -ENOENT; | 28 | case VNOVNODE: return -ENOENT; |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 034fcfd4e330..a3684dcc76e7 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -36,7 +36,7 @@ const struct inode_operations afs_mntpt_inode_operations = { | |||
36 | .lookup = afs_mntpt_lookup, | 36 | .lookup = afs_mntpt_lookup, |
37 | .follow_link = afs_mntpt_follow_link, | 37 | .follow_link = afs_mntpt_follow_link, |
38 | .readlink = page_readlink, | 38 | .readlink = page_readlink, |
39 | .getattr = afs_inode_getattr, | 39 | .getattr = afs_getattr, |
40 | }; | 40 | }; |
41 | 41 | ||
42 | static LIST_HEAD(afs_vfsmounts); | 42 | static LIST_HEAD(afs_vfsmounts); |
@@ -58,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | |||
58 | char *buf; | 58 | char *buf; |
59 | int ret; | 59 | int ret; |
60 | 60 | ||
61 | _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); | 61 | _enter("{%x:%u,%u}", |
62 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); | ||
62 | 63 | ||
63 | /* read the contents of the symlink into the pagecache */ | 64 | /* read the contents of the symlink into the pagecache */ |
64 | page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); | 65 | page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 222c1a3abbb8..04189c47d6a0 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -237,6 +237,70 @@ void afs_flat_call_destructor(struct afs_call *call) | |||
237 | } | 237 | } |
238 | 238 | ||
239 | /* | 239 | /* |
240 | * attach the data from a bunch of pages on an inode to a call | ||
241 | */ | ||
242 | int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov) | ||
243 | { | ||
244 | struct page *pages[8]; | ||
245 | unsigned count, n, loop, offset, to; | ||
246 | pgoff_t first = call->first, last = call->last; | ||
247 | int ret; | ||
248 | |||
249 | _enter(""); | ||
250 | |||
251 | offset = call->first_offset; | ||
252 | call->first_offset = 0; | ||
253 | |||
254 | do { | ||
255 | _debug("attach %lx-%lx", first, last); | ||
256 | |||
257 | count = last - first + 1; | ||
258 | if (count > ARRAY_SIZE(pages)) | ||
259 | count = ARRAY_SIZE(pages); | ||
260 | n = find_get_pages_contig(call->mapping, first, count, pages); | ||
261 | ASSERTCMP(n, ==, count); | ||
262 | |||
263 | loop = 0; | ||
264 | do { | ||
265 | msg->msg_flags = 0; | ||
266 | to = PAGE_SIZE; | ||
267 | if (first + loop >= last) | ||
268 | to = call->last_to; | ||
269 | else | ||
270 | msg->msg_flags = MSG_MORE; | ||
271 | iov->iov_base = kmap(pages[loop]) + offset; | ||
272 | iov->iov_len = to - offset; | ||
273 | offset = 0; | ||
274 | |||
275 | _debug("- range %u-%u%s", | ||
276 | offset, to, msg->msg_flags ? " [more]" : ""); | ||
277 | msg->msg_iov = (struct iovec *) iov; | ||
278 | msg->msg_iovlen = 1; | ||
279 | |||
280 | /* have to change the state *before* sending the last | ||
281 | * packet as RxRPC might give us the reply before it | ||
282 | * returns from sending the request */ | ||
283 | if (first + loop >= last) | ||
284 | call->state = AFS_CALL_AWAIT_REPLY; | ||
285 | ret = rxrpc_kernel_send_data(call->rxcall, msg, | ||
286 | to - offset); | ||
287 | kunmap(pages[loop]); | ||
288 | if (ret < 0) | ||
289 | break; | ||
290 | } while (++loop < count); | ||
291 | first += count; | ||
292 | |||
293 | for (loop = 0; loop < count; loop++) | ||
294 | put_page(pages[loop]); | ||
295 | if (ret < 0) | ||
296 | break; | ||
297 | } while (first < last); | ||
298 | |||
299 | _leave(" = %d", ret); | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | /* | ||
240 | * initiate a call | 304 | * initiate a call |
241 | */ | 305 | */ |
242 | int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, | 306 | int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, |
@@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, | |||
253 | ASSERT(call->type != NULL); | 317 | ASSERT(call->type != NULL); |
254 | ASSERT(call->type->name != NULL); | 318 | ASSERT(call->type->name != NULL); |
255 | 319 | ||
256 | _debug("MAKE %p{%s} [%d]", | 320 | _debug("____MAKE %p{%s,%x} [%d]____", |
257 | call, call->type->name, atomic_read(&afs_outstanding_calls)); | 321 | call, call->type->name, key_serial(call->key), |
322 | atomic_read(&afs_outstanding_calls)); | ||
258 | 323 | ||
259 | call->wait_mode = wait_mode; | 324 | call->wait_mode = wait_mode; |
260 | INIT_WORK(&call->async_work, afs_process_async_call); | 325 | INIT_WORK(&call->async_work, afs_process_async_call); |
@@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, | |||
289 | msg.msg_iovlen = 1; | 354 | msg.msg_iovlen = 1; |
290 | msg.msg_control = NULL; | 355 | msg.msg_control = NULL; |
291 | msg.msg_controllen = 0; | 356 | msg.msg_controllen = 0; |
292 | msg.msg_flags = 0; | 357 | msg.msg_flags = (call->send_pages ? MSG_MORE : 0); |
293 | 358 | ||
294 | /* have to change the state *before* sending the last packet as RxRPC | 359 | /* have to change the state *before* sending the last packet as RxRPC |
295 | * might give us the reply before it returns from sending the | 360 | * might give us the reply before it returns from sending the |
296 | * request */ | 361 | * request */ |
297 | call->state = AFS_CALL_AWAIT_REPLY; | 362 | if (!call->send_pages) |
363 | call->state = AFS_CALL_AWAIT_REPLY; | ||
298 | ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); | 364 | ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); |
299 | if (ret < 0) | 365 | if (ret < 0) |
300 | goto error_do_abort; | 366 | goto error_do_abort; |
301 | 367 | ||
368 | if (call->send_pages) { | ||
369 | ret = afs_send_pages(call, &msg, iov); | ||
370 | if (ret < 0) | ||
371 | goto error_do_abort; | ||
372 | } | ||
373 | |||
302 | /* at this point, an async call may no longer exist as it may have | 374 | /* at this point, an async call may no longer exist as it may have |
303 | * already completed */ | 375 | * already completed */ |
304 | return wait_mode->wait(call); | 376 | return wait_mode->wait(call); |
diff --git a/fs/afs/security.c b/fs/afs/security.c index f9f424d80458..e0ea88b63ebf 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c | |||
@@ -109,7 +109,7 @@ void afs_clear_permits(struct afs_vnode *vnode) | |||
109 | { | 109 | { |
110 | struct afs_permits *permits; | 110 | struct afs_permits *permits; |
111 | 111 | ||
112 | _enter("{%x}", vnode->fid.vnode); | 112 | _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); |
113 | 113 | ||
114 | mutex_lock(&vnode->permits_lock); | 114 | mutex_lock(&vnode->permits_lock); |
115 | permits = vnode->permits; | 115 | permits = vnode->permits; |
@@ -132,7 +132,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) | |||
132 | struct afs_vnode *auth_vnode; | 132 | struct afs_vnode *auth_vnode; |
133 | int count, loop; | 133 | int count, loop; |
134 | 134 | ||
135 | _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order); | 135 | _enter("{%x:%u},%x,%lx", |
136 | vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order); | ||
136 | 137 | ||
137 | auth_vnode = afs_get_auth_inode(vnode, key); | 138 | auth_vnode = afs_get_auth_inode(vnode, key); |
138 | if (IS_ERR(auth_vnode)) { | 139 | if (IS_ERR(auth_vnode)) { |
@@ -220,7 +221,8 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, | |||
220 | bool valid; | 221 | bool valid; |
221 | int loop, ret; | 222 | int loop, ret; |
222 | 223 | ||
223 | _enter(""); | 224 | _enter("{%x:%u},%x", |
225 | vnode->fid.vid, vnode->fid.vnode, key_serial(key)); | ||
224 | 226 | ||
225 | auth_vnode = afs_get_auth_inode(vnode, key); | 227 | auth_vnode = afs_get_auth_inode(vnode, key); |
226 | if (IS_ERR(auth_vnode)) { | 228 | if (IS_ERR(auth_vnode)) { |
@@ -268,9 +270,9 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, | |||
268 | _leave(" = %d", ret); | 270 | _leave(" = %d", ret); |
269 | return ret; | 271 | return ret; |
270 | } | 272 | } |
273 | *_access = vnode->status.caller_access; | ||
271 | } | 274 | } |
272 | 275 | ||
273 | *_access = vnode->status.caller_access; | ||
274 | iput(&auth_vnode->vfs_inode); | 276 | iput(&auth_vnode->vfs_inode); |
275 | _leave(" = 0 [access %x]", *_access); | 277 | _leave(" = 0 [access %x]", *_access); |
276 | return 0; | 278 | return 0; |
@@ -288,7 +290,7 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
288 | struct key *key; | 290 | struct key *key; |
289 | int ret; | 291 | int ret; |
290 | 292 | ||
291 | _enter("{{%x:%x},%lx},%x,", | 293 | _enter("{{%x:%u},%lx},%x,", |
292 | vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); | 294 | vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); |
293 | 295 | ||
294 | key = afs_request_key(vnode->volume->cell); | 296 | key = afs_request_key(vnode->volume->cell); |
diff --git a/fs/afs/server.c b/fs/afs/server.c index 96bb23b476a2..231ae4150279 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c | |||
@@ -252,6 +252,9 @@ static void afs_destroy_server(struct afs_server *server) | |||
252 | { | 252 | { |
253 | _enter("%p", server); | 253 | _enter("%p", server); |
254 | 254 | ||
255 | ASSERTIF(server->cb_break_head != server->cb_break_tail, | ||
256 | delayed_work_pending(&server->cb_break_work)); | ||
257 | |||
255 | ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); | 258 | ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL); |
256 | ASSERTCMP(server->cb_promises.rb_node, ==, NULL); | 259 | ASSERTCMP(server->cb_promises.rb_node, ==, NULL); |
257 | ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); | 260 | ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail); |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 7030d76155fc..d24be334b608 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -50,6 +50,7 @@ static const struct super_operations afs_super_ops = { | |||
50 | .statfs = simple_statfs, | 50 | .statfs = simple_statfs, |
51 | .alloc_inode = afs_alloc_inode, | 51 | .alloc_inode = afs_alloc_inode, |
52 | .drop_inode = generic_delete_inode, | 52 | .drop_inode = generic_delete_inode, |
53 | .write_inode = afs_write_inode, | ||
53 | .destroy_inode = afs_destroy_inode, | 54 | .destroy_inode = afs_destroy_inode, |
54 | .clear_inode = afs_clear_inode, | 55 | .clear_inode = afs_clear_inode, |
55 | .umount_begin = afs_umount_begin, | 56 | .umount_begin = afs_umount_begin, |
@@ -66,7 +67,7 @@ enum { | |||
66 | afs_opt_vol, | 67 | afs_opt_vol, |
67 | }; | 68 | }; |
68 | 69 | ||
69 | static const match_table_t afs_options_list = { | 70 | static match_table_t afs_options_list = { |
70 | { afs_opt_cell, "cell=%s" }, | 71 | { afs_opt_cell, "cell=%s" }, |
71 | { afs_opt_rwpath, "rwpath" }, | 72 | { afs_opt_rwpath, "rwpath" }, |
72 | { afs_opt_vol, "vol=%s" }, | 73 | { afs_opt_vol, "vol=%s" }, |
@@ -459,7 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, | |||
459 | init_waitqueue_head(&vnode->update_waitq); | 460 | init_waitqueue_head(&vnode->update_waitq); |
460 | mutex_init(&vnode->permits_lock); | 461 | mutex_init(&vnode->permits_lock); |
461 | mutex_init(&vnode->validate_lock); | 462 | mutex_init(&vnode->validate_lock); |
463 | spin_lock_init(&vnode->writeback_lock); | ||
462 | spin_lock_init(&vnode->lock); | 464 | spin_lock_init(&vnode->lock); |
465 | INIT_LIST_HEAD(&vnode->writebacks); | ||
463 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); | 466 | INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); |
464 | } | 467 | } |
465 | } | 468 | } |
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index a1904ab8426a..ec814660209f 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c | |||
@@ -261,7 +261,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, | |||
261 | 261 | ||
262 | DECLARE_WAITQUEUE(myself, current); | 262 | DECLARE_WAITQUEUE(myself, current); |
263 | 263 | ||
264 | _enter("%s,{%u,%u,%u}", | 264 | _enter("%s,{%x:%u.%u}", |
265 | vnode->volume->vlocation->vldb.name, | 265 | vnode->volume->vlocation->vldb.name, |
266 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); | 266 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); |
267 | 267 | ||
@@ -389,7 +389,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, | |||
389 | struct afs_server *server; | 389 | struct afs_server *server; |
390 | int ret; | 390 | int ret; |
391 | 391 | ||
392 | _enter("%s{%u,%u,%u},%x,,,", | 392 | _enter("%s{%x:%u.%u},%x,,,", |
393 | vnode->volume->vlocation->vldb.name, | 393 | vnode->volume->vlocation->vldb.name, |
394 | vnode->fid.vid, | 394 | vnode->fid.vid, |
395 | vnode->fid.vnode, | 395 | vnode->fid.vnode, |
@@ -446,7 +446,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key, | |||
446 | struct afs_server *server; | 446 | struct afs_server *server; |
447 | int ret; | 447 | int ret; |
448 | 448 | ||
449 | _enter("%s{%u,%u,%u},%x,%s,,", | 449 | _enter("%s{%x:%u.%u},%x,%s,,", |
450 | vnode->volume->vlocation->vldb.name, | 450 | vnode->volume->vlocation->vldb.name, |
451 | vnode->fid.vid, | 451 | vnode->fid.vid, |
452 | vnode->fid.vnode, | 452 | vnode->fid.vnode, |
@@ -502,7 +502,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, | |||
502 | struct afs_server *server; | 502 | struct afs_server *server; |
503 | int ret; | 503 | int ret; |
504 | 504 | ||
505 | _enter("%s{%u,%u,%u},%x,%s", | 505 | _enter("%s{%x:%u.%u},%x,%s", |
506 | vnode->volume->vlocation->vldb.name, | 506 | vnode->volume->vlocation->vldb.name, |
507 | vnode->fid.vid, | 507 | vnode->fid.vid, |
508 | vnode->fid.vnode, | 508 | vnode->fid.vnode, |
@@ -557,7 +557,7 @@ extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, | |||
557 | struct afs_server *server; | 557 | struct afs_server *server; |
558 | int ret; | 558 | int ret; |
559 | 559 | ||
560 | _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s", | 560 | _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", |
561 | dvnode->volume->vlocation->vldb.name, | 561 | dvnode->volume->vlocation->vldb.name, |
562 | dvnode->fid.vid, | 562 | dvnode->fid.vid, |
563 | dvnode->fid.vnode, | 563 | dvnode->fid.vnode, |
@@ -628,7 +628,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, | |||
628 | struct afs_server *server; | 628 | struct afs_server *server; |
629 | int ret; | 629 | int ret; |
630 | 630 | ||
631 | _enter("%s{%u,%u,%u},%x,%s,%s,,,", | 631 | _enter("%s{%x:%u.%u},%x,%s,%s,,,", |
632 | vnode->volume->vlocation->vldb.name, | 632 | vnode->volume->vlocation->vldb.name, |
633 | vnode->fid.vid, | 633 | vnode->fid.vid, |
634 | vnode->fid.vnode, | 634 | vnode->fid.vnode, |
@@ -687,7 +687,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode, | |||
687 | struct afs_server *server; | 687 | struct afs_server *server; |
688 | int ret; | 688 | int ret; |
689 | 689 | ||
690 | _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s", | 690 | _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", |
691 | orig_dvnode->volume->vlocation->vldb.name, | 691 | orig_dvnode->volume->vlocation->vldb.name, |
692 | orig_dvnode->fid.vid, | 692 | orig_dvnode->fid.vid, |
693 | orig_dvnode->fid.vnode, | 693 | orig_dvnode->fid.vnode, |
@@ -753,3 +753,110 @@ no_server: | |||
753 | _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); | 753 | _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt); |
754 | return PTR_ERR(server); | 754 | return PTR_ERR(server); |
755 | } | 755 | } |
756 | |||
757 | /* | ||
758 | * write to a file | ||
759 | */ | ||
760 | int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, | ||
761 | unsigned offset, unsigned to) | ||
762 | { | ||
763 | struct afs_server *server; | ||
764 | struct afs_vnode *vnode = wb->vnode; | ||
765 | int ret; | ||
766 | |||
767 | _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", | ||
768 | vnode->volume->vlocation->vldb.name, | ||
769 | vnode->fid.vid, | ||
770 | vnode->fid.vnode, | ||
771 | vnode->fid.unique, | ||
772 | key_serial(wb->key), | ||
773 | first, last, offset, to); | ||
774 | |||
775 | /* this op will fetch the status */ | ||
776 | spin_lock(&vnode->lock); | ||
777 | vnode->update_cnt++; | ||
778 | spin_unlock(&vnode->lock); | ||
779 | |||
780 | do { | ||
781 | /* pick a server to query */ | ||
782 | server = afs_volume_pick_fileserver(vnode); | ||
783 | if (IS_ERR(server)) | ||
784 | goto no_server; | ||
785 | |||
786 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
787 | |||
788 | ret = afs_fs_store_data(server, wb, first, last, offset, to, | ||
789 | &afs_sync_call); | ||
790 | |||
791 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
792 | |||
793 | /* adjust the flags */ | ||
794 | if (ret == 0) { | ||
795 | afs_vnode_finalise_status_update(vnode, server); | ||
796 | afs_put_server(server); | ||
797 | } else { | ||
798 | afs_vnode_status_update_failed(vnode, ret); | ||
799 | } | ||
800 | |||
801 | _leave(" = %d", ret); | ||
802 | return ret; | ||
803 | |||
804 | no_server: | ||
805 | spin_lock(&vnode->lock); | ||
806 | vnode->update_cnt--; | ||
807 | ASSERTCMP(vnode->update_cnt, >=, 0); | ||
808 | spin_unlock(&vnode->lock); | ||
809 | return PTR_ERR(server); | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * set the attributes on a file | ||
814 | */ | ||
815 | int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, | ||
816 | struct iattr *attr) | ||
817 | { | ||
818 | struct afs_server *server; | ||
819 | int ret; | ||
820 | |||
821 | _enter("%s{%x:%u.%u},%x", | ||
822 | vnode->volume->vlocation->vldb.name, | ||
823 | vnode->fid.vid, | ||
824 | vnode->fid.vnode, | ||
825 | vnode->fid.unique, | ||
826 | key_serial(key)); | ||
827 | |||
828 | /* this op will fetch the status */ | ||
829 | spin_lock(&vnode->lock); | ||
830 | vnode->update_cnt++; | ||
831 | spin_unlock(&vnode->lock); | ||
832 | |||
833 | do { | ||
834 | /* pick a server to query */ | ||
835 | server = afs_volume_pick_fileserver(vnode); | ||
836 | if (IS_ERR(server)) | ||
837 | goto no_server; | ||
838 | |||
839 | _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); | ||
840 | |||
841 | ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call); | ||
842 | |||
843 | } while (!afs_volume_release_fileserver(vnode, server, ret)); | ||
844 | |||
845 | /* adjust the flags */ | ||
846 | if (ret == 0) { | ||
847 | afs_vnode_finalise_status_update(vnode, server); | ||
848 | afs_put_server(server); | ||
849 | } else { | ||
850 | afs_vnode_status_update_failed(vnode, ret); | ||
851 | } | ||
852 | |||
853 | _leave(" = %d", ret); | ||
854 | return ret; | ||
855 | |||
856 | no_server: | ||
857 | spin_lock(&vnode->lock); | ||
858 | vnode->update_cnt--; | ||
859 | ASSERTCMP(vnode->update_cnt, >=, 0); | ||
860 | spin_unlock(&vnode->lock); | ||
861 | return PTR_ERR(server); | ||
862 | } | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c new file mode 100644 index 000000000000..83ff29262816 --- /dev/null +++ b/fs/afs/write.c | |||
@@ -0,0 +1,835 @@ | |||
1 | /* handling of writes to regular files and writing back to the server | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/slab.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/writeback.h> | ||
16 | #include <linux/pagevec.h> | ||
17 | #include "internal.h" | ||
18 | |||
19 | static int afs_write_back_from_locked_page(struct afs_writeback *wb, | ||
20 | struct page *page); | ||
21 | |||
22 | /* | ||
23 | * mark a page as having been made dirty and thus needing writeback | ||
24 | */ | ||
25 | int afs_set_page_dirty(struct page *page) | ||
26 | { | ||
27 | _enter(""); | ||
28 | return __set_page_dirty_nobuffers(page); | ||
29 | } | ||
30 | |||
31 | /* | ||
32 | * unlink a writeback record because its usage has reached zero | ||
33 | * - must be called with the wb->vnode->writeback_lock held | ||
34 | */ | ||
35 | static void afs_unlink_writeback(struct afs_writeback *wb) | ||
36 | { | ||
37 | struct afs_writeback *front; | ||
38 | struct afs_vnode *vnode = wb->vnode; | ||
39 | |||
40 | list_del_init(&wb->link); | ||
41 | if (!list_empty(&vnode->writebacks)) { | ||
42 | /* if an fsync rises to the front of the queue then wake it | ||
43 | * up */ | ||
44 | front = list_entry(vnode->writebacks.next, | ||
45 | struct afs_writeback, link); | ||
46 | if (front->state == AFS_WBACK_SYNCING) { | ||
47 | _debug("wake up sync"); | ||
48 | front->state = AFS_WBACK_COMPLETE; | ||
49 | wake_up(&front->waitq); | ||
50 | } | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * free a writeback record | ||
56 | */ | ||
57 | static void afs_free_writeback(struct afs_writeback *wb) | ||
58 | { | ||
59 | _enter(""); | ||
60 | key_put(wb->key); | ||
61 | kfree(wb); | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * dispose of a reference to a writeback record | ||
66 | */ | ||
67 | void afs_put_writeback(struct afs_writeback *wb) | ||
68 | { | ||
69 | struct afs_vnode *vnode = wb->vnode; | ||
70 | |||
71 | _enter("{%d}", wb->usage); | ||
72 | |||
73 | spin_lock(&vnode->writeback_lock); | ||
74 | if (--wb->usage == 0) | ||
75 | afs_unlink_writeback(wb); | ||
76 | else | ||
77 | wb = NULL; | ||
78 | spin_unlock(&vnode->writeback_lock); | ||
79 | if (wb) | ||
80 | afs_free_writeback(wb); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * partly or wholly fill a page that's under preparation for writing | ||
85 | */ | ||
86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | ||
87 | unsigned start, unsigned len, struct page *page) | ||
88 | { | ||
89 | int ret; | ||
90 | |||
91 | _enter(",,%u,%u", start, len); | ||
92 | |||
93 | ASSERTCMP(start + len, <=, PAGE_SIZE); | ||
94 | |||
95 | ret = afs_vnode_fetch_data(vnode, key, start, len, page); | ||
96 | if (ret < 0) { | ||
97 | if (ret == -ENOENT) { | ||
98 | _debug("got NOENT from server" | ||
99 | " - marking file deleted and stale"); | ||
100 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | ||
101 | ret = -ESTALE; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | _leave(" = %d", ret); | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * prepare a page for being written to | ||
111 | */ | ||
112 | static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, | ||
113 | struct key *key, unsigned offset, unsigned to) | ||
114 | { | ||
115 | unsigned eof, tail, start, stop, len; | ||
116 | loff_t i_size, pos; | ||
117 | void *p; | ||
118 | int ret; | ||
119 | |||
120 | _enter(""); | ||
121 | |||
122 | if (offset == 0 && to == PAGE_SIZE) | ||
123 | return 0; | ||
124 | |||
125 | p = kmap(page); | ||
126 | |||
127 | i_size = i_size_read(&vnode->vfs_inode); | ||
128 | pos = (loff_t) page->index << PAGE_SHIFT; | ||
129 | if (pos >= i_size) { | ||
130 | /* partial write, page beyond EOF */ | ||
131 | _debug("beyond"); | ||
132 | if (offset > 0) | ||
133 | memset(p, 0, offset); | ||
134 | if (to < PAGE_SIZE) | ||
135 | memset(p + to, 0, PAGE_SIZE - to); | ||
136 | kunmap(page); | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | if (i_size - pos >= PAGE_SIZE) { | ||
141 | /* partial write, page entirely before EOF */ | ||
142 | _debug("before"); | ||
143 | tail = eof = PAGE_SIZE; | ||
144 | } else { | ||
145 | /* partial write, page overlaps EOF */ | ||
146 | eof = i_size - pos; | ||
147 | _debug("overlap %u", eof); | ||
148 | tail = max(eof, to); | ||
149 | if (tail < PAGE_SIZE) | ||
150 | memset(p + tail, 0, PAGE_SIZE - tail); | ||
151 | if (offset > eof) | ||
152 | memset(p + eof, 0, PAGE_SIZE - eof); | ||
153 | } | ||
154 | |||
155 | kunmap(p); | ||
156 | |||
157 | ret = 0; | ||
158 | if (offset > 0 || eof > to) { | ||
159 | /* need to fill one or two bits that aren't going to be written | ||
160 | * (cover both fillers in one read if there are two) */ | ||
161 | start = (offset > 0) ? 0 : to; | ||
162 | stop = (eof > to) ? eof : offset; | ||
163 | len = stop - start; | ||
164 | _debug("wr=%u-%u av=0-%u rd=%u@%u", | ||
165 | offset, to, eof, start, len); | ||
166 | ret = afs_fill_page(vnode, key, start, len, page); | ||
167 | } | ||
168 | |||
169 | _leave(" = %d", ret); | ||
170 | return ret; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * prepare to perform part of a write to a page | ||
175 | * - the caller holds the page locked, preventing it from being written out or | ||
176 | * modified by anyone else | ||
177 | */ | ||
178 | int afs_prepare_write(struct file *file, struct page *page, | ||
179 | unsigned offset, unsigned to) | ||
180 | { | ||
181 | struct afs_writeback *candidate, *wb; | ||
182 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
183 | struct key *key = file->private_data; | ||
184 | pgoff_t index; | ||
185 | int ret; | ||
186 | |||
187 | _enter("{%x:%u},{%lx},%u,%u", | ||
188 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | ||
189 | |||
190 | candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); | ||
191 | if (!candidate) | ||
192 | return -ENOMEM; | ||
193 | candidate->vnode = vnode; | ||
194 | candidate->first = candidate->last = page->index; | ||
195 | candidate->offset_first = offset; | ||
196 | candidate->to_last = to; | ||
197 | candidate->usage = 1; | ||
198 | candidate->state = AFS_WBACK_PENDING; | ||
199 | init_waitqueue_head(&candidate->waitq); | ||
200 | |||
201 | if (!PageUptodate(page)) { | ||
202 | _debug("not up to date"); | ||
203 | ret = afs_prepare_page(vnode, page, key, offset, to); | ||
204 | if (ret < 0) { | ||
205 | kfree(candidate); | ||
206 | _leave(" = %d [prep]", ret); | ||
207 | return ret; | ||
208 | } | ||
209 | SetPageUptodate(page); | ||
210 | } | ||
211 | |||
212 | try_again: | ||
213 | index = page->index; | ||
214 | spin_lock(&vnode->writeback_lock); | ||
215 | |||
216 | /* see if this page is already pending a writeback under a suitable key | ||
217 | * - if so we can just join onto that one */ | ||
218 | wb = (struct afs_writeback *) page_private(page); | ||
219 | if (wb) { | ||
220 | if (wb->key == key && wb->state == AFS_WBACK_PENDING) | ||
221 | goto subsume_in_current_wb; | ||
222 | goto flush_conflicting_wb; | ||
223 | } | ||
224 | |||
225 | if (index > 0) { | ||
226 | /* see if we can find an already pending writeback that we can | ||
227 | * append this page to */ | ||
228 | list_for_each_entry(wb, &vnode->writebacks, link) { | ||
229 | if (wb->last == index - 1 && wb->key == key && | ||
230 | wb->state == AFS_WBACK_PENDING) | ||
231 | goto append_to_previous_wb; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | list_add_tail(&candidate->link, &vnode->writebacks); | ||
236 | candidate->key = key_get(key); | ||
237 | spin_unlock(&vnode->writeback_lock); | ||
238 | SetPagePrivate(page); | ||
239 | set_page_private(page, (unsigned long) candidate); | ||
240 | _leave(" = 0 [new]"); | ||
241 | return 0; | ||
242 | |||
243 | subsume_in_current_wb: | ||
244 | _debug("subsume"); | ||
245 | ASSERTRANGE(wb->first, <=, index, <=, wb->last); | ||
246 | if (index == wb->first && offset < wb->offset_first) | ||
247 | wb->offset_first = offset; | ||
248 | if (index == wb->last && to > wb->to_last) | ||
249 | wb->to_last = to; | ||
250 | spin_unlock(&vnode->writeback_lock); | ||
251 | kfree(candidate); | ||
252 | _leave(" = 0 [sub]"); | ||
253 | return 0; | ||
254 | |||
255 | append_to_previous_wb: | ||
256 | _debug("append into %lx-%lx", wb->first, wb->last); | ||
257 | wb->usage++; | ||
258 | wb->last++; | ||
259 | wb->to_last = to; | ||
260 | spin_unlock(&vnode->writeback_lock); | ||
261 | SetPagePrivate(page); | ||
262 | set_page_private(page, (unsigned long) wb); | ||
263 | kfree(candidate); | ||
264 | _leave(" = 0 [app]"); | ||
265 | return 0; | ||
266 | |||
267 | /* the page is currently bound to another context, so if it's dirty we | ||
268 | * need to flush it before we can use the new context */ | ||
269 | flush_conflicting_wb: | ||
270 | _debug("flush conflict"); | ||
271 | if (wb->state == AFS_WBACK_PENDING) | ||
272 | wb->state = AFS_WBACK_CONFLICTING; | ||
273 | spin_unlock(&vnode->writeback_lock); | ||
274 | if (PageDirty(page)) { | ||
275 | ret = afs_write_back_from_locked_page(wb, page); | ||
276 | if (ret < 0) { | ||
277 | afs_put_writeback(candidate); | ||
278 | _leave(" = %d", ret); | ||
279 | return ret; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | /* the page holds a ref on the writeback record */ | ||
284 | afs_put_writeback(wb); | ||
285 | set_page_private(page, 0); | ||
286 | ClearPagePrivate(page); | ||
287 | goto try_again; | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * finalise part of a write to a page | ||
292 | */ | ||
293 | int afs_commit_write(struct file *file, struct page *page, | ||
294 | unsigned offset, unsigned to) | ||
295 | { | ||
296 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | ||
297 | loff_t i_size, maybe_i_size; | ||
298 | |||
299 | _enter("{%x:%u},{%lx},%u,%u", | ||
300 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | ||
301 | |||
302 | maybe_i_size = (loff_t) page->index << PAGE_SHIFT; | ||
303 | maybe_i_size += to; | ||
304 | |||
305 | i_size = i_size_read(&vnode->vfs_inode); | ||
306 | if (maybe_i_size > i_size) { | ||
307 | spin_lock(&vnode->writeback_lock); | ||
308 | i_size = i_size_read(&vnode->vfs_inode); | ||
309 | if (maybe_i_size > i_size) | ||
310 | i_size_write(&vnode->vfs_inode, maybe_i_size); | ||
311 | spin_unlock(&vnode->writeback_lock); | ||
312 | } | ||
313 | |||
314 | set_page_dirty(page); | ||
315 | |||
316 | if (PageDirty(page)) | ||
317 | _debug("dirtied"); | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * kill all the pages in the given range | ||
324 | */ | ||
325 | static void afs_kill_pages(struct afs_vnode *vnode, bool error, | ||
326 | pgoff_t first, pgoff_t last) | ||
327 | { | ||
328 | struct pagevec pv; | ||
329 | unsigned count, loop; | ||
330 | |||
331 | _enter("{%x:%u},%lx-%lx", | ||
332 | vnode->fid.vid, vnode->fid.vnode, first, last); | ||
333 | |||
334 | pagevec_init(&pv, 0); | ||
335 | |||
336 | do { | ||
337 | _debug("kill %lx-%lx", first, last); | ||
338 | |||
339 | count = last - first + 1; | ||
340 | if (count > PAGEVEC_SIZE) | ||
341 | count = PAGEVEC_SIZE; | ||
342 | pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, | ||
343 | first, count, pv.pages); | ||
344 | ASSERTCMP(pv.nr, ==, count); | ||
345 | |||
346 | for (loop = 0; loop < count; loop++) { | ||
347 | ClearPageUptodate(pv.pages[loop]); | ||
348 | if (error) | ||
349 | SetPageError(pv.pages[loop]); | ||
350 | end_page_writeback(pv.pages[loop]); | ||
351 | } | ||
352 | |||
353 | __pagevec_release(&pv); | ||
354 | } while (first < last); | ||
355 | |||
356 | _leave(""); | ||
357 | } | ||
358 | |||
359 | /* | ||
360 | * synchronously write back the locked page and any subsequent non-locked dirty | ||
361 | * pages also covered by the same writeback record | ||
362 | */ | ||
363 | static int afs_write_back_from_locked_page(struct afs_writeback *wb, | ||
364 | struct page *primary_page) | ||
365 | { | ||
366 | struct page *pages[8], *page; | ||
367 | unsigned long count; | ||
368 | unsigned n, offset, to; | ||
369 | pgoff_t start, first, last; | ||
370 | int loop, ret; | ||
371 | |||
372 | _enter(",%lx", primary_page->index); | ||
373 | |||
374 | count = 1; | ||
375 | if (!clear_page_dirty_for_io(primary_page)) | ||
376 | BUG(); | ||
377 | if (test_set_page_writeback(primary_page)) | ||
378 | BUG(); | ||
379 | |||
380 | /* find all consecutive lockable dirty pages, stopping when we find a | ||
381 | * page that is not immediately lockable, is not dirty or is missing, | ||
382 | * or we reach the end of the range */ | ||
383 | start = primary_page->index; | ||
384 | if (start >= wb->last) | ||
385 | goto no_more; | ||
386 | start++; | ||
387 | do { | ||
388 | _debug("more %lx [%lx]", start, count); | ||
389 | n = wb->last - start + 1; | ||
390 | if (n > ARRAY_SIZE(pages)) | ||
391 | n = ARRAY_SIZE(pages); | ||
392 | n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, | ||
393 | start, n, pages); | ||
394 | _debug("fgpc %u", n); | ||
395 | if (n == 0) | ||
396 | goto no_more; | ||
397 | if (pages[0]->index != start) { | ||
398 | for (n--; n >= 0; n--) | ||
399 | put_page(pages[n]); | ||
400 | goto no_more; | ||
401 | } | ||
402 | |||
403 | for (loop = 0; loop < n; loop++) { | ||
404 | page = pages[loop]; | ||
405 | if (page->index > wb->last) | ||
406 | break; | ||
407 | if (TestSetPageLocked(page)) | ||
408 | break; | ||
409 | if (!PageDirty(page) || | ||
410 | page_private(page) != (unsigned long) wb) { | ||
411 | unlock_page(page); | ||
412 | break; | ||
413 | } | ||
414 | if (!clear_page_dirty_for_io(page)) | ||
415 | BUG(); | ||
416 | if (test_set_page_writeback(page)) | ||
417 | BUG(); | ||
418 | unlock_page(page); | ||
419 | put_page(page); | ||
420 | } | ||
421 | count += loop; | ||
422 | if (loop < n) { | ||
423 | for (; loop < n; loop++) | ||
424 | put_page(pages[loop]); | ||
425 | goto no_more; | ||
426 | } | ||
427 | |||
428 | start += loop; | ||
429 | } while (start <= wb->last && count < 65536); | ||
430 | |||
431 | no_more: | ||
432 | /* we now have a contiguous set of dirty pages, each with writeback set | ||
433 | * and the dirty mark cleared; the first page is locked and must remain | ||
434 | * so, all the rest are unlocked */ | ||
435 | first = primary_page->index; | ||
436 | last = first + count - 1; | ||
437 | |||
438 | offset = (first == wb->first) ? wb->offset_first : 0; | ||
439 | to = (last == wb->last) ? wb->to_last : PAGE_SIZE; | ||
440 | |||
441 | _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); | ||
442 | |||
443 | ret = afs_vnode_store_data(wb, first, last, offset, to); | ||
444 | if (ret < 0) { | ||
445 | switch (ret) { | ||
446 | case -EDQUOT: | ||
447 | case -ENOSPC: | ||
448 | set_bit(AS_ENOSPC, | ||
449 | &wb->vnode->vfs_inode.i_mapping->flags); | ||
450 | break; | ||
451 | case -EROFS: | ||
452 | case -EIO: | ||
453 | case -EREMOTEIO: | ||
454 | case -EFBIG: | ||
455 | case -ENOENT: | ||
456 | case -ENOMEDIUM: | ||
457 | case -ENXIO: | ||
458 | afs_kill_pages(wb->vnode, true, first, last); | ||
459 | set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags); | ||
460 | break; | ||
461 | case -EACCES: | ||
462 | case -EPERM: | ||
463 | case -ENOKEY: | ||
464 | case -EKEYEXPIRED: | ||
465 | case -EKEYREJECTED: | ||
466 | case -EKEYREVOKED: | ||
467 | afs_kill_pages(wb->vnode, false, first, last); | ||
468 | break; | ||
469 | default: | ||
470 | break; | ||
471 | } | ||
472 | } else { | ||
473 | ret = count; | ||
474 | } | ||
475 | |||
476 | _leave(" = %d", ret); | ||
477 | return ret; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * write a page back to the server | ||
482 | * - the caller locked the page for us | ||
483 | */ | ||
484 | int afs_writepage(struct page *page, struct writeback_control *wbc) | ||
485 | { | ||
486 | struct backing_dev_info *bdi = page->mapping->backing_dev_info; | ||
487 | struct afs_writeback *wb; | ||
488 | int ret; | ||
489 | |||
490 | _enter("{%lx},", page->index); | ||
491 | |||
492 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
493 | wait_on_page_writeback(page); | ||
494 | |||
495 | if (PageWriteback(page) || !PageDirty(page)) { | ||
496 | unlock_page(page); | ||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | wb = (struct afs_writeback *) page_private(page); | ||
501 | ASSERT(wb != NULL); | ||
502 | |||
503 | ret = afs_write_back_from_locked_page(wb, page); | ||
504 | unlock_page(page); | ||
505 | if (ret < 0) { | ||
506 | _leave(" = %d", ret); | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | wbc->nr_to_write -= ret; | ||
511 | if (wbc->nonblocking && bdi_write_congested(bdi)) | ||
512 | wbc->encountered_congestion = 1; | ||
513 | |||
514 | _leave(" = 0"); | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * write a region of pages back to the server | ||
520 | */ | ||
521 | int afs_writepages_region(struct address_space *mapping, | ||
522 | struct writeback_control *wbc, | ||
523 | pgoff_t index, pgoff_t end, pgoff_t *_next) | ||
524 | { | ||
525 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
526 | struct afs_writeback *wb; | ||
527 | struct page *page; | ||
528 | int ret, n; | ||
529 | |||
530 | _enter(",,%lx,%lx,", index, end); | ||
531 | |||
532 | do { | ||
533 | n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, | ||
534 | 1, &page); | ||
535 | if (!n) | ||
536 | break; | ||
537 | |||
538 | _debug("wback %lx", page->index); | ||
539 | |||
540 | if (page->index > end) { | ||
541 | *_next = index; | ||
542 | page_cache_release(page); | ||
543 | _leave(" = 0 [%lx]", *_next); | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | /* at this point we hold neither mapping->tree_lock nor lock on | ||
548 | * the page itself: the page may be truncated or invalidated | ||
549 | * (changing page->mapping to NULL), or even swizzled back from | ||
550 | * swapper_space to tmpfs file mapping | ||
551 | */ | ||
552 | lock_page(page); | ||
553 | |||
554 | if (page->mapping != mapping) { | ||
555 | unlock_page(page); | ||
556 | page_cache_release(page); | ||
557 | continue; | ||
558 | } | ||
559 | |||
560 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
561 | wait_on_page_writeback(page); | ||
562 | |||
563 | if (PageWriteback(page) || !PageDirty(page)) { | ||
564 | unlock_page(page); | ||
565 | continue; | ||
566 | } | ||
567 | |||
568 | wb = (struct afs_writeback *) page_private(page); | ||
569 | ASSERT(wb != NULL); | ||
570 | |||
571 | spin_lock(&wb->vnode->writeback_lock); | ||
572 | wb->state = AFS_WBACK_WRITING; | ||
573 | spin_unlock(&wb->vnode->writeback_lock); | ||
574 | |||
575 | ret = afs_write_back_from_locked_page(wb, page); | ||
576 | unlock_page(page); | ||
577 | page_cache_release(page); | ||
578 | if (ret < 0) { | ||
579 | _leave(" = %d", ret); | ||
580 | return ret; | ||
581 | } | ||
582 | |||
583 | wbc->nr_to_write -= ret; | ||
584 | |||
585 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
586 | wbc->encountered_congestion = 1; | ||
587 | break; | ||
588 | } | ||
589 | |||
590 | cond_resched(); | ||
591 | } while (index < end && wbc->nr_to_write > 0); | ||
592 | |||
593 | *_next = index; | ||
594 | _leave(" = 0 [%lx]", *_next); | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * write some of the pending data back to the server | ||
600 | */ | ||
601 | int afs_writepages(struct address_space *mapping, | ||
602 | struct writeback_control *wbc) | ||
603 | { | ||
604 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
605 | pgoff_t start, end, next; | ||
606 | int ret; | ||
607 | |||
608 | _enter(""); | ||
609 | |||
610 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
611 | wbc->encountered_congestion = 1; | ||
612 | _leave(" = 0 [congest]"); | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | if (wbc->range_cyclic) { | ||
617 | start = mapping->writeback_index; | ||
618 | end = -1; | ||
619 | ret = afs_writepages_region(mapping, wbc, start, end, &next); | ||
620 | if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && | ||
621 | !(wbc->nonblocking && wbc->encountered_congestion)) | ||
622 | ret = afs_writepages_region(mapping, wbc, 0, start, | ||
623 | &next); | ||
624 | mapping->writeback_index = next; | ||
625 | } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { | ||
626 | end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT); | ||
627 | ret = afs_writepages_region(mapping, wbc, 0, end, &next); | ||
628 | if (wbc->nr_to_write > 0) | ||
629 | mapping->writeback_index = next; | ||
630 | } else { | ||
631 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
632 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
633 | ret = afs_writepages_region(mapping, wbc, start, end, &next); | ||
634 | } | ||
635 | |||
636 | _leave(" = %d", ret); | ||
637 | return ret; | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * write an inode back | ||
642 | */ | ||
643 | int afs_write_inode(struct inode *inode, int sync) | ||
644 | { | ||
645 | struct afs_vnode *vnode = AFS_FS_I(inode); | ||
646 | int ret; | ||
647 | |||
648 | _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); | ||
649 | |||
650 | ret = 0; | ||
651 | if (sync) { | ||
652 | ret = filemap_fdatawait(inode->i_mapping); | ||
653 | if (ret < 0) | ||
654 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
655 | } | ||
656 | |||
657 | _leave(" = %d", ret); | ||
658 | return ret; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * completion of write to server | ||
663 | */ | ||
664 | void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) | ||
665 | { | ||
666 | struct afs_writeback *wb = call->wb; | ||
667 | struct pagevec pv; | ||
668 | unsigned count, loop; | ||
669 | pgoff_t first = call->first, last = call->last; | ||
670 | bool free_wb; | ||
671 | |||
672 | _enter("{%x:%u},{%lx-%lx}", | ||
673 | vnode->fid.vid, vnode->fid.vnode, first, last); | ||
674 | |||
675 | ASSERT(wb != NULL); | ||
676 | |||
677 | pagevec_init(&pv, 0); | ||
678 | |||
679 | do { | ||
680 | _debug("attach %lx-%lx", first, last); | ||
681 | |||
682 | count = last - first + 1; | ||
683 | if (count > PAGEVEC_SIZE) | ||
684 | count = PAGEVEC_SIZE; | ||
685 | pv.nr = find_get_pages_contig(call->mapping, first, count, | ||
686 | pv.pages); | ||
687 | ASSERTCMP(pv.nr, ==, count); | ||
688 | |||
689 | spin_lock(&vnode->writeback_lock); | ||
690 | for (loop = 0; loop < count; loop++) { | ||
691 | struct page *page = pv.pages[loop]; | ||
692 | end_page_writeback(page); | ||
693 | if (page_private(page) == (unsigned long) wb) { | ||
694 | set_page_private(page, 0); | ||
695 | ClearPagePrivate(page); | ||
696 | wb->usage--; | ||
697 | } | ||
698 | } | ||
699 | free_wb = false; | ||
700 | if (wb->usage == 0) { | ||
701 | afs_unlink_writeback(wb); | ||
702 | free_wb = true; | ||
703 | } | ||
704 | spin_unlock(&vnode->writeback_lock); | ||
705 | first += count; | ||
706 | if (free_wb) { | ||
707 | afs_free_writeback(wb); | ||
708 | wb = NULL; | ||
709 | } | ||
710 | |||
711 | __pagevec_release(&pv); | ||
712 | } while (first < last); | ||
713 | |||
714 | _leave(""); | ||
715 | } | ||
716 | |||
717 | /* | ||
718 | * write to an AFS file | ||
719 | */ | ||
720 | ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, | ||
721 | unsigned long nr_segs, loff_t pos) | ||
722 | { | ||
723 | struct dentry *dentry = iocb->ki_filp->f_path.dentry; | ||
724 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); | ||
725 | ssize_t result; | ||
726 | size_t count = iov_length(iov, nr_segs); | ||
727 | int ret; | ||
728 | |||
729 | _enter("{%x.%u},{%zu},%lu,", | ||
730 | vnode->fid.vid, vnode->fid.vnode, count, nr_segs); | ||
731 | |||
732 | if (IS_SWAPFILE(&vnode->vfs_inode)) { | ||
733 | printk(KERN_INFO | ||
734 | "AFS: Attempt to write to active swap file!\n"); | ||
735 | return -EBUSY; | ||
736 | } | ||
737 | |||
738 | if (!count) | ||
739 | return 0; | ||
740 | |||
741 | result = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
742 | if (IS_ERR_VALUE(result)) { | ||
743 | _leave(" = %zd", result); | ||
744 | return result; | ||
745 | } | ||
746 | |||
747 | /* return error values for O_SYNC and IS_SYNC() */ | ||
748 | if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { | ||
749 | ret = afs_fsync(iocb->ki_filp, dentry, 1); | ||
750 | if (ret < 0) | ||
751 | result = ret; | ||
752 | } | ||
753 | |||
754 | _leave(" = %zd", result); | ||
755 | return result; | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * flush the vnode to the fileserver | ||
760 | */ | ||
761 | int afs_writeback_all(struct afs_vnode *vnode) | ||
762 | { | ||
763 | struct address_space *mapping = vnode->vfs_inode.i_mapping; | ||
764 | struct writeback_control wbc = { | ||
765 | .bdi = mapping->backing_dev_info, | ||
766 | .sync_mode = WB_SYNC_ALL, | ||
767 | .nr_to_write = LONG_MAX, | ||
768 | .for_writepages = 1, | ||
769 | .range_cyclic = 1, | ||
770 | }; | ||
771 | int ret; | ||
772 | |||
773 | _enter(""); | ||
774 | |||
775 | ret = mapping->a_ops->writepages(mapping, &wbc); | ||
776 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
777 | |||
778 | _leave(" = %d", ret); | ||
779 | return ret; | ||
780 | } | ||
781 | |||
782 | /* | ||
783 | * flush any dirty pages for this process, and check for write errors. | ||
784 | * - the return status from this call provides a reliable indication of | ||
785 | * whether any write errors occurred for this process. | ||
786 | */ | ||
787 | int afs_fsync(struct file *file, struct dentry *dentry, int datasync) | ||
788 | { | ||
789 | struct afs_writeback *wb, *xwb; | ||
790 | struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); | ||
791 | int ret; | ||
792 | |||
793 | _enter("{%x:%u},{n=%s},%d", | ||
794 | vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, | ||
795 | datasync); | ||
796 | |||
797 | /* use a writeback record as a marker in the queue - when this reaches | ||
798 | * the front of the queue, all the outstanding writes are either | ||
799 | * completed or rejected */ | ||
800 | wb = kzalloc(sizeof(*wb), GFP_KERNEL); | ||
801 | if (!wb) | ||
802 | return -ENOMEM; | ||
803 | wb->vnode = vnode; | ||
804 | wb->first = 0; | ||
805 | wb->last = -1; | ||
806 | wb->offset_first = 0; | ||
807 | wb->to_last = PAGE_SIZE; | ||
808 | wb->usage = 1; | ||
809 | wb->state = AFS_WBACK_SYNCING; | ||
810 | init_waitqueue_head(&wb->waitq); | ||
811 | |||
812 | spin_lock(&vnode->writeback_lock); | ||
813 | list_for_each_entry(xwb, &vnode->writebacks, link) { | ||
814 | if (xwb->state == AFS_WBACK_PENDING) | ||
815 | xwb->state = AFS_WBACK_CONFLICTING; | ||
816 | } | ||
817 | list_add_tail(&wb->link, &vnode->writebacks); | ||
818 | spin_unlock(&vnode->writeback_lock); | ||
819 | |||
820 | /* push all the outstanding writebacks to the server */ | ||
821 | ret = afs_writeback_all(vnode); | ||
822 | if (ret < 0) { | ||
823 | afs_put_writeback(wb); | ||
824 | _leave(" = %d [wb]", ret); | ||
825 | return ret; | ||
826 | } | ||
827 | |||
828 | /* wait for the preceding writes to actually complete */ | ||
829 | ret = wait_event_interruptible(wb->waitq, | ||
830 | wb->state == AFS_WBACK_COMPLETE || | ||
831 | vnode->writebacks.next == &wb->link); | ||
832 | afs_put_writeback(wb); | ||
833 | _leave(" = %d", ret); | ||
834 | return ret; | ||
835 | } | ||
@@ -346,10 +346,9 @@ void fastcall exit_aio(struct mm_struct *mm) | |||
346 | 346 | ||
347 | wait_for_all_aios(ctx); | 347 | wait_for_all_aios(ctx); |
348 | /* | 348 | /* |
349 | * this is an overkill, but ensures we don't leave | 349 | * Ensure we don't leave the ctx on the aio_wq |
350 | * the ctx on the aio_wq | ||
351 | */ | 350 | */ |
352 | flush_workqueue(aio_wq); | 351 | cancel_work_sync(&ctx->wq.work); |
353 | 352 | ||
354 | if (1 != atomic_read(&ctx->users)) | 353 | if (1 != atomic_read(&ctx->users)) |
355 | printk(KERN_DEBUG | 354 | printk(KERN_DEBUG |
@@ -372,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx) | |||
372 | BUG_ON(ctx->reqs_active); | 371 | BUG_ON(ctx->reqs_active); |
373 | 372 | ||
374 | cancel_delayed_work(&ctx->wq); | 373 | cancel_delayed_work(&ctx->wq); |
375 | flush_workqueue(aio_wq); | 374 | cancel_work_sync(&ctx->wq.work); |
376 | aio_free_ring(ctx); | 375 | aio_free_ring(ctx); |
377 | mmdrop(ctx->mm); | 376 | mmdrop(ctx->mm); |
378 | ctx->mm = NULL; | 377 | ctx->mm = NULL; |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 18657f001b43..72d0b412c376 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -675,19 +675,8 @@ static ssize_t | |||
675 | bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) | 675 | bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) |
676 | { | 676 | { |
677 | char *s = enabled ? "enabled" : "disabled"; | 677 | char *s = enabled ? "enabled" : "disabled"; |
678 | int len = strlen(s); | ||
679 | loff_t pos = *ppos; | ||
680 | 678 | ||
681 | if (pos < 0) | 679 | return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); |
682 | return -EINVAL; | ||
683 | if (pos >= len) | ||
684 | return 0; | ||
685 | if (len < pos + nbytes) | ||
686 | nbytes = len - pos; | ||
687 | if (copy_to_user(buf, s + pos, nbytes)) | ||
688 | return -EFAULT; | ||
689 | *ppos = pos + nbytes; | ||
690 | return nbytes; | ||
691 | } | 680 | } |
692 | 681 | ||
693 | static ssize_t bm_status_write(struct file * file, const char __user * buffer, | 682 | static ssize_t bm_status_write(struct file * file, const char __user * buffer, |
diff --git a/fs/buffer.c b/fs/buffer.c index eb820b82a636..aecd057cd0e0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1846,13 +1846,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, | |||
1846 | if (block_start >= to) | 1846 | if (block_start >= to) |
1847 | break; | 1847 | break; |
1848 | if (buffer_new(bh)) { | 1848 | if (buffer_new(bh)) { |
1849 | void *kaddr; | ||
1850 | |||
1851 | clear_buffer_new(bh); | 1849 | clear_buffer_new(bh); |
1852 | kaddr = kmap_atomic(page, KM_USER0); | 1850 | zero_user_page(page, block_start, bh->b_size, KM_USER0); |
1853 | memset(kaddr+block_start, 0, bh->b_size); | ||
1854 | flush_dcache_page(page); | ||
1855 | kunmap_atomic(kaddr, KM_USER0); | ||
1856 | set_buffer_uptodate(bh); | 1851 | set_buffer_uptodate(bh); |
1857 | mark_buffer_dirty(bh); | 1852 | mark_buffer_dirty(bh); |
1858 | } | 1853 | } |
@@ -1940,10 +1935,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
1940 | SetPageError(page); | 1935 | SetPageError(page); |
1941 | } | 1936 | } |
1942 | if (!buffer_mapped(bh)) { | 1937 | if (!buffer_mapped(bh)) { |
1943 | void *kaddr = kmap_atomic(page, KM_USER0); | 1938 | zero_user_page(page, i * blocksize, blocksize, |
1944 | memset(kaddr + i * blocksize, 0, blocksize); | 1939 | KM_USER0); |
1945 | flush_dcache_page(page); | ||
1946 | kunmap_atomic(kaddr, KM_USER0); | ||
1947 | if (!err) | 1940 | if (!err) |
1948 | set_buffer_uptodate(bh); | 1941 | set_buffer_uptodate(bh); |
1949 | continue; | 1942 | continue; |
@@ -2086,7 +2079,6 @@ int cont_prepare_write(struct page *page, unsigned offset, | |||
2086 | long status; | 2079 | long status; |
2087 | unsigned zerofrom; | 2080 | unsigned zerofrom; |
2088 | unsigned blocksize = 1 << inode->i_blkbits; | 2081 | unsigned blocksize = 1 << inode->i_blkbits; |
2089 | void *kaddr; | ||
2090 | 2082 | ||
2091 | while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { | 2083 | while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { |
2092 | status = -ENOMEM; | 2084 | status = -ENOMEM; |
@@ -2108,10 +2100,8 @@ int cont_prepare_write(struct page *page, unsigned offset, | |||
2108 | PAGE_CACHE_SIZE, get_block); | 2100 | PAGE_CACHE_SIZE, get_block); |
2109 | if (status) | 2101 | if (status) |
2110 | goto out_unmap; | 2102 | goto out_unmap; |
2111 | kaddr = kmap_atomic(new_page, KM_USER0); | 2103 | zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom, |
2112 | memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); | 2104 | KM_USER0); |
2113 | flush_dcache_page(new_page); | ||
2114 | kunmap_atomic(kaddr, KM_USER0); | ||
2115 | generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); | 2105 | generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); |
2116 | unlock_page(new_page); | 2106 | unlock_page(new_page); |
2117 | page_cache_release(new_page); | 2107 | page_cache_release(new_page); |
@@ -2138,10 +2128,7 @@ int cont_prepare_write(struct page *page, unsigned offset, | |||
2138 | if (status) | 2128 | if (status) |
2139 | goto out1; | 2129 | goto out1; |
2140 | if (zerofrom < offset) { | 2130 | if (zerofrom < offset) { |
2141 | kaddr = kmap_atomic(page, KM_USER0); | 2131 | zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); |
2142 | memset(kaddr+zerofrom, 0, offset-zerofrom); | ||
2143 | flush_dcache_page(page); | ||
2144 | kunmap_atomic(kaddr, KM_USER0); | ||
2145 | __block_commit_write(inode, page, zerofrom, offset); | 2132 | __block_commit_write(inode, page, zerofrom, offset); |
2146 | } | 2133 | } |
2147 | return 0; | 2134 | return 0; |
@@ -2340,10 +2327,7 @@ failed: | |||
2340 | * Error recovery is pretty slack. Clear the page and mark it dirty | 2327 | * Error recovery is pretty slack. Clear the page and mark it dirty |
2341 | * so we'll later zero out any blocks which _were_ allocated. | 2328 | * so we'll later zero out any blocks which _were_ allocated. |
2342 | */ | 2329 | */ |
2343 | kaddr = kmap_atomic(page, KM_USER0); | 2330 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); |
2344 | memset(kaddr, 0, PAGE_CACHE_SIZE); | ||
2345 | flush_dcache_page(page); | ||
2346 | kunmap_atomic(kaddr, KM_USER0); | ||
2347 | SetPageUptodate(page); | 2331 | SetPageUptodate(page); |
2348 | set_page_dirty(page); | 2332 | set_page_dirty(page); |
2349 | return ret; | 2333 | return ret; |
@@ -2382,7 +2366,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, | |||
2382 | loff_t i_size = i_size_read(inode); | 2366 | loff_t i_size = i_size_read(inode); |
2383 | const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 2367 | const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
2384 | unsigned offset; | 2368 | unsigned offset; |
2385 | void *kaddr; | ||
2386 | int ret; | 2369 | int ret; |
2387 | 2370 | ||
2388 | /* Is the page fully inside i_size? */ | 2371 | /* Is the page fully inside i_size? */ |
@@ -2413,10 +2396,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, | |||
2413 | * the page size, the remaining memory is zeroed when mapped, and | 2396 | * the page size, the remaining memory is zeroed when mapped, and |
2414 | * writes to that region are not written out to the file." | 2397 | * writes to that region are not written out to the file." |
2415 | */ | 2398 | */ |
2416 | kaddr = kmap_atomic(page, KM_USER0); | 2399 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); |
2417 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | ||
2418 | flush_dcache_page(page); | ||
2419 | kunmap_atomic(kaddr, KM_USER0); | ||
2420 | out: | 2400 | out: |
2421 | ret = mpage_writepage(page, get_block, wbc); | 2401 | ret = mpage_writepage(page, get_block, wbc); |
2422 | if (ret == -EAGAIN) | 2402 | if (ret == -EAGAIN) |
@@ -2437,7 +2417,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) | |||
2437 | unsigned to; | 2417 | unsigned to; |
2438 | struct page *page; | 2418 | struct page *page; |
2439 | const struct address_space_operations *a_ops = mapping->a_ops; | 2419 | const struct address_space_operations *a_ops = mapping->a_ops; |
2440 | char *kaddr; | ||
2441 | int ret = 0; | 2420 | int ret = 0; |
2442 | 2421 | ||
2443 | if ((offset & (blocksize - 1)) == 0) | 2422 | if ((offset & (blocksize - 1)) == 0) |
@@ -2451,10 +2430,8 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) | |||
2451 | to = (offset + blocksize) & ~(blocksize - 1); | 2430 | to = (offset + blocksize) & ~(blocksize - 1); |
2452 | ret = a_ops->prepare_write(NULL, page, offset, to); | 2431 | ret = a_ops->prepare_write(NULL, page, offset, to); |
2453 | if (ret == 0) { | 2432 | if (ret == 0) { |
2454 | kaddr = kmap_atomic(page, KM_USER0); | 2433 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, |
2455 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | 2434 | KM_USER0); |
2456 | flush_dcache_page(page); | ||
2457 | kunmap_atomic(kaddr, KM_USER0); | ||
2458 | /* | 2435 | /* |
2459 | * It would be more correct to call aops->commit_write() | 2436 | * It would be more correct to call aops->commit_write() |
2460 | * here, but this is more efficient. | 2437 | * here, but this is more efficient. |
@@ -2480,7 +2457,6 @@ int block_truncate_page(struct address_space *mapping, | |||
2480 | struct inode *inode = mapping->host; | 2457 | struct inode *inode = mapping->host; |
2481 | struct page *page; | 2458 | struct page *page; |
2482 | struct buffer_head *bh; | 2459 | struct buffer_head *bh; |
2483 | void *kaddr; | ||
2484 | int err; | 2460 | int err; |
2485 | 2461 | ||
2486 | blocksize = 1 << inode->i_blkbits; | 2462 | blocksize = 1 << inode->i_blkbits; |
@@ -2534,11 +2510,7 @@ int block_truncate_page(struct address_space *mapping, | |||
2534 | goto unlock; | 2510 | goto unlock; |
2535 | } | 2511 | } |
2536 | 2512 | ||
2537 | kaddr = kmap_atomic(page, KM_USER0); | 2513 | zero_user_page(page, offset, length, KM_USER0); |
2538 | memset(kaddr + offset, 0, length); | ||
2539 | flush_dcache_page(page); | ||
2540 | kunmap_atomic(kaddr, KM_USER0); | ||
2541 | |||
2542 | mark_buffer_dirty(bh); | 2514 | mark_buffer_dirty(bh); |
2543 | err = 0; | 2515 | err = 0; |
2544 | 2516 | ||
@@ -2559,7 +2531,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2559 | loff_t i_size = i_size_read(inode); | 2531 | loff_t i_size = i_size_read(inode); |
2560 | const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 2532 | const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
2561 | unsigned offset; | 2533 | unsigned offset; |
2562 | void *kaddr; | ||
2563 | 2534 | ||
2564 | /* Is the page fully inside i_size? */ | 2535 | /* Is the page fully inside i_size? */ |
2565 | if (page->index < end_index) | 2536 | if (page->index < end_index) |
@@ -2585,10 +2556,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2585 | * the page size, the remaining memory is zeroed when mapped, and | 2556 | * the page size, the remaining memory is zeroed when mapped, and |
2586 | * writes to that region are not written out to the file." | 2557 | * writes to that region are not written out to the file." |
2587 | */ | 2558 | */ |
2588 | kaddr = kmap_atomic(page, KM_USER0); | 2559 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); |
2589 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | ||
2590 | flush_dcache_page(page); | ||
2591 | kunmap_atomic(kaddr, KM_USER0); | ||
2592 | return __block_write_full_page(inode, page, get_block, wbc); | 2560 | return __block_write_full_page(inode, page, get_block, wbc); |
2593 | } | 2561 | } |
2594 | 2562 | ||
@@ -2978,7 +2946,7 @@ static void buffer_exit_cpu(int cpu) | |||
2978 | static int buffer_cpu_notify(struct notifier_block *self, | 2946 | static int buffer_cpu_notify(struct notifier_block *self, |
2979 | unsigned long action, void *hcpu) | 2947 | unsigned long action, void *hcpu) |
2980 | { | 2948 | { |
2981 | if (action == CPU_DEAD) | 2949 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
2982 | buffer_exit_cpu((unsigned long)hcpu); | 2950 | buffer_exit_cpu((unsigned long)hcpu); |
2983 | return NOTIFY_OK; | 2951 | return NOTIFY_OK; |
2984 | } | 2952 | } |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index d98be5e01328..3527c7c6def8 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -77,36 +77,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf | |||
77 | return ret; | 77 | return ret; |
78 | } | 78 | } |
79 | 79 | ||
80 | |||
81 | /** | ||
82 | * flush_read_buffer - push buffer to userspace. | ||
83 | * @buffer: data buffer for file. | ||
84 | * @userbuf: user-passed buffer. | ||
85 | * @count: number of bytes requested. | ||
86 | * @ppos: file position. | ||
87 | * | ||
88 | * Copy the buffer we filled in fill_read_buffer() to userspace. | ||
89 | * This is done at the reader's leisure, copying and advancing | ||
90 | * the amount they specify each time. | ||
91 | * This may be called continuously until the buffer is empty. | ||
92 | */ | ||
93 | static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf, | ||
94 | size_t count, loff_t * ppos) | ||
95 | { | ||
96 | int error; | ||
97 | |||
98 | if (*ppos > buffer->count) | ||
99 | return 0; | ||
100 | |||
101 | if (count > (buffer->count - *ppos)) | ||
102 | count = buffer->count - *ppos; | ||
103 | |||
104 | error = copy_to_user(buf,buffer->page + *ppos,count); | ||
105 | if (!error) | ||
106 | *ppos += count; | ||
107 | return error ? -EFAULT : count; | ||
108 | } | ||
109 | |||
110 | /** | 80 | /** |
111 | * configfs_read_file - read an attribute. | 81 | * configfs_read_file - read an attribute. |
112 | * @file: file pointer. | 82 | * @file: file pointer. |
@@ -139,7 +109,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp | |||
139 | } | 109 | } |
140 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", | 110 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", |
141 | __FUNCTION__, count, *ppos, buffer->page); | 111 | __FUNCTION__, count, *ppos, buffer->page); |
142 | retval = flush_read_buffer(buffer,buf,count,ppos); | 112 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, |
113 | buffer->count); | ||
143 | out: | 114 | out: |
144 | up(&buffer->sem); | 115 | up(&buffer->sem); |
145 | return retval; | 116 | return retval; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 1e88d8d1d2a9..8593f3dfd299 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -867,7 +867,6 @@ static int do_direct_IO(struct dio *dio) | |||
867 | do_holes: | 867 | do_holes: |
868 | /* Handle holes */ | 868 | /* Handle holes */ |
869 | if (!buffer_mapped(map_bh)) { | 869 | if (!buffer_mapped(map_bh)) { |
870 | char *kaddr; | ||
871 | loff_t i_size_aligned; | 870 | loff_t i_size_aligned; |
872 | 871 | ||
873 | /* AKPM: eargh, -ENOTBLK is a hack */ | 872 | /* AKPM: eargh, -ENOTBLK is a hack */ |
@@ -888,11 +887,8 @@ do_holes: | |||
888 | page_cache_release(page); | 887 | page_cache_release(page); |
889 | goto out; | 888 | goto out; |
890 | } | 889 | } |
891 | kaddr = kmap_atomic(page, KM_USER0); | 890 | zero_user_page(page, block_in_page << blkbits, |
892 | memset(kaddr + (block_in_page << blkbits), | 891 | 1 << blkbits, KM_USER0); |
893 | 0, 1 << blkbits); | ||
894 | flush_dcache_page(page); | ||
895 | kunmap_atomic(kaddr, KM_USER0); | ||
896 | dio->block_in_file++; | 892 | dio->block_in_file++; |
897 | block_in_page++; | 893 | block_in_page++; |
898 | goto next_block; | 894 | goto next_block; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index e1bb03171986..a6cb6171c3af 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1767,7 +1767,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1767 | struct inode *inode = mapping->host; | 1767 | struct inode *inode = mapping->host; |
1768 | struct buffer_head *bh; | 1768 | struct buffer_head *bh; |
1769 | int err = 0; | 1769 | int err = 0; |
1770 | void *kaddr; | ||
1771 | 1770 | ||
1772 | blocksize = inode->i_sb->s_blocksize; | 1771 | blocksize = inode->i_sb->s_blocksize; |
1773 | length = blocksize - (offset & (blocksize - 1)); | 1772 | length = blocksize - (offset & (blocksize - 1)); |
@@ -1779,10 +1778,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1779 | */ | 1778 | */ |
1780 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | 1779 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && |
1781 | ext3_should_writeback_data(inode) && PageUptodate(page)) { | 1780 | ext3_should_writeback_data(inode) && PageUptodate(page)) { |
1782 | kaddr = kmap_atomic(page, KM_USER0); | 1781 | zero_user_page(page, offset, length, KM_USER0); |
1783 | memset(kaddr + offset, 0, length); | ||
1784 | flush_dcache_page(page); | ||
1785 | kunmap_atomic(kaddr, KM_USER0); | ||
1786 | set_page_dirty(page); | 1782 | set_page_dirty(page); |
1787 | goto unlock; | 1783 | goto unlock; |
1788 | } | 1784 | } |
@@ -1835,11 +1831,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1835 | goto unlock; | 1831 | goto unlock; |
1836 | } | 1832 | } |
1837 | 1833 | ||
1838 | kaddr = kmap_atomic(page, KM_USER0); | 1834 | zero_user_page(page, offset, length, KM_USER0); |
1839 | memset(kaddr + offset, 0, length); | ||
1840 | flush_dcache_page(page); | ||
1841 | kunmap_atomic(kaddr, KM_USER0); | ||
1842 | |||
1843 | BUFFER_TRACE(bh, "zeroed end of block"); | 1835 | BUFFER_TRACE(bh, "zeroed end of block"); |
1844 | 1836 | ||
1845 | err = 0; | 1837 | err = 0; |
diff --git a/fs/mpage.c b/fs/mpage.c index fa2441f57b41..0fb914fc2ee0 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -284,11 +284,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | if (first_hole != blocks_per_page) { | 286 | if (first_hole != blocks_per_page) { |
287 | char *kaddr = kmap_atomic(page, KM_USER0); | 287 | zero_user_page(page, first_hole << blkbits, |
288 | memset(kaddr + (first_hole << blkbits), 0, | 288 | PAGE_CACHE_SIZE - (first_hole << blkbits), |
289 | PAGE_CACHE_SIZE - (first_hole << blkbits)); | 289 | KM_USER0); |
290 | flush_dcache_page(page); | ||
291 | kunmap_atomic(kaddr, KM_USER0); | ||
292 | if (first_hole == 0) { | 290 | if (first_hole == 0) { |
293 | SetPageUptodate(page); | 291 | SetPageUptodate(page); |
294 | unlock_page(page); | 292 | unlock_page(page); |
@@ -576,14 +574,11 @@ page_is_mapped: | |||
576 | * written out to the file." | 574 | * written out to the file." |
577 | */ | 575 | */ |
578 | unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); | 576 | unsigned offset = i_size & (PAGE_CACHE_SIZE - 1); |
579 | char *kaddr; | ||
580 | 577 | ||
581 | if (page->index > end_index || !offset) | 578 | if (page->index > end_index || !offset) |
582 | goto confused; | 579 | goto confused; |
583 | kaddr = kmap_atomic(page, KM_USER0); | 580 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, |
584 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | 581 | KM_USER0); |
585 | flush_dcache_page(page); | ||
586 | kunmap_atomic(kaddr, KM_USER0); | ||
587 | } | 582 | } |
588 | 583 | ||
589 | /* | 584 | /* |
diff --git a/fs/namei.c b/fs/namei.c index 856b2f5da51d..b3780e3fc88e 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1152,14 +1152,12 @@ static int fastcall do_path_lookup(int dfd, const char *name, | |||
1152 | 1152 | ||
1153 | fput_light(file, fput_needed); | 1153 | fput_light(file, fput_needed); |
1154 | } | 1154 | } |
1155 | current->total_link_count = 0; | 1155 | |
1156 | retval = link_path_walk(name, nd); | 1156 | retval = path_walk(name, nd); |
1157 | out: | 1157 | out: |
1158 | if (likely(retval == 0)) { | 1158 | if (unlikely(!retval && !audit_dummy_context() && nd->dentry && |
1159 | if (unlikely(!audit_dummy_context() && nd && nd->dentry && | ||
1160 | nd->dentry->d_inode)) | 1159 | nd->dentry->d_inode)) |
1161 | audit_inode(name, nd->dentry->d_inode); | 1160 | audit_inode(name, nd->dentry->d_inode); |
1162 | } | ||
1163 | out_fail: | 1161 | out_fail: |
1164 | return retval; | 1162 | return retval; |
1165 | 1163 | ||
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index ce341dc76d5e..9b118ee20193 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile | |||
@@ -11,4 +11,3 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o | |||
11 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o | 11 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o |
12 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ | 12 | nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ |
13 | nfs4acl.o nfs4callback.o nfs4recover.o | 13 | nfs4acl.o nfs4callback.o nfs4recover.o |
14 | nfsd-objs := $(nfsd-y) | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 6f24768272a1..79bd03b8bbf8 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -469,6 +469,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
469 | nd.dentry = NULL; | 469 | nd.dentry = NULL; |
470 | exp.ex_path = NULL; | 470 | exp.ex_path = NULL; |
471 | 471 | ||
472 | /* fs locations */ | ||
473 | exp.ex_fslocs.locations = NULL; | ||
474 | exp.ex_fslocs.locations_count = 0; | ||
475 | exp.ex_fslocs.migrated = 0; | ||
476 | |||
477 | exp.ex_uuid = NULL; | ||
478 | |||
472 | if (mesg[mlen-1] != '\n') | 479 | if (mesg[mlen-1] != '\n') |
473 | return -EINVAL; | 480 | return -EINVAL; |
474 | mesg[mlen-1] = 0; | 481 | mesg[mlen-1] = 0; |
@@ -509,13 +516,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
509 | if (exp.h.expiry_time == 0) | 516 | if (exp.h.expiry_time == 0) |
510 | goto out; | 517 | goto out; |
511 | 518 | ||
512 | /* fs locations */ | ||
513 | exp.ex_fslocs.locations = NULL; | ||
514 | exp.ex_fslocs.locations_count = 0; | ||
515 | exp.ex_fslocs.migrated = 0; | ||
516 | |||
517 | exp.ex_uuid = NULL; | ||
518 | |||
519 | /* flags */ | 519 | /* flags */ |
520 | err = get_int(&mesg, &an_int); | 520 | err = get_int(&mesg, &an_int); |
521 | if (err == -ENOENT) | 521 | if (err == -ENOENT) |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7f5bad0393b1..eac82830bfd7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -177,7 +177,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, | |||
177 | if (max_blocksize < resp->count) | 177 | if (max_blocksize < resp->count) |
178 | resp->count = max_blocksize; | 178 | resp->count = max_blocksize; |
179 | 179 | ||
180 | svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); | 180 | svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); |
181 | 181 | ||
182 | fh_copy(&resp->fh, &argp->fh); | 182 | fh_copy(&resp->fh, &argp->fh); |
183 | nfserr = nfsd_read(rqstp, &resp->fh, NULL, | 183 | nfserr = nfsd_read(rqstp, &resp->fh, NULL, |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7e4bb0af24d7..10f6e7dcf633 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -239,7 +239,7 @@ static __be32 * | |||
239 | encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) | 239 | encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) |
240 | { | 240 | { |
241 | struct dentry *dentry = fhp->fh_dentry; | 241 | struct dentry *dentry = fhp->fh_dentry; |
242 | if (dentry && dentry->d_inode != NULL) { | 242 | if (dentry && dentry->d_inode) { |
243 | int err; | 243 | int err; |
244 | struct kstat stat; | 244 | struct kstat stat; |
245 | 245 | ||
@@ -300,9 +300,9 @@ int | |||
300 | nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, | 300 | nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, |
301 | struct nfsd3_sattrargs *args) | 301 | struct nfsd3_sattrargs *args) |
302 | { | 302 | { |
303 | if (!(p = decode_fh(p, &args->fh)) | 303 | if (!(p = decode_fh(p, &args->fh))) |
304 | || !(p = decode_sattr3(p, &args->attrs))) | ||
305 | return 0; | 304 | return 0; |
305 | p = decode_sattr3(p, &args->attrs); | ||
306 | 306 | ||
307 | if ((args->check_guard = ntohl(*p++)) != 0) { | 307 | if ((args->check_guard = ntohl(*p++)) != 0) { |
308 | struct timespec time; | 308 | struct timespec time; |
@@ -343,9 +343,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, | |||
343 | int v,pn; | 343 | int v,pn; |
344 | u32 max_blocksize = svc_max_payload(rqstp); | 344 | u32 max_blocksize = svc_max_payload(rqstp); |
345 | 345 | ||
346 | if (!(p = decode_fh(p, &args->fh)) | 346 | if (!(p = decode_fh(p, &args->fh))) |
347 | || !(p = xdr_decode_hyper(p, &args->offset))) | ||
348 | return 0; | 347 | return 0; |
348 | p = xdr_decode_hyper(p, &args->offset); | ||
349 | 349 | ||
350 | len = args->count = ntohl(*p++); | 350 | len = args->count = ntohl(*p++); |
351 | 351 | ||
@@ -369,28 +369,44 @@ int | |||
369 | nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | 369 | nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, |
370 | struct nfsd3_writeargs *args) | 370 | struct nfsd3_writeargs *args) |
371 | { | 371 | { |
372 | unsigned int len, v, hdr; | 372 | unsigned int len, v, hdr, dlen; |
373 | u32 max_blocksize = svc_max_payload(rqstp); | 373 | u32 max_blocksize = svc_max_payload(rqstp); |
374 | 374 | ||
375 | if (!(p = decode_fh(p, &args->fh)) | 375 | if (!(p = decode_fh(p, &args->fh))) |
376 | || !(p = xdr_decode_hyper(p, &args->offset))) | ||
377 | return 0; | 376 | return 0; |
377 | p = xdr_decode_hyper(p, &args->offset); | ||
378 | 378 | ||
379 | args->count = ntohl(*p++); | 379 | args->count = ntohl(*p++); |
380 | args->stable = ntohl(*p++); | 380 | args->stable = ntohl(*p++); |
381 | len = args->len = ntohl(*p++); | 381 | len = args->len = ntohl(*p++); |
382 | /* | ||
383 | * The count must equal the amount of data passed. | ||
384 | */ | ||
385 | if (args->count != args->len) | ||
386 | return 0; | ||
382 | 387 | ||
388 | /* | ||
389 | * Check to make sure that we got the right number of | ||
390 | * bytes. | ||
391 | */ | ||
383 | hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; | 392 | hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; |
384 | if (rqstp->rq_arg.len < hdr || | 393 | dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len |
385 | rqstp->rq_arg.len - hdr < len) | 394 | - hdr; |
395 | /* | ||
396 | * Round the length of the data which was specified up to | ||
397 | * the next multiple of XDR units and then compare that | ||
398 | * against the length which was actually received. | ||
399 | */ | ||
400 | if (dlen != XDR_QUADLEN(len)*4) | ||
386 | return 0; | 401 | return 0; |
387 | 402 | ||
403 | if (args->count > max_blocksize) { | ||
404 | args->count = max_blocksize; | ||
405 | len = args->len = max_blocksize; | ||
406 | } | ||
388 | rqstp->rq_vec[0].iov_base = (void*)p; | 407 | rqstp->rq_vec[0].iov_base = (void*)p; |
389 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; | 408 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; |
390 | 409 | v = 0; | |
391 | if (len > max_blocksize) | ||
392 | len = max_blocksize; | ||
393 | v= 0; | ||
394 | while (len > rqstp->rq_vec[v].iov_len) { | 410 | while (len > rqstp->rq_vec[v].iov_len) { |
395 | len -= rqstp->rq_vec[v].iov_len; | 411 | len -= rqstp->rq_vec[v].iov_len; |
396 | v++; | 412 | v++; |
@@ -398,9 +414,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
398 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; | 414 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; |
399 | } | 415 | } |
400 | rqstp->rq_vec[v].iov_len = len; | 416 | rqstp->rq_vec[v].iov_len = len; |
401 | args->vlen = v+1; | 417 | args->vlen = v + 1; |
402 | 418 | return 1; | |
403 | return args->count == args->len && rqstp->rq_vec[0].iov_len > 0; | ||
404 | } | 419 | } |
405 | 420 | ||
406 | int | 421 | int |
@@ -414,8 +429,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, | |||
414 | switch (args->createmode = ntohl(*p++)) { | 429 | switch (args->createmode = ntohl(*p++)) { |
415 | case NFS3_CREATE_UNCHECKED: | 430 | case NFS3_CREATE_UNCHECKED: |
416 | case NFS3_CREATE_GUARDED: | 431 | case NFS3_CREATE_GUARDED: |
417 | if (!(p = decode_sattr3(p, &args->attrs))) | 432 | p = decode_sattr3(p, &args->attrs); |
418 | return 0; | ||
419 | break; | 433 | break; |
420 | case NFS3_CREATE_EXCLUSIVE: | 434 | case NFS3_CREATE_EXCLUSIVE: |
421 | args->verf = p; | 435 | args->verf = p; |
@@ -431,10 +445,10 @@ int | |||
431 | nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, | 445 | nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p, |
432 | struct nfsd3_createargs *args) | 446 | struct nfsd3_createargs *args) |
433 | { | 447 | { |
434 | if (!(p = decode_fh(p, &args->fh)) | 448 | if (!(p = decode_fh(p, &args->fh)) || |
435 | || !(p = decode_filename(p, &args->name, &args->len)) | 449 | !(p = decode_filename(p, &args->name, &args->len))) |
436 | || !(p = decode_sattr3(p, &args->attrs))) | ||
437 | return 0; | 450 | return 0; |
451 | p = decode_sattr3(p, &args->attrs); | ||
438 | 452 | ||
439 | return xdr_argsize_check(rqstp, p); | 453 | return xdr_argsize_check(rqstp, p); |
440 | } | 454 | } |
@@ -448,11 +462,12 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | |||
448 | char *old, *new; | 462 | char *old, *new; |
449 | struct kvec *vec; | 463 | struct kvec *vec; |
450 | 464 | ||
451 | if (!(p = decode_fh(p, &args->ffh)) | 465 | if (!(p = decode_fh(p, &args->ffh)) || |
452 | || !(p = decode_filename(p, &args->fname, &args->flen)) | 466 | !(p = decode_filename(p, &args->fname, &args->flen)) |
453 | || !(p = decode_sattr3(p, &args->attrs)) | ||
454 | ) | 467 | ) |
455 | return 0; | 468 | return 0; |
469 | p = decode_sattr3(p, &args->attrs); | ||
470 | |||
456 | /* now decode the pathname, which might be larger than the first page. | 471 | /* now decode the pathname, which might be larger than the first page. |
457 | * As we have to check for nul's anyway, we copy it into a new page | 472 | * As we have to check for nul's anyway, we copy it into a new page |
458 | * This page appears in the rq_res.pages list, but as pages_len is always | 473 | * This page appears in the rq_res.pages list, but as pages_len is always |
@@ -502,10 +517,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p, | |||
502 | args->ftype = ntohl(*p++); | 517 | args->ftype = ntohl(*p++); |
503 | 518 | ||
504 | if (args->ftype == NF3BLK || args->ftype == NF3CHR | 519 | if (args->ftype == NF3BLK || args->ftype == NF3CHR |
505 | || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { | 520 | || args->ftype == NF3SOCK || args->ftype == NF3FIFO) |
506 | if (!(p = decode_sattr3(p, &args->attrs))) | 521 | p = decode_sattr3(p, &args->attrs); |
507 | return 0; | ||
508 | } | ||
509 | 522 | ||
510 | if (args->ftype == NF3BLK || args->ftype == NF3CHR) { | 523 | if (args->ftype == NF3BLK || args->ftype == NF3CHR) { |
511 | args->major = ntohl(*p++); | 524 | args->major = ntohl(*p++); |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 673a53c014a3..cc3b7badd486 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -137,7 +137,6 @@ struct ace_container { | |||
137 | static short ace2type(struct nfs4_ace *); | 137 | static short ace2type(struct nfs4_ace *); |
138 | static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, | 138 | static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, |
139 | unsigned int); | 139 | unsigned int); |
140 | void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); | ||
141 | 140 | ||
142 | struct nfs4_acl * | 141 | struct nfs4_acl * |
143 | nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, | 142 | nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, |
@@ -785,21 +784,6 @@ nfs4_acl_new(int n) | |||
785 | return acl; | 784 | return acl; |
786 | } | 785 | } |
787 | 786 | ||
788 | void | ||
789 | nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, | ||
790 | int whotype, uid_t who) | ||
791 | { | ||
792 | struct nfs4_ace *ace = acl->aces + acl->naces; | ||
793 | |||
794 | ace->type = type; | ||
795 | ace->flag = flag; | ||
796 | ace->access_mask = access_mask; | ||
797 | ace->whotype = whotype; | ||
798 | ace->who = who; | ||
799 | |||
800 | acl->naces++; | ||
801 | } | ||
802 | |||
803 | static struct { | 787 | static struct { |
804 | char *string; | 788 | char *string; |
805 | int stringlen; | 789 | int stringlen; |
@@ -851,6 +835,5 @@ nfs4_acl_write_who(int who, char *p) | |||
851 | } | 835 | } |
852 | 836 | ||
853 | EXPORT_SYMBOL(nfs4_acl_new); | 837 | EXPORT_SYMBOL(nfs4_acl_new); |
854 | EXPORT_SYMBOL(nfs4_acl_add_ace); | ||
855 | EXPORT_SYMBOL(nfs4_acl_get_whotype); | 838 | EXPORT_SYMBOL(nfs4_acl_get_whotype); |
856 | EXPORT_SYMBOL(nfs4_acl_write_who); | 839 | EXPORT_SYMBOL(nfs4_acl_write_who); |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 678f3be88ac0..3cc8ce422ab1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -1326,8 +1326,6 @@ do_recall(void *__dp) | |||
1326 | { | 1326 | { |
1327 | struct nfs4_delegation *dp = __dp; | 1327 | struct nfs4_delegation *dp = __dp; |
1328 | 1328 | ||
1329 | daemonize("nfsv4-recall"); | ||
1330 | |||
1331 | nfsd4_cb_recall(dp); | 1329 | nfsd4_cb_recall(dp); |
1332 | return 0; | 1330 | return 0; |
1333 | } | 1331 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 739dd3c5c3b2..6ca2d24fc216 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -323,7 +323,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
323 | * | 323 | * |
324 | */ | 324 | */ |
325 | 325 | ||
326 | u8 version = 1; | 326 | u8 version; |
327 | u8 fsid_type = 0; | 327 | u8 fsid_type = 0; |
328 | struct inode * inode = dentry->d_inode; | 328 | struct inode * inode = dentry->d_inode; |
329 | struct dentry *parent = dentry->d_parent; | 329 | struct dentry *parent = dentry->d_parent; |
@@ -341,15 +341,59 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
341 | * the reference filehandle (if it is in the same export) | 341 | * the reference filehandle (if it is in the same export) |
342 | * or the export options. | 342 | * or the export options. |
343 | */ | 343 | */ |
344 | retry: | ||
345 | version = 1; | ||
344 | if (ref_fh && ref_fh->fh_export == exp) { | 346 | if (ref_fh && ref_fh->fh_export == exp) { |
345 | version = ref_fh->fh_handle.fh_version; | 347 | version = ref_fh->fh_handle.fh_version; |
346 | if (version == 0xca) | 348 | fsid_type = ref_fh->fh_handle.fh_fsid_type; |
349 | |||
350 | if (ref_fh == fhp) | ||
351 | fh_put(ref_fh); | ||
352 | ref_fh = NULL; | ||
353 | |||
354 | switch (version) { | ||
355 | case 0xca: | ||
347 | fsid_type = FSID_DEV; | 356 | fsid_type = FSID_DEV; |
348 | else | 357 | break; |
349 | fsid_type = ref_fh->fh_handle.fh_fsid_type; | 358 | case 1: |
350 | /* We know this version/type works for this export | 359 | break; |
351 | * so there is no need for further checks. | 360 | default: |
361 | goto retry; | ||
362 | } | ||
363 | |||
364 | /* Need to check that this type works for this | ||
365 | * export point. As the fsid -> filesystem mapping | ||
366 | * was guided by user-space, there is no guarantee | ||
367 | * that the filesystem actually supports that fsid | ||
368 | * type. If it doesn't we loop around again without | ||
369 | * ref_fh set. | ||
352 | */ | 370 | */ |
371 | switch(fsid_type) { | ||
372 | case FSID_DEV: | ||
373 | if (!old_valid_dev(ex_dev)) | ||
374 | goto retry; | ||
375 | /* FALL THROUGH */ | ||
376 | case FSID_MAJOR_MINOR: | ||
377 | case FSID_ENCODE_DEV: | ||
378 | if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags | ||
379 | & FS_REQUIRES_DEV)) | ||
380 | goto retry; | ||
381 | break; | ||
382 | case FSID_NUM: | ||
383 | if (! (exp->ex_flags & NFSEXP_FSID)) | ||
384 | goto retry; | ||
385 | break; | ||
386 | case FSID_UUID8: | ||
387 | case FSID_UUID16: | ||
388 | if (!root_export) | ||
389 | goto retry; | ||
390 | /* fall through */ | ||
391 | case FSID_UUID4_INUM: | ||
392 | case FSID_UUID16_INUM: | ||
393 | if (exp->ex_uuid == NULL) | ||
394 | goto retry; | ||
395 | break; | ||
396 | } | ||
353 | } else if (exp->ex_uuid) { | 397 | } else if (exp->ex_uuid) { |
354 | if (fhp->fh_maxsize >= 64) { | 398 | if (fhp->fh_maxsize >= 64) { |
355 | if (root_export) | 399 | if (root_export) |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 5cc2eec981b8..b2c7147aa921 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -155,7 +155,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, | |||
155 | argp->count); | 155 | argp->count); |
156 | argp->count = NFSSVC_MAXBLKSIZE_V2; | 156 | argp->count = NFSSVC_MAXBLKSIZE_V2; |
157 | } | 157 | } |
158 | svc_reserve(rqstp, (19<<2) + argp->count + 4); | 158 | svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); |
159 | 159 | ||
160 | resp->count = argp->count; | 160 | resp->count = argp->count; |
161 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 161 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 0c24b9e24fe8..cb3e7fadb772 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
@@ -231,9 +231,10 @@ int | |||
231 | nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, | 231 | nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, |
232 | struct nfsd_sattrargs *args) | 232 | struct nfsd_sattrargs *args) |
233 | { | 233 | { |
234 | if (!(p = decode_fh(p, &args->fh)) | 234 | p = decode_fh(p, &args->fh); |
235 | || !(p = decode_sattr(p, &args->attrs))) | 235 | if (!p) |
236 | return 0; | 236 | return 0; |
237 | p = decode_sattr(p, &args->attrs); | ||
237 | 238 | ||
238 | return xdr_argsize_check(rqstp, p); | 239 | return xdr_argsize_check(rqstp, p); |
239 | } | 240 | } |
@@ -284,8 +285,9 @@ int | |||
284 | nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | 285 | nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, |
285 | struct nfsd_writeargs *args) | 286 | struct nfsd_writeargs *args) |
286 | { | 287 | { |
287 | unsigned int len; | 288 | unsigned int len, hdr, dlen; |
288 | int v; | 289 | int v; |
290 | |||
289 | if (!(p = decode_fh(p, &args->fh))) | 291 | if (!(p = decode_fh(p, &args->fh))) |
290 | return 0; | 292 | return 0; |
291 | 293 | ||
@@ -293,11 +295,30 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
293 | args->offset = ntohl(*p++); /* offset */ | 295 | args->offset = ntohl(*p++); /* offset */ |
294 | p++; /* totalcount */ | 296 | p++; /* totalcount */ |
295 | len = args->len = ntohl(*p++); | 297 | len = args->len = ntohl(*p++); |
296 | rqstp->rq_vec[0].iov_base = (void*)p; | 298 | /* |
297 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - | 299 | * The protocol specifies a maximum of 8192 bytes. |
298 | (((void*)p) - rqstp->rq_arg.head[0].iov_base); | 300 | */ |
299 | if (len > NFSSVC_MAXBLKSIZE_V2) | 301 | if (len > NFSSVC_MAXBLKSIZE_V2) |
300 | len = NFSSVC_MAXBLKSIZE_V2; | 302 | return 0; |
303 | |||
304 | /* | ||
305 | * Check to make sure that we got the right number of | ||
306 | * bytes. | ||
307 | */ | ||
308 | hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; | ||
309 | dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len | ||
310 | - hdr; | ||
311 | |||
312 | /* | ||
313 | * Round the length of the data which was specified up to | ||
314 | * the next multiple of XDR units and then compare that | ||
315 | * against the length which was actually received. | ||
316 | */ | ||
317 | if (dlen != XDR_QUADLEN(len)*4) | ||
318 | return 0; | ||
319 | |||
320 | rqstp->rq_vec[0].iov_base = (void*)p; | ||
321 | rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; | ||
301 | v = 0; | 322 | v = 0; |
302 | while (len > rqstp->rq_vec[v].iov_len) { | 323 | while (len > rqstp->rq_vec[v].iov_len) { |
303 | len -= rqstp->rq_vec[v].iov_len; | 324 | len -= rqstp->rq_vec[v].iov_len; |
@@ -306,18 +327,18 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, | |||
306 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; | 327 | rqstp->rq_vec[v].iov_len = PAGE_SIZE; |
307 | } | 328 | } |
308 | rqstp->rq_vec[v].iov_len = len; | 329 | rqstp->rq_vec[v].iov_len = len; |
309 | args->vlen = v+1; | 330 | args->vlen = v + 1; |
310 | return rqstp->rq_vec[0].iov_len > 0; | 331 | return 1; |
311 | } | 332 | } |
312 | 333 | ||
313 | int | 334 | int |
314 | nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, | 335 | nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p, |
315 | struct nfsd_createargs *args) | 336 | struct nfsd_createargs *args) |
316 | { | 337 | { |
317 | if (!(p = decode_fh(p, &args->fh)) | 338 | if ( !(p = decode_fh(p, &args->fh)) |
318 | || !(p = decode_filename(p, &args->name, &args->len)) | 339 | || !(p = decode_filename(p, &args->name, &args->len))) |
319 | || !(p = decode_sattr(p, &args->attrs))) | ||
320 | return 0; | 340 | return 0; |
341 | p = decode_sattr(p, &args->attrs); | ||
321 | 342 | ||
322 | return xdr_argsize_check(rqstp, p); | 343 | return xdr_argsize_check(rqstp, p); |
323 | } | 344 | } |
@@ -361,11 +382,11 @@ int | |||
361 | nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | 382 | nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, |
362 | struct nfsd_symlinkargs *args) | 383 | struct nfsd_symlinkargs *args) |
363 | { | 384 | { |
364 | if (!(p = decode_fh(p, &args->ffh)) | 385 | if ( !(p = decode_fh(p, &args->ffh)) |
365 | || !(p = decode_filename(p, &args->fname, &args->flen)) | 386 | || !(p = decode_filename(p, &args->fname, &args->flen)) |
366 | || !(p = decode_pathname(p, &args->tname, &args->tlen)) | 387 | || !(p = decode_pathname(p, &args->tname, &args->tlen))) |
367 | || !(p = decode_sattr(p, &args->attrs))) | ||
368 | return 0; | 388 | return 0; |
389 | p = decode_sattr(p, &args->attrs); | ||
369 | 390 | ||
370 | return xdr_argsize_check(rqstp, p); | 391 | return xdr_argsize_check(rqstp, p); |
371 | } | 392 | } |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ab45db529c80..9e451a68580f 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -1059,20 +1059,12 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode | |||
1059 | maping blocks, since there is none, so we just zero out remaining | 1059 | maping blocks, since there is none, so we just zero out remaining |
1060 | parts of first and last pages in write area (if needed) */ | 1060 | parts of first and last pages in write area (if needed) */ |
1061 | if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { | 1061 | if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { |
1062 | if (from != 0) { /* First page needs to be partially zeroed */ | 1062 | if (from != 0) /* First page needs to be partially zeroed */ |
1063 | char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); | 1063 | zero_user_page(prepared_pages[0], 0, from, KM_USER0); |
1064 | memset(kaddr, 0, from); | 1064 | |
1065 | kunmap_atomic(kaddr, KM_USER0); | 1065 | if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */ |
1066 | flush_dcache_page(prepared_pages[0]); | 1066 | zero_user_page(prepared_pages[num_pages-1], to, |
1067 | } | 1067 | PAGE_CACHE_SIZE - to, KM_USER0); |
1068 | if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */ | ||
1069 | char *kaddr = | ||
1070 | kmap_atomic(prepared_pages[num_pages - 1], | ||
1071 | KM_USER0); | ||
1072 | memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); | ||
1073 | kunmap_atomic(kaddr, KM_USER0); | ||
1074 | flush_dcache_page(prepared_pages[num_pages - 1]); | ||
1075 | } | ||
1076 | 1068 | ||
1077 | /* Since all blocks are new - use already calculated value */ | 1069 | /* Since all blocks are new - use already calculated value */ |
1078 | return blocks; | 1070 | return blocks; |
@@ -1199,13 +1191,9 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode | |||
1199 | ll_rw_block(READ, 1, &bh); | 1191 | ll_rw_block(READ, 1, &bh); |
1200 | *wait_bh++ = bh; | 1192 | *wait_bh++ = bh; |
1201 | } else { /* Not mapped, zero it */ | 1193 | } else { /* Not mapped, zero it */ |
1202 | char *kaddr = | 1194 | zero_user_page(prepared_pages[0], |
1203 | kmap_atomic(prepared_pages[0], | 1195 | block_start, |
1204 | KM_USER0); | 1196 | from - block_start, KM_USER0); |
1205 | memset(kaddr + block_start, 0, | ||
1206 | from - block_start); | ||
1207 | kunmap_atomic(kaddr, KM_USER0); | ||
1208 | flush_dcache_page(prepared_pages[0]); | ||
1209 | set_buffer_uptodate(bh); | 1197 | set_buffer_uptodate(bh); |
1210 | } | 1198 | } |
1211 | } | 1199 | } |
@@ -1237,13 +1225,8 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode | |||
1237 | ll_rw_block(READ, 1, &bh); | 1225 | ll_rw_block(READ, 1, &bh); |
1238 | *wait_bh++ = bh; | 1226 | *wait_bh++ = bh; |
1239 | } else { /* Not mapped, zero it */ | 1227 | } else { /* Not mapped, zero it */ |
1240 | char *kaddr = | 1228 | zero_user_page(prepared_pages[num_pages-1], |
1241 | kmap_atomic(prepared_pages | 1229 | to, block_end - to, KM_USER0); |
1242 | [num_pages - 1], | ||
1243 | KM_USER0); | ||
1244 | memset(kaddr + to, 0, block_end - to); | ||
1245 | kunmap_atomic(kaddr, KM_USER0); | ||
1246 | flush_dcache_page(prepared_pages[num_pages - 1]); | ||
1247 | set_buffer_uptodate(bh); | 1230 | set_buffer_uptodate(bh); |
1248 | } | 1231 | } |
1249 | } | 1232 | } |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9fcbfe316977..1272d11399fb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -2148,13 +2148,8 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) | |||
2148 | length = offset & (blocksize - 1); | 2148 | length = offset & (blocksize - 1); |
2149 | /* if we are not on a block boundary */ | 2149 | /* if we are not on a block boundary */ |
2150 | if (length) { | 2150 | if (length) { |
2151 | char *kaddr; | ||
2152 | |||
2153 | length = blocksize - length; | 2151 | length = blocksize - length; |
2154 | kaddr = kmap_atomic(page, KM_USER0); | 2152 | zero_user_page(page, offset, length, KM_USER0); |
2155 | memset(kaddr + offset, 0, length); | ||
2156 | flush_dcache_page(page); | ||
2157 | kunmap_atomic(kaddr, KM_USER0); | ||
2158 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | 2153 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { |
2159 | mark_buffer_dirty(bh); | 2154 | mark_buffer_dirty(bh); |
2160 | } | 2155 | } |
@@ -2370,7 +2365,6 @@ static int reiserfs_write_full_page(struct page *page, | |||
2370 | ** last byte in the file | 2365 | ** last byte in the file |
2371 | */ | 2366 | */ |
2372 | if (page->index >= end_index) { | 2367 | if (page->index >= end_index) { |
2373 | char *kaddr; | ||
2374 | unsigned last_offset; | 2368 | unsigned last_offset; |
2375 | 2369 | ||
2376 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | 2370 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); |
@@ -2379,10 +2373,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2379 | unlock_page(page); | 2373 | unlock_page(page); |
2380 | return 0; | 2374 | return 0; |
2381 | } | 2375 | } |
2382 | kaddr = kmap_atomic(page, KM_USER0); | 2376 | zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); |
2383 | memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); | ||
2384 | flush_dcache_page(page); | ||
2385 | kunmap_atomic(kaddr, KM_USER0); | ||
2386 | } | 2377 | } |
2387 | bh = head; | 2378 | bh = head; |
2388 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); | 2379 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 0e637adc2b87..b502c7197ec0 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -111,36 +111,6 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer | |||
111 | return ret; | 111 | return ret; |
112 | } | 112 | } |
113 | 113 | ||
114 | |||
115 | /** | ||
116 | * flush_read_buffer - push buffer to userspace. | ||
117 | * @buffer: data buffer for file. | ||
118 | * @buf: user-passed buffer. | ||
119 | * @count: number of bytes requested. | ||
120 | * @ppos: file position. | ||
121 | * | ||
122 | * Copy the buffer we filled in fill_read_buffer() to userspace. | ||
123 | * This is done at the reader's leisure, copying and advancing | ||
124 | * the amount they specify each time. | ||
125 | * This may be called continuously until the buffer is empty. | ||
126 | */ | ||
127 | static int flush_read_buffer(struct sysfs_buffer * buffer, char __user * buf, | ||
128 | size_t count, loff_t * ppos) | ||
129 | { | ||
130 | int error; | ||
131 | |||
132 | if (*ppos > buffer->count) | ||
133 | return 0; | ||
134 | |||
135 | if (count > (buffer->count - *ppos)) | ||
136 | count = buffer->count - *ppos; | ||
137 | |||
138 | error = copy_to_user(buf,buffer->page + *ppos,count); | ||
139 | if (!error) | ||
140 | *ppos += count; | ||
141 | return error ? -EFAULT : count; | ||
142 | } | ||
143 | |||
144 | /** | 114 | /** |
145 | * sysfs_read_file - read an attribute. | 115 | * sysfs_read_file - read an attribute. |
146 | * @file: file pointer. | 116 | * @file: file pointer. |
@@ -177,7 +147,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
177 | } | 147 | } |
178 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", | 148 | pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", |
179 | __FUNCTION__, count, *ppos, buffer->page); | 149 | __FUNCTION__, count, *ppos, buffer->page); |
180 | retval = flush_read_buffer(buffer,buf,count,ppos); | 150 | retval = simple_read_from_buffer(buf, count, ppos, buffer->page, |
151 | buffer->count); | ||
181 | out: | 152 | out: |
182 | up(&buffer->sem); | 153 | up(&buffer->sem); |
183 | return retval; | 154 | return retval; |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f5aa3ef855fb..a96bde6df96d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify( | |||
1734 | per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); | 1734 | per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); |
1735 | switch (action) { | 1735 | switch (action) { |
1736 | case CPU_UP_PREPARE: | 1736 | case CPU_UP_PREPARE: |
1737 | case CPU_UP_PREPARE_FROZEN: | ||
1737 | /* Easy Case - initialize the area and locks, and | 1738 | /* Easy Case - initialize the area and locks, and |
1738 | * then rebalance when online does everything else for us. */ | 1739 | * then rebalance when online does everything else for us. */ |
1739 | memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); | 1740 | memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); |
1740 | break; | 1741 | break; |
1741 | case CPU_ONLINE: | 1742 | case CPU_ONLINE: |
1743 | case CPU_ONLINE_FROZEN: | ||
1742 | xfs_icsb_lock(mp); | 1744 | xfs_icsb_lock(mp); |
1743 | xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); | 1745 | xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); |
1744 | xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); | 1746 | xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); |
@@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify( | |||
1746 | xfs_icsb_unlock(mp); | 1748 | xfs_icsb_unlock(mp); |
1747 | break; | 1749 | break; |
1748 | case CPU_DEAD: | 1750 | case CPU_DEAD: |
1751 | case CPU_DEAD_FROZEN: | ||
1749 | /* Disable all the counters, then fold the dead cpu's | 1752 | /* Disable all the counters, then fold the dead cpu's |
1750 | * count into the total on the global superblock and | 1753 | * count into the total on the global superblock and |
1751 | * re-enable the counters. */ | 1754 | * re-enable the counters. */ |
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h index a1a1eca6be45..286e1d844f63 100644 --- a/include/asm-alpha/smp.h +++ b/include/asm-alpha/smp.h | |||
@@ -51,6 +51,7 @@ int smp_call_function_on_cpu(void (*func) (void *info), void *info,int retry, in | |||
51 | 51 | ||
52 | #else /* CONFIG_SMP */ | 52 | #else /* CONFIG_SMP */ |
53 | 53 | ||
54 | #define hard_smp_processor_id() 0 | ||
54 | #define smp_call_function_on_cpu(func,info,retry,wait,cpu) ({ 0; }) | 55 | #define smp_call_function_on_cpu(func,info,retry,wait,cpu) ({ 0; }) |
55 | 56 | ||
56 | #endif /* CONFIG_SMP */ | 57 | #endif /* CONFIG_SMP */ |
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h index eeb3bef91e11..f4defc2bd3fb 100644 --- a/include/asm-alpha/thread_info.h +++ b/include/asm-alpha/thread_info.h | |||
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); | |||
97 | 1 << TIF_UAC_SIGBUS) | 97 | 1 << TIF_UAC_SIGBUS) |
98 | 98 | ||
99 | #define SET_UNALIGN_CTL(task,value) ({ \ | 99 | #define SET_UNALIGN_CTL(task,value) ({ \ |
100 | (task)->thread_info->flags = (((task)->thread_info->flags & \ | 100 | task_thread_info(task)->flags = ((task_thread_info(task)->flags & \ |
101 | ~ALPHA_UAC_MASK) \ | 101 | ~ALPHA_UAC_MASK) \ |
102 | | (((value) << ALPHA_UAC_SHIFT) & (1<<TIF_UAC_NOPRINT))\ | 102 | | (((value) << ALPHA_UAC_SHIFT) & (1<<TIF_UAC_NOPRINT))\ |
103 | | (((value) << (ALPHA_UAC_SHIFT + 1)) & (1<<TIF_UAC_SIGBUS)) \ | 103 | | (((value) << (ALPHA_UAC_SHIFT + 1)) & (1<<TIF_UAC_SIGBUS)) \ |
@@ -105,11 +105,11 @@ register struct thread_info *__current_thread_info __asm__("$8"); | |||
105 | 0; }) | 105 | 0; }) |
106 | 106 | ||
107 | #define GET_UNALIGN_CTL(task,value) ({ \ | 107 | #define GET_UNALIGN_CTL(task,value) ({ \ |
108 | put_user(((task)->thread_info->flags & (1 << TIF_UAC_NOPRINT)) \ | 108 | put_user((task_thread_info(task)->flags & (1 << TIF_UAC_NOPRINT))\ |
109 | >> ALPHA_UAC_SHIFT \ | 109 | >> ALPHA_UAC_SHIFT \ |
110 | | ((task)->thread_info->flags & (1 << TIF_UAC_SIGBUS)) \ | 110 | | (task_thread_info(task)->flags & (1 << TIF_UAC_SIGBUS))\ |
111 | >> (ALPHA_UAC_SHIFT + 1) \ | 111 | >> (ALPHA_UAC_SHIFT + 1) \ |
112 | | ((task)->thread_info->flags & (1 << TIF_UAC_NOFIX)) \ | 112 | | (task_thread_info(task)->flags & (1 << TIF_UAC_NOFIX))\ |
113 | >> (ALPHA_UAC_SHIFT - 1), \ | 113 | >> (ALPHA_UAC_SHIFT - 1), \ |
114 | (int __user *)(value)); \ | 114 | (int __user *)(value)); \ |
115 | }) | 115 | }) |
diff --git a/include/asm-arm/arch-at91/cpu.h b/include/asm-arm/arch-at91/cpu.h index d464ca58cdbc..7ef4eebe9f8e 100644 --- a/include/asm-arm/arch-at91/cpu.h +++ b/include/asm-arm/arch-at91/cpu.h | |||
@@ -68,4 +68,10 @@ static inline unsigned long at91_arch_identify(void) | |||
68 | #define cpu_is_at91sam9263() (0) | 68 | #define cpu_is_at91sam9263() (0) |
69 | #endif | 69 | #endif |
70 | 70 | ||
71 | /* | ||
72 | * Since this is ARM, we will never run on any AVR32 CPU. But these | ||
73 | * definitions may reduce clutter in common drivers. | ||
74 | */ | ||
75 | #define cpu_is_at32ap7000() (0) | ||
76 | |||
71 | #endif | 77 | #endif |
diff --git a/include/asm-avr32/arch-at32ap/cpu.h b/include/asm-avr32/arch-at32ap/cpu.h new file mode 100644 index 000000000000..2bdc5bd6f793 --- /dev/null +++ b/include/asm-avr32/arch-at32ap/cpu.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * AVR32 and (fake) AT91 CPU identification | ||
3 | * | ||
4 | * Copyright (C) 2007 Atmel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | #ifndef __ASM_ARCH_CPU_H | ||
11 | #define __ASM_ARCH_CPU_H | ||
12 | |||
13 | /* | ||
14 | * Only AT32AP7000 is defined for now. We can identify the specific | ||
15 | * chip at runtime, but I'm not sure if it's really worth it. | ||
16 | */ | ||
17 | #ifdef CONFIG_CPU_AT32AP7000 | ||
18 | # define cpu_is_at32ap7000() (1) | ||
19 | #else | ||
20 | # define cpu_is_at32ap7000() (0) | ||
21 | #endif | ||
22 | |||
23 | /* | ||
24 | * Since this is AVR32, we will never run on any AT91 CPU. But these | ||
25 | * definitions may reduce clutter in common drivers. | ||
26 | */ | ||
27 | #define cpu_is_at91rm9200() (0) | ||
28 | #define cpu_is_at91sam9xe() (0) | ||
29 | #define cpu_is_at91sam9260() (0) | ||
30 | #define cpu_is_at91sam9261() (0) | ||
31 | #define cpu_is_at91sam9263() (0) | ||
32 | |||
33 | #endif /* __ASM_ARCH_CPU_H */ | ||
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h index 1ff1a217015d..b0828d43e110 100644 --- a/include/asm-avr32/setup.h +++ b/include/asm-avr32/setup.h | |||
@@ -110,7 +110,7 @@ struct tagtable { | |||
110 | int (*parse)(struct tag *); | 110 | int (*parse)(struct tag *); |
111 | }; | 111 | }; |
112 | 112 | ||
113 | #define __tag __attribute_used__ __attribute__((__section__(".taglist"))) | 113 | #define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) |
114 | #define __tagtable(tag, fn) \ | 114 | #define __tagtable(tag, fn) \ |
115 | static struct tagtable __tagtable_##fn __tag = { tag, fn } | 115 | static struct tagtable __tagtable_##fn __tag = { tag, fn } |
116 | 116 | ||
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h index 8f5120471819..2418cce624cc 100644 --- a/include/asm-avr32/unistd.h +++ b/include/asm-avr32/unistd.h | |||
@@ -295,8 +295,10 @@ | |||
295 | #define __NR_shmdt 276 | 295 | #define __NR_shmdt 276 |
296 | #define __NR_shmctl 277 | 296 | #define __NR_shmctl 277 |
297 | 297 | ||
298 | #define __NR_utimensat 278 | ||
299 | |||
298 | #ifdef __KERNEL__ | 300 | #ifdef __KERNEL__ |
299 | #define NR_syscalls 278 | 301 | #define NR_syscalls 279 |
300 | 302 | ||
301 | 303 | ||
302 | #define __ARCH_WANT_IPC_PARSE_VERSION | 304 | #define __ARCH_WANT_IPC_PARSE_VERSION |
diff --git a/include/asm-blackfin/processor.h b/include/asm-blackfin/processor.h index 997465c93e82..0336ff132c16 100644 --- a/include/asm-blackfin/processor.h +++ b/include/asm-blackfin/processor.h | |||
@@ -58,10 +58,10 @@ do { \ | |||
58 | (_regs)->pc = (_pc); \ | 58 | (_regs)->pc = (_pc); \ |
59 | if (current->mm) \ | 59 | if (current->mm) \ |
60 | (_regs)->p5 = current->mm->start_data; \ | 60 | (_regs)->p5 = current->mm->start_data; \ |
61 | current->thread_info->l1_task_info.stack_start \ | 61 | task_thread_info(current)->l1_task_info.stack_start \ |
62 | = (void *)current->mm->context.stack_start; \ | 62 | = (void *)current->mm->context.stack_start; \ |
63 | current->thread_info->l1_task_info.lowest_sp = (void *)(_usp); \ | 63 | task_thread_info(current)->l1_task_info.lowest_sp = (void *)(_usp); \ |
64 | memcpy(L1_SCRATCH_TASK_INFO, ¤t->thread_info->l1_task_info, \ | 64 | memcpy(L1_SCRATCH_TASK_INFO, &task_thread_info(current)->l1_task_info, \ |
65 | sizeof(*L1_SCRATCH_TASK_INFO)); \ | 65 | sizeof(*L1_SCRATCH_TASK_INFO)); \ |
66 | wrusp(_usp); \ | 66 | wrusp(_usp); \ |
67 | } while(0) | 67 | } while(0) |
diff --git a/include/asm-blackfin/system.h b/include/asm-blackfin/system.h index b5bf6e7cb5e8..5e5f1a0566c0 100644 --- a/include/asm-blackfin/system.h +++ b/include/asm-blackfin/system.h | |||
@@ -239,9 +239,9 @@ asmlinkage struct task_struct *resume(struct task_struct *prev, struct task_stru | |||
239 | 239 | ||
240 | #define switch_to(prev,next,last) \ | 240 | #define switch_to(prev,next,last) \ |
241 | do { \ | 241 | do { \ |
242 | memcpy (&prev->thread_info->l1_task_info, L1_SCRATCH_TASK_INFO, \ | 242 | memcpy (&task_thread_info(prev)->l1_task_info, L1_SCRATCH_TASK_INFO, \ |
243 | sizeof *L1_SCRATCH_TASK_INFO); \ | 243 | sizeof *L1_SCRATCH_TASK_INFO); \ |
244 | memcpy (L1_SCRATCH_TASK_INFO, &next->thread_info->l1_task_info, \ | 244 | memcpy (L1_SCRATCH_TASK_INFO, &task_thread_info(next)->l1_task_info, \ |
245 | sizeof *L1_SCRATCH_TASK_INFO); \ | 245 | sizeof *L1_SCRATCH_TASK_INFO); \ |
246 | (last) = resume (prev, next); \ | 246 | (last) = resume (prev, next); \ |
247 | } while (0) | 247 | } while (0) |
diff --git a/include/asm-frv/tlb.h b/include/asm-frv/tlb.h index f94fe5cb9b3a..cd458eb6d75e 100644 --- a/include/asm-frv/tlb.h +++ b/include/asm-frv/tlb.h | |||
@@ -3,7 +3,11 @@ | |||
3 | 3 | ||
4 | #include <asm/tlbflush.h> | 4 | #include <asm/tlbflush.h> |
5 | 5 | ||
6 | #ifdef CONFIG_MMU | ||
7 | extern void check_pgt_cache(void); | ||
8 | #else | ||
6 | #define check_pgt_cache() do {} while(0) | 9 | #define check_pgt_cache() do {} while(0) |
10 | #endif | ||
7 | 11 | ||
8 | /* | 12 | /* |
9 | * we don't need any special per-pte or per-vma handling... | 13 | * we don't need any special per-pte or per-vma handling... |
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 3503ad66945e..118e9812778f 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h | |||
@@ -122,21 +122,21 @@ static inline int pfn_valid(int pfn) | |||
122 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) | 122 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) |
123 | #define alloc_bootmem_node(pgdat, x) \ | 123 | #define alloc_bootmem_node(pgdat, x) \ |
124 | ({ \ | 124 | ({ \ |
125 | struct pglist_data __attribute__ ((unused)) \ | 125 | struct pglist_data __maybe_unused \ |
126 | *__alloc_bootmem_node__pgdat = (pgdat); \ | 126 | *__alloc_bootmem_node__pgdat = (pgdat); \ |
127 | __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ | 127 | __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ |
128 | __pa(MAX_DMA_ADDRESS)); \ | 128 | __pa(MAX_DMA_ADDRESS)); \ |
129 | }) | 129 | }) |
130 | #define alloc_bootmem_pages_node(pgdat, x) \ | 130 | #define alloc_bootmem_pages_node(pgdat, x) \ |
131 | ({ \ | 131 | ({ \ |
132 | struct pglist_data __attribute__ ((unused)) \ | 132 | struct pglist_data __maybe_unused \ |
133 | *__alloc_bootmem_node__pgdat = (pgdat); \ | 133 | *__alloc_bootmem_node__pgdat = (pgdat); \ |
134 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ | 134 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ |
135 | __pa(MAX_DMA_ADDRESS)) \ | 135 | __pa(MAX_DMA_ADDRESS)) \ |
136 | }) | 136 | }) |
137 | #define alloc_bootmem_low_pages_node(pgdat, x) \ | 137 | #define alloc_bootmem_low_pages_node(pgdat, x) \ |
138 | ({ \ | 138 | ({ \ |
139 | struct pglist_data __attribute__ ((unused)) \ | 139 | struct pglist_data __maybe_unused \ |
140 | *__alloc_bootmem_node__pgdat = (pgdat); \ | 140 | *__alloc_bootmem_node__pgdat = (pgdat); \ |
141 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ | 141 | __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ |
142 | }) | 142 | }) |
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 26861df52cc4..df21ea049369 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h | |||
@@ -86,62 +86,50 @@ static inline unsigned long long native_read_pmc(void) | |||
86 | 86 | ||
87 | #define rdmsr(msr,val1,val2) \ | 87 | #define rdmsr(msr,val1,val2) \ |
88 | do { \ | 88 | do { \ |
89 | unsigned long long __val = native_read_msr(msr); \ | 89 | u64 __val = native_read_msr(msr); \ |
90 | val1 = __val; \ | 90 | (val1) = (u32)__val; \ |
91 | val2 = __val >> 32; \ | 91 | (val2) = (u32)(__val >> 32); \ |
92 | } while(0) | 92 | } while(0) |
93 | 93 | ||
94 | #define wrmsr(msr,val1,val2) \ | 94 | static inline void wrmsr(u32 __msr, u32 __low, u32 __high) |
95 | native_write_msr(msr, ((unsigned long long)val2 << 32) | val1) | ||
96 | |||
97 | #define rdmsrl(msr,val) \ | ||
98 | do { \ | ||
99 | (val) = native_read_msr(msr); \ | ||
100 | } while(0) | ||
101 | |||
102 | static inline void wrmsrl (unsigned long msr, unsigned long long val) | ||
103 | { | 95 | { |
104 | unsigned long lo, hi; | 96 | native_write_msr(__msr, ((u64)__high << 32) | __low); |
105 | lo = (unsigned long) val; | ||
106 | hi = val >> 32; | ||
107 | wrmsr (msr, lo, hi); | ||
108 | } | 97 | } |
109 | 98 | ||
99 | #define rdmsrl(msr,val) \ | ||
100 | ((val) = native_read_msr(msr)) | ||
101 | |||
102 | #define wrmsrl(msr,val) native_write_msr(msr, val) | ||
103 | |||
110 | /* wrmsr with exception handling */ | 104 | /* wrmsr with exception handling */ |
111 | #define wrmsr_safe(msr,val1,val2) \ | 105 | static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high) |
112 | (native_write_msr_safe(msr, ((unsigned long long)val2 << 32) | val1)) | 106 | { |
107 | return native_write_msr_safe(__msr, ((u64)__high << 32) | __low); | ||
108 | } | ||
113 | 109 | ||
114 | /* rdmsr with exception handling */ | 110 | /* rdmsr with exception handling */ |
115 | #define rdmsr_safe(msr,p1,p2) \ | 111 | #define rdmsr_safe(msr,p1,p2) \ |
116 | ({ \ | 112 | ({ \ |
117 | int __err; \ | 113 | int __err; \ |
118 | unsigned long long __val = native_read_msr_safe(msr, &__err);\ | 114 | u64 __val = native_read_msr_safe(msr, &__err); \ |
119 | (*p1) = __val; \ | 115 | (*p1) = (u32)__val; \ |
120 | (*p2) = __val >> 32; \ | 116 | (*p2) = (u32)(__val >> 32); \ |
121 | __err; \ | 117 | __err; \ |
122 | }) | 118 | }) |
123 | 119 | ||
124 | #define rdtsc(low,high) \ | ||
125 | do { \ | ||
126 | u64 _l = native_read_tsc(); \ | ||
127 | (low) = (u32)_l; \ | ||
128 | (high) = _l >> 32; \ | ||
129 | } while(0) | ||
130 | |||
131 | #define rdtscl(low) \ | 120 | #define rdtscl(low) \ |
132 | do { \ | 121 | ((low) = (u32)native_read_tsc()) |
133 | (low) = native_read_tsc(); \ | ||
134 | } while(0) | ||
135 | 122 | ||
136 | #define rdtscll(val) ((val) = native_read_tsc()) | 123 | #define rdtscll(val) \ |
124 | ((val) = native_read_tsc()) | ||
137 | 125 | ||
138 | #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) | 126 | #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) |
139 | 127 | ||
140 | #define rdpmc(counter,low,high) \ | 128 | #define rdpmc(counter,low,high) \ |
141 | do { \ | 129 | do { \ |
142 | u64 _l = native_read_pmc(); \ | 130 | u64 _l = native_read_pmc(); \ |
143 | low = (u32)_l; \ | 131 | (low) = (u32)_l; \ |
144 | high = _l >> 32; \ | 132 | (high) = (u32)(_l >> 32); \ |
145 | } while(0) | 133 | } while(0) |
146 | #endif /* !CONFIG_PARAVIRT */ | 134 | #endif /* !CONFIG_PARAVIRT */ |
147 | 135 | ||
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e2e7f98723c5..bc5c12c13581 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h | |||
@@ -560,11 +560,6 @@ static inline u64 paravirt_read_tsc(void) | |||
560 | { | 560 | { |
561 | return PVOP_CALL0(u64, read_tsc); | 561 | return PVOP_CALL0(u64, read_tsc); |
562 | } | 562 | } |
563 | #define rdtsc(low,high) do { \ | ||
564 | u64 _l = paravirt_read_tsc(); \ | ||
565 | low = (u32)_l; \ | ||
566 | high = _l >> 32; \ | ||
567 | } while(0) | ||
568 | 563 | ||
569 | #define rdtscl(low) do { \ | 564 | #define rdtscl(low) do { \ |
570 | u64 _l = paravirt_read_tsc(); \ | 565 | u64 _l = paravirt_read_tsc(); \ |
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 090abc1da32a..0c7132787062 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h | |||
@@ -124,20 +124,6 @@ static inline int num_booting_cpus(void) | |||
124 | return cpus_weight(cpu_callout_map); | 124 | return cpus_weight(cpu_callout_map); |
125 | } | 125 | } |
126 | 126 | ||
127 | #ifdef CONFIG_X86_LOCAL_APIC | ||
128 | |||
129 | #ifdef APIC_DEFINITION | ||
130 | extern int hard_smp_processor_id(void); | ||
131 | #else | ||
132 | #include <mach_apicdef.h> | ||
133 | static inline int hard_smp_processor_id(void) | ||
134 | { | ||
135 | /* we don't want to mark this access volatile - bad code generation */ | ||
136 | return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | ||
137 | } | ||
138 | #endif | ||
139 | #endif | ||
140 | |||
141 | extern int safe_smp_processor_id(void); | 127 | extern int safe_smp_processor_id(void); |
142 | extern int __cpu_disable(void); | 128 | extern int __cpu_disable(void); |
143 | extern void __cpu_die(unsigned int cpu); | 129 | extern void __cpu_die(unsigned int cpu); |
@@ -152,10 +138,31 @@ extern unsigned int num_processors; | |||
152 | 138 | ||
153 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 139 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
154 | 140 | ||
155 | #endif | 141 | #endif /* CONFIG_SMP */ |
156 | 142 | ||
157 | #ifndef __ASSEMBLY__ | 143 | #ifndef __ASSEMBLY__ |
158 | 144 | ||
145 | #ifdef CONFIG_X86_LOCAL_APIC | ||
146 | |||
147 | #ifdef APIC_DEFINITION | ||
148 | extern int hard_smp_processor_id(void); | ||
149 | #else | ||
150 | #include <mach_apicdef.h> | ||
151 | static inline int hard_smp_processor_id(void) | ||
152 | { | ||
153 | /* we don't want to mark this access volatile - bad code generation */ | ||
154 | return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | ||
155 | } | ||
156 | #endif /* APIC_DEFINITION */ | ||
157 | |||
158 | #else /* CONFIG_X86_LOCAL_APIC */ | ||
159 | |||
160 | #ifndef CONFIG_SMP | ||
161 | #define hard_smp_processor_id() 0 | ||
162 | #endif | ||
163 | |||
164 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
165 | |||
159 | extern u8 apicid_2_node[]; | 166 | extern u8 apicid_2_node[]; |
160 | 167 | ||
161 | #ifdef CONFIG_X86_LOCAL_APIC | 168 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index bf01d4b342bd..4cb0f91ae64f 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h | |||
@@ -172,7 +172,7 @@ static inline struct thread_info *current_thread_info(void) | |||
172 | #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ | 172 | #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ |
173 | #define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */ | 173 | #define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */ |
174 | 174 | ||
175 | #define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING) | 175 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
176 | 176 | ||
177 | #endif /* __KERNEL__ */ | 177 | #endif /* __KERNEL__ */ |
178 | 178 | ||
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h index 60fd4ae014f6..c60024989ebd 100644 --- a/include/asm-ia64/smp.h +++ b/include/asm-ia64/smp.h | |||
@@ -38,6 +38,8 @@ ia64_get_lid (void) | |||
38 | return lid.f.id << 8 | lid.f.eid; | 38 | return lid.f.id << 8 | lid.f.eid; |
39 | } | 39 | } |
40 | 40 | ||
41 | #define hard_smp_processor_id() ia64_get_lid() | ||
42 | |||
41 | #ifdef CONFIG_SMP | 43 | #ifdef CONFIG_SMP |
42 | 44 | ||
43 | #define XTP_OFFSET 0x1e0008 | 45 | #define XTP_OFFSET 0x1e0008 |
@@ -110,8 +112,6 @@ max_xtp (void) | |||
110 | writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ | 112 | writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ |
111 | } | 113 | } |
112 | 114 | ||
113 | #define hard_smp_processor_id() ia64_get_lid() | ||
114 | |||
115 | /* Upping and downing of CPUs */ | 115 | /* Upping and downing of CPUs */ |
116 | extern int __cpu_disable (void); | 116 | extern int __cpu_disable (void); |
117 | extern void __cpu_die (unsigned int cpu); | 117 | extern void __cpu_die (unsigned int cpu); |
@@ -128,7 +128,7 @@ extern void unlock_ipi_calllock(void); | |||
128 | extern void identify_siblings (struct cpuinfo_ia64 *); | 128 | extern void identify_siblings (struct cpuinfo_ia64 *); |
129 | extern int is_multithreading_enabled(void); | 129 | extern int is_multithreading_enabled(void); |
130 | 130 | ||
131 | #else | 131 | #else /* CONFIG_SMP */ |
132 | 132 | ||
133 | #define cpu_logical_id(i) 0 | 133 | #define cpu_logical_id(i) 0 |
134 | #define cpu_physical_id(i) ia64_get_lid() | 134 | #define cpu_physical_id(i) ia64_get_lid() |
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 91698599f918..d28147506585 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h | |||
@@ -110,6 +110,6 @@ struct thread_info { | |||
110 | 110 | ||
111 | #define TS_POLLING 1 /* true if in idle loop and not sleeping */ | 111 | #define TS_POLLING 1 /* true if in idle loop and not sleeping */ |
112 | 112 | ||
113 | #define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING) | 113 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
114 | 114 | ||
115 | #endif /* _ASM_IA64_THREAD_INFO_H */ | 115 | #endif /* _ASM_IA64_THREAD_INFO_H */ |
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h index abd937ac5239..078e1a51a042 100644 --- a/include/asm-m32r/smp.h +++ b/include/asm-m32r/smp.h | |||
@@ -108,6 +108,10 @@ extern unsigned long send_IPI_mask_phys(cpumask_t, int, int); | |||
108 | #define IPI_SHIFT (0) | 108 | #define IPI_SHIFT (0) |
109 | #define NR_IPIS (8) | 109 | #define NR_IPIS (8) |
110 | 110 | ||
111 | #endif /* CONFIG_SMP */ | 111 | #else /* CONFIG_SMP */ |
112 | |||
113 | #define hard_smp_processor_id() 0 | ||
114 | |||
115 | #endif /* CONFIG_SMP */ | ||
112 | 116 | ||
113 | #endif /* _ASM_M32R_SMP_H */ | 117 | #endif /* _ASM_M32R_SMP_H */ |
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h index c4d622a57dfb..d635a3752488 100644 --- a/include/asm-m68k/thread_info.h +++ b/include/asm-m68k/thread_info.h | |||
@@ -37,17 +37,17 @@ struct thread_info { | |||
37 | #define init_stack (init_thread_union.stack) | 37 | #define init_stack (init_thread_union.stack) |
38 | 38 | ||
39 | #define task_thread_info(tsk) (&(tsk)->thread.info) | 39 | #define task_thread_info(tsk) (&(tsk)->thread.info) |
40 | #define task_stack_page(tsk) ((void *)(tsk)->thread_info) | 40 | #define task_stack_page(tsk) ((tsk)->stack) |
41 | #define current_thread_info() task_thread_info(current) | 41 | #define current_thread_info() task_thread_info(current) |
42 | 42 | ||
43 | #define __HAVE_THREAD_FUNCTIONS | 43 | #define __HAVE_THREAD_FUNCTIONS |
44 | 44 | ||
45 | #define setup_thread_stack(p, org) ({ \ | 45 | #define setup_thread_stack(p, org) ({ \ |
46 | *(struct task_struct **)(p)->thread_info = (p); \ | 46 | *(struct task_struct **)(p)->stack = (p); \ |
47 | task_thread_info(p)->task = (p); \ | 47 | task_thread_info(p)->task = (p); \ |
48 | }) | 48 | }) |
49 | 49 | ||
50 | #define end_of_stack(p) ((unsigned long *)(p)->thread_info + 1) | 50 | #define end_of_stack(p) ((unsigned long *)(p)->stack + 1) |
51 | 51 | ||
52 | /* entry.S relies on these definitions! | 52 | /* entry.S relies on these definitions! |
53 | * bits 0-7 are tested at every exception exit | 53 | * bits 0-7 are tested at every exception exit |
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index 30f23a2b46ca..3713d256d369 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h | |||
@@ -55,7 +55,7 @@ do { \ | |||
55 | if (cpu_has_dsp) \ | 55 | if (cpu_has_dsp) \ |
56 | __save_dsp(prev); \ | 56 | __save_dsp(prev); \ |
57 | next->thread.emulated_fp = 0; \ | 57 | next->thread.emulated_fp = 0; \ |
58 | (last) = resume(prev, next, next->thread_info); \ | 58 | (last) = resume(prev, next, task_thread_info(next)); \ |
59 | if (cpu_has_dsp) \ | 59 | if (cpu_has_dsp) \ |
60 | __restore_dsp(current); \ | 60 | __restore_dsp(current); \ |
61 | } while(0) | 61 | } while(0) |
diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h index fe8579023531..11f4222597a0 100644 --- a/include/asm-parisc/compat.h +++ b/include/asm-parisc/compat.h | |||
@@ -152,7 +152,7 @@ static __inline__ void __user *compat_alloc_user_space(long len) | |||
152 | 152 | ||
153 | static inline int __is_compat_task(struct task_struct *t) | 153 | static inline int __is_compat_task(struct task_struct *t) |
154 | { | 154 | { |
155 | return test_ti_thread_flag(t->thread_info, TIF_32BIT); | 155 | return test_ti_thread_flag(task_thread_info(t), TIF_32BIT); |
156 | } | 156 | } |
157 | 157 | ||
158 | static inline int is_compat_task(void) | 158 | static inline int is_compat_task(void) |
diff --git a/include/asm-powerpc/smp.h b/include/asm-powerpc/smp.h index 01717f266dc9..d037f50580e2 100644 --- a/include/asm-powerpc/smp.h +++ b/include/asm-powerpc/smp.h | |||
@@ -83,6 +83,7 @@ extern void __cpu_die(unsigned int cpu); | |||
83 | 83 | ||
84 | #else | 84 | #else |
85 | /* for UP */ | 85 | /* for UP */ |
86 | #define hard_smp_processor_id() 0 | ||
86 | #define smp_setup_cpu_maps() | 87 | #define smp_setup_cpu_maps() |
87 | 88 | ||
88 | #endif /* CONFIG_SMP */ | 89 | #endif /* CONFIG_SMP */ |
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index 0a28e6d6ef40..76e424f718c6 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h | |||
@@ -110,6 +110,7 @@ static inline void smp_send_stop(void) | |||
110 | __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); | 110 | __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); |
111 | } | 111 | } |
112 | 112 | ||
113 | #define hard_smp_processor_id() 0 | ||
113 | #define smp_cpu_not_running(cpu) 1 | 114 | #define smp_cpu_not_running(cpu) 1 |
114 | #define smp_setup_cpu_possible_map() do { } while (0) | 115 | #define smp_setup_cpu_possible_map() do { } while (0) |
115 | #endif | 116 | #endif |
diff --git a/include/asm-sh/cpu-sh3/dma.h b/include/asm-sh/cpu-sh3/dma.h index 954801b46022..3a66dc458023 100644 --- a/include/asm-sh/cpu-sh3/dma.h +++ b/include/asm-sh/cpu-sh3/dma.h | |||
@@ -26,7 +26,7 @@ enum { | |||
26 | XMIT_SZ_128BIT, | 26 | XMIT_SZ_128BIT, |
27 | }; | 27 | }; |
28 | 28 | ||
29 | static unsigned int ts_shift[] __attribute__ ((used)) = { | 29 | static unsigned int ts_shift[] __maybe_unused = { |
30 | [XMIT_SZ_8BIT] = 0, | 30 | [XMIT_SZ_8BIT] = 0, |
31 | [XMIT_SZ_16BIT] = 1, | 31 | [XMIT_SZ_16BIT] = 1, |
32 | [XMIT_SZ_32BIT] = 2, | 32 | [XMIT_SZ_32BIT] = 2, |
diff --git a/include/asm-sh/cpu-sh4/dma-sh7780.h b/include/asm-sh/cpu-sh4/dma-sh7780.h index 6c90d28331b2..71b426a6e482 100644 --- a/include/asm-sh/cpu-sh4/dma-sh7780.h +++ b/include/asm-sh/cpu-sh4/dma-sh7780.h | |||
@@ -28,7 +28,7 @@ enum { | |||
28 | /* | 28 | /* |
29 | * The DMA count is defined as the number of bytes to transfer. | 29 | * The DMA count is defined as the number of bytes to transfer. |
30 | */ | 30 | */ |
31 | static unsigned int __attribute__ ((used)) ts_shift[] = { | 31 | static unsigned int ts_shift[] __maybe_unused = { |
32 | [XMIT_SZ_8BIT] = 0, | 32 | [XMIT_SZ_8BIT] = 0, |
33 | [XMIT_SZ_16BIT] = 1, | 33 | [XMIT_SZ_16BIT] = 1, |
34 | [XMIT_SZ_32BIT] = 2, | 34 | [XMIT_SZ_32BIT] = 2, |
diff --git a/include/asm-sh/cpu-sh4/dma.h b/include/asm-sh/cpu-sh4/dma.h index c135e9cebd9c..36e26a964765 100644 --- a/include/asm-sh/cpu-sh4/dma.h +++ b/include/asm-sh/cpu-sh4/dma.h | |||
@@ -53,7 +53,7 @@ enum { | |||
53 | /* | 53 | /* |
54 | * The DMA count is defined as the number of bytes to transfer. | 54 | * The DMA count is defined as the number of bytes to transfer. |
55 | */ | 55 | */ |
56 | static unsigned int ts_shift[] __attribute__ ((used)) = { | 56 | static unsigned int ts_shift[] __maybe_unused = { |
57 | [XMIT_SZ_64BIT] = 3, | 57 | [XMIT_SZ_64BIT] = 3, |
58 | [XMIT_SZ_8BIT] = 0, | 58 | [XMIT_SZ_8BIT] = 0, |
59 | [XMIT_SZ_16BIT] = 1, | 59 | [XMIT_SZ_16BIT] = 1, |
diff --git a/include/asm-sparc/smp.h b/include/asm-sparc/smp.h index b9da9a600e35..b3f492208fd2 100644 --- a/include/asm-sparc/smp.h +++ b/include/asm-sparc/smp.h | |||
@@ -165,6 +165,7 @@ void smp_setup_cpu_possible_map(void); | |||
165 | 165 | ||
166 | #else /* SMP */ | 166 | #else /* SMP */ |
167 | 167 | ||
168 | #define hard_smp_processor_id() 0 | ||
168 | #define smp_setup_cpu_possible_map() do { } while (0) | 169 | #define smp_setup_cpu_possible_map() do { } while (0) |
169 | 170 | ||
170 | #endif /* !(SMP) */ | 171 | #endif /* !(SMP) */ |
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index cca54804b722..869d16fb907b 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h | |||
@@ -48,6 +48,7 @@ extern unsigned char boot_cpu_id; | |||
48 | 48 | ||
49 | #else | 49 | #else |
50 | 50 | ||
51 | #define hard_smp_processor_id() 0 | ||
51 | #define smp_setup_cpu_possible_map() do { } while (0) | 52 | #define smp_setup_cpu_possible_map() do { } while (0) |
52 | #define boot_cpu_id (0) | 53 | #define boot_cpu_id (0) |
53 | 54 | ||
diff --git a/include/asm-um/required-features.h b/include/asm-um/required-features.h new file mode 100644 index 000000000000..dfb967b2d2f3 --- /dev/null +++ b/include/asm-um/required-features.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef __UM_REQUIRED_FEATURES_H | ||
2 | #define __UM_REQUIRED_FEATURES_H | ||
3 | |||
4 | /* | ||
5 | * Nothing to see, just need something for the i386 and x86_64 asm | ||
6 | * headers to include. | ||
7 | */ | ||
8 | |||
9 | #endif | ||
diff --git a/include/asm-um/smp.h b/include/asm-um/smp.h index ca552261ed1f..84f8cf29324e 100644 --- a/include/asm-um/smp.h +++ b/include/asm-um/smp.h | |||
@@ -24,6 +24,10 @@ extern inline void smp_cpus_done(unsigned int maxcpus) | |||
24 | 24 | ||
25 | extern struct task_struct *idle_threads[NR_CPUS]; | 25 | extern struct task_struct *idle_threads[NR_CPUS]; |
26 | 26 | ||
27 | #else | ||
28 | |||
29 | #define hard_smp_processor_id() 0 | ||
30 | |||
27 | #endif | 31 | #endif |
28 | 32 | ||
29 | #endif | 33 | #endif |
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index d5704421456b..3f303d2365ed 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h | |||
@@ -57,12 +57,6 @@ static inline int num_booting_cpus(void) | |||
57 | 57 | ||
58 | #define raw_smp_processor_id() read_pda(cpunumber) | 58 | #define raw_smp_processor_id() read_pda(cpunumber) |
59 | 59 | ||
60 | static inline int hard_smp_processor_id(void) | ||
61 | { | ||
62 | /* we don't want to mark this access volatile - bad code generation */ | ||
63 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | ||
64 | } | ||
65 | |||
66 | extern int __cpu_disable(void); | 60 | extern int __cpu_disable(void); |
67 | extern void __cpu_die(unsigned int cpu); | 61 | extern void __cpu_die(unsigned int cpu); |
68 | extern void prefill_possible_map(void); | 62 | extern void prefill_possible_map(void); |
@@ -71,7 +65,13 @@ extern unsigned __cpuinitdata disabled_cpus; | |||
71 | 65 | ||
72 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 66 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
73 | 67 | ||
74 | #endif | 68 | #endif /* CONFIG_SMP */ |
69 | |||
70 | static inline int hard_smp_processor_id(void) | ||
71 | { | ||
72 | /* we don't want to mark this access volatile - bad code generation */ | ||
73 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | ||
74 | } | ||
75 | 75 | ||
76 | /* | 76 | /* |
77 | * Some lowlevel functions might want to know about | 77 | * Some lowlevel functions might want to know about |
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index b7b8021e8c43..ead9f9a56234 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h | |||
@@ -39,7 +39,7 @@ | |||
39 | [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ | 39 | [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ |
40 | [ti_flags] "i" (offsetof(struct thread_info, flags)),\ | 40 | [ti_flags] "i" (offsetof(struct thread_info, flags)),\ |
41 | [tif_fork] "i" (TIF_FORK), \ | 41 | [tif_fork] "i" (TIF_FORK), \ |
42 | [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ | 42 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ |
43 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ | 43 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ |
44 | : "memory", "cc" __EXTRA_CLOBBER) | 44 | : "memory", "cc" __EXTRA_CLOBBER) |
45 | 45 | ||
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index 74a6c74397f7..10bb5a8ed688 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h | |||
@@ -162,7 +162,7 @@ static inline struct thread_info *stack_thread_info(void) | |||
162 | #define TS_COMPAT 0x0002 /* 32bit syscall active */ | 162 | #define TS_COMPAT 0x0002 /* 32bit syscall active */ |
163 | #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ | 163 | #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ |
164 | 164 | ||
165 | #define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING) | 165 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
166 | 166 | ||
167 | #endif /* __KERNEL__ */ | 167 | #endif /* __KERNEL__ */ |
168 | 168 | ||
diff --git a/include/linux/aio.h b/include/linux/aio.h index a30ef13c9e62..43dc2ebfaa0e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h | |||
@@ -226,7 +226,8 @@ int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
226 | __put_ioctx(kioctx); \ | 226 | __put_ioctx(kioctx); \ |
227 | } while (0) | 227 | } while (0) |
228 | 228 | ||
229 | #define in_aio() !is_sync_wait(current->io_wait) | 229 | #define in_aio() (unlikely(!is_sync_wait(current->io_wait))) |
230 | |||
230 | /* may be used for debugging */ | 231 | /* may be used for debugging */ |
231 | #define warn_if_async() \ | 232 | #define warn_if_async() \ |
232 | do { \ | 233 | do { \ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a686eabe22d6..db5b00a792f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -854,7 +854,7 @@ static inline void put_dev_sector(Sector p) | |||
854 | 854 | ||
855 | struct work_struct; | 855 | struct work_struct; |
856 | int kblockd_schedule_work(struct work_struct *work); | 856 | int kblockd_schedule_work(struct work_struct *work); |
857 | void kblockd_flush(void); | 857 | void kblockd_flush_work(struct work_struct *work); |
858 | 858 | ||
859 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ | 859 | #define MODULE_ALIAS_BLOCKDEV(major,minor) \ |
860 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) | 860 | MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) |
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 2665ca04cf8f..bf297b03a4e4 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
@@ -49,6 +49,7 @@ struct clocksource; | |||
49 | * @shift: cycle to nanosecond divisor (power of two) | 49 | * @shift: cycle to nanosecond divisor (power of two) |
50 | * @flags: flags describing special properties | 50 | * @flags: flags describing special properties |
51 | * @vread: vsyscall based read | 51 | * @vread: vsyscall based read |
52 | * @resume: resume function for the clocksource, if necessary | ||
52 | * @cycle_interval: Used internally by timekeeping core, please ignore. | 53 | * @cycle_interval: Used internally by timekeeping core, please ignore. |
53 | * @xtime_interval: Used internally by timekeeping core, please ignore. | 54 | * @xtime_interval: Used internally by timekeeping core, please ignore. |
54 | */ | 55 | */ |
@@ -65,6 +66,7 @@ struct clocksource { | |||
65 | u32 shift; | 66 | u32 shift; |
66 | unsigned long flags; | 67 | unsigned long flags; |
67 | cycle_t (*vread)(void); | 68 | cycle_t (*vread)(void); |
69 | void (*resume)(void); | ||
68 | 70 | ||
69 | /* timekeeping specific data, ignore */ | 71 | /* timekeeping specific data, ignore */ |
70 | cycle_t cycle_interval; | 72 | cycle_t cycle_interval; |
@@ -209,6 +211,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, | |||
209 | extern int clocksource_register(struct clocksource*); | 211 | extern int clocksource_register(struct clocksource*); |
210 | extern struct clocksource* clocksource_get_next(void); | 212 | extern struct clocksource* clocksource_get_next(void); |
211 | extern void clocksource_change_rating(struct clocksource *cs, int rating); | 213 | extern void clocksource_change_rating(struct clocksource *cs, int rating); |
214 | extern void clocksource_resume(void); | ||
212 | 215 | ||
213 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL | 216 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL |
214 | extern void update_vsyscall(struct timespec *ts, struct clocksource *c); | 217 | extern void update_vsyscall(struct timespec *ts, struct clocksource *c); |
diff --git a/include/linux/compat.h b/include/linux/compat.h index ccd863dd77fa..70a157a130bb 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h | |||
@@ -253,5 +253,8 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, | |||
253 | const compat_sigset_t __user *sigmask, | 253 | const compat_sigset_t __user *sigmask, |
254 | compat_size_t sigsetsize); | 254 | compat_size_t sigsetsize); |
255 | 255 | ||
256 | asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, | ||
257 | struct compat_timespec __user *t, int flags); | ||
258 | |||
256 | #endif /* CONFIG_COMPAT */ | 259 | #endif /* CONFIG_COMPAT */ |
257 | #endif /* _LINUX_COMPAT_H */ | 260 | #endif /* _LINUX_COMPAT_H */ |
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a9f794716a81..03ec2311fb29 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
@@ -40,3 +40,4 @@ | |||
40 | #define noinline __attribute__((noinline)) | 40 | #define noinline __attribute__((noinline)) |
41 | #define __attribute_pure__ __attribute__((pure)) | 41 | #define __attribute_pure__ __attribute__((pure)) |
42 | #define __attribute_const__ __attribute__((__const__)) | 42 | #define __attribute_const__ __attribute__((__const__)) |
43 | #define __maybe_unused __attribute__((unused)) | ||
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index ecd621fd27d2..a9e2863c2dbf 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h | |||
@@ -4,9 +4,11 @@ | |||
4 | #include <linux/compiler-gcc.h> | 4 | #include <linux/compiler-gcc.h> |
5 | 5 | ||
6 | #if __GNUC_MINOR__ >= 3 | 6 | #if __GNUC_MINOR__ >= 3 |
7 | # define __attribute_used__ __attribute__((__used__)) | 7 | # define __used __attribute__((__used__)) |
8 | # define __attribute_used__ __used /* deprecated */ | ||
8 | #else | 9 | #else |
9 | # define __attribute_used__ __attribute__((__unused__)) | 10 | # define __used __attribute__((__unused__)) |
11 | # define __attribute_used__ __used /* deprecated */ | ||
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | #if __GNUC_MINOR__ >= 4 | 14 | #if __GNUC_MINOR__ >= 4 |
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index fd0cc7c4a636..a03e9398a6c2 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h | |||
@@ -12,7 +12,8 @@ | |||
12 | # define __inline __inline __attribute__((always_inline)) | 12 | # define __inline __inline __attribute__((always_inline)) |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | #define __attribute_used__ __attribute__((__used__)) | 15 | #define __used __attribute__((__used__)) |
16 | #define __attribute_used__ __used /* deprecated */ | ||
16 | #define __must_check __attribute__((warn_unused_result)) | 17 | #define __must_check __attribute__((warn_unused_result)) |
17 | #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) | 18 | #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) |
18 | #define __always_inline inline __attribute__((always_inline)) | 19 | #define __always_inline inline __attribute__((always_inline)) |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3b6949b41745..498c35920762 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -108,15 +108,30 @@ extern void __chk_io_ptr(const void __iomem *); | |||
108 | * Allow us to avoid 'defined but not used' warnings on functions and data, | 108 | * Allow us to avoid 'defined but not used' warnings on functions and data, |
109 | * as well as force them to be emitted to the assembly file. | 109 | * as well as force them to be emitted to the assembly file. |
110 | * | 110 | * |
111 | * As of gcc 3.3, static functions that are not marked with attribute((used)) | 111 | * As of gcc 3.4, static functions that are not marked with attribute((used)) |
112 | * may be elided from the assembly file. As of gcc 3.3, static data not so | 112 | * may be elided from the assembly file. As of gcc 3.4, static data not so |
113 | * marked will not be elided, but this may change in a future gcc version. | 113 | * marked will not be elided, but this may change in a future gcc version. |
114 | * | 114 | * |
115 | * NOTE: Because distributions shipped with a backported unit-at-a-time | ||
116 | * compiler in gcc 3.3, we must define __used to be __attribute__((used)) | ||
117 | * for gcc >=3.3 instead of 3.4. | ||
118 | * | ||
115 | * In prior versions of gcc, such functions and data would be emitted, but | 119 | * In prior versions of gcc, such functions and data would be emitted, but |
116 | * would be warned about except with attribute((unused)). | 120 | * would be warned about except with attribute((unused)). |
121 | * | ||
122 | * Mark functions that are referenced only in inline assembly as __used so | ||
123 | * the code is emitted even though it appears to be unreferenced. | ||
117 | */ | 124 | */ |
118 | #ifndef __attribute_used__ | 125 | #ifndef __attribute_used__ |
119 | # define __attribute_used__ /* unimplemented */ | 126 | # define __attribute_used__ /* deprecated */ |
127 | #endif | ||
128 | |||
129 | #ifndef __used | ||
130 | # define __used /* unimplemented */ | ||
131 | #endif | ||
132 | |||
133 | #ifndef __maybe_unused | ||
134 | # define __maybe_unused /* unimplemented */ | ||
120 | #endif | 135 | #endif |
121 | 136 | ||
122 | /* | 137 | /* |
diff --git a/include/linux/fb.h b/include/linux/fb.h index dff7a728948c..c654d0e9ce33 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h | |||
@@ -868,7 +868,7 @@ struct fb_info { | |||
868 | #define fb_writeq sbus_writeq | 868 | #define fb_writeq sbus_writeq |
869 | #define fb_memset sbus_memset_io | 869 | #define fb_memset sbus_memset_io |
870 | 870 | ||
871 | #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) | 871 | #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__) |
872 | 872 | ||
873 | #define fb_readb __raw_readb | 873 | #define fb_readb __raw_readb |
874 | #define fb_readw __raw_readw | 874 | #define fb_readw __raw_readw |
diff --git a/include/linux/futex.h b/include/linux/futex.h index 820125c628c1..899fc7f20edd 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | 5 | ||
6 | union ktime; | ||
7 | |||
6 | /* Second argument to futex syscall */ | 8 | /* Second argument to futex syscall */ |
7 | 9 | ||
8 | 10 | ||
@@ -15,6 +17,19 @@ | |||
15 | #define FUTEX_LOCK_PI 6 | 17 | #define FUTEX_LOCK_PI 6 |
16 | #define FUTEX_UNLOCK_PI 7 | 18 | #define FUTEX_UNLOCK_PI 7 |
17 | #define FUTEX_TRYLOCK_PI 8 | 19 | #define FUTEX_TRYLOCK_PI 8 |
20 | #define FUTEX_CMP_REQUEUE_PI 9 | ||
21 | |||
22 | #define FUTEX_PRIVATE_FLAG 128 | ||
23 | #define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG | ||
24 | |||
25 | #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) | ||
26 | #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) | ||
27 | #define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG) | ||
28 | #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) | ||
29 | #define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) | ||
30 | #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) | ||
31 | #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) | ||
32 | #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) | ||
18 | 33 | ||
19 | /* | 34 | /* |
20 | * Support for robust futexes: the kernel cleans up held futexes at | 35 | * Support for robust futexes: the kernel cleans up held futexes at |
@@ -83,9 +98,14 @@ struct robust_list_head { | |||
83 | #define FUTEX_OWNER_DIED 0x40000000 | 98 | #define FUTEX_OWNER_DIED 0x40000000 |
84 | 99 | ||
85 | /* | 100 | /* |
101 | * Some processes have been requeued on this PI-futex | ||
102 | */ | ||
103 | #define FUTEX_WAITER_REQUEUED 0x20000000 | ||
104 | |||
105 | /* | ||
86 | * The rest of the robust-futex field is for the TID: | 106 | * The rest of the robust-futex field is for the TID: |
87 | */ | 107 | */ |
88 | #define FUTEX_TID_MASK 0x3fffffff | 108 | #define FUTEX_TID_MASK 0x0fffffff |
89 | 109 | ||
90 | /* | 110 | /* |
91 | * This limit protects against a deliberately circular list. | 111 | * This limit protects against a deliberately circular list. |
@@ -94,7 +114,7 @@ struct robust_list_head { | |||
94 | #define ROBUST_LIST_LIMIT 2048 | 114 | #define ROBUST_LIST_LIMIT 2048 |
95 | 115 | ||
96 | #ifdef __KERNEL__ | 116 | #ifdef __KERNEL__ |
97 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | 117 | long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, |
98 | u32 __user *uaddr2, u32 val2, u32 val3); | 118 | u32 __user *uaddr2, u32 val2, u32 val3); |
99 | 119 | ||
100 | extern int | 120 | extern int |
@@ -106,9 +126,20 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); | |||
106 | * Don't rearrange members without looking at hash_futex(). | 126 | * Don't rearrange members without looking at hash_futex(). |
107 | * | 127 | * |
108 | * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. | 128 | * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. |
109 | * We set bit 0 to indicate if it's an inode-based key. | 129 | * We use the two low order bits of offset to tell what is the kind of key : |
110 | */ | 130 | * 00 : Private process futex (PTHREAD_PROCESS_PRIVATE) |
131 | * (no reference on an inode or mm) | ||
132 | * 01 : Shared futex (PTHREAD_PROCESS_SHARED) | ||
133 | * mapped on a file (reference on the underlying inode) | ||
134 | * 10 : Shared futex (PTHREAD_PROCESS_SHARED) | ||
135 | * (but private mapping on an mm, and reference taken on it) | ||
136 | */ | ||
137 | |||
138 | #define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */ | ||
139 | #define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */ | ||
140 | |||
111 | union futex_key { | 141 | union futex_key { |
142 | u32 __user *uaddr; | ||
112 | struct { | 143 | struct { |
113 | unsigned long pgoff; | 144 | unsigned long pgoff; |
114 | struct inode *inode; | 145 | struct inode *inode; |
@@ -125,7 +156,8 @@ union futex_key { | |||
125 | int offset; | 156 | int offset; |
126 | } both; | 157 | } both; |
127 | }; | 158 | }; |
128 | int get_futex_key(u32 __user *uaddr, union futex_key *key); | 159 | int get_futex_key(u32 __user *uaddr, struct rw_semaphore *shared, |
160 | union futex_key *key); | ||
129 | void get_futex_key_refs(union futex_key *key); | 161 | void get_futex_key_refs(union futex_key *key); |
130 | void drop_futex_key_refs(union futex_key *key); | 162 | void drop_futex_key_refs(union futex_key *key); |
131 | 163 | ||
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2c65da7cabb2..f589559cf070 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -413,6 +413,7 @@ char *disk_name (struct gendisk *hd, int part, char *buf); | |||
413 | extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); | 413 | extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); |
414 | extern void add_partition(struct gendisk *, int, sector_t, sector_t, int); | 414 | extern void add_partition(struct gendisk *, int, sector_t, sector_t, int); |
415 | extern void delete_partition(struct gendisk *, int); | 415 | extern void delete_partition(struct gendisk *, int); |
416 | extern void printk_all_partitions(void); | ||
416 | 417 | ||
417 | extern struct gendisk *alloc_disk_node(int minors, int node_id); | 418 | extern struct gendisk *alloc_disk_node(int minors, int node_id); |
418 | extern struct gendisk *alloc_disk(int minors); | 419 | extern struct gendisk *alloc_disk(int minors); |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a36c3d96e2..0d2ef0b082a6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -176,10 +176,6 @@ extern void FASTCALL(free_cold_page(struct page *page)); | |||
176 | #define free_page(addr) free_pages((addr),0) | 176 | #define free_page(addr) free_pages((addr),0) |
177 | 177 | ||
178 | void page_alloc_init(void); | 178 | void page_alloc_init(void); |
179 | #ifdef CONFIG_NUMA | 179 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); |
180 | void drain_node_pages(int node); | ||
181 | #else | ||
182 | static inline void drain_node_pages(int node) { }; | ||
183 | #endif | ||
184 | 180 | ||
185 | #endif /* __LINUX_GFP_H */ | 181 | #endif /* __LINUX_GFP_H */ |
diff --git a/include/linux/highmem.h b/include/linux/highmem.h index a515eb0afdfb..98e2cce996a4 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h | |||
@@ -94,17 +94,26 @@ static inline void clear_highpage(struct page *page) | |||
94 | 94 | ||
95 | /* | 95 | /* |
96 | * Same but also flushes aliased cache contents to RAM. | 96 | * Same but also flushes aliased cache contents to RAM. |
97 | * | ||
98 | * This must be a macro because KM_USER0 and friends aren't defined if | ||
99 | * !CONFIG_HIGHMEM | ||
97 | */ | 100 | */ |
98 | static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) | 101 | #define zero_user_page(page, offset, size, km_type) \ |
102 | do { \ | ||
103 | void *kaddr; \ | ||
104 | \ | ||
105 | BUG_ON((offset) + (size) > PAGE_SIZE); \ | ||
106 | \ | ||
107 | kaddr = kmap_atomic(page, km_type); \ | ||
108 | memset((char *)kaddr + (offset), 0, (size)); \ | ||
109 | flush_dcache_page(page); \ | ||
110 | kunmap_atomic(kaddr, (km_type)); \ | ||
111 | } while (0) | ||
112 | |||
113 | static inline void __deprecated memclear_highpage_flush(struct page *page, | ||
114 | unsigned int offset, unsigned int size) | ||
99 | { | 115 | { |
100 | void *kaddr; | 116 | zero_user_page(page, offset, size, KM_USER0); |
101 | |||
102 | BUG_ON(offset + size > PAGE_SIZE); | ||
103 | |||
104 | kaddr = kmap_atomic(page, KM_USER0); | ||
105 | memset((char *)kaddr + offset, 0, size); | ||
106 | flush_dcache_page(page); | ||
107 | kunmap_atomic(kaddr, KM_USER0); | ||
108 | } | 117 | } |
109 | 118 | ||
110 | #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE | 119 | #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 795102309bf1..45170b2fa253 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -95,7 +95,7 @@ extern struct group_info init_groups; | |||
95 | #define INIT_TASK(tsk) \ | 95 | #define INIT_TASK(tsk) \ |
96 | { \ | 96 | { \ |
97 | .state = 0, \ | 97 | .state = 0, \ |
98 | .thread_info = &init_thread_info, \ | 98 | .stack = &init_thread_info, \ |
99 | .usage = ATOMIC_INIT(2), \ | 99 | .usage = ATOMIC_INIT(2), \ |
100 | .flags = 0, \ | 100 | .flags = 0, \ |
101 | .lock_depth = -1, \ | 101 | .lock_depth = -1, \ |
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 1c65e7a9f186..00dd957e245b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h | |||
@@ -30,4 +30,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu); | |||
30 | int kthread_stop(struct task_struct *k); | 30 | int kthread_stop(struct task_struct *k); |
31 | int kthread_should_stop(void); | 31 | int kthread_should_stop(void); |
32 | 32 | ||
33 | int kthreadd(void *unused); | ||
34 | extern struct task_struct *kthreadd_task; | ||
35 | |||
33 | #endif /* _LINUX_KTHREAD_H */ | 36 | #endif /* _LINUX_KTHREAD_H */ |
diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 81bb9c7a4eb3..c762954bda14 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h | |||
@@ -43,7 +43,7 @@ | |||
43 | * plain scalar nanosecond based representation can be selected by the | 43 | * plain scalar nanosecond based representation can be selected by the |
44 | * config switch CONFIG_KTIME_SCALAR. | 44 | * config switch CONFIG_KTIME_SCALAR. |
45 | */ | 45 | */ |
46 | typedef union { | 46 | union ktime { |
47 | s64 tv64; | 47 | s64 tv64; |
48 | #if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR) | 48 | #if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR) |
49 | struct { | 49 | struct { |
@@ -54,7 +54,9 @@ typedef union { | |||
54 | # endif | 54 | # endif |
55 | } tv; | 55 | } tv; |
56 | #endif | 56 | #endif |
57 | } ktime_t; | 57 | }; |
58 | |||
59 | typedef union ktime ktime_t; /* Kill this */ | ||
58 | 60 | ||
59 | #define KTIME_MAX ((s64)~((u64)1 << 63)) | 61 | #define KTIME_MAX ((s64)~((u64)1 << 63)) |
60 | #if (BITS_PER_LONG == 64) | 62 | #if (BITS_PER_LONG == 64) |
diff --git a/include/linux/mca.h b/include/linux/mca.h index 5cff2923092b..37972704617f 100644 --- a/include/linux/mca.h +++ b/include/linux/mca.h | |||
@@ -94,6 +94,7 @@ struct mca_bus { | |||
94 | struct mca_driver { | 94 | struct mca_driver { |
95 | const short *id_table; | 95 | const short *id_table; |
96 | void *driver_data; | 96 | void *driver_data; |
97 | int integrated_id; | ||
97 | struct device_driver driver; | 98 | struct device_driver driver; |
98 | }; | 99 | }; |
99 | #define to_mca_driver(mdriver) container_of(mdriver, struct mca_driver, driver) | 100 | #define to_mca_driver(mdriver) container_of(mdriver, struct mca_driver, driver) |
@@ -125,6 +126,7 @@ extern enum MCA_AdapterStatus mca_device_status(struct mca_device *mca_dev); | |||
125 | extern struct bus_type mca_bus_type; | 126 | extern struct bus_type mca_bus_type; |
126 | 127 | ||
127 | extern int mca_register_driver(struct mca_driver *drv); | 128 | extern int mca_register_driver(struct mca_driver *drv); |
129 | extern int mca_register_driver_integrated(struct mca_driver *, int); | ||
128 | extern void mca_unregister_driver(struct mca_driver *drv); | 130 | extern void mca_unregister_driver(struct mca_driver *drv); |
129 | 131 | ||
130 | /* WARNING: only called by the boot time device setup */ | 132 | /* WARNING: only called by the boot time device setup */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f1544e83042..d09b1345a3a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -83,6 +83,9 @@ struct per_cpu_pages { | |||
83 | 83 | ||
84 | struct per_cpu_pageset { | 84 | struct per_cpu_pageset { |
85 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ | 85 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ |
86 | #ifdef CONFIG_NUMA | ||
87 | s8 expire; | ||
88 | #endif | ||
86 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
87 | s8 stat_threshold; | 90 | s8 stat_threshold; |
88 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; | 91 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; |
diff --git a/include/linux/module.h b/include/linux/module.h index 6d3dc9c4ff96..792d483c9af7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h | |||
@@ -356,6 +356,9 @@ struct module | |||
356 | keeping pointers to this stuff */ | 356 | keeping pointers to this stuff */ |
357 | char *args; | 357 | char *args; |
358 | }; | 358 | }; |
359 | #ifndef MODULE_ARCH_INIT | ||
360 | #define MODULE_ARCH_INIT {} | ||
361 | #endif | ||
359 | 362 | ||
360 | /* FIXME: It'd be nice to isolate modules during init, too, so they | 363 | /* FIXME: It'd be nice to isolate modules during init, too, so they |
361 | aren't used before they (may) fail. But presently too much code | 364 | aren't used before they (may) fail. But presently too much code |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b81bc2adaeff..0d50ea3df689 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -121,11 +121,12 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) | |||
121 | * Also see Documentation/mutex-design.txt. | 121 | * Also see Documentation/mutex-design.txt. |
122 | */ | 122 | */ |
123 | extern void fastcall mutex_lock(struct mutex *lock); | 123 | extern void fastcall mutex_lock(struct mutex *lock); |
124 | extern int fastcall mutex_lock_interruptible(struct mutex *lock); | 124 | extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); |
125 | 125 | ||
126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
127 | extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); | 127 | extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); |
128 | extern int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); | 128 | extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, |
129 | unsigned int subclass); | ||
129 | #else | 130 | #else |
130 | # define mutex_lock_nested(lock, subclass) mutex_lock(lock) | 131 | # define mutex_lock_nested(lock, subclass) mutex_lock(lock) |
131 | # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) | 132 | # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) |
diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h index 409b6e02f337..c9c05a78e9bb 100644 --- a/include/linux/nfs4_acl.h +++ b/include/linux/nfs4_acl.h | |||
@@ -44,7 +44,6 @@ | |||
44 | #define NFS4_ACL_MAX 170 | 44 | #define NFS4_ACL_MAX 170 |
45 | 45 | ||
46 | struct nfs4_acl *nfs4_acl_new(int); | 46 | struct nfs4_acl *nfs4_acl_new(int); |
47 | void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); | ||
48 | int nfs4_acl_get_whotype(char *, u32); | 47 | int nfs4_acl_get_whotype(char *, u32); |
49 | int nfs4_acl_write_who(int who, char *p); | 48 | int nfs4_acl_write_who(int who, char *p); |
50 | int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, | 49 | int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, |
diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 10a43ed0527e..9431101bf876 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h | |||
@@ -112,32 +112,40 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |||
112 | 112 | ||
113 | #ifdef __KERNEL__ | 113 | #ifdef __KERNEL__ |
114 | 114 | ||
115 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *, | 115 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, |
116 | struct notifier_block *); | 116 | struct notifier_block *nb); |
117 | extern int blocking_notifier_chain_register(struct blocking_notifier_head *, | 117 | extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh, |
118 | struct notifier_block *); | 118 | struct notifier_block *nb); |
119 | extern int raw_notifier_chain_register(struct raw_notifier_head *, | 119 | extern int raw_notifier_chain_register(struct raw_notifier_head *nh, |
120 | struct notifier_block *); | 120 | struct notifier_block *nb); |
121 | extern int srcu_notifier_chain_register(struct srcu_notifier_head *, | 121 | extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, |
122 | struct notifier_block *); | 122 | struct notifier_block *nb); |
123 | 123 | ||
124 | extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *, | 124 | extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, |
125 | struct notifier_block *); | 125 | struct notifier_block *nb); |
126 | extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *, | 126 | extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, |
127 | struct notifier_block *); | 127 | struct notifier_block *nb); |
128 | extern int raw_notifier_chain_unregister(struct raw_notifier_head *, | 128 | extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh, |
129 | struct notifier_block *); | 129 | struct notifier_block *nb); |
130 | extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *, | 130 | extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, |
131 | struct notifier_block *); | 131 | struct notifier_block *nb); |
132 | 132 | ||
133 | extern int atomic_notifier_call_chain(struct atomic_notifier_head *, | 133 | extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, |
134 | unsigned long val, void *v); | 134 | unsigned long val, void *v); |
135 | extern int blocking_notifier_call_chain(struct blocking_notifier_head *, | 135 | extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, |
136 | unsigned long val, void *v, int nr_to_call, int *nr_calls); | ||
137 | extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, | ||
136 | unsigned long val, void *v); | 138 | unsigned long val, void *v); |
137 | extern int raw_notifier_call_chain(struct raw_notifier_head *, | 139 | extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, |
140 | unsigned long val, void *v, int nr_to_call, int *nr_calls); | ||
141 | extern int raw_notifier_call_chain(struct raw_notifier_head *nh, | ||
138 | unsigned long val, void *v); | 142 | unsigned long val, void *v); |
139 | extern int srcu_notifier_call_chain(struct srcu_notifier_head *, | 143 | extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, |
144 | unsigned long val, void *v, int nr_to_call, int *nr_calls); | ||
145 | extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||
140 | unsigned long val, void *v); | 146 | unsigned long val, void *v); |
147 | extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||
148 | unsigned long val, void *v, int nr_to_call, int *nr_calls); | ||
141 | 149 | ||
142 | #define NOTIFY_DONE 0x0000 /* Don't care */ | 150 | #define NOTIFY_DONE 0x0000 /* Don't care */ |
143 | #define NOTIFY_OK 0x0001 /* Suits me */ | 151 | #define NOTIFY_OK 0x0001 /* Suits me */ |
@@ -186,6 +194,20 @@ extern int srcu_notifier_call_chain(struct srcu_notifier_head *, | |||
186 | #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ | 194 | #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ |
187 | #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ | 195 | #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ |
188 | #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ | 196 | #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ |
197 | #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ | ||
198 | #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ | ||
199 | |||
200 | /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend | ||
201 | * operation in progress | ||
202 | */ | ||
203 | #define CPU_TASKS_FROZEN 0x0010 | ||
204 | |||
205 | #define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) | ||
206 | #define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) | ||
207 | #define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) | ||
208 | #define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) | ||
209 | #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) | ||
210 | #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) | ||
189 | 211 | ||
190 | #endif /* __KERNEL__ */ | 212 | #endif /* __KERNEL__ */ |
191 | #endif /* _LINUX_NOTIFIER_H */ | 213 | #endif /* _LINUX_NOTIFIER_H */ |
diff --git a/include/linux/pm.h b/include/linux/pm.h index 6e8fa3049e5d..87545e0f0b58 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h | |||
@@ -107,26 +107,11 @@ typedef int __bitwise suspend_state_t; | |||
107 | #define PM_SUSPEND_ON ((__force suspend_state_t) 0) | 107 | #define PM_SUSPEND_ON ((__force suspend_state_t) 0) |
108 | #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1) | 108 | #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1) |
109 | #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) | 109 | #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) |
110 | #define PM_SUSPEND_DISK ((__force suspend_state_t) 4) | 110 | #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) |
111 | #define PM_SUSPEND_MAX ((__force suspend_state_t) 5) | ||
112 | |||
113 | typedef int __bitwise suspend_disk_method_t; | ||
114 | |||
115 | /* invalid must be 0 so struct pm_ops initialisers can leave it out */ | ||
116 | #define PM_DISK_INVALID ((__force suspend_disk_method_t) 0) | ||
117 | #define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 1) | ||
118 | #define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 2) | ||
119 | #define PM_DISK_REBOOT ((__force suspend_disk_method_t) 3) | ||
120 | #define PM_DISK_TEST ((__force suspend_disk_method_t) 4) | ||
121 | #define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 5) | ||
122 | #define PM_DISK_MAX ((__force suspend_disk_method_t) 6) | ||
123 | 111 | ||
124 | /** | 112 | /** |
125 | * struct pm_ops - Callbacks for managing platform dependent suspend states. | 113 | * struct pm_ops - Callbacks for managing platform dependent suspend states. |
126 | * @valid: Callback to determine whether the given state can be entered. | 114 | * @valid: Callback to determine whether the given state can be entered. |
127 | * If %CONFIG_SOFTWARE_SUSPEND is set then %PM_SUSPEND_DISK is | ||
128 | * always valid and never passed to this call. If not assigned, | ||
129 | * no suspend states are valid. | ||
130 | * Valid states are advertised in /sys/power/state but can still | 115 | * Valid states are advertised in /sys/power/state but can still |
131 | * be rejected by prepare or enter if the conditions aren't right. | 116 | * be rejected by prepare or enter if the conditions aren't right. |
132 | * There is a %pm_valid_only_mem function available that can be assigned | 117 | * There is a %pm_valid_only_mem function available that can be assigned |
@@ -140,24 +125,12 @@ typedef int __bitwise suspend_disk_method_t; | |||
140 | * | 125 | * |
141 | * @finish: Called when the system has left the given state and all devices | 126 | * @finish: Called when the system has left the given state and all devices |
142 | * are resumed. The return value is ignored. | 127 | * are resumed. The return value is ignored. |
143 | * | ||
144 | * @pm_disk_mode: The generic code always allows one of the shutdown methods | ||
145 | * %PM_DISK_SHUTDOWN, %PM_DISK_REBOOT, %PM_DISK_TEST and | ||
146 | * %PM_DISK_TESTPROC. If this variable is set, the mode it is set | ||
147 | * to is allowed in addition to those modes and is also made default. | ||
148 | * When this mode is sent selected, the @prepare call will be called | ||
149 | * before suspending to disk (if present), the @enter call should be | ||
150 | * present and will be called after all state has been saved and the | ||
151 | * machine is ready to be powered off; the @finish callback is called | ||
152 | * after state has been restored. All these calls are called with | ||
153 | * %PM_SUSPEND_DISK as the state. | ||
154 | */ | 128 | */ |
155 | struct pm_ops { | 129 | struct pm_ops { |
156 | int (*valid)(suspend_state_t state); | 130 | int (*valid)(suspend_state_t state); |
157 | int (*prepare)(suspend_state_t state); | 131 | int (*prepare)(suspend_state_t state); |
158 | int (*enter)(suspend_state_t state); | 132 | int (*enter)(suspend_state_t state); |
159 | int (*finish)(suspend_state_t state); | 133 | int (*finish)(suspend_state_t state); |
160 | suspend_disk_method_t pm_disk_mode; | ||
161 | }; | 134 | }; |
162 | 135 | ||
163 | /** | 136 | /** |
@@ -276,8 +249,6 @@ extern void device_power_up(void); | |||
276 | extern void device_resume(void); | 249 | extern void device_resume(void); |
277 | 250 | ||
278 | #ifdef CONFIG_PM | 251 | #ifdef CONFIG_PM |
279 | extern suspend_disk_method_t pm_disk_mode; | ||
280 | |||
281 | extern int device_suspend(pm_message_t state); | 252 | extern int device_suspend(pm_message_t state); |
282 | extern int device_prepare_suspend(pm_message_t state); | 253 | extern int device_prepare_suspend(pm_message_t state); |
283 | 254 | ||
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index de72c49747c8..a121f36f4437 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -201,7 +201,6 @@ struct mddev_s | |||
201 | struct mutex reconfig_mutex; | 201 | struct mutex reconfig_mutex; |
202 | atomic_t active; | 202 | atomic_t active; |
203 | 203 | ||
204 | int changed; /* true if we might need to reread partition info */ | ||
205 | int degraded; /* whether md should consider | 204 | int degraded; /* whether md should consider |
206 | * adding a spare | 205 | * adding a spare |
207 | */ | 206 | */ |
diff --git a/include/linux/relay.h b/include/linux/relay.h index 759a0f97bec2..6cd8c4425fc7 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/timer.h> | ||
15 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
16 | #include <linux/list.h> | 17 | #include <linux/list.h> |
17 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
@@ -38,7 +39,7 @@ struct rchan_buf | |||
38 | size_t subbufs_consumed; /* count of sub-buffers consumed */ | 39 | size_t subbufs_consumed; /* count of sub-buffers consumed */ |
39 | struct rchan *chan; /* associated channel */ | 40 | struct rchan *chan; /* associated channel */ |
40 | wait_queue_head_t read_wait; /* reader wait queue */ | 41 | wait_queue_head_t read_wait; /* reader wait queue */ |
41 | struct delayed_work wake_readers; /* reader wake-up work struct */ | 42 | struct timer_list timer; /* reader wake-up timer */ |
42 | struct dentry *dentry; /* channel file dentry */ | 43 | struct dentry *dentry; /* channel file dentry */ |
43 | struct kref kref; /* channel buffer refcount */ | 44 | struct kref kref; /* channel buffer refcount */ |
44 | struct page **page_array; /* array of current buffer pages */ | 45 | struct page **page_array; /* array of current buffer pages */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d95c480f58d..17b72d88c4cb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -817,7 +817,7 @@ struct prio_array; | |||
817 | 817 | ||
818 | struct task_struct { | 818 | struct task_struct { |
819 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 819 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
820 | struct thread_info *thread_info; | 820 | void *stack; |
821 | atomic_t usage; | 821 | atomic_t usage; |
822 | unsigned int flags; /* per process flags, defined below */ | 822 | unsigned int flags; /* per process flags, defined below */ |
823 | unsigned int ptrace; | 823 | unsigned int ptrace; |
@@ -1317,6 +1317,7 @@ extern int in_egroup_p(gid_t); | |||
1317 | 1317 | ||
1318 | extern void proc_caches_init(void); | 1318 | extern void proc_caches_init(void); |
1319 | extern void flush_signals(struct task_struct *); | 1319 | extern void flush_signals(struct task_struct *); |
1320 | extern void ignore_signals(struct task_struct *); | ||
1320 | extern void flush_signal_handlers(struct task_struct *, int force_default); | 1321 | extern void flush_signal_handlers(struct task_struct *, int force_default); |
1321 | extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); | 1322 | extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); |
1322 | 1323 | ||
@@ -1512,8 +1513,8 @@ static inline void unlock_task_sighand(struct task_struct *tsk, | |||
1512 | 1513 | ||
1513 | #ifndef __HAVE_THREAD_FUNCTIONS | 1514 | #ifndef __HAVE_THREAD_FUNCTIONS |
1514 | 1515 | ||
1515 | #define task_thread_info(task) (task)->thread_info | 1516 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) |
1516 | #define task_stack_page(task) ((void*)((task)->thread_info)) | 1517 | #define task_stack_page(task) ((task)->stack) |
1517 | 1518 | ||
1518 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) | 1519 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) |
1519 | { | 1520 | { |
@@ -1523,7 +1524,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct | |||
1523 | 1524 | ||
1524 | static inline unsigned long *end_of_stack(struct task_struct *p) | 1525 | static inline unsigned long *end_of_stack(struct task_struct *p) |
1525 | { | 1526 | { |
1526 | return (unsigned long *)(p->thread_info + 1); | 1527 | return (unsigned long *)(task_thread_info(p) + 1); |
1527 | } | 1528 | } |
1528 | 1529 | ||
1529 | #endif | 1530 | #endif |
diff --git a/include/linux/signal.h b/include/linux/signal.h index 14749056dd63..3fa0fab4a04b 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h | |||
@@ -243,6 +243,131 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, | |||
243 | 243 | ||
244 | extern struct kmem_cache *sighand_cachep; | 244 | extern struct kmem_cache *sighand_cachep; |
245 | 245 | ||
246 | /* | ||
247 | * In POSIX a signal is sent either to a specific thread (Linux task) | ||
248 | * or to the process as a whole (Linux thread group). How the signal | ||
249 | * is sent determines whether it's to one thread or the whole group, | ||
250 | * which determines which signal mask(s) are involved in blocking it | ||
251 | * from being delivered until later. When the signal is delivered, | ||
252 | * either it's caught or ignored by a user handler or it has a default | ||
253 | * effect that applies to the whole thread group (POSIX process). | ||
254 | * | ||
255 | * The possible effects an unblocked signal set to SIG_DFL can have are: | ||
256 | * ignore - Nothing Happens | ||
257 | * terminate - kill the process, i.e. all threads in the group, | ||
258 | * similar to exit_group. The group leader (only) reports | ||
259 | * WIFSIGNALED status to its parent. | ||
260 | * coredump - write a core dump file describing all threads using | ||
261 | * the same mm and then kill all those threads | ||
262 | * stop - stop all the threads in the group, i.e. TASK_STOPPED state | ||
263 | * | ||
264 | * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. | ||
265 | * Other signals when not blocked and set to SIG_DFL behaves as follows. | ||
266 | * The job control signals also have other special effects. | ||
267 | * | ||
268 | * +--------------------+------------------+ | ||
269 | * | POSIX signal | default action | | ||
270 | * +--------------------+------------------+ | ||
271 | * | SIGHUP | terminate | | ||
272 | * | SIGINT | terminate | | ||
273 | * | SIGQUIT | coredump | | ||
274 | * | SIGILL | coredump | | ||
275 | * | SIGTRAP | coredump | | ||
276 | * | SIGABRT/SIGIOT | coredump | | ||
277 | * | SIGBUS | coredump | | ||
278 | * | SIGFPE | coredump | | ||
279 | * | SIGKILL | terminate(+) | | ||
280 | * | SIGUSR1 | terminate | | ||
281 | * | SIGSEGV | coredump | | ||
282 | * | SIGUSR2 | terminate | | ||
283 | * | SIGPIPE | terminate | | ||
284 | * | SIGALRM | terminate | | ||
285 | * | SIGTERM | terminate | | ||
286 | * | SIGCHLD | ignore | | ||
287 | * | SIGCONT | ignore(*) | | ||
288 | * | SIGSTOP | stop(*)(+) | | ||
289 | * | SIGTSTP | stop(*) | | ||
290 | * | SIGTTIN | stop(*) | | ||
291 | * | SIGTTOU | stop(*) | | ||
292 | * | SIGURG | ignore | | ||
293 | * | SIGXCPU | coredump | | ||
294 | * | SIGXFSZ | coredump | | ||
295 | * | SIGVTALRM | terminate | | ||
296 | * | SIGPROF | terminate | | ||
297 | * | SIGPOLL/SIGIO | terminate | | ||
298 | * | SIGSYS/SIGUNUSED | coredump | | ||
299 | * | SIGSTKFLT | terminate | | ||
300 | * | SIGWINCH | ignore | | ||
301 | * | SIGPWR | terminate | | ||
302 | * | SIGRTMIN-SIGRTMAX | terminate | | ||
303 | * +--------------------+------------------+ | ||
304 | * | non-POSIX signal | default action | | ||
305 | * +--------------------+------------------+ | ||
306 | * | SIGEMT | coredump | | ||
307 | * +--------------------+------------------+ | ||
308 | * | ||
309 | * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". | ||
310 | * (*) Special job control effects: | ||
311 | * When SIGCONT is sent, it resumes the process (all threads in the group) | ||
312 | * from TASK_STOPPED state and also clears any pending/queued stop signals | ||
313 | * (any of those marked with "stop(*)"). This happens regardless of blocking, | ||
314 | * catching, or ignoring SIGCONT. When any stop signal is sent, it clears | ||
315 | * any pending/queued SIGCONT signals; this happens regardless of blocking, | ||
316 | * catching, or ignored the stop signal, though (except for SIGSTOP) the | ||
317 | * default action of stopping the process may happen later or never. | ||
318 | */ | ||
319 | |||
320 | #ifdef SIGEMT | ||
321 | #define SIGEMT_MASK rt_sigmask(SIGEMT) | ||
322 | #else | ||
323 | #define SIGEMT_MASK 0 | ||
324 | #endif | ||
325 | |||
326 | #if SIGRTMIN > BITS_PER_LONG | ||
327 | #define rt_sigmask(sig) (1ULL << ((sig)-1)) | ||
328 | #else | ||
329 | #define rt_sigmask(sig) sigmask(sig) | ||
330 | #endif | ||
331 | #define siginmask(sig, mask) (rt_sigmask(sig) & (mask)) | ||
332 | |||
333 | #define SIG_KERNEL_ONLY_MASK (\ | ||
334 | rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) | ||
335 | |||
336 | #define SIG_KERNEL_STOP_MASK (\ | ||
337 | rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \ | ||
338 | rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) ) | ||
339 | |||
340 | #define SIG_KERNEL_COREDUMP_MASK (\ | ||
341 | rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \ | ||
342 | rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \ | ||
343 | rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \ | ||
344 | rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \ | ||
345 | rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \ | ||
346 | SIGEMT_MASK ) | ||
347 | |||
348 | #define SIG_KERNEL_IGNORE_MASK (\ | ||
349 | rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ | ||
350 | rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) | ||
351 | |||
352 | #define sig_kernel_only(sig) \ | ||
353 | (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_ONLY_MASK)) | ||
354 | #define sig_kernel_coredump(sig) \ | ||
355 | (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_COREDUMP_MASK)) | ||
356 | #define sig_kernel_ignore(sig) \ | ||
357 | (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_IGNORE_MASK)) | ||
358 | #define sig_kernel_stop(sig) \ | ||
359 | (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_STOP_MASK)) | ||
360 | |||
361 | #define sig_needs_tasklist(sig) ((sig) == SIGCONT) | ||
362 | |||
363 | #define sig_user_defined(t, signr) \ | ||
364 | (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ | ||
365 | ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) | ||
366 | |||
367 | #define sig_fatal(t, signr) \ | ||
368 | (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ | ||
369 | (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) | ||
370 | |||
246 | #endif /* __KERNEL__ */ | 371 | #endif /* __KERNEL__ */ |
247 | 372 | ||
248 | #endif /* _LINUX_SIGNAL_H */ | 373 | #endif /* _LINUX_SIGNAL_H */ |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ba23ec8211b..3f70149eabbb 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -83,7 +83,6 @@ void smp_prepare_boot_cpu(void); | |||
83 | * These macros fold the SMP functionality into a single CPU system | 83 | * These macros fold the SMP functionality into a single CPU system |
84 | */ | 84 | */ |
85 | #define raw_smp_processor_id() 0 | 85 | #define raw_smp_processor_id() 0 |
86 | #define hard_smp_processor_id() 0 | ||
87 | static inline int up_smp_call_function(void) | 86 | static inline int up_smp_call_function(void) |
88 | { | 87 | { |
89 | return 0; | 88 | return 0; |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 35fa4d5aadd0..4a7ae8ab6eb8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -396,4 +396,23 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); | |||
396 | 396 | ||
397 | #define RPC_MAX_ADDRBUFLEN (63U) | 397 | #define RPC_MAX_ADDRBUFLEN (63U) |
398 | 398 | ||
399 | /* | ||
400 | * When we want to reduce the size of the reserved space in the response | ||
401 | * buffer, we need to take into account the size of any checksum data that | ||
402 | * may be at the end of the packet. This is difficult to determine exactly | ||
403 | * for all cases without actually generating the checksum, so we just use a | ||
404 | * static value. | ||
405 | */ | ||
406 | static inline void | ||
407 | svc_reserve_auth(struct svc_rqst *rqstp, int space) | ||
408 | { | ||
409 | int added_space = 0; | ||
410 | |||
411 | switch(rqstp->rq_authop->flavour) { | ||
412 | case RPC_AUTH_GSS: | ||
413 | added_space = RPC_MAX_AUTH_SIZE; | ||
414 | } | ||
415 | return svc_reserve(rqstp, space + added_space); | ||
416 | } | ||
417 | |||
399 | #endif /* SUNRPC_SVC_H */ | 418 | #endif /* SUNRPC_SVC_H */ |
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7909687557bf..e21dd93ac4b7 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
@@ -37,7 +37,8 @@ struct svc_sock { | |||
37 | 37 | ||
38 | atomic_t sk_reserved; /* space on outq that is reserved */ | 38 | atomic_t sk_reserved; /* space on outq that is reserved */ |
39 | 39 | ||
40 | spinlock_t sk_defer_lock; /* protects sk_deferred */ | 40 | spinlock_t sk_lock; /* protects sk_deferred and |
41 | * sk_info_authunix */ | ||
41 | struct list_head sk_deferred; /* deferred requests that need to | 42 | struct list_head sk_deferred; /* deferred requests that need to |
42 | * be revisted */ | 43 | * be revisted */ |
43 | struct mutex sk_mutex; /* to serialize sending data */ | 44 | struct mutex sk_mutex; /* to serialize sending data */ |
diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 9d2aa1a12aa0..d74da9122b60 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h | |||
@@ -32,6 +32,24 @@ static inline int pm_prepare_console(void) { return 0; } | |||
32 | static inline void pm_restore_console(void) {} | 32 | static inline void pm_restore_console(void) {} |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | /** | ||
36 | * struct hibernation_ops - hibernation platform support | ||
37 | * | ||
38 | * The methods in this structure allow a platform to override the default | ||
39 | * mechanism of shutting down the machine during a hibernation transition. | ||
40 | * | ||
41 | * All three methods must be assigned. | ||
42 | * | ||
43 | * @prepare: prepare system for hibernation | ||
44 | * @enter: shut down system after state has been saved to disk | ||
45 | * @finish: finish/clean up after state has been reloaded | ||
46 | */ | ||
47 | struct hibernation_ops { | ||
48 | int (*prepare)(void); | ||
49 | int (*enter)(void); | ||
50 | void (*finish)(void); | ||
51 | }; | ||
52 | |||
35 | #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) | 53 | #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) |
36 | /* kernel/power/snapshot.c */ | 54 | /* kernel/power/snapshot.c */ |
37 | extern void __init register_nosave_region(unsigned long, unsigned long); | 55 | extern void __init register_nosave_region(unsigned long, unsigned long); |
@@ -39,11 +57,17 @@ extern int swsusp_page_is_forbidden(struct page *); | |||
39 | extern void swsusp_set_page_free(struct page *); | 57 | extern void swsusp_set_page_free(struct page *); |
40 | extern void swsusp_unset_page_free(struct page *); | 58 | extern void swsusp_unset_page_free(struct page *); |
41 | extern unsigned long get_safe_page(gfp_t gfp_mask); | 59 | extern unsigned long get_safe_page(gfp_t gfp_mask); |
60 | |||
61 | extern void hibernation_set_ops(struct hibernation_ops *ops); | ||
62 | extern int hibernate(void); | ||
42 | #else | 63 | #else |
43 | static inline void register_nosave_region(unsigned long b, unsigned long e) {} | 64 | static inline void register_nosave_region(unsigned long b, unsigned long e) {} |
44 | static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } | 65 | static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } |
45 | static inline void swsusp_set_page_free(struct page *p) {} | 66 | static inline void swsusp_set_page_free(struct page *p) {} |
46 | static inline void swsusp_unset_page_free(struct page *p) {} | 67 | static inline void swsusp_unset_page_free(struct page *p) {} |
68 | |||
69 | static inline void hibernation_set_ops(struct hibernation_ops *ops) {} | ||
70 | static inline int hibernate(void) { return -ENOSYS; } | ||
47 | #endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ | 71 | #endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ |
48 | 72 | ||
49 | void save_processor_state(void); | 73 | void save_processor_state(void); |
diff --git a/include/linux/svga.h b/include/linux/svga.h index e1cc552e04fe..13ad0b82ac28 100644 --- a/include/linux/svga.h +++ b/include/linux/svga.h | |||
@@ -113,6 +113,8 @@ void svga_tilefill(struct fb_info *info, struct fb_tilerect *rect); | |||
113 | void svga_tileblit(struct fb_info *info, struct fb_tileblit *blit); | 113 | void svga_tileblit(struct fb_info *info, struct fb_tileblit *blit); |
114 | void svga_tilecursor(struct fb_info *info, struct fb_tilecursor *cursor); | 114 | void svga_tilecursor(struct fb_info *info, struct fb_tilecursor *cursor); |
115 | int svga_get_tilemax(struct fb_info *info); | 115 | int svga_get_tilemax(struct fb_info *info); |
116 | void svga_get_caps(struct fb_info *info, struct fb_blit_caps *caps, | ||
117 | struct fb_var_screeninfo *var); | ||
116 | 118 | ||
117 | int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u16 *r, int node); | 119 | int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u16 *r, int node); |
118 | int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screeninfo *var, int node); | 120 | int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screeninfo *var, int node); |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1912c6cbef55..3139f4412297 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -576,6 +576,8 @@ asmlinkage long sys_fstatat64(int dfd, char __user *filename, | |||
576 | struct stat64 __user *statbuf, int flag); | 576 | struct stat64 __user *statbuf, int flag); |
577 | asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, | 577 | asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, |
578 | int bufsiz); | 578 | int bufsiz); |
579 | asmlinkage long sys_utimensat(int dfd, char __user *filename, | ||
580 | struct timespec __user *utimes, int flags); | ||
579 | asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, | 581 | asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, |
580 | struct compat_timeval __user *t); | 582 | struct compat_timeval __user *t); |
581 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, | 583 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index acb1f105870c..d9325cf8a134 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -212,8 +212,6 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item); | |||
212 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); | 212 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); |
213 | 213 | ||
214 | void refresh_cpu_vm_stats(int); | 214 | void refresh_cpu_vm_stats(int); |
215 | void refresh_vm_stats(void); | ||
216 | |||
217 | #else /* CONFIG_SMP */ | 215 | #else /* CONFIG_SMP */ |
218 | 216 | ||
219 | /* | 217 | /* |
@@ -260,7 +258,6 @@ static inline void __dec_zone_page_state(struct page *page, | |||
260 | #define mod_zone_page_state __mod_zone_page_state | 258 | #define mod_zone_page_state __mod_zone_page_state |
261 | 259 | ||
262 | static inline void refresh_cpu_vm_stats(int cpu) { } | 260 | static inline void refresh_cpu_vm_stats(int cpu) { } |
263 | static inline void refresh_vm_stats(void) { } | ||
264 | #endif | 261 | #endif |
265 | 262 | ||
266 | #endif /* _LINUX_VMSTAT_H */ | 263 | #endif /* _LINUX_VMSTAT_H */ |
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f16ba1e0687d..d555f31c0746 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h | |||
@@ -24,15 +24,13 @@ typedef void (*work_func_t)(struct work_struct *work); | |||
24 | struct work_struct { | 24 | struct work_struct { |
25 | atomic_long_t data; | 25 | atomic_long_t data; |
26 | #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ | 26 | #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ |
27 | #define WORK_STRUCT_NOAUTOREL 1 /* F if work item automatically released on exec */ | ||
28 | #define WORK_STRUCT_FLAG_MASK (3UL) | 27 | #define WORK_STRUCT_FLAG_MASK (3UL) |
29 | #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) | 28 | #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) |
30 | struct list_head entry; | 29 | struct list_head entry; |
31 | work_func_t func; | 30 | work_func_t func; |
32 | }; | 31 | }; |
33 | 32 | ||
34 | #define WORK_DATA_INIT(autorelease) \ | 33 | #define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) |
35 | ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL) | ||
36 | 34 | ||
37 | struct delayed_work { | 35 | struct delayed_work { |
38 | struct work_struct work; | 36 | struct work_struct work; |
@@ -44,14 +42,8 @@ struct execute_work { | |||
44 | }; | 42 | }; |
45 | 43 | ||
46 | #define __WORK_INITIALIZER(n, f) { \ | 44 | #define __WORK_INITIALIZER(n, f) { \ |
47 | .data = WORK_DATA_INIT(0), \ | 45 | .data = WORK_DATA_INIT(), \ |
48 | .entry = { &(n).entry, &(n).entry }, \ | 46 | .entry = { &(n).entry, &(n).entry }, \ |
49 | .func = (f), \ | ||
50 | } | ||
51 | |||
52 | #define __WORK_INITIALIZER_NAR(n, f) { \ | ||
53 | .data = WORK_DATA_INIT(1), \ | ||
54 | .entry = { &(n).entry, &(n).entry }, \ | ||
55 | .func = (f), \ | 47 | .func = (f), \ |
56 | } | 48 | } |
57 | 49 | ||
@@ -60,23 +52,12 @@ struct execute_work { | |||
60 | .timer = TIMER_INITIALIZER(NULL, 0, 0), \ | 52 | .timer = TIMER_INITIALIZER(NULL, 0, 0), \ |
61 | } | 53 | } |
62 | 54 | ||
63 | #define __DELAYED_WORK_INITIALIZER_NAR(n, f) { \ | ||
64 | .work = __WORK_INITIALIZER_NAR((n).work, (f)), \ | ||
65 | .timer = TIMER_INITIALIZER(NULL, 0, 0), \ | ||
66 | } | ||
67 | |||
68 | #define DECLARE_WORK(n, f) \ | 55 | #define DECLARE_WORK(n, f) \ |
69 | struct work_struct n = __WORK_INITIALIZER(n, f) | 56 | struct work_struct n = __WORK_INITIALIZER(n, f) |
70 | 57 | ||
71 | #define DECLARE_WORK_NAR(n, f) \ | ||
72 | struct work_struct n = __WORK_INITIALIZER_NAR(n, f) | ||
73 | |||
74 | #define DECLARE_DELAYED_WORK(n, f) \ | 58 | #define DECLARE_DELAYED_WORK(n, f) \ |
75 | struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) | 59 | struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) |
76 | 60 | ||
77 | #define DECLARE_DELAYED_WORK_NAR(n, f) \ | ||
78 | struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f) | ||
79 | |||
80 | /* | 61 | /* |
81 | * initialize a work item's function pointer | 62 | * initialize a work item's function pointer |
82 | */ | 63 | */ |
@@ -95,16 +76,9 @@ struct execute_work { | |||
95 | * assignment of the work data initializer allows the compiler | 76 | * assignment of the work data initializer allows the compiler |
96 | * to generate better code. | 77 | * to generate better code. |
97 | */ | 78 | */ |
98 | #define INIT_WORK(_work, _func) \ | 79 | #define INIT_WORK(_work, _func) \ |
99 | do { \ | ||
100 | (_work)->data = (atomic_long_t) WORK_DATA_INIT(0); \ | ||
101 | INIT_LIST_HEAD(&(_work)->entry); \ | ||
102 | PREPARE_WORK((_work), (_func)); \ | ||
103 | } while (0) | ||
104 | |||
105 | #define INIT_WORK_NAR(_work, _func) \ | ||
106 | do { \ | 80 | do { \ |
107 | (_work)->data = (atomic_long_t) WORK_DATA_INIT(1); \ | 81 | (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ |
108 | INIT_LIST_HEAD(&(_work)->entry); \ | 82 | INIT_LIST_HEAD(&(_work)->entry); \ |
109 | PREPARE_WORK((_work), (_func)); \ | 83 | PREPARE_WORK((_work), (_func)); \ |
110 | } while (0) | 84 | } while (0) |
@@ -115,12 +89,6 @@ struct execute_work { | |||
115 | init_timer(&(_work)->timer); \ | 89 | init_timer(&(_work)->timer); \ |
116 | } while (0) | 90 | } while (0) |
117 | 91 | ||
118 | #define INIT_DELAYED_WORK_NAR(_work, _func) \ | ||
119 | do { \ | ||
120 | INIT_WORK_NAR(&(_work)->work, (_func)); \ | ||
121 | init_timer(&(_work)->timer); \ | ||
122 | } while (0) | ||
123 | |||
124 | #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ | 92 | #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ |
125 | do { \ | 93 | do { \ |
126 | INIT_WORK(&(_work)->work, (_func)); \ | 94 | INIT_WORK(&(_work)->work, (_func)); \ |
@@ -143,24 +111,10 @@ struct execute_work { | |||
143 | work_pending(&(w)->work) | 111 | work_pending(&(w)->work) |
144 | 112 | ||
145 | /** | 113 | /** |
146 | * work_release - Release a work item under execution | 114 | * work_clear_pending - for internal use only, mark a work item as not pending |
147 | * @work: The work item to release | 115 | * @work: The work item in question |
148 | * | ||
149 | * This is used to release a work item that has been initialised with automatic | ||
150 | * release mode disabled (WORK_STRUCT_NOAUTOREL is set). This gives the work | ||
151 | * function the opportunity to grab auxiliary data from the container of the | ||
152 | * work_struct before clearing the pending bit as the work_struct may be | ||
153 | * subject to deallocation the moment the pending bit is cleared. | ||
154 | * | ||
155 | * In such a case, this should be called in the work function after it has | ||
156 | * fetched any data it may require from the containter of the work_struct. | ||
157 | * After this function has been called, the work_struct may be scheduled for | ||
158 | * further execution or it may be deallocated unless other precautions are | ||
159 | * taken. | ||
160 | * | ||
161 | * This should also be used to release a delayed work item. | ||
162 | */ | 116 | */ |
163 | #define work_release(work) \ | 117 | #define work_clear_pending(work) \ |
164 | clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) | 118 | clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) |
165 | 119 | ||
166 | 120 | ||
@@ -174,27 +128,28 @@ extern struct workqueue_struct *__create_workqueue(const char *name, | |||
174 | extern void destroy_workqueue(struct workqueue_struct *wq); | 128 | extern void destroy_workqueue(struct workqueue_struct *wq); |
175 | 129 | ||
176 | extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work)); | 130 | extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work)); |
177 | extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay)); | 131 | extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, |
132 | struct delayed_work *work, unsigned long delay)); | ||
178 | extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | 133 | extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, |
179 | struct delayed_work *work, unsigned long delay); | 134 | struct delayed_work *work, unsigned long delay); |
135 | |||
180 | extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); | 136 | extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); |
137 | extern void flush_scheduled_work(void); | ||
181 | 138 | ||
182 | extern int FASTCALL(schedule_work(struct work_struct *work)); | 139 | extern int FASTCALL(schedule_work(struct work_struct *work)); |
183 | extern int FASTCALL(run_scheduled_work(struct work_struct *work)); | 140 | extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, |
184 | extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay)); | 141 | unsigned long delay)); |
185 | 142 | extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, | |
186 | extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); | 143 | unsigned long delay); |
187 | extern int schedule_on_each_cpu(work_func_t func); | 144 | extern int schedule_on_each_cpu(work_func_t func); |
188 | extern void flush_scheduled_work(void); | ||
189 | extern int current_is_keventd(void); | 145 | extern int current_is_keventd(void); |
190 | extern int keventd_up(void); | 146 | extern int keventd_up(void); |
191 | 147 | ||
192 | extern void init_workqueues(void); | 148 | extern void init_workqueues(void); |
193 | void cancel_rearming_delayed_work(struct delayed_work *work); | ||
194 | void cancel_rearming_delayed_workqueue(struct workqueue_struct *, | ||
195 | struct delayed_work *); | ||
196 | int execute_in_process_context(work_func_t fn, struct execute_work *); | 149 | int execute_in_process_context(work_func_t fn, struct execute_work *); |
197 | 150 | ||
151 | extern void cancel_work_sync(struct work_struct *work); | ||
152 | |||
198 | /* | 153 | /* |
199 | * Kill off a pending schedule_delayed_work(). Note that the work callback | 154 | * Kill off a pending schedule_delayed_work(). Note that the work callback |
200 | * function may still be running on return from cancel_delayed_work(), unless | 155 | * function may still be running on return from cancel_delayed_work(), unless |
@@ -207,8 +162,18 @@ static inline int cancel_delayed_work(struct delayed_work *work) | |||
207 | 162 | ||
208 | ret = del_timer(&work->timer); | 163 | ret = del_timer(&work->timer); |
209 | if (ret) | 164 | if (ret) |
210 | work_release(&work->work); | 165 | work_clear_pending(&work->work); |
211 | return ret; | 166 | return ret; |
212 | } | 167 | } |
213 | 168 | ||
169 | extern void cancel_rearming_delayed_work(struct delayed_work *work); | ||
170 | |||
171 | /* Obsolete. use cancel_rearming_delayed_work() */ | ||
172 | static inline | ||
173 | void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, | ||
174 | struct delayed_work *work) | ||
175 | { | ||
176 | cancel_rearming_delayed_work(work); | ||
177 | } | ||
178 | |||
214 | #endif | 179 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index a7e48796d571..e63a017c391e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -502,6 +502,15 @@ config VM_EVENT_COUNTERS | |||
502 | on EMBEDDED systems. /proc/vmstat will only show page counts | 502 | on EMBEDDED systems. /proc/vmstat will only show page counts |
503 | if VM event counters are disabled. | 503 | if VM event counters are disabled. |
504 | 504 | ||
505 | config SLUB_DEBUG | ||
506 | default y | ||
507 | bool "Enable SLUB debugging support" if EMBEDDED | ||
508 | help | ||
509 | SLUB has extensive debug support features. Disabling these can | ||
510 | result in significant savings in code size. This also disables | ||
511 | SLUB sysfs support. /sys/slab will not exist and there will be | ||
512 | no support for cache validation etc. | ||
513 | |||
505 | choice | 514 | choice |
506 | prompt "Choose SLAB allocator" | 515 | prompt "Choose SLAB allocator" |
507 | default SLAB | 516 | default SLAB |
@@ -512,9 +521,9 @@ config SLAB | |||
512 | bool "SLAB" | 521 | bool "SLAB" |
513 | help | 522 | help |
514 | The regular slab allocator that is established and known to work | 523 | The regular slab allocator that is established and known to work |
515 | well in all environments. It organizes chache hot objects in | 524 | well in all environments. It organizes cache hot objects in |
516 | per cpu and per node queues. SLAB is the default choice for | 525 | per cpu and per node queues. SLAB is the default choice for |
517 | slab allocator. | 526 | a slab allocator. |
518 | 527 | ||
519 | config SLUB | 528 | config SLUB |
520 | depends on EXPERIMENTAL && !ARCH_USES_SLAB_PAGE_STRUCT | 529 | depends on EXPERIMENTAL && !ARCH_USES_SLAB_PAGE_STRUCT |
@@ -524,21 +533,20 @@ config SLUB | |||
524 | instead of managing queues of cached objects (SLAB approach). | 533 | instead of managing queues of cached objects (SLAB approach). |
525 | Per cpu caching is realized using slabs of objects instead | 534 | Per cpu caching is realized using slabs of objects instead |
526 | of queues of objects. SLUB can use memory efficiently | 535 | of queues of objects. SLUB can use memory efficiently |
527 | way and has enhanced diagnostics. | 536 | and has enhanced diagnostics. |
528 | 537 | ||
529 | config SLOB | 538 | config SLOB |
530 | # | 539 | # |
531 | # SLOB cannot support SMP because SLAB_DESTROY_BY_RCU does not work | 540 | # SLOB does not support SMP because SLAB_DESTROY_BY_RCU is unsupported |
532 | # properly. | ||
533 | # | 541 | # |
534 | depends on EMBEDDED && !SMP && !SPARSEMEM | 542 | depends on EMBEDDED && !SMP && !SPARSEMEM |
535 | bool "SLOB (Simple Allocator)" | 543 | bool "SLOB (Simple Allocator)" |
536 | help | 544 | help |
537 | SLOB replaces the SLAB allocator with a drastically simpler | 545 | SLOB replaces the SLAB allocator with a drastically simpler |
538 | allocator. SLOB is more space efficient that SLAB but does not | 546 | allocator. SLOB is more space efficient that SLAB but does not |
539 | scale well (single lock for all operations) and is more susceptible | 547 | scale well (single lock for all operations) and is also highly |
540 | to fragmentation. SLOB it is a great choice to reduce | 548 | susceptible to fragmentation. SLUB can accomplish a higher object |
541 | memory usage and code size for embedded systems. | 549 | density. It is usually better to use SLUB instead of SLOB. |
542 | 550 | ||
543 | endchoice | 551 | endchoice |
544 | 552 | ||
diff --git a/init/do_mounts.c b/init/do_mounts.c index 3f57ed4599d6..46fe407fb03e 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/root_dev.h> | 7 | #include <linux/root_dev.h> |
8 | #include <linux/security.h> | 8 | #include <linux/security.h> |
9 | #include <linux/delay.h> | 9 | #include <linux/delay.h> |
10 | #include <linux/genhd.h> | ||
10 | #include <linux/mount.h> | 11 | #include <linux/mount.h> |
11 | #include <linux/device.h> | 12 | #include <linux/device.h> |
12 | #include <linux/init.h> | 13 | #include <linux/init.h> |
@@ -308,17 +309,21 @@ retry: | |||
308 | /* | 309 | /* |
309 | * Allow the user to distinguish between failed sys_open | 310 | * Allow the user to distinguish between failed sys_open |
310 | * and bad superblock on root device. | 311 | * and bad superblock on root device. |
312 | * and give them a list of the available devices | ||
311 | */ | 313 | */ |
312 | #ifdef CONFIG_BLOCK | 314 | #ifdef CONFIG_BLOCK |
313 | __bdevname(ROOT_DEV, b); | 315 | __bdevname(ROOT_DEV, b); |
314 | #endif | 316 | #endif |
315 | printk("VFS: Cannot open root device \"%s\" or %s\n", | 317 | printk("VFS: Cannot open root device \"%s\" or %s\n", |
316 | root_device_name, b); | 318 | root_device_name, b); |
317 | printk("Please append a correct \"root=\" boot option\n"); | 319 | printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); |
318 | 320 | ||
321 | printk_all_partitions(); | ||
319 | panic("VFS: Unable to mount root fs on %s", b); | 322 | panic("VFS: Unable to mount root fs on %s", b); |
320 | } | 323 | } |
321 | 324 | ||
325 | printk("List of all partitions:\n"); | ||
326 | printk_all_partitions(); | ||
322 | printk("No filesystem could mount root, tried: "); | 327 | printk("No filesystem could mount root, tried: "); |
323 | for (p = fs_names; *p; p += strlen(p)+1) | 328 | for (p = fs_names; *p; p += strlen(p)+1) |
324 | printk(" %s", p); | 329 | printk(" %s", p); |
diff --git a/init/main.c b/init/main.c index c1537e0ddceb..e8d080cab443 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/lockdep.h> | 54 | #include <linux/lockdep.h> |
55 | #include <linux/pid_namespace.h> | 55 | #include <linux/pid_namespace.h> |
56 | #include <linux/device.h> | 56 | #include <linux/device.h> |
57 | #include <linux/kthread.h> | ||
57 | 58 | ||
58 | #include <asm/io.h> | 59 | #include <asm/io.h> |
59 | #include <asm/bugs.h> | 60 | #include <asm/bugs.h> |
@@ -425,8 +426,12 @@ static void __init setup_command_line(char *command_line) | |||
425 | static void noinline rest_init(void) | 426 | static void noinline rest_init(void) |
426 | __releases(kernel_lock) | 427 | __releases(kernel_lock) |
427 | { | 428 | { |
429 | int pid; | ||
430 | |||
428 | kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); | 431 | kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); |
429 | numa_default_policy(); | 432 | numa_default_policy(); |
433 | pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); | ||
434 | kthreadd_task = find_task_by_pid(pid); | ||
430 | unlock_kernel(); | 435 | unlock_kernel(); |
431 | 436 | ||
432 | /* | 437 | /* |
diff --git a/kernel/configs.c b/kernel/configs.c index 8fa1fb28f8a7..e84d3f9c6c7b 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -61,18 +61,9 @@ static ssize_t | |||
61 | ikconfig_read_current(struct file *file, char __user *buf, | 61 | ikconfig_read_current(struct file *file, char __user *buf, |
62 | size_t len, loff_t * offset) | 62 | size_t len, loff_t * offset) |
63 | { | 63 | { |
64 | loff_t pos = *offset; | 64 | return simple_read_from_buffer(buf, len, offset, |
65 | ssize_t count; | 65 | kernel_config_data + MAGIC_SIZE, |
66 | 66 | kernel_config_data_size); | |
67 | if (pos >= kernel_config_data_size) | ||
68 | return 0; | ||
69 | |||
70 | count = min(len, (size_t)(kernel_config_data_size - pos)); | ||
71 | if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count)) | ||
72 | return -EFAULT; | ||
73 | |||
74 | *offset += count; | ||
75 | return count; | ||
76 | } | 67 | } |
77 | 68 | ||
78 | static const struct file_operations ikconfig_file_ops = { | 69 | static const struct file_operations ikconfig_file_ops = { |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 36e70845cfc3..208cf3497c10 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -97,7 +97,7 @@ static inline void check_for_tasks(int cpu) | |||
97 | (!cputime_eq(p->utime, cputime_zero) || | 97 | (!cputime_eq(p->utime, cputime_zero) || |
98 | !cputime_eq(p->stime, cputime_zero))) | 98 | !cputime_eq(p->stime, cputime_zero))) |
99 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ | 99 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ |
100 | (state = %ld, flags = %lx) \n", | 100 | (state = %ld, flags = %x) \n", |
101 | p->comm, p->pid, cpu, p->state, p->flags); | 101 | p->comm, p->pid, cpu, p->state, p->flags); |
102 | } | 102 | } |
103 | write_unlock_irq(&tasklist_lock); | 103 | write_unlock_irq(&tasklist_lock); |
@@ -120,11 +120,13 @@ static int take_cpu_down(void *unused) | |||
120 | } | 120 | } |
121 | 121 | ||
122 | /* Requires cpu_add_remove_lock to be held */ | 122 | /* Requires cpu_add_remove_lock to be held */ |
123 | static int _cpu_down(unsigned int cpu) | 123 | static int _cpu_down(unsigned int cpu, int tasks_frozen) |
124 | { | 124 | { |
125 | int err; | 125 | int err, nr_calls = 0; |
126 | struct task_struct *p; | 126 | struct task_struct *p; |
127 | cpumask_t old_allowed, tmp; | 127 | cpumask_t old_allowed, tmp; |
128 | void *hcpu = (void *)(long)cpu; | ||
129 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||
128 | 130 | ||
129 | if (num_online_cpus() == 1) | 131 | if (num_online_cpus() == 1) |
130 | return -EBUSY; | 132 | return -EBUSY; |
@@ -132,12 +134,16 @@ static int _cpu_down(unsigned int cpu) | |||
132 | if (!cpu_online(cpu)) | 134 | if (!cpu_online(cpu)) |
133 | return -EINVAL; | 135 | return -EINVAL; |
134 | 136 | ||
135 | err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, | 137 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); |
136 | (void *)(long)cpu); | 138 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
139 | hcpu, -1, &nr_calls); | ||
137 | if (err == NOTIFY_BAD) { | 140 | if (err == NOTIFY_BAD) { |
141 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | ||
142 | hcpu, nr_calls, NULL); | ||
138 | printk("%s: attempt to take down CPU %u failed\n", | 143 | printk("%s: attempt to take down CPU %u failed\n", |
139 | __FUNCTION__, cpu); | 144 | __FUNCTION__, cpu); |
140 | return -EINVAL; | 145 | err = -EINVAL; |
146 | goto out_release; | ||
141 | } | 147 | } |
142 | 148 | ||
143 | /* Ensure that we are not runnable on dying cpu */ | 149 | /* Ensure that we are not runnable on dying cpu */ |
@@ -152,8 +158,8 @@ static int _cpu_down(unsigned int cpu) | |||
152 | 158 | ||
153 | if (IS_ERR(p) || cpu_online(cpu)) { | 159 | if (IS_ERR(p) || cpu_online(cpu)) { |
154 | /* CPU didn't die: tell everyone. Can't complain. */ | 160 | /* CPU didn't die: tell everyone. Can't complain. */ |
155 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, | 161 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
156 | (void *)(long)cpu) == NOTIFY_BAD) | 162 | hcpu) == NOTIFY_BAD) |
157 | BUG(); | 163 | BUG(); |
158 | 164 | ||
159 | if (IS_ERR(p)) { | 165 | if (IS_ERR(p)) { |
@@ -170,13 +176,9 @@ static int _cpu_down(unsigned int cpu) | |||
170 | /* This actually kills the CPU. */ | 176 | /* This actually kills the CPU. */ |
171 | __cpu_die(cpu); | 177 | __cpu_die(cpu); |
172 | 178 | ||
173 | /* Move it here so it can run. */ | ||
174 | kthread_bind(p, get_cpu()); | ||
175 | put_cpu(); | ||
176 | |||
177 | /* CPU is completely dead: tell everyone. Too late to complain. */ | 179 | /* CPU is completely dead: tell everyone. Too late to complain. */ |
178 | if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD, | 180 | if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod, |
179 | (void *)(long)cpu) == NOTIFY_BAD) | 181 | hcpu) == NOTIFY_BAD) |
180 | BUG(); | 182 | BUG(); |
181 | 183 | ||
182 | check_for_tasks(cpu); | 184 | check_for_tasks(cpu); |
@@ -185,6 +187,8 @@ out_thread: | |||
185 | err = kthread_stop(p); | 187 | err = kthread_stop(p); |
186 | out_allowed: | 188 | out_allowed: |
187 | set_cpus_allowed(current, old_allowed); | 189 | set_cpus_allowed(current, old_allowed); |
190 | out_release: | ||
191 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); | ||
188 | return err; | 192 | return err; |
189 | } | 193 | } |
190 | 194 | ||
@@ -196,7 +200,7 @@ int cpu_down(unsigned int cpu) | |||
196 | if (cpu_hotplug_disabled) | 200 | if (cpu_hotplug_disabled) |
197 | err = -EBUSY; | 201 | err = -EBUSY; |
198 | else | 202 | else |
199 | err = _cpu_down(cpu); | 203 | err = _cpu_down(cpu, 0); |
200 | 204 | ||
201 | mutex_unlock(&cpu_add_remove_lock); | 205 | mutex_unlock(&cpu_add_remove_lock); |
202 | return err; | 206 | return err; |
@@ -204,15 +208,18 @@ int cpu_down(unsigned int cpu) | |||
204 | #endif /*CONFIG_HOTPLUG_CPU*/ | 208 | #endif /*CONFIG_HOTPLUG_CPU*/ |
205 | 209 | ||
206 | /* Requires cpu_add_remove_lock to be held */ | 210 | /* Requires cpu_add_remove_lock to be held */ |
207 | static int __cpuinit _cpu_up(unsigned int cpu) | 211 | static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) |
208 | { | 212 | { |
209 | int ret; | 213 | int ret, nr_calls = 0; |
210 | void *hcpu = (void *)(long)cpu; | 214 | void *hcpu = (void *)(long)cpu; |
215 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||
211 | 216 | ||
212 | if (cpu_online(cpu) || !cpu_present(cpu)) | 217 | if (cpu_online(cpu) || !cpu_present(cpu)) |
213 | return -EINVAL; | 218 | return -EINVAL; |
214 | 219 | ||
215 | ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); | 220 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); |
221 | ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, | ||
222 | -1, &nr_calls); | ||
216 | if (ret == NOTIFY_BAD) { | 223 | if (ret == NOTIFY_BAD) { |
217 | printk("%s: attempt to bring up CPU %u failed\n", | 224 | printk("%s: attempt to bring up CPU %u failed\n", |
218 | __FUNCTION__, cpu); | 225 | __FUNCTION__, cpu); |
@@ -229,12 +236,13 @@ static int __cpuinit _cpu_up(unsigned int cpu) | |||
229 | BUG_ON(!cpu_online(cpu)); | 236 | BUG_ON(!cpu_online(cpu)); |
230 | 237 | ||
231 | /* Now call notifier in preparation. */ | 238 | /* Now call notifier in preparation. */ |
232 | raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); | 239 | raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); |
233 | 240 | ||
234 | out_notify: | 241 | out_notify: |
235 | if (ret != 0) | 242 | if (ret != 0) |
236 | raw_notifier_call_chain(&cpu_chain, | 243 | __raw_notifier_call_chain(&cpu_chain, |
237 | CPU_UP_CANCELED, hcpu); | 244 | CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); |
245 | raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); | ||
238 | 246 | ||
239 | return ret; | 247 | return ret; |
240 | } | 248 | } |
@@ -247,19 +255,13 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
247 | if (cpu_hotplug_disabled) | 255 | if (cpu_hotplug_disabled) |
248 | err = -EBUSY; | 256 | err = -EBUSY; |
249 | else | 257 | else |
250 | err = _cpu_up(cpu); | 258 | err = _cpu_up(cpu, 0); |
251 | 259 | ||
252 | mutex_unlock(&cpu_add_remove_lock); | 260 | mutex_unlock(&cpu_add_remove_lock); |
253 | return err; | 261 | return err; |
254 | } | 262 | } |
255 | 263 | ||
256 | #ifdef CONFIG_SUSPEND_SMP | 264 | #ifdef CONFIG_SUSPEND_SMP |
257 | /* Needed to prevent the microcode driver from requesting firmware in its CPU | ||
258 | * hotplug notifier during the suspend/resume. | ||
259 | */ | ||
260 | int suspend_cpu_hotplug; | ||
261 | EXPORT_SYMBOL(suspend_cpu_hotplug); | ||
262 | |||
263 | static cpumask_t frozen_cpus; | 265 | static cpumask_t frozen_cpus; |
264 | 266 | ||
265 | int disable_nonboot_cpus(void) | 267 | int disable_nonboot_cpus(void) |
@@ -267,7 +269,6 @@ int disable_nonboot_cpus(void) | |||
267 | int cpu, first_cpu, error = 0; | 269 | int cpu, first_cpu, error = 0; |
268 | 270 | ||
269 | mutex_lock(&cpu_add_remove_lock); | 271 | mutex_lock(&cpu_add_remove_lock); |
270 | suspend_cpu_hotplug = 1; | ||
271 | first_cpu = first_cpu(cpu_online_map); | 272 | first_cpu = first_cpu(cpu_online_map); |
272 | /* We take down all of the non-boot CPUs in one shot to avoid races | 273 | /* We take down all of the non-boot CPUs in one shot to avoid races |
273 | * with the userspace trying to use the CPU hotplug at the same time | 274 | * with the userspace trying to use the CPU hotplug at the same time |
@@ -277,7 +278,7 @@ int disable_nonboot_cpus(void) | |||
277 | for_each_online_cpu(cpu) { | 278 | for_each_online_cpu(cpu) { |
278 | if (cpu == first_cpu) | 279 | if (cpu == first_cpu) |
279 | continue; | 280 | continue; |
280 | error = _cpu_down(cpu); | 281 | error = _cpu_down(cpu, 1); |
281 | if (!error) { | 282 | if (!error) { |
282 | cpu_set(cpu, frozen_cpus); | 283 | cpu_set(cpu, frozen_cpus); |
283 | printk("CPU%d is down\n", cpu); | 284 | printk("CPU%d is down\n", cpu); |
@@ -294,7 +295,6 @@ int disable_nonboot_cpus(void) | |||
294 | } else { | 295 | } else { |
295 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 296 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
296 | } | 297 | } |
297 | suspend_cpu_hotplug = 0; | ||
298 | mutex_unlock(&cpu_add_remove_lock); | 298 | mutex_unlock(&cpu_add_remove_lock); |
299 | return error; | 299 | return error; |
300 | } | 300 | } |
@@ -309,10 +309,9 @@ void enable_nonboot_cpus(void) | |||
309 | if (cpus_empty(frozen_cpus)) | 309 | if (cpus_empty(frozen_cpus)) |
310 | goto out; | 310 | goto out; |
311 | 311 | ||
312 | suspend_cpu_hotplug = 1; | ||
313 | printk("Enabling non-boot CPUs ...\n"); | 312 | printk("Enabling non-boot CPUs ...\n"); |
314 | for_each_cpu_mask(cpu, frozen_cpus) { | 313 | for_each_cpu_mask(cpu, frozen_cpus) { |
315 | error = _cpu_up(cpu); | 314 | error = _cpu_up(cpu, 1); |
316 | if (!error) { | 315 | if (!error) { |
317 | printk("CPU%d is up\n", cpu); | 316 | printk("CPU%d is up\n", cpu); |
318 | continue; | 317 | continue; |
@@ -320,7 +319,6 @@ void enable_nonboot_cpus(void) | |||
320 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); | 319 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
321 | } | 320 | } |
322 | cpus_clear(frozen_cpus); | 321 | cpus_clear(frozen_cpus); |
323 | suspend_cpu_hotplug = 0; | ||
324 | out: | 322 | out: |
325 | mutex_unlock(&cpu_add_remove_lock); | 323 | mutex_unlock(&cpu_add_remove_lock); |
326 | } | 324 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 88b416dfbc72..f57854b08922 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1772,12 +1772,7 @@ static ssize_t cpuset_tasks_read(struct file *file, char __user *buf, | |||
1772 | { | 1772 | { |
1773 | struct ctr_struct *ctr = file->private_data; | 1773 | struct ctr_struct *ctr = file->private_data; |
1774 | 1774 | ||
1775 | if (*ppos + nbytes > ctr->bufsz) | 1775 | return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); |
1776 | nbytes = ctr->bufsz - *ppos; | ||
1777 | if (copy_to_user(buf, ctr->buf + *ppos, nbytes)) | ||
1778 | return -EFAULT; | ||
1779 | *ppos += nbytes; | ||
1780 | return nbytes; | ||
1781 | } | 1776 | } |
1782 | 1777 | ||
1783 | static int cpuset_tasks_release(struct inode *unused_inode, struct file *file) | 1778 | static int cpuset_tasks_release(struct inode *unused_inode, struct file *file) |
diff --git a/kernel/exit.c b/kernel/exit.c index f5a7abb621f3..b0c6f0c3a2df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
27 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
28 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
29 | #include <linux/kthread.h> | ||
29 | #include <linux/mempolicy.h> | 30 | #include <linux/mempolicy.h> |
30 | #include <linux/taskstats_kern.h> | 31 | #include <linux/taskstats_kern.h> |
31 | #include <linux/delayacct.h> | 32 | #include <linux/delayacct.h> |
@@ -254,26 +255,25 @@ static int has_stopped_jobs(struct pid *pgrp) | |||
254 | } | 255 | } |
255 | 256 | ||
256 | /** | 257 | /** |
257 | * reparent_to_init - Reparent the calling kernel thread to the init task of the pid space that the thread belongs to. | 258 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd |
258 | * | 259 | * |
259 | * If a kernel thread is launched as a result of a system call, or if | 260 | * If a kernel thread is launched as a result of a system call, or if |
260 | * it ever exits, it should generally reparent itself to init so that | 261 | * it ever exits, it should generally reparent itself to kthreadd so it |
261 | * it is correctly cleaned up on exit. | 262 | * isn't in the way of other processes and is correctly cleaned up on exit. |
262 | * | 263 | * |
263 | * The various task state such as scheduling policy and priority may have | 264 | * The various task state such as scheduling policy and priority may have |
264 | * been inherited from a user process, so we reset them to sane values here. | 265 | * been inherited from a user process, so we reset them to sane values here. |
265 | * | 266 | * |
266 | * NOTE that reparent_to_init() gives the caller full capabilities. | 267 | * NOTE that reparent_to_kthreadd() gives the caller full capabilities. |
267 | */ | 268 | */ |
268 | static void reparent_to_init(void) | 269 | static void reparent_to_kthreadd(void) |
269 | { | 270 | { |
270 | write_lock_irq(&tasklist_lock); | 271 | write_lock_irq(&tasklist_lock); |
271 | 272 | ||
272 | ptrace_unlink(current); | 273 | ptrace_unlink(current); |
273 | /* Reparent to init */ | 274 | /* Reparent to init */ |
274 | remove_parent(current); | 275 | remove_parent(current); |
275 | current->parent = child_reaper(current); | 276 | current->real_parent = current->parent = kthreadd_task; |
276 | current->real_parent = child_reaper(current); | ||
277 | add_parent(current); | 277 | add_parent(current); |
278 | 278 | ||
279 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 279 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
@@ -347,7 +347,7 @@ int disallow_signal(int sig) | |||
347 | return -EINVAL; | 347 | return -EINVAL; |
348 | 348 | ||
349 | spin_lock_irq(¤t->sighand->siglock); | 349 | spin_lock_irq(¤t->sighand->siglock); |
350 | sigaddset(¤t->blocked, sig); | 350 | current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; |
351 | recalc_sigpending(); | 351 | recalc_sigpending(); |
352 | spin_unlock_irq(¤t->sighand->siglock); | 352 | spin_unlock_irq(¤t->sighand->siglock); |
353 | return 0; | 353 | return 0; |
@@ -400,7 +400,7 @@ void daemonize(const char *name, ...) | |||
400 | current->files = init_task.files; | 400 | current->files = init_task.files; |
401 | atomic_inc(¤t->files->count); | 401 | atomic_inc(¤t->files->count); |
402 | 402 | ||
403 | reparent_to_init(); | 403 | reparent_to_kthreadd(); |
404 | } | 404 | } |
405 | 405 | ||
406 | EXPORT_SYMBOL(daemonize); | 406 | EXPORT_SYMBOL(daemonize); |
diff --git a/kernel/fork.c b/kernel/fork.c index a8dd75d4992b..5dd3979747f5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -105,7 +105,7 @@ static struct kmem_cache *mm_cachep; | |||
105 | 105 | ||
106 | void free_task(struct task_struct *tsk) | 106 | void free_task(struct task_struct *tsk) |
107 | { | 107 | { |
108 | free_thread_info(tsk->thread_info); | 108 | free_thread_info(tsk->stack); |
109 | rt_mutex_debug_task_free(tsk); | 109 | rt_mutex_debug_task_free(tsk); |
110 | free_task_struct(tsk); | 110 | free_task_struct(tsk); |
111 | } | 111 | } |
@@ -175,7 +175,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
175 | } | 175 | } |
176 | 176 | ||
177 | *tsk = *orig; | 177 | *tsk = *orig; |
178 | tsk->thread_info = ti; | 178 | tsk->stack = ti; |
179 | setup_thread_stack(tsk, orig); | 179 | setup_thread_stack(tsk, orig); |
180 | 180 | ||
181 | #ifdef CONFIG_CC_STACKPROTECTOR | 181 | #ifdef CONFIG_CC_STACKPROTECTOR |
diff --git a/kernel/futex.c b/kernel/futex.c index 600bc9d801f2..b7ce15c67e32 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -16,6 +16,9 @@ | |||
16 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 16 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
17 | * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | 17 | * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> |
18 | * | 18 | * |
19 | * PRIVATE futexes by Eric Dumazet | ||
20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> | ||
21 | * | ||
19 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 22 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
20 | * enough at me, Linus for the original (flawed) idea, Matthew | 23 | * enough at me, Linus for the original (flawed) idea, Matthew |
21 | * Kirkwood for proof-of-concept implementation. | 24 | * Kirkwood for proof-of-concept implementation. |
@@ -53,6 +56,12 @@ | |||
53 | 56 | ||
54 | #include "rtmutex_common.h" | 57 | #include "rtmutex_common.h" |
55 | 58 | ||
59 | #ifdef CONFIG_DEBUG_RT_MUTEXES | ||
60 | # include "rtmutex-debug.h" | ||
61 | #else | ||
62 | # include "rtmutex.h" | ||
63 | #endif | ||
64 | |||
56 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 65 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
57 | 66 | ||
58 | /* | 67 | /* |
@@ -81,12 +90,12 @@ struct futex_pi_state { | |||
81 | * we can wake only the relevant ones (hashed queues may be shared). | 90 | * we can wake only the relevant ones (hashed queues may be shared). |
82 | * | 91 | * |
83 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 92 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
84 | * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0. | 93 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
85 | * The order of wakup is always to make the first condition true, then | 94 | * The order of wakup is always to make the first condition true, then |
86 | * wake up q->waiters, then make the second condition true. | 95 | * wake up q->waiters, then make the second condition true. |
87 | */ | 96 | */ |
88 | struct futex_q { | 97 | struct futex_q { |
89 | struct list_head list; | 98 | struct plist_node list; |
90 | wait_queue_head_t waiters; | 99 | wait_queue_head_t waiters; |
91 | 100 | ||
92 | /* Which hash list lock to use: */ | 101 | /* Which hash list lock to use: */ |
@@ -102,14 +111,20 @@ struct futex_q { | |||
102 | /* Optional priority inheritance state: */ | 111 | /* Optional priority inheritance state: */ |
103 | struct futex_pi_state *pi_state; | 112 | struct futex_pi_state *pi_state; |
104 | struct task_struct *task; | 113 | struct task_struct *task; |
114 | |||
115 | /* | ||
116 | * This waiter is used in case of requeue from a | ||
117 | * normal futex to a PI-futex | ||
118 | */ | ||
119 | struct rt_mutex_waiter waiter; | ||
105 | }; | 120 | }; |
106 | 121 | ||
107 | /* | 122 | /* |
108 | * Split the global futex_lock into every hash list lock. | 123 | * Split the global futex_lock into every hash list lock. |
109 | */ | 124 | */ |
110 | struct futex_hash_bucket { | 125 | struct futex_hash_bucket { |
111 | spinlock_t lock; | 126 | spinlock_t lock; |
112 | struct list_head chain; | 127 | struct plist_head chain; |
113 | }; | 128 | }; |
114 | 129 | ||
115 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; | 130 | static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; |
@@ -138,19 +153,26 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) | |||
138 | && key1->both.offset == key2->both.offset); | 153 | && key1->both.offset == key2->both.offset); |
139 | } | 154 | } |
140 | 155 | ||
141 | /* | 156 | /** |
142 | * Get parameters which are the keys for a futex. | 157 | * get_futex_key - Get parameters which are the keys for a futex. |
158 | * @uaddr: virtual address of the futex | ||
159 | * @shared: NULL for a PROCESS_PRIVATE futex, | ||
160 | * ¤t->mm->mmap_sem for a PROCESS_SHARED futex | ||
161 | * @key: address where result is stored. | ||
162 | * | ||
163 | * Returns a negative error code or 0 | ||
164 | * The key words are stored in *key on success. | ||
143 | * | 165 | * |
144 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, | 166 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, |
145 | * offset_within_page). For private mappings, it's (uaddr, current->mm). | 167 | * offset_within_page). For private mappings, it's (uaddr, current->mm). |
146 | * We can usually work out the index without swapping in the page. | 168 | * We can usually work out the index without swapping in the page. |
147 | * | 169 | * |
148 | * Returns: 0, or negative error code. | 170 | * fshared is NULL for PROCESS_PRIVATE futexes |
149 | * The key words are stored in *key on success. | 171 | * For other futexes, it points to ¤t->mm->mmap_sem and |
150 | * | 172 | * caller must have taken the reader lock. but NOT any spinlocks. |
151 | * Should be called with ¤t->mm->mmap_sem but NOT any spinlocks. | ||
152 | */ | 173 | */ |
153 | int get_futex_key(u32 __user *uaddr, union futex_key *key) | 174 | int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, |
175 | union futex_key *key) | ||
154 | { | 176 | { |
155 | unsigned long address = (unsigned long)uaddr; | 177 | unsigned long address = (unsigned long)uaddr; |
156 | struct mm_struct *mm = current->mm; | 178 | struct mm_struct *mm = current->mm; |
@@ -162,11 +184,25 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
162 | * The futex address must be "naturally" aligned. | 184 | * The futex address must be "naturally" aligned. |
163 | */ | 185 | */ |
164 | key->both.offset = address % PAGE_SIZE; | 186 | key->both.offset = address % PAGE_SIZE; |
165 | if (unlikely((key->both.offset % sizeof(u32)) != 0)) | 187 | if (unlikely((address % sizeof(u32)) != 0)) |
166 | return -EINVAL; | 188 | return -EINVAL; |
167 | address -= key->both.offset; | 189 | address -= key->both.offset; |
168 | 190 | ||
169 | /* | 191 | /* |
192 | * PROCESS_PRIVATE futexes are fast. | ||
193 | * As the mm cannot disappear under us and the 'key' only needs | ||
194 | * virtual address, we dont even have to find the underlying vma. | ||
195 | * Note : We do have to check 'uaddr' is a valid user address, | ||
196 | * but access_ok() should be faster than find_vma() | ||
197 | */ | ||
198 | if (!fshared) { | ||
199 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) | ||
200 | return -EFAULT; | ||
201 | key->private.mm = mm; | ||
202 | key->private.address = address; | ||
203 | return 0; | ||
204 | } | ||
205 | /* | ||
170 | * The futex is hashed differently depending on whether | 206 | * The futex is hashed differently depending on whether |
171 | * it's in a shared or private mapping. So check vma first. | 207 | * it's in a shared or private mapping. So check vma first. |
172 | */ | 208 | */ |
@@ -180,6 +216,9 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
180 | if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) | 216 | if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) |
181 | return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; | 217 | return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; |
182 | 218 | ||
219 | /* Save the user address in the ley */ | ||
220 | key->uaddr = uaddr; | ||
221 | |||
183 | /* | 222 | /* |
184 | * Private mappings are handled in a simple way. | 223 | * Private mappings are handled in a simple way. |
185 | * | 224 | * |
@@ -190,6 +229,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
190 | * mappings of _writable_ handles. | 229 | * mappings of _writable_ handles. |
191 | */ | 230 | */ |
192 | if (likely(!(vma->vm_flags & VM_MAYSHARE))) { | 231 | if (likely(!(vma->vm_flags & VM_MAYSHARE))) { |
232 | key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ | ||
193 | key->private.mm = mm; | 233 | key->private.mm = mm; |
194 | key->private.address = address; | 234 | key->private.address = address; |
195 | return 0; | 235 | return 0; |
@@ -199,7 +239,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
199 | * Linear file mappings are also simple. | 239 | * Linear file mappings are also simple. |
200 | */ | 240 | */ |
201 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; | 241 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; |
202 | key->both.offset++; /* Bit 0 of offset indicates inode-based key. */ | 242 | key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ |
203 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { | 243 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { |
204 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) | 244 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) |
205 | + vma->vm_pgoff); | 245 | + vma->vm_pgoff); |
@@ -227,16 +267,18 @@ EXPORT_SYMBOL_GPL(get_futex_key); | |||
227 | * Take a reference to the resource addressed by a key. | 267 | * Take a reference to the resource addressed by a key. |
228 | * Can be called while holding spinlocks. | 268 | * Can be called while holding spinlocks. |
229 | * | 269 | * |
230 | * NOTE: mmap_sem MUST be held between get_futex_key() and calling this | ||
231 | * function, if it is called at all. mmap_sem keeps key->shared.inode valid. | ||
232 | */ | 270 | */ |
233 | inline void get_futex_key_refs(union futex_key *key) | 271 | inline void get_futex_key_refs(union futex_key *key) |
234 | { | 272 | { |
235 | if (key->both.ptr != 0) { | 273 | if (key->both.ptr == 0) |
236 | if (key->both.offset & 1) | 274 | return; |
275 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
276 | case FUT_OFF_INODE: | ||
237 | atomic_inc(&key->shared.inode->i_count); | 277 | atomic_inc(&key->shared.inode->i_count); |
238 | else | 278 | break; |
279 | case FUT_OFF_MMSHARED: | ||
239 | atomic_inc(&key->private.mm->mm_count); | 280 | atomic_inc(&key->private.mm->mm_count); |
281 | break; | ||
240 | } | 282 | } |
241 | } | 283 | } |
242 | EXPORT_SYMBOL_GPL(get_futex_key_refs); | 284 | EXPORT_SYMBOL_GPL(get_futex_key_refs); |
@@ -247,11 +289,15 @@ EXPORT_SYMBOL_GPL(get_futex_key_refs); | |||
247 | */ | 289 | */ |
248 | void drop_futex_key_refs(union futex_key *key) | 290 | void drop_futex_key_refs(union futex_key *key) |
249 | { | 291 | { |
250 | if (key->both.ptr != 0) { | 292 | if (key->both.ptr == 0) |
251 | if (key->both.offset & 1) | 293 | return; |
294 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
295 | case FUT_OFF_INODE: | ||
252 | iput(key->shared.inode); | 296 | iput(key->shared.inode); |
253 | else | 297 | break; |
298 | case FUT_OFF_MMSHARED: | ||
254 | mmdrop(key->private.mm); | 299 | mmdrop(key->private.mm); |
300 | break; | ||
255 | } | 301 | } |
256 | } | 302 | } |
257 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); | 303 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); |
@@ -268,28 +314,38 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | |||
268 | } | 314 | } |
269 | 315 | ||
270 | /* | 316 | /* |
271 | * Fault handling. Called with current->mm->mmap_sem held. | 317 | * Fault handling. |
318 | * if fshared is non NULL, current->mm->mmap_sem is already held | ||
272 | */ | 319 | */ |
273 | static int futex_handle_fault(unsigned long address, int attempt) | 320 | static int futex_handle_fault(unsigned long address, |
321 | struct rw_semaphore *fshared, int attempt) | ||
274 | { | 322 | { |
275 | struct vm_area_struct * vma; | 323 | struct vm_area_struct * vma; |
276 | struct mm_struct *mm = current->mm; | 324 | struct mm_struct *mm = current->mm; |
325 | int ret = -EFAULT; | ||
277 | 326 | ||
278 | if (attempt > 2 || !(vma = find_vma(mm, address)) || | 327 | if (attempt > 2) |
279 | vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) | 328 | return ret; |
280 | return -EFAULT; | ||
281 | 329 | ||
282 | switch (handle_mm_fault(mm, vma, address, 1)) { | 330 | if (!fshared) |
283 | case VM_FAULT_MINOR: | 331 | down_read(&mm->mmap_sem); |
284 | current->min_flt++; | 332 | vma = find_vma(mm, address); |
285 | break; | 333 | if (vma && address >= vma->vm_start && |
286 | case VM_FAULT_MAJOR: | 334 | (vma->vm_flags & VM_WRITE)) { |
287 | current->maj_flt++; | 335 | switch (handle_mm_fault(mm, vma, address, 1)) { |
288 | break; | 336 | case VM_FAULT_MINOR: |
289 | default: | 337 | ret = 0; |
290 | return -EFAULT; | 338 | current->min_flt++; |
339 | break; | ||
340 | case VM_FAULT_MAJOR: | ||
341 | ret = 0; | ||
342 | current->maj_flt++; | ||
343 | break; | ||
344 | } | ||
291 | } | 345 | } |
292 | return 0; | 346 | if (!fshared) |
347 | up_read(&mm->mmap_sem); | ||
348 | return ret; | ||
293 | } | 349 | } |
294 | 350 | ||
295 | /* | 351 | /* |
@@ -439,18 +495,19 @@ void exit_pi_state_list(struct task_struct *curr) | |||
439 | } | 495 | } |
440 | 496 | ||
441 | static int | 497 | static int |
442 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | 498 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
499 | union futex_key *key, struct futex_pi_state **ps) | ||
443 | { | 500 | { |
444 | struct futex_pi_state *pi_state = NULL; | 501 | struct futex_pi_state *pi_state = NULL; |
445 | struct futex_q *this, *next; | 502 | struct futex_q *this, *next; |
446 | struct list_head *head; | 503 | struct plist_head *head; |
447 | struct task_struct *p; | 504 | struct task_struct *p; |
448 | pid_t pid; | 505 | pid_t pid; |
449 | 506 | ||
450 | head = &hb->chain; | 507 | head = &hb->chain; |
451 | 508 | ||
452 | list_for_each_entry_safe(this, next, head, list) { | 509 | plist_for_each_entry_safe(this, next, head, list) { |
453 | if (match_futex(&this->key, &me->key)) { | 510 | if (match_futex(&this->key, key)) { |
454 | /* | 511 | /* |
455 | * Another waiter already exists - bump up | 512 | * Another waiter already exists - bump up |
456 | * the refcount and return its pi_state: | 513 | * the refcount and return its pi_state: |
@@ -465,7 +522,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
465 | WARN_ON(!atomic_read(&pi_state->refcount)); | 522 | WARN_ON(!atomic_read(&pi_state->refcount)); |
466 | 523 | ||
467 | atomic_inc(&pi_state->refcount); | 524 | atomic_inc(&pi_state->refcount); |
468 | me->pi_state = pi_state; | 525 | *ps = pi_state; |
469 | 526 | ||
470 | return 0; | 527 | return 0; |
471 | } | 528 | } |
@@ -492,7 +549,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
492 | rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); | 549 | rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); |
493 | 550 | ||
494 | /* Store the key for possible exit cleanups: */ | 551 | /* Store the key for possible exit cleanups: */ |
495 | pi_state->key = me->key; | 552 | pi_state->key = *key; |
496 | 553 | ||
497 | spin_lock_irq(&p->pi_lock); | 554 | spin_lock_irq(&p->pi_lock); |
498 | WARN_ON(!list_empty(&pi_state->list)); | 555 | WARN_ON(!list_empty(&pi_state->list)); |
@@ -502,7 +559,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
502 | 559 | ||
503 | put_task_struct(p); | 560 | put_task_struct(p); |
504 | 561 | ||
505 | me->pi_state = pi_state; | 562 | *ps = pi_state; |
506 | 563 | ||
507 | return 0; | 564 | return 0; |
508 | } | 565 | } |
@@ -513,12 +570,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
513 | */ | 570 | */ |
514 | static void wake_futex(struct futex_q *q) | 571 | static void wake_futex(struct futex_q *q) |
515 | { | 572 | { |
516 | list_del_init(&q->list); | 573 | plist_del(&q->list, &q->list.plist); |
517 | if (q->filp) | 574 | if (q->filp) |
518 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); | 575 | send_sigio(&q->filp->f_owner, q->fd, POLL_IN); |
519 | /* | 576 | /* |
520 | * The lock in wake_up_all() is a crucial memory barrier after the | 577 | * The lock in wake_up_all() is a crucial memory barrier after the |
521 | * list_del_init() and also before assigning to q->lock_ptr. | 578 | * plist_del() and also before assigning to q->lock_ptr. |
522 | */ | 579 | */ |
523 | wake_up_all(&q->waiters); | 580 | wake_up_all(&q->waiters); |
524 | /* | 581 | /* |
@@ -562,6 +619,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
562 | */ | 619 | */ |
563 | if (!(uval & FUTEX_OWNER_DIED)) { | 620 | if (!(uval & FUTEX_OWNER_DIED)) { |
564 | newval = FUTEX_WAITERS | new_owner->pid; | 621 | newval = FUTEX_WAITERS | new_owner->pid; |
622 | /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */ | ||
623 | newval |= (uval & FUTEX_WAITER_REQUEUED); | ||
565 | 624 | ||
566 | pagefault_disable(); | 625 | pagefault_disable(); |
567 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 626 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
@@ -629,17 +688,19 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) | |||
629 | * Wake up all waiters hashed on the physical page that is mapped | 688 | * Wake up all waiters hashed on the physical page that is mapped |
630 | * to this virtual address: | 689 | * to this virtual address: |
631 | */ | 690 | */ |
632 | static int futex_wake(u32 __user *uaddr, int nr_wake) | 691 | static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, |
692 | int nr_wake) | ||
633 | { | 693 | { |
634 | struct futex_hash_bucket *hb; | 694 | struct futex_hash_bucket *hb; |
635 | struct futex_q *this, *next; | 695 | struct futex_q *this, *next; |
636 | struct list_head *head; | 696 | struct plist_head *head; |
637 | union futex_key key; | 697 | union futex_key key; |
638 | int ret; | 698 | int ret; |
639 | 699 | ||
640 | down_read(¤t->mm->mmap_sem); | 700 | if (fshared) |
701 | down_read(fshared); | ||
641 | 702 | ||
642 | ret = get_futex_key(uaddr, &key); | 703 | ret = get_futex_key(uaddr, fshared, &key); |
643 | if (unlikely(ret != 0)) | 704 | if (unlikely(ret != 0)) |
644 | goto out; | 705 | goto out; |
645 | 706 | ||
@@ -647,7 +708,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
647 | spin_lock(&hb->lock); | 708 | spin_lock(&hb->lock); |
648 | head = &hb->chain; | 709 | head = &hb->chain; |
649 | 710 | ||
650 | list_for_each_entry_safe(this, next, head, list) { | 711 | plist_for_each_entry_safe(this, next, head, list) { |
651 | if (match_futex (&this->key, &key)) { | 712 | if (match_futex (&this->key, &key)) { |
652 | if (this->pi_state) { | 713 | if (this->pi_state) { |
653 | ret = -EINVAL; | 714 | ret = -EINVAL; |
@@ -661,7 +722,261 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
661 | 722 | ||
662 | spin_unlock(&hb->lock); | 723 | spin_unlock(&hb->lock); |
663 | out: | 724 | out: |
664 | up_read(¤t->mm->mmap_sem); | 725 | if (fshared) |
726 | up_read(fshared); | ||
727 | return ret; | ||
728 | } | ||
729 | |||
730 | /* | ||
731 | * Called from futex_requeue_pi. | ||
732 | * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the | ||
733 | * PI-futex value; search its associated pi_state if an owner exist | ||
734 | * or create a new one without owner. | ||
735 | */ | ||
736 | static inline int | ||
737 | lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb, | ||
738 | union futex_key *key, | ||
739 | struct futex_pi_state **pi_state) | ||
740 | { | ||
741 | u32 curval, uval, newval; | ||
742 | |||
743 | retry: | ||
744 | /* | ||
745 | * We can't handle a fault cleanly because we can't | ||
746 | * release the locks here. Simply return the fault. | ||
747 | */ | ||
748 | if (get_futex_value_locked(&curval, uaddr)) | ||
749 | return -EFAULT; | ||
750 | |||
751 | /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */ | ||
752 | if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) | ||
753 | != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) { | ||
754 | /* | ||
755 | * No waiters yet, we prepare the futex to have some waiters. | ||
756 | */ | ||
757 | |||
758 | uval = curval; | ||
759 | newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED; | ||
760 | |||
761 | pagefault_disable(); | ||
762 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
763 | pagefault_enable(); | ||
764 | |||
765 | if (unlikely(curval == -EFAULT)) | ||
766 | return -EFAULT; | ||
767 | if (unlikely(curval != uval)) | ||
768 | goto retry; | ||
769 | } | ||
770 | |||
771 | if (!(curval & FUTEX_TID_MASK) | ||
772 | || lookup_pi_state(curval, hb, key, pi_state)) { | ||
773 | /* the futex has no owner (yet) or the lookup failed: | ||
774 | allocate one pi_state without owner */ | ||
775 | |||
776 | *pi_state = alloc_pi_state(); | ||
777 | |||
778 | /* Already stores the key: */ | ||
779 | (*pi_state)->key = *key; | ||
780 | |||
781 | /* init the mutex without owner */ | ||
782 | __rt_mutex_init(&(*pi_state)->pi_mutex, NULL); | ||
783 | } | ||
784 | |||
785 | return 0; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * Keep the first nr_wake waiter from futex1, wake up one, | ||
790 | * and requeue the next nr_requeue waiters following hashed on | ||
791 | * one physical page to another physical page (PI-futex uaddr2) | ||
792 | */ | ||
793 | static int futex_requeue_pi(u32 __user *uaddr1, | ||
794 | struct rw_semaphore *fshared, | ||
795 | u32 __user *uaddr2, | ||
796 | int nr_wake, int nr_requeue, u32 *cmpval) | ||
797 | { | ||
798 | union futex_key key1, key2; | ||
799 | struct futex_hash_bucket *hb1, *hb2; | ||
800 | struct plist_head *head1; | ||
801 | struct futex_q *this, *next; | ||
802 | struct futex_pi_state *pi_state2 = NULL; | ||
803 | struct rt_mutex_waiter *waiter, *top_waiter = NULL; | ||
804 | struct rt_mutex *lock2 = NULL; | ||
805 | int ret, drop_count = 0; | ||
806 | |||
807 | if (refill_pi_state_cache()) | ||
808 | return -ENOMEM; | ||
809 | |||
810 | retry: | ||
811 | /* | ||
812 | * First take all the futex related locks: | ||
813 | */ | ||
814 | if (fshared) | ||
815 | down_read(fshared); | ||
816 | |||
817 | ret = get_futex_key(uaddr1, fshared, &key1); | ||
818 | if (unlikely(ret != 0)) | ||
819 | goto out; | ||
820 | ret = get_futex_key(uaddr2, fshared, &key2); | ||
821 | if (unlikely(ret != 0)) | ||
822 | goto out; | ||
823 | |||
824 | hb1 = hash_futex(&key1); | ||
825 | hb2 = hash_futex(&key2); | ||
826 | |||
827 | double_lock_hb(hb1, hb2); | ||
828 | |||
829 | if (likely(cmpval != NULL)) { | ||
830 | u32 curval; | ||
831 | |||
832 | ret = get_futex_value_locked(&curval, uaddr1); | ||
833 | |||
834 | if (unlikely(ret)) { | ||
835 | spin_unlock(&hb1->lock); | ||
836 | if (hb1 != hb2) | ||
837 | spin_unlock(&hb2->lock); | ||
838 | |||
839 | /* | ||
840 | * If we would have faulted, release mmap_sem, fault | ||
841 | * it in and start all over again. | ||
842 | */ | ||
843 | if (fshared) | ||
844 | up_read(fshared); | ||
845 | |||
846 | ret = get_user(curval, uaddr1); | ||
847 | |||
848 | if (!ret) | ||
849 | goto retry; | ||
850 | |||
851 | return ret; | ||
852 | } | ||
853 | if (curval != *cmpval) { | ||
854 | ret = -EAGAIN; | ||
855 | goto out_unlock; | ||
856 | } | ||
857 | } | ||
858 | |||
859 | head1 = &hb1->chain; | ||
860 | plist_for_each_entry_safe(this, next, head1, list) { | ||
861 | if (!match_futex (&this->key, &key1)) | ||
862 | continue; | ||
863 | if (++ret <= nr_wake) { | ||
864 | wake_futex(this); | ||
865 | } else { | ||
866 | /* | ||
867 | * FIRST: get and set the pi_state | ||
868 | */ | ||
869 | if (!pi_state2) { | ||
870 | int s; | ||
871 | /* do this only the first time we requeue someone */ | ||
872 | s = lookup_pi_state_for_requeue(uaddr2, hb2, | ||
873 | &key2, &pi_state2); | ||
874 | if (s) { | ||
875 | ret = s; | ||
876 | goto out_unlock; | ||
877 | } | ||
878 | |||
879 | lock2 = &pi_state2->pi_mutex; | ||
880 | spin_lock(&lock2->wait_lock); | ||
881 | |||
882 | /* Save the top waiter of the wait_list */ | ||
883 | if (rt_mutex_has_waiters(lock2)) | ||
884 | top_waiter = rt_mutex_top_waiter(lock2); | ||
885 | } else | ||
886 | atomic_inc(&pi_state2->refcount); | ||
887 | |||
888 | |||
889 | this->pi_state = pi_state2; | ||
890 | |||
891 | /* | ||
892 | * SECOND: requeue futex_q to the correct hashbucket | ||
893 | */ | ||
894 | |||
895 | /* | ||
896 | * If key1 and key2 hash to the same bucket, no need to | ||
897 | * requeue. | ||
898 | */ | ||
899 | if (likely(head1 != &hb2->chain)) { | ||
900 | plist_del(&this->list, &hb1->chain); | ||
901 | plist_add(&this->list, &hb2->chain); | ||
902 | this->lock_ptr = &hb2->lock; | ||
903 | #ifdef CONFIG_DEBUG_PI_LIST | ||
904 | this->list.plist.lock = &hb2->lock; | ||
905 | #endif | ||
906 | } | ||
907 | this->key = key2; | ||
908 | get_futex_key_refs(&key2); | ||
909 | drop_count++; | ||
910 | |||
911 | |||
912 | /* | ||
913 | * THIRD: queue it to lock2 | ||
914 | */ | ||
915 | spin_lock_irq(&this->task->pi_lock); | ||
916 | waiter = &this->waiter; | ||
917 | waiter->task = this->task; | ||
918 | waiter->lock = lock2; | ||
919 | plist_node_init(&waiter->list_entry, this->task->prio); | ||
920 | plist_node_init(&waiter->pi_list_entry, this->task->prio); | ||
921 | plist_add(&waiter->list_entry, &lock2->wait_list); | ||
922 | this->task->pi_blocked_on = waiter; | ||
923 | spin_unlock_irq(&this->task->pi_lock); | ||
924 | |||
925 | if (ret - nr_wake >= nr_requeue) | ||
926 | break; | ||
927 | } | ||
928 | } | ||
929 | |||
930 | /* If we've requeued some tasks and the top_waiter of the rt_mutex | ||
931 | has changed, we must adjust the priority of the owner, if any */ | ||
932 | if (drop_count) { | ||
933 | struct task_struct *owner = rt_mutex_owner(lock2); | ||
934 | if (owner && | ||
935 | (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) { | ||
936 | int chain_walk = 0; | ||
937 | |||
938 | spin_lock_irq(&owner->pi_lock); | ||
939 | if (top_waiter) | ||
940 | plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); | ||
941 | else | ||
942 | /* | ||
943 | * There was no waiters before the requeue, | ||
944 | * the flag must be updated | ||
945 | */ | ||
946 | mark_rt_mutex_waiters(lock2); | ||
947 | |||
948 | plist_add(&waiter->pi_list_entry, &owner->pi_waiters); | ||
949 | __rt_mutex_adjust_prio(owner); | ||
950 | if (owner->pi_blocked_on) { | ||
951 | chain_walk = 1; | ||
952 | get_task_struct(owner); | ||
953 | } | ||
954 | |||
955 | spin_unlock_irq(&owner->pi_lock); | ||
956 | spin_unlock(&lock2->wait_lock); | ||
957 | |||
958 | if (chain_walk) | ||
959 | rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL, | ||
960 | current); | ||
961 | } else { | ||
962 | /* No owner or the top_waiter does not change */ | ||
963 | mark_rt_mutex_waiters(lock2); | ||
964 | spin_unlock(&lock2->wait_lock); | ||
965 | } | ||
966 | } | ||
967 | |||
968 | out_unlock: | ||
969 | spin_unlock(&hb1->lock); | ||
970 | if (hb1 != hb2) | ||
971 | spin_unlock(&hb2->lock); | ||
972 | |||
973 | /* drop_futex_key_refs() must be called outside the spinlocks. */ | ||
974 | while (--drop_count >= 0) | ||
975 | drop_futex_key_refs(&key1); | ||
976 | |||
977 | out: | ||
978 | if (fshared) | ||
979 | up_read(fshared); | ||
665 | return ret; | 980 | return ret; |
666 | } | 981 | } |
667 | 982 | ||
@@ -670,22 +985,24 @@ out: | |||
670 | * to this virtual address: | 985 | * to this virtual address: |
671 | */ | 986 | */ |
672 | static int | 987 | static int |
673 | futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, | 988 | futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, |
989 | u32 __user *uaddr2, | ||
674 | int nr_wake, int nr_wake2, int op) | 990 | int nr_wake, int nr_wake2, int op) |
675 | { | 991 | { |
676 | union futex_key key1, key2; | 992 | union futex_key key1, key2; |
677 | struct futex_hash_bucket *hb1, *hb2; | 993 | struct futex_hash_bucket *hb1, *hb2; |
678 | struct list_head *head; | 994 | struct plist_head *head; |
679 | struct futex_q *this, *next; | 995 | struct futex_q *this, *next; |
680 | int ret, op_ret, attempt = 0; | 996 | int ret, op_ret, attempt = 0; |
681 | 997 | ||
682 | retryfull: | 998 | retryfull: |
683 | down_read(¤t->mm->mmap_sem); | 999 | if (fshared) |
1000 | down_read(fshared); | ||
684 | 1001 | ||
685 | ret = get_futex_key(uaddr1, &key1); | 1002 | ret = get_futex_key(uaddr1, fshared, &key1); |
686 | if (unlikely(ret != 0)) | 1003 | if (unlikely(ret != 0)) |
687 | goto out; | 1004 | goto out; |
688 | ret = get_futex_key(uaddr2, &key2); | 1005 | ret = get_futex_key(uaddr2, fshared, &key2); |
689 | if (unlikely(ret != 0)) | 1006 | if (unlikely(ret != 0)) |
690 | goto out; | 1007 | goto out; |
691 | 1008 | ||
@@ -725,11 +1042,10 @@ retry: | |||
725 | * still holding the mmap_sem. | 1042 | * still holding the mmap_sem. |
726 | */ | 1043 | */ |
727 | if (attempt++) { | 1044 | if (attempt++) { |
728 | if (futex_handle_fault((unsigned long)uaddr2, | 1045 | ret = futex_handle_fault((unsigned long)uaddr2, |
729 | attempt)) { | 1046 | fshared, attempt); |
730 | ret = -EFAULT; | 1047 | if (ret) |
731 | goto out; | 1048 | goto out; |
732 | } | ||
733 | goto retry; | 1049 | goto retry; |
734 | } | 1050 | } |
735 | 1051 | ||
@@ -737,7 +1053,8 @@ retry: | |||
737 | * If we would have faulted, release mmap_sem, | 1053 | * If we would have faulted, release mmap_sem, |
738 | * fault it in and start all over again. | 1054 | * fault it in and start all over again. |
739 | */ | 1055 | */ |
740 | up_read(¤t->mm->mmap_sem); | 1056 | if (fshared) |
1057 | up_read(fshared); | ||
741 | 1058 | ||
742 | ret = get_user(dummy, uaddr2); | 1059 | ret = get_user(dummy, uaddr2); |
743 | if (ret) | 1060 | if (ret) |
@@ -748,7 +1065,7 @@ retry: | |||
748 | 1065 | ||
749 | head = &hb1->chain; | 1066 | head = &hb1->chain; |
750 | 1067 | ||
751 | list_for_each_entry_safe(this, next, head, list) { | 1068 | plist_for_each_entry_safe(this, next, head, list) { |
752 | if (match_futex (&this->key, &key1)) { | 1069 | if (match_futex (&this->key, &key1)) { |
753 | wake_futex(this); | 1070 | wake_futex(this); |
754 | if (++ret >= nr_wake) | 1071 | if (++ret >= nr_wake) |
@@ -760,7 +1077,7 @@ retry: | |||
760 | head = &hb2->chain; | 1077 | head = &hb2->chain; |
761 | 1078 | ||
762 | op_ret = 0; | 1079 | op_ret = 0; |
763 | list_for_each_entry_safe(this, next, head, list) { | 1080 | plist_for_each_entry_safe(this, next, head, list) { |
764 | if (match_futex (&this->key, &key2)) { | 1081 | if (match_futex (&this->key, &key2)) { |
765 | wake_futex(this); | 1082 | wake_futex(this); |
766 | if (++op_ret >= nr_wake2) | 1083 | if (++op_ret >= nr_wake2) |
@@ -774,7 +1091,8 @@ retry: | |||
774 | if (hb1 != hb2) | 1091 | if (hb1 != hb2) |
775 | spin_unlock(&hb2->lock); | 1092 | spin_unlock(&hb2->lock); |
776 | out: | 1093 | out: |
777 | up_read(¤t->mm->mmap_sem); | 1094 | if (fshared) |
1095 | up_read(fshared); | ||
778 | return ret; | 1096 | return ret; |
779 | } | 1097 | } |
780 | 1098 | ||
@@ -782,22 +1100,24 @@ out: | |||
782 | * Requeue all waiters hashed on one physical page to another | 1100 | * Requeue all waiters hashed on one physical page to another |
783 | * physical page. | 1101 | * physical page. |
784 | */ | 1102 | */ |
785 | static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | 1103 | static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, |
1104 | u32 __user *uaddr2, | ||
786 | int nr_wake, int nr_requeue, u32 *cmpval) | 1105 | int nr_wake, int nr_requeue, u32 *cmpval) |
787 | { | 1106 | { |
788 | union futex_key key1, key2; | 1107 | union futex_key key1, key2; |
789 | struct futex_hash_bucket *hb1, *hb2; | 1108 | struct futex_hash_bucket *hb1, *hb2; |
790 | struct list_head *head1; | 1109 | struct plist_head *head1; |
791 | struct futex_q *this, *next; | 1110 | struct futex_q *this, *next; |
792 | int ret, drop_count = 0; | 1111 | int ret, drop_count = 0; |
793 | 1112 | ||
794 | retry: | 1113 | retry: |
795 | down_read(¤t->mm->mmap_sem); | 1114 | if (fshared) |
1115 | down_read(fshared); | ||
796 | 1116 | ||
797 | ret = get_futex_key(uaddr1, &key1); | 1117 | ret = get_futex_key(uaddr1, fshared, &key1); |
798 | if (unlikely(ret != 0)) | 1118 | if (unlikely(ret != 0)) |
799 | goto out; | 1119 | goto out; |
800 | ret = get_futex_key(uaddr2, &key2); | 1120 | ret = get_futex_key(uaddr2, fshared, &key2); |
801 | if (unlikely(ret != 0)) | 1121 | if (unlikely(ret != 0)) |
802 | goto out; | 1122 | goto out; |
803 | 1123 | ||
@@ -820,7 +1140,8 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
820 | * If we would have faulted, release mmap_sem, fault | 1140 | * If we would have faulted, release mmap_sem, fault |
821 | * it in and start all over again. | 1141 | * it in and start all over again. |
822 | */ | 1142 | */ |
823 | up_read(¤t->mm->mmap_sem); | 1143 | if (fshared) |
1144 | up_read(fshared); | ||
824 | 1145 | ||
825 | ret = get_user(curval, uaddr1); | 1146 | ret = get_user(curval, uaddr1); |
826 | 1147 | ||
@@ -836,7 +1157,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
836 | } | 1157 | } |
837 | 1158 | ||
838 | head1 = &hb1->chain; | 1159 | head1 = &hb1->chain; |
839 | list_for_each_entry_safe(this, next, head1, list) { | 1160 | plist_for_each_entry_safe(this, next, head1, list) { |
840 | if (!match_futex (&this->key, &key1)) | 1161 | if (!match_futex (&this->key, &key1)) |
841 | continue; | 1162 | continue; |
842 | if (++ret <= nr_wake) { | 1163 | if (++ret <= nr_wake) { |
@@ -847,9 +1168,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
847 | * requeue. | 1168 | * requeue. |
848 | */ | 1169 | */ |
849 | if (likely(head1 != &hb2->chain)) { | 1170 | if (likely(head1 != &hb2->chain)) { |
850 | list_move_tail(&this->list, &hb2->chain); | 1171 | plist_del(&this->list, &hb1->chain); |
1172 | plist_add(&this->list, &hb2->chain); | ||
851 | this->lock_ptr = &hb2->lock; | 1173 | this->lock_ptr = &hb2->lock; |
852 | } | 1174 | #ifdef CONFIG_DEBUG_PI_LIST |
1175 | this->list.plist.lock = &hb2->lock; | ||
1176 | #endif | ||
1177 | } | ||
853 | this->key = key2; | 1178 | this->key = key2; |
854 | get_futex_key_refs(&key2); | 1179 | get_futex_key_refs(&key2); |
855 | drop_count++; | 1180 | drop_count++; |
@@ -869,7 +1194,8 @@ out_unlock: | |||
869 | drop_futex_key_refs(&key1); | 1194 | drop_futex_key_refs(&key1); |
870 | 1195 | ||
871 | out: | 1196 | out: |
872 | up_read(¤t->mm->mmap_sem); | 1197 | if (fshared) |
1198 | up_read(fshared); | ||
873 | return ret; | 1199 | return ret; |
874 | } | 1200 | } |
875 | 1201 | ||
@@ -894,7 +1220,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp) | |||
894 | 1220 | ||
895 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1221 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
896 | { | 1222 | { |
897 | list_add_tail(&q->list, &hb->chain); | 1223 | int prio; |
1224 | |||
1225 | /* | ||
1226 | * The priority used to register this element is | ||
1227 | * - either the real thread-priority for the real-time threads | ||
1228 | * (i.e. threads with a priority lower than MAX_RT_PRIO) | ||
1229 | * - or MAX_RT_PRIO for non-RT threads. | ||
1230 | * Thus, all RT-threads are woken first in priority order, and | ||
1231 | * the others are woken last, in FIFO order. | ||
1232 | */ | ||
1233 | prio = min(current->normal_prio, MAX_RT_PRIO); | ||
1234 | |||
1235 | plist_node_init(&q->list, prio); | ||
1236 | #ifdef CONFIG_DEBUG_PI_LIST | ||
1237 | q->list.plist.lock = &hb->lock; | ||
1238 | #endif | ||
1239 | plist_add(&q->list, &hb->chain); | ||
898 | q->task = current; | 1240 | q->task = current; |
899 | spin_unlock(&hb->lock); | 1241 | spin_unlock(&hb->lock); |
900 | } | 1242 | } |
@@ -949,8 +1291,8 @@ static int unqueue_me(struct futex_q *q) | |||
949 | spin_unlock(lock_ptr); | 1291 | spin_unlock(lock_ptr); |
950 | goto retry; | 1292 | goto retry; |
951 | } | 1293 | } |
952 | WARN_ON(list_empty(&q->list)); | 1294 | WARN_ON(plist_node_empty(&q->list)); |
953 | list_del(&q->list); | 1295 | plist_del(&q->list, &q->list.plist); |
954 | 1296 | ||
955 | BUG_ON(q->pi_state); | 1297 | BUG_ON(q->pi_state); |
956 | 1298 | ||
@@ -964,39 +1306,104 @@ static int unqueue_me(struct futex_q *q) | |||
964 | 1306 | ||
965 | /* | 1307 | /* |
966 | * PI futexes can not be requeued and must remove themself from the | 1308 | * PI futexes can not be requeued and must remove themself from the |
967 | * hash bucket. The hash bucket lock is held on entry and dropped here. | 1309 | * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry |
1310 | * and dropped here. | ||
968 | */ | 1311 | */ |
969 | static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) | 1312 | static void unqueue_me_pi(struct futex_q *q) |
970 | { | 1313 | { |
971 | WARN_ON(list_empty(&q->list)); | 1314 | WARN_ON(plist_node_empty(&q->list)); |
972 | list_del(&q->list); | 1315 | plist_del(&q->list, &q->list.plist); |
973 | 1316 | ||
974 | BUG_ON(!q->pi_state); | 1317 | BUG_ON(!q->pi_state); |
975 | free_pi_state(q->pi_state); | 1318 | free_pi_state(q->pi_state); |
976 | q->pi_state = NULL; | 1319 | q->pi_state = NULL; |
977 | 1320 | ||
978 | spin_unlock(&hb->lock); | 1321 | spin_unlock(q->lock_ptr); |
979 | 1322 | ||
980 | drop_futex_key_refs(&q->key); | 1323 | drop_futex_key_refs(&q->key); |
981 | } | 1324 | } |
982 | 1325 | ||
1326 | /* | ||
1327 | * Fixup the pi_state owner with current. | ||
1328 | * | ||
1329 | * The cur->mm semaphore must be held, it is released at return of this | ||
1330 | * function. | ||
1331 | */ | ||
1332 | static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared, | ||
1333 | struct futex_q *q, | ||
1334 | struct futex_hash_bucket *hb, | ||
1335 | struct task_struct *curr) | ||
1336 | { | ||
1337 | u32 newtid = curr->pid | FUTEX_WAITERS; | ||
1338 | struct futex_pi_state *pi_state = q->pi_state; | ||
1339 | u32 uval, curval, newval; | ||
1340 | int ret; | ||
1341 | |||
1342 | /* Owner died? */ | ||
1343 | if (pi_state->owner != NULL) { | ||
1344 | spin_lock_irq(&pi_state->owner->pi_lock); | ||
1345 | WARN_ON(list_empty(&pi_state->list)); | ||
1346 | list_del_init(&pi_state->list); | ||
1347 | spin_unlock_irq(&pi_state->owner->pi_lock); | ||
1348 | } else | ||
1349 | newtid |= FUTEX_OWNER_DIED; | ||
1350 | |||
1351 | pi_state->owner = curr; | ||
1352 | |||
1353 | spin_lock_irq(&curr->pi_lock); | ||
1354 | WARN_ON(!list_empty(&pi_state->list)); | ||
1355 | list_add(&pi_state->list, &curr->pi_state_list); | ||
1356 | spin_unlock_irq(&curr->pi_lock); | ||
1357 | |||
1358 | /* Unqueue and drop the lock */ | ||
1359 | unqueue_me_pi(q); | ||
1360 | if (fshared) | ||
1361 | up_read(fshared); | ||
1362 | /* | ||
1363 | * We own it, so we have to replace the pending owner | ||
1364 | * TID. This must be atomic as we have preserve the | ||
1365 | * owner died bit here. | ||
1366 | */ | ||
1367 | ret = get_user(uval, uaddr); | ||
1368 | while (!ret) { | ||
1369 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | ||
1370 | newval |= (uval & FUTEX_WAITER_REQUEUED); | ||
1371 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | ||
1372 | uval, newval); | ||
1373 | if (curval == -EFAULT) | ||
1374 | ret = -EFAULT; | ||
1375 | if (curval == uval) | ||
1376 | break; | ||
1377 | uval = curval; | ||
1378 | } | ||
1379 | return ret; | ||
1380 | } | ||
1381 | |||
1382 | /* | ||
1383 | * In case we must use restart_block to restart a futex_wait, | ||
1384 | * we encode in the 'arg3' shared capability | ||
1385 | */ | ||
1386 | #define ARG3_SHARED 1 | ||
1387 | |||
983 | static long futex_wait_restart(struct restart_block *restart); | 1388 | static long futex_wait_restart(struct restart_block *restart); |
984 | static int futex_wait_abstime(u32 __user *uaddr, u32 val, | 1389 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, |
985 | int timed, unsigned long abs_time) | 1390 | u32 val, ktime_t *abs_time) |
986 | { | 1391 | { |
987 | struct task_struct *curr = current; | 1392 | struct task_struct *curr = current; |
988 | DECLARE_WAITQUEUE(wait, curr); | 1393 | DECLARE_WAITQUEUE(wait, curr); |
989 | struct futex_hash_bucket *hb; | 1394 | struct futex_hash_bucket *hb; |
990 | struct futex_q q; | 1395 | struct futex_q q; |
991 | unsigned long time_left = 0; | ||
992 | u32 uval; | 1396 | u32 uval; |
993 | int ret; | 1397 | int ret; |
1398 | struct hrtimer_sleeper t, *to = NULL; | ||
1399 | int rem = 0; | ||
994 | 1400 | ||
995 | q.pi_state = NULL; | 1401 | q.pi_state = NULL; |
996 | retry: | 1402 | retry: |
997 | down_read(&curr->mm->mmap_sem); | 1403 | if (fshared) |
1404 | down_read(fshared); | ||
998 | 1405 | ||
999 | ret = get_futex_key(uaddr, &q.key); | 1406 | ret = get_futex_key(uaddr, fshared, &q.key); |
1000 | if (unlikely(ret != 0)) | 1407 | if (unlikely(ret != 0)) |
1001 | goto out_release_sem; | 1408 | goto out_release_sem; |
1002 | 1409 | ||
@@ -1019,8 +1426,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1019 | * a wakeup when *uaddr != val on entry to the syscall. This is | 1426 | * a wakeup when *uaddr != val on entry to the syscall. This is |
1020 | * rare, but normal. | 1427 | * rare, but normal. |
1021 | * | 1428 | * |
1022 | * We hold the mmap semaphore, so the mapping cannot have changed | 1429 | * for shared futexes, we hold the mmap semaphore, so the mapping |
1023 | * since we looked it up in get_futex_key. | 1430 | * cannot have changed since we looked it up in get_futex_key. |
1024 | */ | 1431 | */ |
1025 | ret = get_futex_value_locked(&uval, uaddr); | 1432 | ret = get_futex_value_locked(&uval, uaddr); |
1026 | 1433 | ||
@@ -1031,7 +1438,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1031 | * If we would have faulted, release mmap_sem, fault it in and | 1438 | * If we would have faulted, release mmap_sem, fault it in and |
1032 | * start all over again. | 1439 | * start all over again. |
1033 | */ | 1440 | */ |
1034 | up_read(&curr->mm->mmap_sem); | 1441 | if (fshared) |
1442 | up_read(fshared); | ||
1035 | 1443 | ||
1036 | ret = get_user(uval, uaddr); | 1444 | ret = get_user(uval, uaddr); |
1037 | 1445 | ||
@@ -1043,6 +1451,14 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1043 | if (uval != val) | 1451 | if (uval != val) |
1044 | goto out_unlock_release_sem; | 1452 | goto out_unlock_release_sem; |
1045 | 1453 | ||
1454 | /* | ||
1455 | * This rt_mutex_waiter structure is prepared here and will | ||
1456 | * be used only if this task is requeued from a normal futex to | ||
1457 | * a PI-futex with futex_requeue_pi. | ||
1458 | */ | ||
1459 | debug_rt_mutex_init_waiter(&q.waiter); | ||
1460 | q.waiter.task = NULL; | ||
1461 | |||
1046 | /* Only actually queue if *uaddr contained val. */ | 1462 | /* Only actually queue if *uaddr contained val. */ |
1047 | __queue_me(&q, hb); | 1463 | __queue_me(&q, hb); |
1048 | 1464 | ||
@@ -1050,7 +1466,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1050 | * Now the futex is queued and we have checked the data, we | 1466 | * Now the futex is queued and we have checked the data, we |
1051 | * don't want to hold mmap_sem while we sleep. | 1467 | * don't want to hold mmap_sem while we sleep. |
1052 | */ | 1468 | */ |
1053 | up_read(&curr->mm->mmap_sem); | 1469 | if (fshared) |
1470 | up_read(fshared); | ||
1054 | 1471 | ||
1055 | /* | 1472 | /* |
1056 | * There might have been scheduling since the queue_me(), as we | 1473 | * There might have been scheduling since the queue_me(), as we |
@@ -1065,23 +1482,33 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1065 | __set_current_state(TASK_INTERRUPTIBLE); | 1482 | __set_current_state(TASK_INTERRUPTIBLE); |
1066 | add_wait_queue(&q.waiters, &wait); | 1483 | add_wait_queue(&q.waiters, &wait); |
1067 | /* | 1484 | /* |
1068 | * !list_empty() is safe here without any lock. | 1485 | * !plist_node_empty() is safe here without any lock. |
1069 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1486 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. |
1070 | */ | 1487 | */ |
1071 | time_left = 0; | 1488 | if (likely(!plist_node_empty(&q.list))) { |
1072 | if (likely(!list_empty(&q.list))) { | 1489 | if (!abs_time) |
1073 | unsigned long rel_time; | 1490 | schedule(); |
1074 | 1491 | else { | |
1075 | if (timed) { | 1492 | to = &t; |
1076 | unsigned long now = jiffies; | 1493 | hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
1077 | if (time_after(now, abs_time)) | 1494 | hrtimer_init_sleeper(&t, current); |
1078 | rel_time = 0; | 1495 | t.timer.expires = *abs_time; |
1079 | else | ||
1080 | rel_time = abs_time - now; | ||
1081 | } else | ||
1082 | rel_time = MAX_SCHEDULE_TIMEOUT; | ||
1083 | 1496 | ||
1084 | time_left = schedule_timeout(rel_time); | 1497 | hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); |
1498 | |||
1499 | /* | ||
1500 | * the timer could have already expired, in which | ||
1501 | * case current would be flagged for rescheduling. | ||
1502 | * Don't bother calling schedule. | ||
1503 | */ | ||
1504 | if (likely(t.task)) | ||
1505 | schedule(); | ||
1506 | |||
1507 | hrtimer_cancel(&t.timer); | ||
1508 | |||
1509 | /* Flag if a timeout occured */ | ||
1510 | rem = (t.task == NULL); | ||
1511 | } | ||
1085 | } | 1512 | } |
1086 | __set_current_state(TASK_RUNNING); | 1513 | __set_current_state(TASK_RUNNING); |
1087 | 1514 | ||
@@ -1090,17 +1517,80 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1090 | * we are the only user of it. | 1517 | * we are the only user of it. |
1091 | */ | 1518 | */ |
1092 | 1519 | ||
1520 | if (q.pi_state) { | ||
1521 | /* | ||
1522 | * We were woken but have been requeued on a PI-futex. | ||
1523 | * We have to complete the lock acquisition by taking | ||
1524 | * the rtmutex. | ||
1525 | */ | ||
1526 | |||
1527 | struct rt_mutex *lock = &q.pi_state->pi_mutex; | ||
1528 | |||
1529 | spin_lock(&lock->wait_lock); | ||
1530 | if (unlikely(q.waiter.task)) { | ||
1531 | remove_waiter(lock, &q.waiter); | ||
1532 | } | ||
1533 | spin_unlock(&lock->wait_lock); | ||
1534 | |||
1535 | if (rem) | ||
1536 | ret = -ETIMEDOUT; | ||
1537 | else | ||
1538 | ret = rt_mutex_timed_lock(lock, to, 1); | ||
1539 | |||
1540 | if (fshared) | ||
1541 | down_read(fshared); | ||
1542 | spin_lock(q.lock_ptr); | ||
1543 | |||
1544 | /* | ||
1545 | * Got the lock. We might not be the anticipated owner if we | ||
1546 | * did a lock-steal - fix up the PI-state in that case. | ||
1547 | */ | ||
1548 | if (!ret && q.pi_state->owner != curr) { | ||
1549 | /* | ||
1550 | * We MUST play with the futex we were requeued on, | ||
1551 | * NOT the current futex. | ||
1552 | * We can retrieve it from the key of the pi_state | ||
1553 | */ | ||
1554 | uaddr = q.pi_state->key.uaddr; | ||
1555 | |||
1556 | /* mmap_sem and hash_bucket lock are unlocked at | ||
1557 | return of this function */ | ||
1558 | ret = fixup_pi_state_owner(uaddr, fshared, | ||
1559 | &q, hb, curr); | ||
1560 | } else { | ||
1561 | /* | ||
1562 | * Catch the rare case, where the lock was released | ||
1563 | * when we were on the way back before we locked | ||
1564 | * the hash bucket. | ||
1565 | */ | ||
1566 | if (ret && q.pi_state->owner == curr) { | ||
1567 | if (rt_mutex_trylock(&q.pi_state->pi_mutex)) | ||
1568 | ret = 0; | ||
1569 | } | ||
1570 | /* Unqueue and drop the lock */ | ||
1571 | unqueue_me_pi(&q); | ||
1572 | if (fshared) | ||
1573 | up_read(fshared); | ||
1574 | } | ||
1575 | |||
1576 | debug_rt_mutex_free_waiter(&q.waiter); | ||
1577 | |||
1578 | return ret; | ||
1579 | } | ||
1580 | |||
1581 | debug_rt_mutex_free_waiter(&q.waiter); | ||
1582 | |||
1093 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1583 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
1094 | if (!unqueue_me(&q)) | 1584 | if (!unqueue_me(&q)) |
1095 | return 0; | 1585 | return 0; |
1096 | if (time_left == 0) | 1586 | if (rem) |
1097 | return -ETIMEDOUT; | 1587 | return -ETIMEDOUT; |
1098 | 1588 | ||
1099 | /* | 1589 | /* |
1100 | * We expect signal_pending(current), but another thread may | 1590 | * We expect signal_pending(current), but another thread may |
1101 | * have handled it for us already. | 1591 | * have handled it for us already. |
1102 | */ | 1592 | */ |
1103 | if (time_left == MAX_SCHEDULE_TIMEOUT) | 1593 | if (!abs_time) |
1104 | return -ERESTARTSYS; | 1594 | return -ERESTARTSYS; |
1105 | else { | 1595 | else { |
1106 | struct restart_block *restart; | 1596 | struct restart_block *restart; |
@@ -1108,8 +1598,10 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1108 | restart->fn = futex_wait_restart; | 1598 | restart->fn = futex_wait_restart; |
1109 | restart->arg0 = (unsigned long)uaddr; | 1599 | restart->arg0 = (unsigned long)uaddr; |
1110 | restart->arg1 = (unsigned long)val; | 1600 | restart->arg1 = (unsigned long)val; |
1111 | restart->arg2 = (unsigned long)timed; | 1601 | restart->arg2 = (unsigned long)abs_time; |
1112 | restart->arg3 = abs_time; | 1602 | restart->arg3 = 0; |
1603 | if (fshared) | ||
1604 | restart->arg3 |= ARG3_SHARED; | ||
1113 | return -ERESTART_RESTARTBLOCK; | 1605 | return -ERESTART_RESTARTBLOCK; |
1114 | } | 1606 | } |
1115 | 1607 | ||
@@ -1117,65 +1609,111 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1117 | queue_unlock(&q, hb); | 1609 | queue_unlock(&q, hb); |
1118 | 1610 | ||
1119 | out_release_sem: | 1611 | out_release_sem: |
1120 | up_read(&curr->mm->mmap_sem); | 1612 | if (fshared) |
1613 | up_read(fshared); | ||
1121 | return ret; | 1614 | return ret; |
1122 | } | 1615 | } |
1123 | 1616 | ||
1124 | static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time) | ||
1125 | { | ||
1126 | int timed = (rel_time != MAX_SCHEDULE_TIMEOUT); | ||
1127 | return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time); | ||
1128 | } | ||
1129 | 1617 | ||
1130 | static long futex_wait_restart(struct restart_block *restart) | 1618 | static long futex_wait_restart(struct restart_block *restart) |
1131 | { | 1619 | { |
1132 | u32 __user *uaddr = (u32 __user *)restart->arg0; | 1620 | u32 __user *uaddr = (u32 __user *)restart->arg0; |
1133 | u32 val = (u32)restart->arg1; | 1621 | u32 val = (u32)restart->arg1; |
1134 | int timed = (int)restart->arg2; | 1622 | ktime_t *abs_time = (ktime_t *)restart->arg2; |
1135 | unsigned long abs_time = restart->arg3; | 1623 | struct rw_semaphore *fshared = NULL; |
1136 | 1624 | ||
1137 | restart->fn = do_no_restart_syscall; | 1625 | restart->fn = do_no_restart_syscall; |
1138 | return (long)futex_wait_abstime(uaddr, val, timed, abs_time); | 1626 | if (restart->arg3 & ARG3_SHARED) |
1627 | fshared = ¤t->mm->mmap_sem; | ||
1628 | return (long)futex_wait(uaddr, fshared, val, abs_time); | ||
1139 | } | 1629 | } |
1140 | 1630 | ||
1141 | 1631 | ||
1632 | static void set_pi_futex_owner(struct futex_hash_bucket *hb, | ||
1633 | union futex_key *key, struct task_struct *p) | ||
1634 | { | ||
1635 | struct plist_head *head; | ||
1636 | struct futex_q *this, *next; | ||
1637 | struct futex_pi_state *pi_state = NULL; | ||
1638 | struct rt_mutex *lock; | ||
1639 | |||
1640 | /* Search a waiter that should already exists */ | ||
1641 | |||
1642 | head = &hb->chain; | ||
1643 | |||
1644 | plist_for_each_entry_safe(this, next, head, list) { | ||
1645 | if (match_futex (&this->key, key)) { | ||
1646 | pi_state = this->pi_state; | ||
1647 | break; | ||
1648 | } | ||
1649 | } | ||
1650 | |||
1651 | BUG_ON(!pi_state); | ||
1652 | |||
1653 | /* set p as pi_state's owner */ | ||
1654 | lock = &pi_state->pi_mutex; | ||
1655 | |||
1656 | spin_lock(&lock->wait_lock); | ||
1657 | spin_lock_irq(&p->pi_lock); | ||
1658 | |||
1659 | list_add(&pi_state->list, &p->pi_state_list); | ||
1660 | pi_state->owner = p; | ||
1661 | |||
1662 | |||
1663 | /* set p as pi_mutex's owner */ | ||
1664 | debug_rt_mutex_proxy_lock(lock, p); | ||
1665 | WARN_ON(rt_mutex_owner(lock)); | ||
1666 | rt_mutex_set_owner(lock, p, 0); | ||
1667 | rt_mutex_deadlock_account_lock(lock, p); | ||
1668 | |||
1669 | plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry, | ||
1670 | &p->pi_waiters); | ||
1671 | __rt_mutex_adjust_prio(p); | ||
1672 | |||
1673 | spin_unlock_irq(&p->pi_lock); | ||
1674 | spin_unlock(&lock->wait_lock); | ||
1675 | } | ||
1676 | |||
1142 | /* | 1677 | /* |
1143 | * Userspace tried a 0 -> TID atomic transition of the futex value | 1678 | * Userspace tried a 0 -> TID atomic transition of the futex value |
1144 | * and failed. The kernel side here does the whole locking operation: | 1679 | * and failed. The kernel side here does the whole locking operation: |
1145 | * if there are waiters then it will block, it does PI, etc. (Due to | 1680 | * if there are waiters then it will block, it does PI, etc. (Due to |
1146 | * races the kernel might see a 0 value of the futex too.) | 1681 | * races the kernel might see a 0 value of the futex too.) |
1147 | */ | 1682 | */ |
1148 | static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | 1683 | static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, |
1149 | long nsec, int trylock) | 1684 | int detect, ktime_t *time, int trylock) |
1150 | { | 1685 | { |
1151 | struct hrtimer_sleeper timeout, *to = NULL; | 1686 | struct hrtimer_sleeper timeout, *to = NULL; |
1152 | struct task_struct *curr = current; | 1687 | struct task_struct *curr = current; |
1153 | struct futex_hash_bucket *hb; | 1688 | struct futex_hash_bucket *hb; |
1154 | u32 uval, newval, curval; | 1689 | u32 uval, newval, curval; |
1155 | struct futex_q q; | 1690 | struct futex_q q; |
1156 | int ret, attempt = 0; | 1691 | int ret, lock_held, attempt = 0; |
1157 | 1692 | ||
1158 | if (refill_pi_state_cache()) | 1693 | if (refill_pi_state_cache()) |
1159 | return -ENOMEM; | 1694 | return -ENOMEM; |
1160 | 1695 | ||
1161 | if (sec != MAX_SCHEDULE_TIMEOUT) { | 1696 | if (time) { |
1162 | to = &timeout; | 1697 | to = &timeout; |
1163 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 1698 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
1164 | hrtimer_init_sleeper(to, current); | 1699 | hrtimer_init_sleeper(to, current); |
1165 | to->timer.expires = ktime_set(sec, nsec); | 1700 | to->timer.expires = *time; |
1166 | } | 1701 | } |
1167 | 1702 | ||
1168 | q.pi_state = NULL; | 1703 | q.pi_state = NULL; |
1169 | retry: | 1704 | retry: |
1170 | down_read(&curr->mm->mmap_sem); | 1705 | if (fshared) |
1706 | down_read(fshared); | ||
1171 | 1707 | ||
1172 | ret = get_futex_key(uaddr, &q.key); | 1708 | ret = get_futex_key(uaddr, fshared, &q.key); |
1173 | if (unlikely(ret != 0)) | 1709 | if (unlikely(ret != 0)) |
1174 | goto out_release_sem; | 1710 | goto out_release_sem; |
1175 | 1711 | ||
1176 | hb = queue_lock(&q, -1, NULL); | 1712 | hb = queue_lock(&q, -1, NULL); |
1177 | 1713 | ||
1178 | retry_locked: | 1714 | retry_locked: |
1715 | lock_held = 0; | ||
1716 | |||
1179 | /* | 1717 | /* |
1180 | * To avoid races, we attempt to take the lock here again | 1718 | * To avoid races, we attempt to take the lock here again |
1181 | * (by doing a 0 -> TID atomic cmpxchg), while holding all | 1719 | * (by doing a 0 -> TID atomic cmpxchg), while holding all |
@@ -1194,7 +1732,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1194 | if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { | 1732 | if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { |
1195 | if (!detect && 0) | 1733 | if (!detect && 0) |
1196 | force_sig(SIGKILL, current); | 1734 | force_sig(SIGKILL, current); |
1197 | ret = -EDEADLK; | 1735 | /* |
1736 | * Normally, this check is done in user space. | ||
1737 | * In case of requeue, the owner may attempt to lock this futex, | ||
1738 | * even if the ownership has already been given by the previous | ||
1739 | * waker. | ||
1740 | * In the usual case, this is a case of deadlock, but not in case | ||
1741 | * of REQUEUE_PI. | ||
1742 | */ | ||
1743 | if (!(curval & FUTEX_WAITER_REQUEUED)) | ||
1744 | ret = -EDEADLK; | ||
1198 | goto out_unlock_release_sem; | 1745 | goto out_unlock_release_sem; |
1199 | } | 1746 | } |
1200 | 1747 | ||
@@ -1206,7 +1753,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1206 | goto out_unlock_release_sem; | 1753 | goto out_unlock_release_sem; |
1207 | 1754 | ||
1208 | uval = curval; | 1755 | uval = curval; |
1209 | newval = uval | FUTEX_WAITERS; | 1756 | /* |
1757 | * In case of a requeue, check if there already is an owner | ||
1758 | * If not, just take the futex. | ||
1759 | */ | ||
1760 | if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) { | ||
1761 | /* set current as futex owner */ | ||
1762 | newval = curval | current->pid; | ||
1763 | lock_held = 1; | ||
1764 | } else | ||
1765 | /* Set the WAITERS flag, so the owner will know it has someone | ||
1766 | to wake at next unlock */ | ||
1767 | newval = curval | FUTEX_WAITERS; | ||
1210 | 1768 | ||
1211 | pagefault_disable(); | 1769 | pagefault_disable(); |
1212 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 1770 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
@@ -1217,11 +1775,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1217 | if (unlikely(curval != uval)) | 1775 | if (unlikely(curval != uval)) |
1218 | goto retry_locked; | 1776 | goto retry_locked; |
1219 | 1777 | ||
1778 | if (lock_held) { | ||
1779 | set_pi_futex_owner(hb, &q.key, curr); | ||
1780 | goto out_unlock_release_sem; | ||
1781 | } | ||
1782 | |||
1220 | /* | 1783 | /* |
1221 | * We dont have the lock. Look up the PI state (or create it if | 1784 | * We dont have the lock. Look up the PI state (or create it if |
1222 | * we are the first waiter): | 1785 | * we are the first waiter): |
1223 | */ | 1786 | */ |
1224 | ret = lookup_pi_state(uval, hb, &q); | 1787 | ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); |
1225 | 1788 | ||
1226 | if (unlikely(ret)) { | 1789 | if (unlikely(ret)) { |
1227 | /* | 1790 | /* |
@@ -1263,7 +1826,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1263 | * Now the futex is queued and we have checked the data, we | 1826 | * Now the futex is queued and we have checked the data, we |
1264 | * don't want to hold mmap_sem while we sleep. | 1827 | * don't want to hold mmap_sem while we sleep. |
1265 | */ | 1828 | */ |
1266 | up_read(&curr->mm->mmap_sem); | 1829 | if (fshared) |
1830 | up_read(fshared); | ||
1267 | 1831 | ||
1268 | WARN_ON(!q.pi_state); | 1832 | WARN_ON(!q.pi_state); |
1269 | /* | 1833 | /* |
@@ -1277,52 +1841,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1277 | ret = ret ? 0 : -EWOULDBLOCK; | 1841 | ret = ret ? 0 : -EWOULDBLOCK; |
1278 | } | 1842 | } |
1279 | 1843 | ||
1280 | down_read(&curr->mm->mmap_sem); | 1844 | if (fshared) |
1845 | down_read(fshared); | ||
1281 | spin_lock(q.lock_ptr); | 1846 | spin_lock(q.lock_ptr); |
1282 | 1847 | ||
1283 | /* | 1848 | /* |
1284 | * Got the lock. We might not be the anticipated owner if we | 1849 | * Got the lock. We might not be the anticipated owner if we |
1285 | * did a lock-steal - fix up the PI-state in that case. | 1850 | * did a lock-steal - fix up the PI-state in that case. |
1286 | */ | 1851 | */ |
1287 | if (!ret && q.pi_state->owner != curr) { | 1852 | if (!ret && q.pi_state->owner != curr) |
1288 | u32 newtid = current->pid | FUTEX_WAITERS; | 1853 | /* mmap_sem is unlocked at return of this function */ |
1289 | 1854 | ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr); | |
1290 | /* Owner died? */ | 1855 | else { |
1291 | if (q.pi_state->owner != NULL) { | ||
1292 | spin_lock_irq(&q.pi_state->owner->pi_lock); | ||
1293 | WARN_ON(list_empty(&q.pi_state->list)); | ||
1294 | list_del_init(&q.pi_state->list); | ||
1295 | spin_unlock_irq(&q.pi_state->owner->pi_lock); | ||
1296 | } else | ||
1297 | newtid |= FUTEX_OWNER_DIED; | ||
1298 | |||
1299 | q.pi_state->owner = current; | ||
1300 | |||
1301 | spin_lock_irq(¤t->pi_lock); | ||
1302 | WARN_ON(!list_empty(&q.pi_state->list)); | ||
1303 | list_add(&q.pi_state->list, ¤t->pi_state_list); | ||
1304 | spin_unlock_irq(¤t->pi_lock); | ||
1305 | |||
1306 | /* Unqueue and drop the lock */ | ||
1307 | unqueue_me_pi(&q, hb); | ||
1308 | up_read(&curr->mm->mmap_sem); | ||
1309 | /* | ||
1310 | * We own it, so we have to replace the pending owner | ||
1311 | * TID. This must be atomic as we have preserve the | ||
1312 | * owner died bit here. | ||
1313 | */ | ||
1314 | ret = get_user(uval, uaddr); | ||
1315 | while (!ret) { | ||
1316 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | ||
1317 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | ||
1318 | uval, newval); | ||
1319 | if (curval == -EFAULT) | ||
1320 | ret = -EFAULT; | ||
1321 | if (curval == uval) | ||
1322 | break; | ||
1323 | uval = curval; | ||
1324 | } | ||
1325 | } else { | ||
1326 | /* | 1856 | /* |
1327 | * Catch the rare case, where the lock was released | 1857 | * Catch the rare case, where the lock was released |
1328 | * when we were on the way back before we locked | 1858 | * when we were on the way back before we locked |
@@ -1333,8 +1863,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1333 | ret = 0; | 1863 | ret = 0; |
1334 | } | 1864 | } |
1335 | /* Unqueue and drop the lock */ | 1865 | /* Unqueue and drop the lock */ |
1336 | unqueue_me_pi(&q, hb); | 1866 | unqueue_me_pi(&q); |
1337 | up_read(&curr->mm->mmap_sem); | 1867 | if (fshared) |
1868 | up_read(fshared); | ||
1338 | } | 1869 | } |
1339 | 1870 | ||
1340 | if (!detect && ret == -EDEADLK && 0) | 1871 | if (!detect && ret == -EDEADLK && 0) |
@@ -1346,7 +1877,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1346 | queue_unlock(&q, hb); | 1877 | queue_unlock(&q, hb); |
1347 | 1878 | ||
1348 | out_release_sem: | 1879 | out_release_sem: |
1349 | up_read(&curr->mm->mmap_sem); | 1880 | if (fshared) |
1881 | up_read(fshared); | ||
1350 | return ret; | 1882 | return ret; |
1351 | 1883 | ||
1352 | uaddr_faulted: | 1884 | uaddr_faulted: |
@@ -1357,15 +1889,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1357 | * still holding the mmap_sem. | 1889 | * still holding the mmap_sem. |
1358 | */ | 1890 | */ |
1359 | if (attempt++) { | 1891 | if (attempt++) { |
1360 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { | 1892 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
1361 | ret = -EFAULT; | 1893 | attempt); |
1894 | if (ret) | ||
1362 | goto out_unlock_release_sem; | 1895 | goto out_unlock_release_sem; |
1363 | } | ||
1364 | goto retry_locked; | 1896 | goto retry_locked; |
1365 | } | 1897 | } |
1366 | 1898 | ||
1367 | queue_unlock(&q, hb); | 1899 | queue_unlock(&q, hb); |
1368 | up_read(&curr->mm->mmap_sem); | 1900 | if (fshared) |
1901 | up_read(fshared); | ||
1369 | 1902 | ||
1370 | ret = get_user(uval, uaddr); | 1903 | ret = get_user(uval, uaddr); |
1371 | if (!ret && (uval != -EFAULT)) | 1904 | if (!ret && (uval != -EFAULT)) |
@@ -1379,12 +1912,12 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1379 | * This is the in-kernel slowpath: we look up the PI state (if any), | 1912 | * This is the in-kernel slowpath: we look up the PI state (if any), |
1380 | * and do the rt-mutex unlock. | 1913 | * and do the rt-mutex unlock. |
1381 | */ | 1914 | */ |
1382 | static int futex_unlock_pi(u32 __user *uaddr) | 1915 | static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) |
1383 | { | 1916 | { |
1384 | struct futex_hash_bucket *hb; | 1917 | struct futex_hash_bucket *hb; |
1385 | struct futex_q *this, *next; | 1918 | struct futex_q *this, *next; |
1386 | u32 uval; | 1919 | u32 uval; |
1387 | struct list_head *head; | 1920 | struct plist_head *head; |
1388 | union futex_key key; | 1921 | union futex_key key; |
1389 | int ret, attempt = 0; | 1922 | int ret, attempt = 0; |
1390 | 1923 | ||
@@ -1399,9 +1932,10 @@ retry: | |||
1399 | /* | 1932 | /* |
1400 | * First take all the futex related locks: | 1933 | * First take all the futex related locks: |
1401 | */ | 1934 | */ |
1402 | down_read(¤t->mm->mmap_sem); | 1935 | if (fshared) |
1936 | down_read(fshared); | ||
1403 | 1937 | ||
1404 | ret = get_futex_key(uaddr, &key); | 1938 | ret = get_futex_key(uaddr, fshared, &key); |
1405 | if (unlikely(ret != 0)) | 1939 | if (unlikely(ret != 0)) |
1406 | goto out; | 1940 | goto out; |
1407 | 1941 | ||
@@ -1435,7 +1969,7 @@ retry_locked: | |||
1435 | */ | 1969 | */ |
1436 | head = &hb->chain; | 1970 | head = &hb->chain; |
1437 | 1971 | ||
1438 | list_for_each_entry_safe(this, next, head, list) { | 1972 | plist_for_each_entry_safe(this, next, head, list) { |
1439 | if (!match_futex (&this->key, &key)) | 1973 | if (!match_futex (&this->key, &key)) |
1440 | continue; | 1974 | continue; |
1441 | ret = wake_futex_pi(uaddr, uval, this); | 1975 | ret = wake_futex_pi(uaddr, uval, this); |
@@ -1460,7 +1994,8 @@ retry_locked: | |||
1460 | out_unlock: | 1994 | out_unlock: |
1461 | spin_unlock(&hb->lock); | 1995 | spin_unlock(&hb->lock); |
1462 | out: | 1996 | out: |
1463 | up_read(¤t->mm->mmap_sem); | 1997 | if (fshared) |
1998 | up_read(fshared); | ||
1464 | 1999 | ||
1465 | return ret; | 2000 | return ret; |
1466 | 2001 | ||
@@ -1472,15 +2007,16 @@ pi_faulted: | |||
1472 | * still holding the mmap_sem. | 2007 | * still holding the mmap_sem. |
1473 | */ | 2008 | */ |
1474 | if (attempt++) { | 2009 | if (attempt++) { |
1475 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { | 2010 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
1476 | ret = -EFAULT; | 2011 | attempt); |
2012 | if (ret) | ||
1477 | goto out_unlock; | 2013 | goto out_unlock; |
1478 | } | ||
1479 | goto retry_locked; | 2014 | goto retry_locked; |
1480 | } | 2015 | } |
1481 | 2016 | ||
1482 | spin_unlock(&hb->lock); | 2017 | spin_unlock(&hb->lock); |
1483 | up_read(¤t->mm->mmap_sem); | 2018 | if (fshared) |
2019 | up_read(fshared); | ||
1484 | 2020 | ||
1485 | ret = get_user(uval, uaddr); | 2021 | ret = get_user(uval, uaddr); |
1486 | if (!ret && (uval != -EFAULT)) | 2022 | if (!ret && (uval != -EFAULT)) |
@@ -1509,10 +2045,10 @@ static unsigned int futex_poll(struct file *filp, | |||
1509 | poll_wait(filp, &q->waiters, wait); | 2045 | poll_wait(filp, &q->waiters, wait); |
1510 | 2046 | ||
1511 | /* | 2047 | /* |
1512 | * list_empty() is safe here without any lock. | 2048 | * plist_node_empty() is safe here without any lock. |
1513 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. | 2049 | * q->lock_ptr != 0 is not safe, because of ordering against wakeup. |
1514 | */ | 2050 | */ |
1515 | if (list_empty(&q->list)) | 2051 | if (plist_node_empty(&q->list)) |
1516 | ret = POLLIN | POLLRDNORM; | 2052 | ret = POLLIN | POLLRDNORM; |
1517 | 2053 | ||
1518 | return ret; | 2054 | return ret; |
@@ -1532,6 +2068,7 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1532 | struct futex_q *q; | 2068 | struct futex_q *q; |
1533 | struct file *filp; | 2069 | struct file *filp; |
1534 | int ret, err; | 2070 | int ret, err; |
2071 | struct rw_semaphore *fshared; | ||
1535 | static unsigned long printk_interval; | 2072 | static unsigned long printk_interval; |
1536 | 2073 | ||
1537 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { | 2074 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { |
@@ -1573,11 +2110,12 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1573 | } | 2110 | } |
1574 | q->pi_state = NULL; | 2111 | q->pi_state = NULL; |
1575 | 2112 | ||
1576 | down_read(¤t->mm->mmap_sem); | 2113 | fshared = ¤t->mm->mmap_sem; |
1577 | err = get_futex_key(uaddr, &q->key); | 2114 | down_read(fshared); |
2115 | err = get_futex_key(uaddr, fshared, &q->key); | ||
1578 | 2116 | ||
1579 | if (unlikely(err != 0)) { | 2117 | if (unlikely(err != 0)) { |
1580 | up_read(¤t->mm->mmap_sem); | 2118 | up_read(fshared); |
1581 | kfree(q); | 2119 | kfree(q); |
1582 | goto error; | 2120 | goto error; |
1583 | } | 2121 | } |
@@ -1589,7 +2127,7 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1589 | filp->private_data = q; | 2127 | filp->private_data = q; |
1590 | 2128 | ||
1591 | queue_me(q, ret, filp); | 2129 | queue_me(q, ret, filp); |
1592 | up_read(¤t->mm->mmap_sem); | 2130 | up_read(fshared); |
1593 | 2131 | ||
1594 | /* Now we map fd to filp, so userspace can access it */ | 2132 | /* Now we map fd to filp, so userspace can access it */ |
1595 | fd_install(ret, filp); | 2133 | fd_install(ret, filp); |
@@ -1702,6 +2240,8 @@ retry: | |||
1702 | * userspace. | 2240 | * userspace. |
1703 | */ | 2241 | */ |
1704 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; | 2242 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
2243 | /* Also keep the FUTEX_WAITER_REQUEUED flag if set */ | ||
2244 | mval |= (uval & FUTEX_WAITER_REQUEUED); | ||
1705 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); | 2245 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); |
1706 | 2246 | ||
1707 | if (nval == -EFAULT) | 2247 | if (nval == -EFAULT) |
@@ -1716,7 +2256,7 @@ retry: | |||
1716 | */ | 2256 | */ |
1717 | if (!pi) { | 2257 | if (!pi) { |
1718 | if (uval & FUTEX_WAITERS) | 2258 | if (uval & FUTEX_WAITERS) |
1719 | futex_wake(uaddr, 1); | 2259 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); |
1720 | } | 2260 | } |
1721 | } | 2261 | } |
1722 | return 0; | 2262 | return 0; |
@@ -1772,7 +2312,8 @@ void exit_robust_list(struct task_struct *curr) | |||
1772 | return; | 2312 | return; |
1773 | 2313 | ||
1774 | if (pending) | 2314 | if (pending) |
1775 | handle_futex_death((void __user *)pending + futex_offset, curr, pip); | 2315 | handle_futex_death((void __user *)pending + futex_offset, |
2316 | curr, pip); | ||
1776 | 2317 | ||
1777 | while (entry != &head->list) { | 2318 | while (entry != &head->list) { |
1778 | /* | 2319 | /* |
@@ -1798,39 +2339,47 @@ void exit_robust_list(struct task_struct *curr) | |||
1798 | } | 2339 | } |
1799 | } | 2340 | } |
1800 | 2341 | ||
1801 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | 2342 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
1802 | u32 __user *uaddr2, u32 val2, u32 val3) | 2343 | u32 __user *uaddr2, u32 val2, u32 val3) |
1803 | { | 2344 | { |
1804 | int ret; | 2345 | int ret; |
2346 | int cmd = op & FUTEX_CMD_MASK; | ||
2347 | struct rw_semaphore *fshared = NULL; | ||
2348 | |||
2349 | if (!(op & FUTEX_PRIVATE_FLAG)) | ||
2350 | fshared = ¤t->mm->mmap_sem; | ||
1805 | 2351 | ||
1806 | switch (op) { | 2352 | switch (cmd) { |
1807 | case FUTEX_WAIT: | 2353 | case FUTEX_WAIT: |
1808 | ret = futex_wait(uaddr, val, timeout); | 2354 | ret = futex_wait(uaddr, fshared, val, timeout); |
1809 | break; | 2355 | break; |
1810 | case FUTEX_WAKE: | 2356 | case FUTEX_WAKE: |
1811 | ret = futex_wake(uaddr, val); | 2357 | ret = futex_wake(uaddr, fshared, val); |
1812 | break; | 2358 | break; |
1813 | case FUTEX_FD: | 2359 | case FUTEX_FD: |
1814 | /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ | 2360 | /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ |
1815 | ret = futex_fd(uaddr, val); | 2361 | ret = futex_fd(uaddr, val); |
1816 | break; | 2362 | break; |
1817 | case FUTEX_REQUEUE: | 2363 | case FUTEX_REQUEUE: |
1818 | ret = futex_requeue(uaddr, uaddr2, val, val2, NULL); | 2364 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); |
1819 | break; | 2365 | break; |
1820 | case FUTEX_CMP_REQUEUE: | 2366 | case FUTEX_CMP_REQUEUE: |
1821 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); | 2367 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); |
1822 | break; | 2368 | break; |
1823 | case FUTEX_WAKE_OP: | 2369 | case FUTEX_WAKE_OP: |
1824 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); | 2370 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
1825 | break; | 2371 | break; |
1826 | case FUTEX_LOCK_PI: | 2372 | case FUTEX_LOCK_PI: |
1827 | ret = futex_lock_pi(uaddr, val, timeout, val2, 0); | 2373 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); |
1828 | break; | 2374 | break; |
1829 | case FUTEX_UNLOCK_PI: | 2375 | case FUTEX_UNLOCK_PI: |
1830 | ret = futex_unlock_pi(uaddr); | 2376 | ret = futex_unlock_pi(uaddr, fshared); |
1831 | break; | 2377 | break; |
1832 | case FUTEX_TRYLOCK_PI: | 2378 | case FUTEX_TRYLOCK_PI: |
1833 | ret = futex_lock_pi(uaddr, 0, timeout, val2, 1); | 2379 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); |
2380 | break; | ||
2381 | case FUTEX_CMP_REQUEUE_PI: | ||
2382 | ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3); | ||
1834 | break; | 2383 | break; |
1835 | default: | 2384 | default: |
1836 | ret = -ENOSYS; | 2385 | ret = -ENOSYS; |
@@ -1843,29 +2392,30 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, | |||
1843 | struct timespec __user *utime, u32 __user *uaddr2, | 2392 | struct timespec __user *utime, u32 __user *uaddr2, |
1844 | u32 val3) | 2393 | u32 val3) |
1845 | { | 2394 | { |
1846 | struct timespec t; | 2395 | struct timespec ts; |
1847 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 2396 | ktime_t t, *tp = NULL; |
1848 | u32 val2 = 0; | 2397 | u32 val2 = 0; |
2398 | int cmd = op & FUTEX_CMD_MASK; | ||
1849 | 2399 | ||
1850 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 2400 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { |
1851 | if (copy_from_user(&t, utime, sizeof(t)) != 0) | 2401 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) |
1852 | return -EFAULT; | 2402 | return -EFAULT; |
1853 | if (!timespec_valid(&t)) | 2403 | if (!timespec_valid(&ts)) |
1854 | return -EINVAL; | 2404 | return -EINVAL; |
1855 | if (op == FUTEX_WAIT) | 2405 | |
1856 | timeout = timespec_to_jiffies(&t) + 1; | 2406 | t = timespec_to_ktime(ts); |
1857 | else { | 2407 | if (cmd == FUTEX_WAIT) |
1858 | timeout = t.tv_sec; | 2408 | t = ktime_add(ktime_get(), t); |
1859 | val2 = t.tv_nsec; | 2409 | tp = &t; |
1860 | } | ||
1861 | } | 2410 | } |
1862 | /* | 2411 | /* |
1863 | * requeue parameter in 'utime' if op == FUTEX_REQUEUE. | 2412 | * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. |
1864 | */ | 2413 | */ |
1865 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) | 2414 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE |
2415 | || cmd == FUTEX_CMP_REQUEUE_PI) | ||
1866 | val2 = (u32) (unsigned long) utime; | 2416 | val2 = (u32) (unsigned long) utime; |
1867 | 2417 | ||
1868 | return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | 2418 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
1869 | } | 2419 | } |
1870 | 2420 | ||
1871 | static int futexfs_get_sb(struct file_system_type *fs_type, | 2421 | static int futexfs_get_sb(struct file_system_type *fs_type, |
@@ -1895,7 +2445,7 @@ static int __init init(void) | |||
1895 | } | 2445 | } |
1896 | 2446 | ||
1897 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 2447 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |
1898 | INIT_LIST_HEAD(&futex_queues[i].chain); | 2448 | plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); |
1899 | spin_lock_init(&futex_queues[i].lock); | 2449 | spin_lock_init(&futex_queues[i].lock); |
1900 | } | 2450 | } |
1901 | return 0; | 2451 | return 0; |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 50f24eea6cd0..338a9b489fbc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -141,24 +141,24 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | |||
141 | struct compat_timespec __user *utime, u32 __user *uaddr2, | 141 | struct compat_timespec __user *utime, u32 __user *uaddr2, |
142 | u32 val3) | 142 | u32 val3) |
143 | { | 143 | { |
144 | struct timespec t; | 144 | struct timespec ts; |
145 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 145 | ktime_t t, *tp = NULL; |
146 | int val2 = 0; | 146 | int val2 = 0; |
147 | 147 | ||
148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { |
149 | if (get_compat_timespec(&t, utime)) | 149 | if (get_compat_timespec(&ts, utime)) |
150 | return -EFAULT; | 150 | return -EFAULT; |
151 | if (!timespec_valid(&t)) | 151 | if (!timespec_valid(&ts)) |
152 | return -EINVAL; | 152 | return -EINVAL; |
153 | |||
154 | t = timespec_to_ktime(ts); | ||
153 | if (op == FUTEX_WAIT) | 155 | if (op == FUTEX_WAIT) |
154 | timeout = timespec_to_jiffies(&t) + 1; | 156 | t = ktime_add(ktime_get(), t); |
155 | else { | 157 | tp = &t; |
156 | timeout = t.tv_sec; | ||
157 | val2 = t.tv_nsec; | ||
158 | } | ||
159 | } | 158 | } |
160 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) | 159 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE |
160 | || op == FUTEX_CMP_REQUEUE_PI) | ||
161 | val2 = (int) (unsigned long) utime; | 161 | val2 = (int) (unsigned long) utime; |
162 | 162 | ||
163 | return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | 163 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
164 | } | 164 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c9f4f044a8a8..23c03f43e196 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1411,11 +1411,13 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
1411 | switch (action) { | 1411 | switch (action) { |
1412 | 1412 | ||
1413 | case CPU_UP_PREPARE: | 1413 | case CPU_UP_PREPARE: |
1414 | case CPU_UP_PREPARE_FROZEN: | ||
1414 | init_hrtimers_cpu(cpu); | 1415 | init_hrtimers_cpu(cpu); |
1415 | break; | 1416 | break; |
1416 | 1417 | ||
1417 | #ifdef CONFIG_HOTPLUG_CPU | 1418 | #ifdef CONFIG_HOTPLUG_CPU |
1418 | case CPU_DEAD: | 1419 | case CPU_DEAD: |
1420 | case CPU_DEAD_FROZEN: | ||
1419 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); | 1421 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); |
1420 | migrate_hrtimers(cpu); | 1422 | migrate_hrtimers(cpu); |
1421 | break; | 1423 | break; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 32e1ab1477d1..e391cbb1f566 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -22,7 +22,6 @@ | |||
22 | * handle_bad_irq - handle spurious and unhandled irqs | 22 | * handle_bad_irq - handle spurious and unhandled irqs |
23 | * @irq: the interrupt number | 23 | * @irq: the interrupt number |
24 | * @desc: description of the interrupt | 24 | * @desc: description of the interrupt |
25 | * @regs: pointer to a register structure | ||
26 | * | 25 | * |
27 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. | 26 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. |
28 | */ | 27 | */ |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 49cc4b9c1a8d..4d32eb077179 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -135,7 +135,6 @@ static int ____call_usermodehelper(void *data) | |||
135 | 135 | ||
136 | /* Unblock all signals and set the session keyring. */ | 136 | /* Unblock all signals and set the session keyring. */ |
137 | new_session = key_get(sub_info->ring); | 137 | new_session = key_get(sub_info->ring); |
138 | flush_signals(current); | ||
139 | spin_lock_irq(¤t->sighand->siglock); | 138 | spin_lock_irq(¤t->sighand->siglock); |
140 | old_session = __install_session_keyring(current, new_session); | 139 | old_session = __install_session_keyring(current, new_session); |
141 | flush_signal_handlers(current, 1); | 140 | flush_signal_handlers(current, 1); |
@@ -186,14 +185,9 @@ static int wait_for_helper(void *data) | |||
186 | { | 185 | { |
187 | struct subprocess_info *sub_info = data; | 186 | struct subprocess_info *sub_info = data; |
188 | pid_t pid; | 187 | pid_t pid; |
189 | struct k_sigaction sa; | ||
190 | 188 | ||
191 | /* Install a handler: if SIGCLD isn't handled sys_wait4 won't | 189 | /* Install a handler: if SIGCLD isn't handled sys_wait4 won't |
192 | * populate the status, but will return -ECHILD. */ | 190 | * populate the status, but will return -ECHILD. */ |
193 | sa.sa.sa_handler = SIG_IGN; | ||
194 | sa.sa.sa_flags = 0; | ||
195 | siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); | ||
196 | do_sigaction(SIGCHLD, &sa, NULL); | ||
197 | allow_signal(SIGCHLD); | 191 | allow_signal(SIGCHLD); |
198 | 192 | ||
199 | pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); | 193 | pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 87c50ccd1d4e..df8a8e8f6ca4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* Kernel thread helper functions. | 1 | /* Kernel thread helper functions. |
2 | * Copyright (C) 2004 IBM Corporation, Rusty Russell. | 2 | * Copyright (C) 2004 IBM Corporation, Rusty Russell. |
3 | * | 3 | * |
4 | * Creation is done via keventd, so that we get a clean environment | 4 | * Creation is done via kthreadd, so that we get a clean environment |
5 | * even if we're invoked from userspace (think modprobe, hotplug cpu, | 5 | * even if we're invoked from userspace (think modprobe, hotplug cpu, |
6 | * etc.). | 6 | * etc.). |
7 | */ | 7 | */ |
@@ -15,24 +15,22 @@ | |||
15 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
16 | #include <asm/semaphore.h> | 16 | #include <asm/semaphore.h> |
17 | 17 | ||
18 | /* | 18 | static DEFINE_SPINLOCK(kthread_create_lock); |
19 | * We dont want to execute off keventd since it might | 19 | static LIST_HEAD(kthread_create_list); |
20 | * hold a semaphore our callers hold too: | 20 | struct task_struct *kthreadd_task; |
21 | */ | ||
22 | static struct workqueue_struct *helper_wq; | ||
23 | 21 | ||
24 | struct kthread_create_info | 22 | struct kthread_create_info |
25 | { | 23 | { |
26 | /* Information passed to kthread() from keventd. */ | 24 | /* Information passed to kthread() from kthreadd. */ |
27 | int (*threadfn)(void *data); | 25 | int (*threadfn)(void *data); |
28 | void *data; | 26 | void *data; |
29 | struct completion started; | 27 | struct completion started; |
30 | 28 | ||
31 | /* Result passed back to kthread_create() from keventd. */ | 29 | /* Result passed back to kthread_create() from kthreadd. */ |
32 | struct task_struct *result; | 30 | struct task_struct *result; |
33 | struct completion done; | 31 | struct completion done; |
34 | 32 | ||
35 | struct work_struct work; | 33 | struct list_head list; |
36 | }; | 34 | }; |
37 | 35 | ||
38 | struct kthread_stop_info | 36 | struct kthread_stop_info |
@@ -60,42 +58,17 @@ int kthread_should_stop(void) | |||
60 | } | 58 | } |
61 | EXPORT_SYMBOL(kthread_should_stop); | 59 | EXPORT_SYMBOL(kthread_should_stop); |
62 | 60 | ||
63 | static void kthread_exit_files(void) | ||
64 | { | ||
65 | struct fs_struct *fs; | ||
66 | struct task_struct *tsk = current; | ||
67 | |||
68 | exit_fs(tsk); /* current->fs->count--; */ | ||
69 | fs = init_task.fs; | ||
70 | tsk->fs = fs; | ||
71 | atomic_inc(&fs->count); | ||
72 | exit_files(tsk); | ||
73 | current->files = init_task.files; | ||
74 | atomic_inc(&tsk->files->count); | ||
75 | } | ||
76 | |||
77 | static int kthread(void *_create) | 61 | static int kthread(void *_create) |
78 | { | 62 | { |
79 | struct kthread_create_info *create = _create; | 63 | struct kthread_create_info *create = _create; |
80 | int (*threadfn)(void *data); | 64 | int (*threadfn)(void *data); |
81 | void *data; | 65 | void *data; |
82 | sigset_t blocked; | ||
83 | int ret = -EINTR; | 66 | int ret = -EINTR; |
84 | 67 | ||
85 | kthread_exit_files(); | 68 | /* Copy data: it's on kthread's stack */ |
86 | |||
87 | /* Copy data: it's on keventd's stack */ | ||
88 | threadfn = create->threadfn; | 69 | threadfn = create->threadfn; |
89 | data = create->data; | 70 | data = create->data; |
90 | 71 | ||
91 | /* Block and flush all signals (in case we're not from keventd). */ | ||
92 | sigfillset(&blocked); | ||
93 | sigprocmask(SIG_BLOCK, &blocked, NULL); | ||
94 | flush_signals(current); | ||
95 | |||
96 | /* By default we can run anywhere, unlike keventd. */ | ||
97 | set_cpus_allowed(current, CPU_MASK_ALL); | ||
98 | |||
99 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 72 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
100 | __set_current_state(TASK_INTERRUPTIBLE); | 73 | __set_current_state(TASK_INTERRUPTIBLE); |
101 | complete(&create->started); | 74 | complete(&create->started); |
@@ -112,11 +85,8 @@ static int kthread(void *_create) | |||
112 | return 0; | 85 | return 0; |
113 | } | 86 | } |
114 | 87 | ||
115 | /* We are keventd: create a thread. */ | 88 | static void create_kthread(struct kthread_create_info *create) |
116 | static void keventd_create_kthread(struct work_struct *work) | ||
117 | { | 89 | { |
118 | struct kthread_create_info *create = | ||
119 | container_of(work, struct kthread_create_info, work); | ||
120 | int pid; | 90 | int pid; |
121 | 91 | ||
122 | /* We want our own signal handler (we take no signals by default). */ | 92 | /* We want our own signal handler (we take no signals by default). */ |
@@ -162,17 +132,14 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
162 | create.data = data; | 132 | create.data = data; |
163 | init_completion(&create.started); | 133 | init_completion(&create.started); |
164 | init_completion(&create.done); | 134 | init_completion(&create.done); |
165 | INIT_WORK(&create.work, keventd_create_kthread); | 135 | |
166 | 136 | spin_lock(&kthread_create_lock); | |
167 | /* | 137 | list_add_tail(&create.list, &kthread_create_list); |
168 | * The workqueue needs to start up first: | 138 | wake_up_process(kthreadd_task); |
169 | */ | 139 | spin_unlock(&kthread_create_lock); |
170 | if (!helper_wq) | 140 | |
171 | create.work.func(&create.work); | 141 | wait_for_completion(&create.done); |
172 | else { | 142 | |
173 | queue_work(helper_wq, &create.work); | ||
174 | wait_for_completion(&create.done); | ||
175 | } | ||
176 | if (!IS_ERR(create.result)) { | 143 | if (!IS_ERR(create.result)) { |
177 | va_list args; | 144 | va_list args; |
178 | va_start(args, namefmt); | 145 | va_start(args, namefmt); |
@@ -180,7 +147,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
180 | namefmt, args); | 147 | namefmt, args); |
181 | va_end(args); | 148 | va_end(args); |
182 | } | 149 | } |
183 | |||
184 | return create.result; | 150 | return create.result; |
185 | } | 151 | } |
186 | EXPORT_SYMBOL(kthread_create); | 152 | EXPORT_SYMBOL(kthread_create); |
@@ -245,12 +211,47 @@ int kthread_stop(struct task_struct *k) | |||
245 | } | 211 | } |
246 | EXPORT_SYMBOL(kthread_stop); | 212 | EXPORT_SYMBOL(kthread_stop); |
247 | 213 | ||
248 | static __init int helper_init(void) | 214 | |
215 | static __init void kthreadd_setup(void) | ||
249 | { | 216 | { |
250 | helper_wq = create_singlethread_workqueue("kthread"); | 217 | struct task_struct *tsk = current; |
251 | BUG_ON(!helper_wq); | ||
252 | 218 | ||
253 | return 0; | 219 | set_task_comm(tsk, "kthreadd"); |
220 | |||
221 | ignore_signals(tsk); | ||
222 | |||
223 | set_user_nice(tsk, -5); | ||
224 | set_cpus_allowed(tsk, CPU_MASK_ALL); | ||
254 | } | 225 | } |
255 | 226 | ||
256 | core_initcall(helper_init); | 227 | int kthreadd(void *unused) |
228 | { | ||
229 | /* Setup a clean context for our children to inherit. */ | ||
230 | kthreadd_setup(); | ||
231 | |||
232 | current->flags |= PF_NOFREEZE; | ||
233 | |||
234 | for (;;) { | ||
235 | set_current_state(TASK_INTERRUPTIBLE); | ||
236 | if (list_empty(&kthread_create_list)) | ||
237 | schedule(); | ||
238 | __set_current_state(TASK_RUNNING); | ||
239 | |||
240 | spin_lock(&kthread_create_lock); | ||
241 | while (!list_empty(&kthread_create_list)) { | ||
242 | struct kthread_create_info *create; | ||
243 | |||
244 | create = list_entry(kthread_create_list.next, | ||
245 | struct kthread_create_info, list); | ||
246 | list_del_init(&create->list); | ||
247 | spin_unlock(&kthread_create_lock); | ||
248 | |||
249 | create_kthread(create); | ||
250 | |||
251 | spin_lock(&kthread_create_lock); | ||
252 | } | ||
253 | spin_unlock(&kthread_create_lock); | ||
254 | } | ||
255 | |||
256 | return 0; | ||
257 | } | ||
diff --git a/kernel/mutex.c b/kernel/mutex.c index e7cbbb82765b..303eab18484b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -133,7 +133,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
133 | 133 | ||
134 | debug_mutex_lock_common(lock, &waiter); | 134 | debug_mutex_lock_common(lock, &waiter); |
135 | mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | 135 | mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); |
136 | debug_mutex_add_waiter(lock, &waiter, task->thread_info); | 136 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); |
137 | 137 | ||
138 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | 138 | /* add waiting tasks to the end of the waitqueue (FIFO): */ |
139 | list_add_tail(&waiter.list, &lock->wait_list); | 139 | list_add_tail(&waiter.list, &lock->wait_list); |
@@ -159,7 +159,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
159 | */ | 159 | */ |
160 | if (unlikely(state == TASK_INTERRUPTIBLE && | 160 | if (unlikely(state == TASK_INTERRUPTIBLE && |
161 | signal_pending(task))) { | 161 | signal_pending(task))) { |
162 | mutex_remove_waiter(lock, &waiter, task->thread_info); | 162 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); |
163 | mutex_release(&lock->dep_map, 1, _RET_IP_); | 163 | mutex_release(&lock->dep_map, 1, _RET_IP_); |
164 | spin_unlock_mutex(&lock->wait_lock, flags); | 164 | spin_unlock_mutex(&lock->wait_lock, flags); |
165 | 165 | ||
@@ -175,8 +175,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) | |||
175 | } | 175 | } |
176 | 176 | ||
177 | /* got the lock - rejoice! */ | 177 | /* got the lock - rejoice! */ |
178 | mutex_remove_waiter(lock, &waiter, task->thread_info); | 178 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); |
179 | debug_mutex_set_owner(lock, task->thread_info); | 179 | debug_mutex_set_owner(lock, task_thread_info(task)); |
180 | 180 | ||
181 | /* set it to 0 if there are no waiters left: */ | 181 | /* set it to 0 if there are no waiters left: */ |
182 | if (likely(list_empty(&lock->wait_list))) | 182 | if (likely(list_empty(&lock->wait_list))) |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 06331374d862..b5f0543ed84d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -30,30 +30,69 @@ char resume_file[256] = CONFIG_PM_STD_PARTITION; | |||
30 | dev_t swsusp_resume_device; | 30 | dev_t swsusp_resume_device; |
31 | sector_t swsusp_resume_block; | 31 | sector_t swsusp_resume_block; |
32 | 32 | ||
33 | enum { | ||
34 | HIBERNATION_INVALID, | ||
35 | HIBERNATION_PLATFORM, | ||
36 | HIBERNATION_TEST, | ||
37 | HIBERNATION_TESTPROC, | ||
38 | HIBERNATION_SHUTDOWN, | ||
39 | HIBERNATION_REBOOT, | ||
40 | /* keep last */ | ||
41 | __HIBERNATION_AFTER_LAST | ||
42 | }; | ||
43 | #define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) | ||
44 | #define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) | ||
45 | |||
46 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | ||
47 | |||
48 | struct hibernation_ops *hibernation_ops; | ||
49 | |||
50 | /** | ||
51 | * hibernation_set_ops - set the global hibernate operations | ||
52 | * @ops: the hibernation operations to use in subsequent hibernation transitions | ||
53 | */ | ||
54 | |||
55 | void hibernation_set_ops(struct hibernation_ops *ops) | ||
56 | { | ||
57 | if (ops && !(ops->prepare && ops->enter && ops->finish)) { | ||
58 | WARN_ON(1); | ||
59 | return; | ||
60 | } | ||
61 | mutex_lock(&pm_mutex); | ||
62 | hibernation_ops = ops; | ||
63 | if (ops) | ||
64 | hibernation_mode = HIBERNATION_PLATFORM; | ||
65 | else if (hibernation_mode == HIBERNATION_PLATFORM) | ||
66 | hibernation_mode = HIBERNATION_SHUTDOWN; | ||
67 | |||
68 | mutex_unlock(&pm_mutex); | ||
69 | } | ||
70 | |||
71 | |||
33 | /** | 72 | /** |
34 | * platform_prepare - prepare the machine for hibernation using the | 73 | * platform_prepare - prepare the machine for hibernation using the |
35 | * platform driver if so configured and return an error code if it fails | 74 | * platform driver if so configured and return an error code if it fails |
36 | */ | 75 | */ |
37 | 76 | ||
38 | static inline int platform_prepare(void) | 77 | static int platform_prepare(void) |
39 | { | 78 | { |
40 | int error = 0; | 79 | return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ? |
80 | hibernation_ops->prepare() : 0; | ||
81 | } | ||
41 | 82 | ||
42 | switch (pm_disk_mode) { | 83 | /** |
43 | case PM_DISK_TEST: | 84 | * platform_finish - switch the machine to the normal mode of operation |
44 | case PM_DISK_TESTPROC: | 85 | * using the platform driver (must be called after platform_prepare()) |
45 | case PM_DISK_SHUTDOWN: | 86 | */ |
46 | case PM_DISK_REBOOT: | 87 | |
47 | break; | 88 | static void platform_finish(void) |
48 | default: | 89 | { |
49 | if (pm_ops && pm_ops->prepare) | 90 | if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) |
50 | error = pm_ops->prepare(PM_SUSPEND_DISK); | 91 | hibernation_ops->finish(); |
51 | } | ||
52 | return error; | ||
53 | } | 92 | } |
54 | 93 | ||
55 | /** | 94 | /** |
56 | * power_down - Shut machine down for hibernate. | 95 | * power_down - Shut the machine down for hibernation. |
57 | * | 96 | * |
58 | * Use the platform driver, if configured so; otherwise try | 97 | * Use the platform driver, if configured so; otherwise try |
59 | * to power off or reboot. | 98 | * to power off or reboot. |
@@ -61,20 +100,20 @@ static inline int platform_prepare(void) | |||
61 | 100 | ||
62 | static void power_down(void) | 101 | static void power_down(void) |
63 | { | 102 | { |
64 | switch (pm_disk_mode) { | 103 | switch (hibernation_mode) { |
65 | case PM_DISK_TEST: | 104 | case HIBERNATION_TEST: |
66 | case PM_DISK_TESTPROC: | 105 | case HIBERNATION_TESTPROC: |
67 | break; | 106 | break; |
68 | case PM_DISK_SHUTDOWN: | 107 | case HIBERNATION_SHUTDOWN: |
69 | kernel_power_off(); | 108 | kernel_power_off(); |
70 | break; | 109 | break; |
71 | case PM_DISK_REBOOT: | 110 | case HIBERNATION_REBOOT: |
72 | kernel_restart(NULL); | 111 | kernel_restart(NULL); |
73 | break; | 112 | break; |
74 | default: | 113 | case HIBERNATION_PLATFORM: |
75 | if (pm_ops && pm_ops->enter) { | 114 | if (hibernation_ops) { |
76 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 115 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); |
77 | pm_ops->enter(PM_SUSPEND_DISK); | 116 | hibernation_ops->enter(); |
78 | break; | 117 | break; |
79 | } | 118 | } |
80 | } | 119 | } |
@@ -87,20 +126,6 @@ static void power_down(void) | |||
87 | while(1); | 126 | while(1); |
88 | } | 127 | } |
89 | 128 | ||
90 | static inline void platform_finish(void) | ||
91 | { | ||
92 | switch (pm_disk_mode) { | ||
93 | case PM_DISK_TEST: | ||
94 | case PM_DISK_TESTPROC: | ||
95 | case PM_DISK_SHUTDOWN: | ||
96 | case PM_DISK_REBOOT: | ||
97 | break; | ||
98 | default: | ||
99 | if (pm_ops && pm_ops->finish) | ||
100 | pm_ops->finish(PM_SUSPEND_DISK); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | static void unprepare_processes(void) | 129 | static void unprepare_processes(void) |
105 | { | 130 | { |
106 | thaw_processes(); | 131 | thaw_processes(); |
@@ -120,13 +145,10 @@ static int prepare_processes(void) | |||
120 | } | 145 | } |
121 | 146 | ||
122 | /** | 147 | /** |
123 | * pm_suspend_disk - The granpappy of hibernation power management. | 148 | * hibernate - The granpappy of the built-in hibernation management |
124 | * | ||
125 | * If not, then call swsusp to do its thing, then figure out how | ||
126 | * to power down the system. | ||
127 | */ | 149 | */ |
128 | 150 | ||
129 | int pm_suspend_disk(void) | 151 | int hibernate(void) |
130 | { | 152 | { |
131 | int error; | 153 | int error; |
132 | 154 | ||
@@ -143,7 +165,8 @@ int pm_suspend_disk(void) | |||
143 | if (error) | 165 | if (error) |
144 | goto Finish; | 166 | goto Finish; |
145 | 167 | ||
146 | if (pm_disk_mode == PM_DISK_TESTPROC) { | 168 | mutex_lock(&pm_mutex); |
169 | if (hibernation_mode == HIBERNATION_TESTPROC) { | ||
147 | printk("swsusp debug: Waiting for 5 seconds.\n"); | 170 | printk("swsusp debug: Waiting for 5 seconds.\n"); |
148 | mdelay(5000); | 171 | mdelay(5000); |
149 | goto Thaw; | 172 | goto Thaw; |
@@ -168,7 +191,7 @@ int pm_suspend_disk(void) | |||
168 | if (error) | 191 | if (error) |
169 | goto Enable_cpus; | 192 | goto Enable_cpus; |
170 | 193 | ||
171 | if (pm_disk_mode == PM_DISK_TEST) { | 194 | if (hibernation_mode == HIBERNATION_TEST) { |
172 | printk("swsusp debug: Waiting for 5 seconds.\n"); | 195 | printk("swsusp debug: Waiting for 5 seconds.\n"); |
173 | mdelay(5000); | 196 | mdelay(5000); |
174 | goto Enable_cpus; | 197 | goto Enable_cpus; |
@@ -205,6 +228,7 @@ int pm_suspend_disk(void) | |||
205 | device_resume(); | 228 | device_resume(); |
206 | resume_console(); | 229 | resume_console(); |
207 | Thaw: | 230 | Thaw: |
231 | mutex_unlock(&pm_mutex); | ||
208 | unprepare_processes(); | 232 | unprepare_processes(); |
209 | Finish: | 233 | Finish: |
210 | free_basic_memory_bitmaps(); | 234 | free_basic_memory_bitmaps(); |
@@ -220,7 +244,7 @@ int pm_suspend_disk(void) | |||
220 | * Called as a late_initcall (so all devices are discovered and | 244 | * Called as a late_initcall (so all devices are discovered and |
221 | * initialized), we call swsusp to see if we have a saved image or not. | 245 | * initialized), we call swsusp to see if we have a saved image or not. |
222 | * If so, we quiesce devices, the restore the saved image. We will | 246 | * If so, we quiesce devices, the restore the saved image. We will |
223 | * return above (in pm_suspend_disk() ) if everything goes well. | 247 | * return above (in hibernate() ) if everything goes well. |
224 | * Otherwise, we fail gracefully and return to the normally | 248 | * Otherwise, we fail gracefully and return to the normally |
225 | * scheduled program. | 249 | * scheduled program. |
226 | * | 250 | * |
@@ -315,25 +339,26 @@ static int software_resume(void) | |||
315 | late_initcall(software_resume); | 339 | late_initcall(software_resume); |
316 | 340 | ||
317 | 341 | ||
318 | static const char * const pm_disk_modes[] = { | 342 | static const char * const hibernation_modes[] = { |
319 | [PM_DISK_PLATFORM] = "platform", | 343 | [HIBERNATION_PLATFORM] = "platform", |
320 | [PM_DISK_SHUTDOWN] = "shutdown", | 344 | [HIBERNATION_SHUTDOWN] = "shutdown", |
321 | [PM_DISK_REBOOT] = "reboot", | 345 | [HIBERNATION_REBOOT] = "reboot", |
322 | [PM_DISK_TEST] = "test", | 346 | [HIBERNATION_TEST] = "test", |
323 | [PM_DISK_TESTPROC] = "testproc", | 347 | [HIBERNATION_TESTPROC] = "testproc", |
324 | }; | 348 | }; |
325 | 349 | ||
326 | /** | 350 | /** |
327 | * disk - Control suspend-to-disk mode | 351 | * disk - Control hibernation mode |
328 | * | 352 | * |
329 | * Suspend-to-disk can be handled in several ways. We have a few options | 353 | * Suspend-to-disk can be handled in several ways. We have a few options |
330 | * for putting the system to sleep - using the platform driver (e.g. ACPI | 354 | * for putting the system to sleep - using the platform driver (e.g. ACPI |
331 | * or other pm_ops), powering off the system or rebooting the system | 355 | * or other hibernation_ops), powering off the system or rebooting the |
332 | * (for testing) as well as the two test modes. | 356 | * system (for testing) as well as the two test modes. |
333 | * | 357 | * |
334 | * The system can support 'platform', and that is known a priori (and | 358 | * The system can support 'platform', and that is known a priori (and |
335 | * encoded in pm_ops). However, the user may choose 'shutdown' or 'reboot' | 359 | * encoded by the presence of hibernation_ops). However, the user may |
336 | * as alternatives, as well as the test modes 'test' and 'testproc'. | 360 | * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the |
361 | * test modes, 'test' or 'testproc'. | ||
337 | * | 362 | * |
338 | * show() will display what the mode is currently set to. | 363 | * show() will display what the mode is currently set to. |
339 | * store() will accept one of | 364 | * store() will accept one of |
@@ -345,7 +370,7 @@ static const char * const pm_disk_modes[] = { | |||
345 | * 'testproc' | 370 | * 'testproc' |
346 | * | 371 | * |
347 | * It will only change to 'platform' if the system | 372 | * It will only change to 'platform' if the system |
348 | * supports it (as determined from pm_ops->pm_disk_mode). | 373 | * supports it (as determined by having hibernation_ops). |
349 | */ | 374 | */ |
350 | 375 | ||
351 | static ssize_t disk_show(struct kset *kset, char *buf) | 376 | static ssize_t disk_show(struct kset *kset, char *buf) |
@@ -353,28 +378,25 @@ static ssize_t disk_show(struct kset *kset, char *buf) | |||
353 | int i; | 378 | int i; |
354 | char *start = buf; | 379 | char *start = buf; |
355 | 380 | ||
356 | for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { | 381 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { |
357 | if (!pm_disk_modes[i]) | 382 | if (!hibernation_modes[i]) |
358 | continue; | 383 | continue; |
359 | switch (i) { | 384 | switch (i) { |
360 | case PM_DISK_SHUTDOWN: | 385 | case HIBERNATION_SHUTDOWN: |
361 | case PM_DISK_REBOOT: | 386 | case HIBERNATION_REBOOT: |
362 | case PM_DISK_TEST: | 387 | case HIBERNATION_TEST: |
363 | case PM_DISK_TESTPROC: | 388 | case HIBERNATION_TESTPROC: |
364 | break; | 389 | break; |
365 | default: | 390 | case HIBERNATION_PLATFORM: |
366 | if (pm_ops && pm_ops->enter && | 391 | if (hibernation_ops) |
367 | (i == pm_ops->pm_disk_mode)) | ||
368 | break; | 392 | break; |
369 | /* not a valid mode, continue with loop */ | 393 | /* not a valid mode, continue with loop */ |
370 | continue; | 394 | continue; |
371 | } | 395 | } |
372 | if (i == pm_disk_mode) | 396 | if (i == hibernation_mode) |
373 | buf += sprintf(buf, "[%s]", pm_disk_modes[i]); | 397 | buf += sprintf(buf, "[%s] ", hibernation_modes[i]); |
374 | else | 398 | else |
375 | buf += sprintf(buf, "%s", pm_disk_modes[i]); | 399 | buf += sprintf(buf, "%s ", hibernation_modes[i]); |
376 | if (i+1 != PM_DISK_MAX) | ||
377 | buf += sprintf(buf, " "); | ||
378 | } | 400 | } |
379 | buf += sprintf(buf, "\n"); | 401 | buf += sprintf(buf, "\n"); |
380 | return buf-start; | 402 | return buf-start; |
@@ -387,39 +409,38 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) | |||
387 | int i; | 409 | int i; |
388 | int len; | 410 | int len; |
389 | char *p; | 411 | char *p; |
390 | suspend_disk_method_t mode = 0; | 412 | int mode = HIBERNATION_INVALID; |
391 | 413 | ||
392 | p = memchr(buf, '\n', n); | 414 | p = memchr(buf, '\n', n); |
393 | len = p ? p - buf : n; | 415 | len = p ? p - buf : n; |
394 | 416 | ||
395 | mutex_lock(&pm_mutex); | 417 | mutex_lock(&pm_mutex); |
396 | for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { | 418 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { |
397 | if (!strncmp(buf, pm_disk_modes[i], len)) { | 419 | if (!strncmp(buf, hibernation_modes[i], len)) { |
398 | mode = i; | 420 | mode = i; |
399 | break; | 421 | break; |
400 | } | 422 | } |
401 | } | 423 | } |
402 | if (mode) { | 424 | if (mode != HIBERNATION_INVALID) { |
403 | switch (mode) { | 425 | switch (mode) { |
404 | case PM_DISK_SHUTDOWN: | 426 | case HIBERNATION_SHUTDOWN: |
405 | case PM_DISK_REBOOT: | 427 | case HIBERNATION_REBOOT: |
406 | case PM_DISK_TEST: | 428 | case HIBERNATION_TEST: |
407 | case PM_DISK_TESTPROC: | 429 | case HIBERNATION_TESTPROC: |
408 | pm_disk_mode = mode; | 430 | hibernation_mode = mode; |
409 | break; | 431 | break; |
410 | default: | 432 | case HIBERNATION_PLATFORM: |
411 | if (pm_ops && pm_ops->enter && | 433 | if (hibernation_ops) |
412 | (mode == pm_ops->pm_disk_mode)) | 434 | hibernation_mode = mode; |
413 | pm_disk_mode = mode; | ||
414 | else | 435 | else |
415 | error = -EINVAL; | 436 | error = -EINVAL; |
416 | } | 437 | } |
417 | } else { | 438 | } else |
418 | error = -EINVAL; | 439 | error = -EINVAL; |
419 | } | ||
420 | 440 | ||
421 | pr_debug("PM: suspend-to-disk mode set to '%s'\n", | 441 | if (!error) |
422 | pm_disk_modes[mode]); | 442 | pr_debug("PM: suspend-to-disk mode set to '%s'\n", |
443 | hibernation_modes[mode]); | ||
423 | mutex_unlock(&pm_mutex); | 444 | mutex_unlock(&pm_mutex); |
424 | return error ? error : n; | 445 | return error ? error : n; |
425 | } | 446 | } |
diff --git a/kernel/power/main.c b/kernel/power/main.c index f6dda685e7e2..40d56a31245e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -30,7 +30,6 @@ | |||
30 | DEFINE_MUTEX(pm_mutex); | 30 | DEFINE_MUTEX(pm_mutex); |
31 | 31 | ||
32 | struct pm_ops *pm_ops; | 32 | struct pm_ops *pm_ops; |
33 | suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; | ||
34 | 33 | ||
35 | /** | 34 | /** |
36 | * pm_set_ops - Set the global power method table. | 35 | * pm_set_ops - Set the global power method table. |
@@ -41,10 +40,6 @@ void pm_set_ops(struct pm_ops * ops) | |||
41 | { | 40 | { |
42 | mutex_lock(&pm_mutex); | 41 | mutex_lock(&pm_mutex); |
43 | pm_ops = ops; | 42 | pm_ops = ops; |
44 | if (ops && ops->pm_disk_mode != PM_DISK_INVALID) { | ||
45 | pm_disk_mode = ops->pm_disk_mode; | ||
46 | } else | ||
47 | pm_disk_mode = PM_DISK_SHUTDOWN; | ||
48 | mutex_unlock(&pm_mutex); | 43 | mutex_unlock(&pm_mutex); |
49 | } | 44 | } |
50 | 45 | ||
@@ -184,24 +179,12 @@ static void suspend_finish(suspend_state_t state) | |||
184 | static const char * const pm_states[PM_SUSPEND_MAX] = { | 179 | static const char * const pm_states[PM_SUSPEND_MAX] = { |
185 | [PM_SUSPEND_STANDBY] = "standby", | 180 | [PM_SUSPEND_STANDBY] = "standby", |
186 | [PM_SUSPEND_MEM] = "mem", | 181 | [PM_SUSPEND_MEM] = "mem", |
187 | [PM_SUSPEND_DISK] = "disk", | ||
188 | }; | 182 | }; |
189 | 183 | ||
190 | static inline int valid_state(suspend_state_t state) | 184 | static inline int valid_state(suspend_state_t state) |
191 | { | 185 | { |
192 | /* Suspend-to-disk does not really need low-level support. | 186 | /* All states need lowlevel support and need to be valid |
193 | * It can work with shutdown/reboot if needed. If it isn't | 187 | * to the lowlevel implementation, no valid callback |
194 | * configured, then it cannot be supported. | ||
195 | */ | ||
196 | if (state == PM_SUSPEND_DISK) | ||
197 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
198 | return 1; | ||
199 | #else | ||
200 | return 0; | ||
201 | #endif | ||
202 | |||
203 | /* all other states need lowlevel support and need to be | ||
204 | * valid to the lowlevel implementation, no valid callback | ||
205 | * implies that none are valid. */ | 188 | * implies that none are valid. */ |
206 | if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state)) | 189 | if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state)) |
207 | return 0; | 190 | return 0; |
@@ -229,11 +212,6 @@ static int enter_state(suspend_state_t state) | |||
229 | if (!mutex_trylock(&pm_mutex)) | 212 | if (!mutex_trylock(&pm_mutex)) |
230 | return -EBUSY; | 213 | return -EBUSY; |
231 | 214 | ||
232 | if (state == PM_SUSPEND_DISK) { | ||
233 | error = pm_suspend_disk(); | ||
234 | goto Unlock; | ||
235 | } | ||
236 | |||
237 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); | 215 | pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); |
238 | if ((error = suspend_prepare(state))) | 216 | if ((error = suspend_prepare(state))) |
239 | goto Unlock; | 217 | goto Unlock; |
@@ -251,7 +229,7 @@ static int enter_state(suspend_state_t state) | |||
251 | 229 | ||
252 | /** | 230 | /** |
253 | * pm_suspend - Externally visible function for suspending system. | 231 | * pm_suspend - Externally visible function for suspending system. |
254 | * @state: Enumarted value of state to enter. | 232 | * @state: Enumerated value of state to enter. |
255 | * | 233 | * |
256 | * Determine whether or not value is within range, get state | 234 | * Determine whether or not value is within range, get state |
257 | * structure, and enter (above). | 235 | * structure, and enter (above). |
@@ -289,7 +267,13 @@ static ssize_t state_show(struct kset *kset, char *buf) | |||
289 | if (pm_states[i] && valid_state(i)) | 267 | if (pm_states[i] && valid_state(i)) |
290 | s += sprintf(s,"%s ", pm_states[i]); | 268 | s += sprintf(s,"%s ", pm_states[i]); |
291 | } | 269 | } |
292 | s += sprintf(s,"\n"); | 270 | #ifdef CONFIG_SOFTWARE_SUSPEND |
271 | s += sprintf(s, "%s\n", "disk"); | ||
272 | #else | ||
273 | if (s != buf) | ||
274 | /* convert the last space to a newline */ | ||
275 | *(s-1) = '\n'; | ||
276 | #endif | ||
293 | return (s - buf); | 277 | return (s - buf); |
294 | } | 278 | } |
295 | 279 | ||
@@ -304,6 +288,12 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n) | |||
304 | p = memchr(buf, '\n', n); | 288 | p = memchr(buf, '\n', n); |
305 | len = p ? p - buf : n; | 289 | len = p ? p - buf : n; |
306 | 290 | ||
291 | /* First, check if we are requested to hibernate */ | ||
292 | if (!strncmp(buf, "disk", len)) { | ||
293 | error = hibernate(); | ||
294 | return error ? error : n; | ||
295 | } | ||
296 | |||
307 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { | 297 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { |
308 | if (*s && !strncmp(buf, *s, len)) | 298 | if (*s && !strncmp(buf, *s, len)) |
309 | break; | 299 | break; |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 34b43542785a..51381487103f 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -25,12 +25,7 @@ struct swsusp_info { | |||
25 | */ | 25 | */ |
26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) | 26 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) |
27 | 27 | ||
28 | extern int pm_suspend_disk(void); | 28 | extern struct hibernation_ops *hibernation_ops; |
29 | #else | ||
30 | static inline int pm_suspend_disk(void) | ||
31 | { | ||
32 | return -EPERM; | ||
33 | } | ||
34 | #endif | 29 | #endif |
35 | 30 | ||
36 | extern int pfn_is_nosave(unsigned long); | 31 | extern int pfn_is_nosave(unsigned long); |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b7039772b05c..48383ea72290 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1227,7 +1227,7 @@ asmlinkage int swsusp_save(void) | |||
1227 | nr_copy_pages = nr_pages; | 1227 | nr_copy_pages = nr_pages; |
1228 | nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); | 1228 | nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); |
1229 | 1229 | ||
1230 | printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); | 1230 | printk("swsusp: critical section: done (%d pages copied)\n", nr_pages); |
1231 | 1231 | ||
1232 | return 0; | 1232 | return 0; |
1233 | } | 1233 | } |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 040560d9c312..24d7d78e6f42 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -130,16 +130,16 @@ static inline int platform_prepare(void) | |||
130 | { | 130 | { |
131 | int error = 0; | 131 | int error = 0; |
132 | 132 | ||
133 | if (pm_ops && pm_ops->prepare) | 133 | if (hibernation_ops) |
134 | error = pm_ops->prepare(PM_SUSPEND_DISK); | 134 | error = hibernation_ops->prepare(); |
135 | 135 | ||
136 | return error; | 136 | return error; |
137 | } | 137 | } |
138 | 138 | ||
139 | static inline void platform_finish(void) | 139 | static inline void platform_finish(void) |
140 | { | 140 | { |
141 | if (pm_ops && pm_ops->finish) | 141 | if (hibernation_ops) |
142 | pm_ops->finish(PM_SUSPEND_DISK); | 142 | hibernation_ops->finish(); |
143 | } | 143 | } |
144 | 144 | ||
145 | static inline int snapshot_suspend(int platform_suspend) | 145 | static inline int snapshot_suspend(int platform_suspend) |
@@ -384,7 +384,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
384 | switch (arg) { | 384 | switch (arg) { |
385 | 385 | ||
386 | case PMOPS_PREPARE: | 386 | case PMOPS_PREPARE: |
387 | if (pm_ops && pm_ops->enter) { | 387 | if (hibernation_ops) { |
388 | data->platform_suspend = 1; | 388 | data->platform_suspend = 1; |
389 | error = 0; | 389 | error = 0; |
390 | } else { | 390 | } else { |
@@ -395,8 +395,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, | |||
395 | case PMOPS_ENTER: | 395 | case PMOPS_ENTER: |
396 | if (data->platform_suspend) { | 396 | if (data->platform_suspend) { |
397 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); | 397 | kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK); |
398 | error = pm_ops->enter(PM_SUSPEND_DISK); | 398 | error = hibernation_ops->enter(); |
399 | error = 0; | ||
400 | } | 399 | } |
401 | break; | 400 | break; |
402 | 401 | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 9bfadb248dd8..cc91b9bf759d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -340,6 +340,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, | |||
340 | 340 | ||
341 | switch (action) { | 341 | switch (action) { |
342 | case CPU_UP_PREPARE: | 342 | case CPU_UP_PREPARE: |
343 | case CPU_UP_PREPARE_FROZEN: | ||
343 | node = cpu_to_node(cpu); | 344 | node = cpu_to_node(cpu); |
344 | per_cpu(cpu_profile_flip, cpu) = 0; | 345 | per_cpu(cpu_profile_flip, cpu) = 0; |
345 | if (!per_cpu(cpu_profile_hits, cpu)[1]) { | 346 | if (!per_cpu(cpu_profile_hits, cpu)[1]) { |
@@ -365,10 +366,13 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, | |||
365 | __free_page(page); | 366 | __free_page(page); |
366 | return NOTIFY_BAD; | 367 | return NOTIFY_BAD; |
367 | case CPU_ONLINE: | 368 | case CPU_ONLINE: |
369 | case CPU_ONLINE_FROZEN: | ||
368 | cpu_set(cpu, prof_cpu_mask); | 370 | cpu_set(cpu, prof_cpu_mask); |
369 | break; | 371 | break; |
370 | case CPU_UP_CANCELED: | 372 | case CPU_UP_CANCELED: |
373 | case CPU_UP_CANCELED_FROZEN: | ||
371 | case CPU_DEAD: | 374 | case CPU_DEAD: |
375 | case CPU_DEAD_FROZEN: | ||
372 | cpu_clear(cpu, prof_cpu_mask); | 376 | cpu_clear(cpu, prof_cpu_mask); |
373 | if (per_cpu(cpu_profile_hits, cpu)[0]) { | 377 | if (per_cpu(cpu_profile_hits, cpu)[0]) { |
374 | page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); | 378 | page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 3554b76da84c..2c2dd8410dc4 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -558,9 +558,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
558 | long cpu = (long)hcpu; | 558 | long cpu = (long)hcpu; |
559 | switch (action) { | 559 | switch (action) { |
560 | case CPU_UP_PREPARE: | 560 | case CPU_UP_PREPARE: |
561 | case CPU_UP_PREPARE_FROZEN: | ||
561 | rcu_online_cpu(cpu); | 562 | rcu_online_cpu(cpu); |
562 | break; | 563 | break; |
563 | case CPU_DEAD: | 564 | case CPU_DEAD: |
565 | case CPU_DEAD_FROZEN: | ||
564 | rcu_offline_cpu(cpu); | 566 | rcu_offline_cpu(cpu); |
565 | break; | 567 | break; |
566 | default: | 568 | default: |
diff --git a/kernel/relay.c b/kernel/relay.c index d24395e8b6e5..4311101b0ca7 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -310,16 +310,13 @@ static struct rchan_callbacks default_channel_callbacks = { | |||
310 | 310 | ||
311 | /** | 311 | /** |
312 | * wakeup_readers - wake up readers waiting on a channel | 312 | * wakeup_readers - wake up readers waiting on a channel |
313 | * @work: work struct that contains the channel buffer | 313 | * @data: contains the channel buffer |
314 | * | 314 | * |
315 | * This is the work function used to defer reader waking. The | 315 | * This is the timer function used to defer reader waking. |
316 | * reason waking is deferred is that calling directly from write | ||
317 | * causes problems if you're writing from say the scheduler. | ||
318 | */ | 316 | */ |
319 | static void wakeup_readers(struct work_struct *work) | 317 | static void wakeup_readers(unsigned long data) |
320 | { | 318 | { |
321 | struct rchan_buf *buf = | 319 | struct rchan_buf *buf = (struct rchan_buf *)data; |
322 | container_of(work, struct rchan_buf, wake_readers.work); | ||
323 | wake_up_interruptible(&buf->read_wait); | 320 | wake_up_interruptible(&buf->read_wait); |
324 | } | 321 | } |
325 | 322 | ||
@@ -337,11 +334,9 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) | |||
337 | if (init) { | 334 | if (init) { |
338 | init_waitqueue_head(&buf->read_wait); | 335 | init_waitqueue_head(&buf->read_wait); |
339 | kref_init(&buf->kref); | 336 | kref_init(&buf->kref); |
340 | INIT_DELAYED_WORK(&buf->wake_readers, NULL); | 337 | setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); |
341 | } else { | 338 | } else |
342 | cancel_delayed_work(&buf->wake_readers); | 339 | del_timer_sync(&buf->timer); |
343 | flush_scheduled_work(); | ||
344 | } | ||
345 | 340 | ||
346 | buf->subbufs_produced = 0; | 341 | buf->subbufs_produced = 0; |
347 | buf->subbufs_consumed = 0; | 342 | buf->subbufs_consumed = 0; |
@@ -447,8 +442,7 @@ end: | |||
447 | static void relay_close_buf(struct rchan_buf *buf) | 442 | static void relay_close_buf(struct rchan_buf *buf) |
448 | { | 443 | { |
449 | buf->finalized = 1; | 444 | buf->finalized = 1; |
450 | cancel_delayed_work(&buf->wake_readers); | 445 | del_timer_sync(&buf->timer); |
451 | flush_scheduled_work(); | ||
452 | kref_put(&buf->kref, relay_remove_buf); | 446 | kref_put(&buf->kref, relay_remove_buf); |
453 | } | 447 | } |
454 | 448 | ||
@@ -490,6 +484,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, | |||
490 | 484 | ||
491 | switch(action) { | 485 | switch(action) { |
492 | case CPU_UP_PREPARE: | 486 | case CPU_UP_PREPARE: |
487 | case CPU_UP_PREPARE_FROZEN: | ||
493 | mutex_lock(&relay_channels_mutex); | 488 | mutex_lock(&relay_channels_mutex); |
494 | list_for_each_entry(chan, &relay_channels, list) { | 489 | list_for_each_entry(chan, &relay_channels, list) { |
495 | if (chan->buf[hotcpu]) | 490 | if (chan->buf[hotcpu]) |
@@ -506,6 +501,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, | |||
506 | mutex_unlock(&relay_channels_mutex); | 501 | mutex_unlock(&relay_channels_mutex); |
507 | break; | 502 | break; |
508 | case CPU_DEAD: | 503 | case CPU_DEAD: |
504 | case CPU_DEAD_FROZEN: | ||
509 | /* No need to flush the cpu : will be flushed upon | 505 | /* No need to flush the cpu : will be flushed upon |
510 | * final relay_flush() call. */ | 506 | * final relay_flush() call. */ |
511 | break; | 507 | break; |
@@ -608,11 +604,14 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) | |||
608 | buf->dentry->d_inode->i_size += buf->chan->subbuf_size - | 604 | buf->dentry->d_inode->i_size += buf->chan->subbuf_size - |
609 | buf->padding[old_subbuf]; | 605 | buf->padding[old_subbuf]; |
610 | smp_mb(); | 606 | smp_mb(); |
611 | if (waitqueue_active(&buf->read_wait)) { | 607 | if (waitqueue_active(&buf->read_wait)) |
612 | PREPARE_DELAYED_WORK(&buf->wake_readers, | 608 | /* |
613 | wakeup_readers); | 609 | * Calling wake_up_interruptible() from here |
614 | schedule_delayed_work(&buf->wake_readers, 1); | 610 | * will deadlock if we happen to be logging |
615 | } | 611 | * from the scheduler (trying to re-grab |
612 | * rq->lock), so defer it. | ||
613 | */ | ||
614 | __mod_timer(&buf->timer, jiffies + 1); | ||
616 | } | 615 | } |
617 | 616 | ||
618 | old = buf->data; | 617 | old = buf->data; |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 180978cb2f75..12879f6c1ec3 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -56,7 +56,7 @@ | |||
56 | * state. | 56 | * state. |
57 | */ | 57 | */ |
58 | 58 | ||
59 | static void | 59 | void |
60 | rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, | 60 | rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, |
61 | unsigned long mask) | 61 | unsigned long mask) |
62 | { | 62 | { |
@@ -81,29 +81,6 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * We can speed up the acquire/release, if the architecture | ||
85 | * supports cmpxchg and if there's no debugging state to be set up | ||
86 | */ | ||
87 | #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) | ||
88 | # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) | ||
89 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
90 | { | ||
91 | unsigned long owner, *p = (unsigned long *) &lock->owner; | ||
92 | |||
93 | do { | ||
94 | owner = *p; | ||
95 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); | ||
96 | } | ||
97 | #else | ||
98 | # define rt_mutex_cmpxchg(l,c,n) (0) | ||
99 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
100 | { | ||
101 | lock->owner = (struct task_struct *) | ||
102 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); | ||
103 | } | ||
104 | #endif | ||
105 | |||
106 | /* | ||
107 | * Calculate task priority from the waiter list priority | 84 | * Calculate task priority from the waiter list priority |
108 | * | 85 | * |
109 | * Return task->normal_prio when the waiter list is empty or when | 86 | * Return task->normal_prio when the waiter list is empty or when |
@@ -123,7 +100,7 @@ int rt_mutex_getprio(struct task_struct *task) | |||
123 | * | 100 | * |
124 | * This can be both boosting and unboosting. task->pi_lock must be held. | 101 | * This can be both boosting and unboosting. task->pi_lock must be held. |
125 | */ | 102 | */ |
126 | static void __rt_mutex_adjust_prio(struct task_struct *task) | 103 | void __rt_mutex_adjust_prio(struct task_struct *task) |
127 | { | 104 | { |
128 | int prio = rt_mutex_getprio(task); | 105 | int prio = rt_mutex_getprio(task); |
129 | 106 | ||
@@ -159,11 +136,11 @@ int max_lock_depth = 1024; | |||
159 | * Decreases task's usage by one - may thus free the task. | 136 | * Decreases task's usage by one - may thus free the task. |
160 | * Returns 0 or -EDEADLK. | 137 | * Returns 0 or -EDEADLK. |
161 | */ | 138 | */ |
162 | static int rt_mutex_adjust_prio_chain(struct task_struct *task, | 139 | int rt_mutex_adjust_prio_chain(struct task_struct *task, |
163 | int deadlock_detect, | 140 | int deadlock_detect, |
164 | struct rt_mutex *orig_lock, | 141 | struct rt_mutex *orig_lock, |
165 | struct rt_mutex_waiter *orig_waiter, | 142 | struct rt_mutex_waiter *orig_waiter, |
166 | struct task_struct *top_task) | 143 | struct task_struct *top_task) |
167 | { | 144 | { |
168 | struct rt_mutex *lock; | 145 | struct rt_mutex *lock; |
169 | struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; | 146 | struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; |
@@ -524,8 +501,8 @@ static void wakeup_next_waiter(struct rt_mutex *lock) | |||
524 | * | 501 | * |
525 | * Must be called with lock->wait_lock held | 502 | * Must be called with lock->wait_lock held |
526 | */ | 503 | */ |
527 | static void remove_waiter(struct rt_mutex *lock, | 504 | void remove_waiter(struct rt_mutex *lock, |
528 | struct rt_mutex_waiter *waiter) | 505 | struct rt_mutex_waiter *waiter) |
529 | { | 506 | { |
530 | int first = (waiter == rt_mutex_top_waiter(lock)); | 507 | int first = (waiter == rt_mutex_top_waiter(lock)); |
531 | struct task_struct *owner = rt_mutex_owner(lock); | 508 | struct task_struct *owner = rt_mutex_owner(lock); |
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 9c75856e791e..242ec7ee740b 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
@@ -113,6 +113,29 @@ static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock) | |||
113 | } | 113 | } |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * We can speed up the acquire/release, if the architecture | ||
117 | * supports cmpxchg and if there's no debugging state to be set up | ||
118 | */ | ||
119 | #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) | ||
120 | # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) | ||
121 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
122 | { | ||
123 | unsigned long owner, *p = (unsigned long *) &lock->owner; | ||
124 | |||
125 | do { | ||
126 | owner = *p; | ||
127 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); | ||
128 | } | ||
129 | #else | ||
130 | # define rt_mutex_cmpxchg(l,c,n) (0) | ||
131 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | ||
132 | { | ||
133 | lock->owner = (struct task_struct *) | ||
134 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); | ||
135 | } | ||
136 | #endif | ||
137 | |||
138 | /* | ||
116 | * PI-futex support (proxy locking functions, etc.): | 139 | * PI-futex support (proxy locking functions, etc.): |
117 | */ | 140 | */ |
118 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | 141 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); |
@@ -120,4 +143,15 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |||
120 | struct task_struct *proxy_owner); | 143 | struct task_struct *proxy_owner); |
121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | 144 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
122 | struct task_struct *proxy_owner); | 145 | struct task_struct *proxy_owner); |
146 | |||
147 | extern void rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, | ||
148 | unsigned long mask); | ||
149 | extern void __rt_mutex_adjust_prio(struct task_struct *task); | ||
150 | extern int rt_mutex_adjust_prio_chain(struct task_struct *task, | ||
151 | int deadlock_detect, | ||
152 | struct rt_mutex *orig_lock, | ||
153 | struct rt_mutex_waiter *orig_waiter, | ||
154 | struct task_struct *top_task); | ||
155 | extern void remove_waiter(struct rt_mutex *lock, | ||
156 | struct rt_mutex_waiter *waiter); | ||
123 | #endif | 157 | #endif |
diff --git a/kernel/sched.c b/kernel/sched.c index 66bd7ff23f18..799d23b4e35d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -305,6 +305,7 @@ struct rq { | |||
305 | }; | 305 | }; |
306 | 306 | ||
307 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; | 307 | static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp; |
308 | static DEFINE_MUTEX(sched_hotcpu_mutex); | ||
308 | 309 | ||
309 | static inline int cpu_of(struct rq *rq) | 310 | static inline int cpu_of(struct rq *rq) |
310 | { | 311 | { |
@@ -4520,13 +4521,13 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
4520 | struct task_struct *p; | 4521 | struct task_struct *p; |
4521 | int retval; | 4522 | int retval; |
4522 | 4523 | ||
4523 | lock_cpu_hotplug(); | 4524 | mutex_lock(&sched_hotcpu_mutex); |
4524 | read_lock(&tasklist_lock); | 4525 | read_lock(&tasklist_lock); |
4525 | 4526 | ||
4526 | p = find_process_by_pid(pid); | 4527 | p = find_process_by_pid(pid); |
4527 | if (!p) { | 4528 | if (!p) { |
4528 | read_unlock(&tasklist_lock); | 4529 | read_unlock(&tasklist_lock); |
4529 | unlock_cpu_hotplug(); | 4530 | mutex_unlock(&sched_hotcpu_mutex); |
4530 | return -ESRCH; | 4531 | return -ESRCH; |
4531 | } | 4532 | } |
4532 | 4533 | ||
@@ -4553,7 +4554,7 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask) | |||
4553 | 4554 | ||
4554 | out_unlock: | 4555 | out_unlock: |
4555 | put_task_struct(p); | 4556 | put_task_struct(p); |
4556 | unlock_cpu_hotplug(); | 4557 | mutex_unlock(&sched_hotcpu_mutex); |
4557 | return retval; | 4558 | return retval; |
4558 | } | 4559 | } |
4559 | 4560 | ||
@@ -4610,7 +4611,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
4610 | struct task_struct *p; | 4611 | struct task_struct *p; |
4611 | int retval; | 4612 | int retval; |
4612 | 4613 | ||
4613 | lock_cpu_hotplug(); | 4614 | mutex_lock(&sched_hotcpu_mutex); |
4614 | read_lock(&tasklist_lock); | 4615 | read_lock(&tasklist_lock); |
4615 | 4616 | ||
4616 | retval = -ESRCH; | 4617 | retval = -ESRCH; |
@@ -4626,7 +4627,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) | |||
4626 | 4627 | ||
4627 | out_unlock: | 4628 | out_unlock: |
4628 | read_unlock(&tasklist_lock); | 4629 | read_unlock(&tasklist_lock); |
4629 | unlock_cpu_hotplug(); | 4630 | mutex_unlock(&sched_hotcpu_mutex); |
4630 | if (retval) | 4631 | if (retval) |
4631 | return retval; | 4632 | return retval; |
4632 | 4633 | ||
@@ -5388,7 +5389,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5388 | struct rq *rq; | 5389 | struct rq *rq; |
5389 | 5390 | ||
5390 | switch (action) { | 5391 | switch (action) { |
5392 | case CPU_LOCK_ACQUIRE: | ||
5393 | mutex_lock(&sched_hotcpu_mutex); | ||
5394 | break; | ||
5395 | |||
5391 | case CPU_UP_PREPARE: | 5396 | case CPU_UP_PREPARE: |
5397 | case CPU_UP_PREPARE_FROZEN: | ||
5392 | p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); | 5398 | p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); |
5393 | if (IS_ERR(p)) | 5399 | if (IS_ERR(p)) |
5394 | return NOTIFY_BAD; | 5400 | return NOTIFY_BAD; |
@@ -5402,12 +5408,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5402 | break; | 5408 | break; |
5403 | 5409 | ||
5404 | case CPU_ONLINE: | 5410 | case CPU_ONLINE: |
5411 | case CPU_ONLINE_FROZEN: | ||
5405 | /* Strictly unneccessary, as first user will wake it. */ | 5412 | /* Strictly unneccessary, as first user will wake it. */ |
5406 | wake_up_process(cpu_rq(cpu)->migration_thread); | 5413 | wake_up_process(cpu_rq(cpu)->migration_thread); |
5407 | break; | 5414 | break; |
5408 | 5415 | ||
5409 | #ifdef CONFIG_HOTPLUG_CPU | 5416 | #ifdef CONFIG_HOTPLUG_CPU |
5410 | case CPU_UP_CANCELED: | 5417 | case CPU_UP_CANCELED: |
5418 | case CPU_UP_CANCELED_FROZEN: | ||
5411 | if (!cpu_rq(cpu)->migration_thread) | 5419 | if (!cpu_rq(cpu)->migration_thread) |
5412 | break; | 5420 | break; |
5413 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 5421 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
@@ -5418,6 +5426,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5418 | break; | 5426 | break; |
5419 | 5427 | ||
5420 | case CPU_DEAD: | 5428 | case CPU_DEAD: |
5429 | case CPU_DEAD_FROZEN: | ||
5421 | migrate_live_tasks(cpu); | 5430 | migrate_live_tasks(cpu); |
5422 | rq = cpu_rq(cpu); | 5431 | rq = cpu_rq(cpu); |
5423 | kthread_stop(rq->migration_thread); | 5432 | kthread_stop(rq->migration_thread); |
@@ -5433,7 +5442,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5433 | BUG_ON(rq->nr_running != 0); | 5442 | BUG_ON(rq->nr_running != 0); |
5434 | 5443 | ||
5435 | /* No need to migrate the tasks: it was best-effort if | 5444 | /* No need to migrate the tasks: it was best-effort if |
5436 | * they didn't do lock_cpu_hotplug(). Just wake up | 5445 | * they didn't take sched_hotcpu_mutex. Just wake up |
5437 | * the requestors. */ | 5446 | * the requestors. */ |
5438 | spin_lock_irq(&rq->lock); | 5447 | spin_lock_irq(&rq->lock); |
5439 | while (!list_empty(&rq->migration_queue)) { | 5448 | while (!list_empty(&rq->migration_queue)) { |
@@ -5447,6 +5456,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5447 | spin_unlock_irq(&rq->lock); | 5456 | spin_unlock_irq(&rq->lock); |
5448 | break; | 5457 | break; |
5449 | #endif | 5458 | #endif |
5459 | case CPU_LOCK_RELEASE: | ||
5460 | mutex_unlock(&sched_hotcpu_mutex); | ||
5461 | break; | ||
5450 | } | 5462 | } |
5451 | return NOTIFY_OK; | 5463 | return NOTIFY_OK; |
5452 | } | 5464 | } |
@@ -6822,10 +6834,10 @@ int arch_reinit_sched_domains(void) | |||
6822 | { | 6834 | { |
6823 | int err; | 6835 | int err; |
6824 | 6836 | ||
6825 | lock_cpu_hotplug(); | 6837 | mutex_lock(&sched_hotcpu_mutex); |
6826 | detach_destroy_domains(&cpu_online_map); | 6838 | detach_destroy_domains(&cpu_online_map); |
6827 | err = arch_init_sched_domains(&cpu_online_map); | 6839 | err = arch_init_sched_domains(&cpu_online_map); |
6828 | unlock_cpu_hotplug(); | 6840 | mutex_unlock(&sched_hotcpu_mutex); |
6829 | 6841 | ||
6830 | return err; | 6842 | return err; |
6831 | } | 6843 | } |
@@ -6904,14 +6916,20 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
6904 | { | 6916 | { |
6905 | switch (action) { | 6917 | switch (action) { |
6906 | case CPU_UP_PREPARE: | 6918 | case CPU_UP_PREPARE: |
6919 | case CPU_UP_PREPARE_FROZEN: | ||
6907 | case CPU_DOWN_PREPARE: | 6920 | case CPU_DOWN_PREPARE: |
6921 | case CPU_DOWN_PREPARE_FROZEN: | ||
6908 | detach_destroy_domains(&cpu_online_map); | 6922 | detach_destroy_domains(&cpu_online_map); |
6909 | return NOTIFY_OK; | 6923 | return NOTIFY_OK; |
6910 | 6924 | ||
6911 | case CPU_UP_CANCELED: | 6925 | case CPU_UP_CANCELED: |
6926 | case CPU_UP_CANCELED_FROZEN: | ||
6912 | case CPU_DOWN_FAILED: | 6927 | case CPU_DOWN_FAILED: |
6928 | case CPU_DOWN_FAILED_FROZEN: | ||
6913 | case CPU_ONLINE: | 6929 | case CPU_ONLINE: |
6930 | case CPU_ONLINE_FROZEN: | ||
6914 | case CPU_DEAD: | 6931 | case CPU_DEAD: |
6932 | case CPU_DEAD_FROZEN: | ||
6915 | /* | 6933 | /* |
6916 | * Fall through and re-initialise the domains. | 6934 | * Fall through and re-initialise the domains. |
6917 | */ | 6935 | */ |
@@ -6930,12 +6948,12 @@ void __init sched_init_smp(void) | |||
6930 | { | 6948 | { |
6931 | cpumask_t non_isolated_cpus; | 6949 | cpumask_t non_isolated_cpus; |
6932 | 6950 | ||
6933 | lock_cpu_hotplug(); | 6951 | mutex_lock(&sched_hotcpu_mutex); |
6934 | arch_init_sched_domains(&cpu_online_map); | 6952 | arch_init_sched_domains(&cpu_online_map); |
6935 | cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); | 6953 | cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); |
6936 | if (cpus_empty(non_isolated_cpus)) | 6954 | if (cpus_empty(non_isolated_cpus)) |
6937 | cpu_set(smp_processor_id(), non_isolated_cpus); | 6955 | cpu_set(smp_processor_id(), non_isolated_cpus); |
6938 | unlock_cpu_hotplug(); | 6956 | mutex_unlock(&sched_hotcpu_mutex); |
6939 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 6957 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
6940 | hotcpu_notifier(update_sched_domains, 0); | 6958 | hotcpu_notifier(update_sched_domains, 0); |
6941 | 6959 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 1368e67c8482..2ac3a668d9dd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -38,125 +38,6 @@ | |||
38 | 38 | ||
39 | static struct kmem_cache *sigqueue_cachep; | 39 | static struct kmem_cache *sigqueue_cachep; |
40 | 40 | ||
41 | /* | ||
42 | * In POSIX a signal is sent either to a specific thread (Linux task) | ||
43 | * or to the process as a whole (Linux thread group). How the signal | ||
44 | * is sent determines whether it's to one thread or the whole group, | ||
45 | * which determines which signal mask(s) are involved in blocking it | ||
46 | * from being delivered until later. When the signal is delivered, | ||
47 | * either it's caught or ignored by a user handler or it has a default | ||
48 | * effect that applies to the whole thread group (POSIX process). | ||
49 | * | ||
50 | * The possible effects an unblocked signal set to SIG_DFL can have are: | ||
51 | * ignore - Nothing Happens | ||
52 | * terminate - kill the process, i.e. all threads in the group, | ||
53 | * similar to exit_group. The group leader (only) reports | ||
54 | * WIFSIGNALED status to its parent. | ||
55 | * coredump - write a core dump file describing all threads using | ||
56 | * the same mm and then kill all those threads | ||
57 | * stop - stop all the threads in the group, i.e. TASK_STOPPED state | ||
58 | * | ||
59 | * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored. | ||
60 | * Other signals when not blocked and set to SIG_DFL behaves as follows. | ||
61 | * The job control signals also have other special effects. | ||
62 | * | ||
63 | * +--------------------+------------------+ | ||
64 | * | POSIX signal | default action | | ||
65 | * +--------------------+------------------+ | ||
66 | * | SIGHUP | terminate | | ||
67 | * | SIGINT | terminate | | ||
68 | * | SIGQUIT | coredump | | ||
69 | * | SIGILL | coredump | | ||
70 | * | SIGTRAP | coredump | | ||
71 | * | SIGABRT/SIGIOT | coredump | | ||
72 | * | SIGBUS | coredump | | ||
73 | * | SIGFPE | coredump | | ||
74 | * | SIGKILL | terminate(+) | | ||
75 | * | SIGUSR1 | terminate | | ||
76 | * | SIGSEGV | coredump | | ||
77 | * | SIGUSR2 | terminate | | ||
78 | * | SIGPIPE | terminate | | ||
79 | * | SIGALRM | terminate | | ||
80 | * | SIGTERM | terminate | | ||
81 | * | SIGCHLD | ignore | | ||
82 | * | SIGCONT | ignore(*) | | ||
83 | * | SIGSTOP | stop(*)(+) | | ||
84 | * | SIGTSTP | stop(*) | | ||
85 | * | SIGTTIN | stop(*) | | ||
86 | * | SIGTTOU | stop(*) | | ||
87 | * | SIGURG | ignore | | ||
88 | * | SIGXCPU | coredump | | ||
89 | * | SIGXFSZ | coredump | | ||
90 | * | SIGVTALRM | terminate | | ||
91 | * | SIGPROF | terminate | | ||
92 | * | SIGPOLL/SIGIO | terminate | | ||
93 | * | SIGSYS/SIGUNUSED | coredump | | ||
94 | * | SIGSTKFLT | terminate | | ||
95 | * | SIGWINCH | ignore | | ||
96 | * | SIGPWR | terminate | | ||
97 | * | SIGRTMIN-SIGRTMAX | terminate | | ||
98 | * +--------------------+------------------+ | ||
99 | * | non-POSIX signal | default action | | ||
100 | * +--------------------+------------------+ | ||
101 | * | SIGEMT | coredump | | ||
102 | * +--------------------+------------------+ | ||
103 | * | ||
104 | * (+) For SIGKILL and SIGSTOP the action is "always", not just "default". | ||
105 | * (*) Special job control effects: | ||
106 | * When SIGCONT is sent, it resumes the process (all threads in the group) | ||
107 | * from TASK_STOPPED state and also clears any pending/queued stop signals | ||
108 | * (any of those marked with "stop(*)"). This happens regardless of blocking, | ||
109 | * catching, or ignoring SIGCONT. When any stop signal is sent, it clears | ||
110 | * any pending/queued SIGCONT signals; this happens regardless of blocking, | ||
111 | * catching, or ignored the stop signal, though (except for SIGSTOP) the | ||
112 | * default action of stopping the process may happen later or never. | ||
113 | */ | ||
114 | |||
115 | #ifdef SIGEMT | ||
116 | #define M_SIGEMT M(SIGEMT) | ||
117 | #else | ||
118 | #define M_SIGEMT 0 | ||
119 | #endif | ||
120 | |||
121 | #if SIGRTMIN > BITS_PER_LONG | ||
122 | #define M(sig) (1ULL << ((sig)-1)) | ||
123 | #else | ||
124 | #define M(sig) (1UL << ((sig)-1)) | ||
125 | #endif | ||
126 | #define T(sig, mask) (M(sig) & (mask)) | ||
127 | |||
128 | #define SIG_KERNEL_ONLY_MASK (\ | ||
129 | M(SIGKILL) | M(SIGSTOP) ) | ||
130 | |||
131 | #define SIG_KERNEL_STOP_MASK (\ | ||
132 | M(SIGSTOP) | M(SIGTSTP) | M(SIGTTIN) | M(SIGTTOU) ) | ||
133 | |||
134 | #define SIG_KERNEL_COREDUMP_MASK (\ | ||
135 | M(SIGQUIT) | M(SIGILL) | M(SIGTRAP) | M(SIGABRT) | \ | ||
136 | M(SIGFPE) | M(SIGSEGV) | M(SIGBUS) | M(SIGSYS) | \ | ||
137 | M(SIGXCPU) | M(SIGXFSZ) | M_SIGEMT ) | ||
138 | |||
139 | #define SIG_KERNEL_IGNORE_MASK (\ | ||
140 | M(SIGCONT) | M(SIGCHLD) | M(SIGWINCH) | M(SIGURG) ) | ||
141 | |||
142 | #define sig_kernel_only(sig) \ | ||
143 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_ONLY_MASK)) | ||
144 | #define sig_kernel_coredump(sig) \ | ||
145 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_COREDUMP_MASK)) | ||
146 | #define sig_kernel_ignore(sig) \ | ||
147 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_IGNORE_MASK)) | ||
148 | #define sig_kernel_stop(sig) \ | ||
149 | (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_STOP_MASK)) | ||
150 | |||
151 | #define sig_needs_tasklist(sig) ((sig) == SIGCONT) | ||
152 | |||
153 | #define sig_user_defined(t, signr) \ | ||
154 | (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ | ||
155 | ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) | ||
156 | |||
157 | #define sig_fatal(t, signr) \ | ||
158 | (!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ | ||
159 | (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) | ||
160 | 41 | ||
161 | static int sig_ignored(struct task_struct *t, int sig) | 42 | static int sig_ignored(struct task_struct *t, int sig) |
162 | { | 43 | { |
@@ -328,6 +209,16 @@ void flush_signals(struct task_struct *t) | |||
328 | spin_unlock_irqrestore(&t->sighand->siglock, flags); | 209 | spin_unlock_irqrestore(&t->sighand->siglock, flags); |
329 | } | 210 | } |
330 | 211 | ||
212 | void ignore_signals(struct task_struct *t) | ||
213 | { | ||
214 | int i; | ||
215 | |||
216 | for (i = 0; i < _NSIG; ++i) | ||
217 | t->sighand->action[i].sa.sa_handler = SIG_IGN; | ||
218 | |||
219 | flush_signals(t); | ||
220 | } | ||
221 | |||
331 | /* | 222 | /* |
332 | * Flush all handlers for a task. | 223 | * Flush all handlers for a task. |
333 | */ | 224 | */ |
@@ -1032,17 +923,6 @@ void zap_other_threads(struct task_struct *p) | |||
1032 | if (t->exit_state) | 923 | if (t->exit_state) |
1033 | continue; | 924 | continue; |
1034 | 925 | ||
1035 | /* | ||
1036 | * We don't want to notify the parent, since we are | ||
1037 | * killed as part of a thread group due to another | ||
1038 | * thread doing an execve() or similar. So set the | ||
1039 | * exit signal to -1 to allow immediate reaping of | ||
1040 | * the process. But don't detach the thread group | ||
1041 | * leader. | ||
1042 | */ | ||
1043 | if (t != p->group_leader) | ||
1044 | t->exit_signal = -1; | ||
1045 | |||
1046 | /* SIGKILL will be handled before any pending SIGSTOP */ | 926 | /* SIGKILL will be handled before any pending SIGSTOP */ |
1047 | sigaddset(&t->pending.signal, SIGKILL); | 927 | sigaddset(&t->pending.signal, SIGKILL); |
1048 | signal_wake_up(t, 1); | 928 | signal_wake_up(t, 1); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 8b75008e2bd8..0b9886a00e74 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -593,6 +593,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
593 | 593 | ||
594 | switch (action) { | 594 | switch (action) { |
595 | case CPU_UP_PREPARE: | 595 | case CPU_UP_PREPARE: |
596 | case CPU_UP_PREPARE_FROZEN: | ||
596 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | 597 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); |
597 | if (IS_ERR(p)) { | 598 | if (IS_ERR(p)) { |
598 | printk("ksoftirqd for %i failed\n", hotcpu); | 599 | printk("ksoftirqd for %i failed\n", hotcpu); |
@@ -602,16 +603,19 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
602 | per_cpu(ksoftirqd, hotcpu) = p; | 603 | per_cpu(ksoftirqd, hotcpu) = p; |
603 | break; | 604 | break; |
604 | case CPU_ONLINE: | 605 | case CPU_ONLINE: |
606 | case CPU_ONLINE_FROZEN: | ||
605 | wake_up_process(per_cpu(ksoftirqd, hotcpu)); | 607 | wake_up_process(per_cpu(ksoftirqd, hotcpu)); |
606 | break; | 608 | break; |
607 | #ifdef CONFIG_HOTPLUG_CPU | 609 | #ifdef CONFIG_HOTPLUG_CPU |
608 | case CPU_UP_CANCELED: | 610 | case CPU_UP_CANCELED: |
611 | case CPU_UP_CANCELED_FROZEN: | ||
609 | if (!per_cpu(ksoftirqd, hotcpu)) | 612 | if (!per_cpu(ksoftirqd, hotcpu)) |
610 | break; | 613 | break; |
611 | /* Unbind so it can run. Fall thru. */ | 614 | /* Unbind so it can run. Fall thru. */ |
612 | kthread_bind(per_cpu(ksoftirqd, hotcpu), | 615 | kthread_bind(per_cpu(ksoftirqd, hotcpu), |
613 | any_online_cpu(cpu_online_map)); | 616 | any_online_cpu(cpu_online_map)); |
614 | case CPU_DEAD: | 617 | case CPU_DEAD: |
618 | case CPU_DEAD_FROZEN: | ||
615 | p = per_cpu(ksoftirqd, hotcpu); | 619 | p = per_cpu(ksoftirqd, hotcpu); |
616 | per_cpu(ksoftirqd, hotcpu) = NULL; | 620 | per_cpu(ksoftirqd, hotcpu) = NULL; |
617 | kthread_stop(p); | 621 | kthread_stop(p); |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 8fa7040247ad..0131e296ffb4 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -146,6 +146,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
146 | 146 | ||
147 | switch (action) { | 147 | switch (action) { |
148 | case CPU_UP_PREPARE: | 148 | case CPU_UP_PREPARE: |
149 | case CPU_UP_PREPARE_FROZEN: | ||
149 | BUG_ON(per_cpu(watchdog_task, hotcpu)); | 150 | BUG_ON(per_cpu(watchdog_task, hotcpu)); |
150 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); | 151 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); |
151 | if (IS_ERR(p)) { | 152 | if (IS_ERR(p)) { |
@@ -157,16 +158,19 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
157 | kthread_bind(p, hotcpu); | 158 | kthread_bind(p, hotcpu); |
158 | break; | 159 | break; |
159 | case CPU_ONLINE: | 160 | case CPU_ONLINE: |
161 | case CPU_ONLINE_FROZEN: | ||
160 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 162 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
161 | break; | 163 | break; |
162 | #ifdef CONFIG_HOTPLUG_CPU | 164 | #ifdef CONFIG_HOTPLUG_CPU |
163 | case CPU_UP_CANCELED: | 165 | case CPU_UP_CANCELED: |
166 | case CPU_UP_CANCELED_FROZEN: | ||
164 | if (!per_cpu(watchdog_task, hotcpu)) | 167 | if (!per_cpu(watchdog_task, hotcpu)) |
165 | break; | 168 | break; |
166 | /* Unbind so it can run. Fall thru. */ | 169 | /* Unbind so it can run. Fall thru. */ |
167 | kthread_bind(per_cpu(watchdog_task, hotcpu), | 170 | kthread_bind(per_cpu(watchdog_task, hotcpu), |
168 | any_online_cpu(cpu_online_map)); | 171 | any_online_cpu(cpu_online_map)); |
169 | case CPU_DEAD: | 172 | case CPU_DEAD: |
173 | case CPU_DEAD_FROZEN: | ||
170 | p = per_cpu(watchdog_task, hotcpu); | 174 | p = per_cpu(watchdog_task, hotcpu); |
171 | per_cpu(watchdog_task, hotcpu) = NULL; | 175 | per_cpu(watchdog_task, hotcpu) = NULL; |
172 | kthread_stop(p); | 176 | kthread_stop(p); |
diff --git a/kernel/sys.c b/kernel/sys.c index 0742c938dfa7..cdb7e9457ba6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -134,19 +134,39 @@ static int notifier_chain_unregister(struct notifier_block **nl, | |||
134 | return -ENOENT; | 134 | return -ENOENT; |
135 | } | 135 | } |
136 | 136 | ||
137 | /** | ||
138 | * notifier_call_chain - Informs the registered notifiers about an event. | ||
139 | * @nl: Pointer to head of the blocking notifier chain | ||
140 | * @val: Value passed unmodified to notifier function | ||
141 | * @v: Pointer passed unmodified to notifier function | ||
142 | * @nr_to_call: Number of notifier functions to be called. Don't care | ||
143 | * value of this parameter is -1. | ||
144 | * @nr_calls: Records the number of notifications sent. Don't care | ||
145 | * value of this field is NULL. | ||
146 | * @returns: notifier_call_chain returns the value returned by the | ||
147 | * last notifier function called. | ||
148 | */ | ||
149 | |||
137 | static int __kprobes notifier_call_chain(struct notifier_block **nl, | 150 | static int __kprobes notifier_call_chain(struct notifier_block **nl, |
138 | unsigned long val, void *v) | 151 | unsigned long val, void *v, |
152 | int nr_to_call, int *nr_calls) | ||
139 | { | 153 | { |
140 | int ret = NOTIFY_DONE; | 154 | int ret = NOTIFY_DONE; |
141 | struct notifier_block *nb, *next_nb; | 155 | struct notifier_block *nb, *next_nb; |
142 | 156 | ||
143 | nb = rcu_dereference(*nl); | 157 | nb = rcu_dereference(*nl); |
144 | while (nb) { | 158 | |
159 | while (nb && nr_to_call) { | ||
145 | next_nb = rcu_dereference(nb->next); | 160 | next_nb = rcu_dereference(nb->next); |
146 | ret = nb->notifier_call(nb, val, v); | 161 | ret = nb->notifier_call(nb, val, v); |
162 | |||
163 | if (nr_calls) | ||
164 | (*nr_calls)++; | ||
165 | |||
147 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) | 166 | if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) |
148 | break; | 167 | break; |
149 | nb = next_nb; | 168 | nb = next_nb; |
169 | nr_to_call--; | ||
150 | } | 170 | } |
151 | return ret; | 171 | return ret; |
152 | } | 172 | } |
@@ -205,10 +225,12 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, | |||
205 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); | 225 | EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); |
206 | 226 | ||
207 | /** | 227 | /** |
208 | * atomic_notifier_call_chain - Call functions in an atomic notifier chain | 228 | * __atomic_notifier_call_chain - Call functions in an atomic notifier chain |
209 | * @nh: Pointer to head of the atomic notifier chain | 229 | * @nh: Pointer to head of the atomic notifier chain |
210 | * @val: Value passed unmodified to notifier function | 230 | * @val: Value passed unmodified to notifier function |
211 | * @v: Pointer passed unmodified to notifier function | 231 | * @v: Pointer passed unmodified to notifier function |
232 | * @nr_to_call: See the comment for notifier_call_chain. | ||
233 | * @nr_calls: See the comment for notifier_call_chain. | ||
212 | * | 234 | * |
213 | * Calls each function in a notifier chain in turn. The functions | 235 | * Calls each function in a notifier chain in turn. The functions |
214 | * run in an atomic context, so they must not block. | 236 | * run in an atomic context, so they must not block. |
@@ -222,19 +244,27 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); | |||
222 | * of the last notifier function called. | 244 | * of the last notifier function called. |
223 | */ | 245 | */ |
224 | 246 | ||
225 | int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, | 247 | int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh, |
226 | unsigned long val, void *v) | 248 | unsigned long val, void *v, |
249 | int nr_to_call, int *nr_calls) | ||
227 | { | 250 | { |
228 | int ret; | 251 | int ret; |
229 | 252 | ||
230 | rcu_read_lock(); | 253 | rcu_read_lock(); |
231 | ret = notifier_call_chain(&nh->head, val, v); | 254 | ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); |
232 | rcu_read_unlock(); | 255 | rcu_read_unlock(); |
233 | return ret; | 256 | return ret; |
234 | } | 257 | } |
235 | 258 | ||
236 | EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); | 259 | EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); |
260 | |||
261 | int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, | ||
262 | unsigned long val, void *v) | ||
263 | { | ||
264 | return __atomic_notifier_call_chain(nh, val, v, -1, NULL); | ||
265 | } | ||
237 | 266 | ||
267 | EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); | ||
238 | /* | 268 | /* |
239 | * Blocking notifier chain routines. All access to the chain is | 269 | * Blocking notifier chain routines. All access to the chain is |
240 | * synchronized by an rwsem. | 270 | * synchronized by an rwsem. |
@@ -304,10 +334,12 @@ int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, | |||
304 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); | 334 | EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); |
305 | 335 | ||
306 | /** | 336 | /** |
307 | * blocking_notifier_call_chain - Call functions in a blocking notifier chain | 337 | * __blocking_notifier_call_chain - Call functions in a blocking notifier chain |
308 | * @nh: Pointer to head of the blocking notifier chain | 338 | * @nh: Pointer to head of the blocking notifier chain |
309 | * @val: Value passed unmodified to notifier function | 339 | * @val: Value passed unmodified to notifier function |
310 | * @v: Pointer passed unmodified to notifier function | 340 | * @v: Pointer passed unmodified to notifier function |
341 | * @nr_to_call: See comment for notifier_call_chain. | ||
342 | * @nr_calls: See comment for notifier_call_chain. | ||
311 | * | 343 | * |
312 | * Calls each function in a notifier chain in turn. The functions | 344 | * Calls each function in a notifier chain in turn. The functions |
313 | * run in a process context, so they are allowed to block. | 345 | * run in a process context, so they are allowed to block. |
@@ -320,8 +352,9 @@ EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); | |||
320 | * of the last notifier function called. | 352 | * of the last notifier function called. |
321 | */ | 353 | */ |
322 | 354 | ||
323 | int blocking_notifier_call_chain(struct blocking_notifier_head *nh, | 355 | int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, |
324 | unsigned long val, void *v) | 356 | unsigned long val, void *v, |
357 | int nr_to_call, int *nr_calls) | ||
325 | { | 358 | { |
326 | int ret = NOTIFY_DONE; | 359 | int ret = NOTIFY_DONE; |
327 | 360 | ||
@@ -332,12 +365,19 @@ int blocking_notifier_call_chain(struct blocking_notifier_head *nh, | |||
332 | */ | 365 | */ |
333 | if (rcu_dereference(nh->head)) { | 366 | if (rcu_dereference(nh->head)) { |
334 | down_read(&nh->rwsem); | 367 | down_read(&nh->rwsem); |
335 | ret = notifier_call_chain(&nh->head, val, v); | 368 | ret = notifier_call_chain(&nh->head, val, v, nr_to_call, |
369 | nr_calls); | ||
336 | up_read(&nh->rwsem); | 370 | up_read(&nh->rwsem); |
337 | } | 371 | } |
338 | return ret; | 372 | return ret; |
339 | } | 373 | } |
374 | EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain); | ||
340 | 375 | ||
376 | int blocking_notifier_call_chain(struct blocking_notifier_head *nh, | ||
377 | unsigned long val, void *v) | ||
378 | { | ||
379 | return __blocking_notifier_call_chain(nh, val, v, -1, NULL); | ||
380 | } | ||
341 | EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); | 381 | EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); |
342 | 382 | ||
343 | /* | 383 | /* |
@@ -383,10 +423,12 @@ int raw_notifier_chain_unregister(struct raw_notifier_head *nh, | |||
383 | EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); | 423 | EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); |
384 | 424 | ||
385 | /** | 425 | /** |
386 | * raw_notifier_call_chain - Call functions in a raw notifier chain | 426 | * __raw_notifier_call_chain - Call functions in a raw notifier chain |
387 | * @nh: Pointer to head of the raw notifier chain | 427 | * @nh: Pointer to head of the raw notifier chain |
388 | * @val: Value passed unmodified to notifier function | 428 | * @val: Value passed unmodified to notifier function |
389 | * @v: Pointer passed unmodified to notifier function | 429 | * @v: Pointer passed unmodified to notifier function |
430 | * @nr_to_call: See comment for notifier_call_chain. | ||
431 | * @nr_calls: See comment for notifier_call_chain | ||
390 | * | 432 | * |
391 | * Calls each function in a notifier chain in turn. The functions | 433 | * Calls each function in a notifier chain in turn. The functions |
392 | * run in an undefined context. | 434 | * run in an undefined context. |
@@ -400,10 +442,19 @@ EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); | |||
400 | * of the last notifier function called. | 442 | * of the last notifier function called. |
401 | */ | 443 | */ |
402 | 444 | ||
445 | int __raw_notifier_call_chain(struct raw_notifier_head *nh, | ||
446 | unsigned long val, void *v, | ||
447 | int nr_to_call, int *nr_calls) | ||
448 | { | ||
449 | return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); | ||
450 | } | ||
451 | |||
452 | EXPORT_SYMBOL_GPL(__raw_notifier_call_chain); | ||
453 | |||
403 | int raw_notifier_call_chain(struct raw_notifier_head *nh, | 454 | int raw_notifier_call_chain(struct raw_notifier_head *nh, |
404 | unsigned long val, void *v) | 455 | unsigned long val, void *v) |
405 | { | 456 | { |
406 | return notifier_call_chain(&nh->head, val, v); | 457 | return __raw_notifier_call_chain(nh, val, v, -1, NULL); |
407 | } | 458 | } |
408 | 459 | ||
409 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); | 460 | EXPORT_SYMBOL_GPL(raw_notifier_call_chain); |
@@ -478,10 +529,12 @@ int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, | |||
478 | EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); | 529 | EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); |
479 | 530 | ||
480 | /** | 531 | /** |
481 | * srcu_notifier_call_chain - Call functions in an SRCU notifier chain | 532 | * __srcu_notifier_call_chain - Call functions in an SRCU notifier chain |
482 | * @nh: Pointer to head of the SRCU notifier chain | 533 | * @nh: Pointer to head of the SRCU notifier chain |
483 | * @val: Value passed unmodified to notifier function | 534 | * @val: Value passed unmodified to notifier function |
484 | * @v: Pointer passed unmodified to notifier function | 535 | * @v: Pointer passed unmodified to notifier function |
536 | * @nr_to_call: See comment for notifier_call_chain. | ||
537 | * @nr_calls: See comment for notifier_call_chain | ||
485 | * | 538 | * |
486 | * Calls each function in a notifier chain in turn. The functions | 539 | * Calls each function in a notifier chain in turn. The functions |
487 | * run in a process context, so they are allowed to block. | 540 | * run in a process context, so they are allowed to block. |
@@ -494,18 +547,25 @@ EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); | |||
494 | * of the last notifier function called. | 547 | * of the last notifier function called. |
495 | */ | 548 | */ |
496 | 549 | ||
497 | int srcu_notifier_call_chain(struct srcu_notifier_head *nh, | 550 | int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, |
498 | unsigned long val, void *v) | 551 | unsigned long val, void *v, |
552 | int nr_to_call, int *nr_calls) | ||
499 | { | 553 | { |
500 | int ret; | 554 | int ret; |
501 | int idx; | 555 | int idx; |
502 | 556 | ||
503 | idx = srcu_read_lock(&nh->srcu); | 557 | idx = srcu_read_lock(&nh->srcu); |
504 | ret = notifier_call_chain(&nh->head, val, v); | 558 | ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); |
505 | srcu_read_unlock(&nh->srcu, idx); | 559 | srcu_read_unlock(&nh->srcu, idx); |
506 | return ret; | 560 | return ret; |
507 | } | 561 | } |
562 | EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain); | ||
508 | 563 | ||
564 | int srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||
565 | unsigned long val, void *v) | ||
566 | { | ||
567 | return __srcu_notifier_call_chain(nh, val, v, -1, NULL); | ||
568 | } | ||
509 | EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); | 569 | EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); |
510 | 570 | ||
511 | /** | 571 | /** |
@@ -881,7 +941,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user | |||
881 | #ifdef CONFIG_SOFTWARE_SUSPEND | 941 | #ifdef CONFIG_SOFTWARE_SUSPEND |
882 | case LINUX_REBOOT_CMD_SW_SUSPEND: | 942 | case LINUX_REBOOT_CMD_SW_SUSPEND: |
883 | { | 943 | { |
884 | int ret = pm_suspend(PM_SUSPEND_DISK); | 944 | int ret = hibernate(); |
885 | unlock_kernel(); | 945 | unlock_kernel(); |
886 | return ret; | 946 | return ret; |
887 | } | 947 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f0664bd5011c..4073353abd4f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -77,6 +77,7 @@ extern int sysctl_drop_caches; | |||
77 | extern int percpu_pagelist_fraction; | 77 | extern int percpu_pagelist_fraction; |
78 | extern int compat_log; | 78 | extern int compat_log; |
79 | extern int maps_protect; | 79 | extern int maps_protect; |
80 | extern int sysctl_stat_interval; | ||
80 | 81 | ||
81 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 82 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
82 | static int maxolduid = 65535; | 83 | static int maxolduid = 65535; |
@@ -857,6 +858,17 @@ static ctl_table vm_table[] = { | |||
857 | .extra2 = &one_hundred, | 858 | .extra2 = &one_hundred, |
858 | }, | 859 | }, |
859 | #endif | 860 | #endif |
861 | #ifdef CONFIG_SMP | ||
862 | { | ||
863 | .ctl_name = CTL_UNNUMBERED, | ||
864 | .procname = "stat_interval", | ||
865 | .data = &sysctl_stat_interval, | ||
866 | .maxlen = sizeof(sysctl_stat_interval), | ||
867 | .mode = 0644, | ||
868 | .proc_handler = &proc_dointvec_jiffies, | ||
869 | .strategy = &sysctl_jiffies, | ||
870 | }, | ||
871 | #endif | ||
860 | #if defined(CONFIG_X86_32) || \ | 872 | #if defined(CONFIG_X86_32) || \ |
861 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) | 873 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) |
862 | { | 874 | { |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fe5c7db24247..3db5c3c460d7 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -74,15 +74,17 @@ static struct clocksource *watchdog; | |||
74 | static struct timer_list watchdog_timer; | 74 | static struct timer_list watchdog_timer; |
75 | static DEFINE_SPINLOCK(watchdog_lock); | 75 | static DEFINE_SPINLOCK(watchdog_lock); |
76 | static cycle_t watchdog_last; | 76 | static cycle_t watchdog_last; |
77 | static int watchdog_resumed; | ||
78 | |||
77 | /* | 79 | /* |
78 | * Interval: 0.5sec Treshold: 0.0625s | 80 | * Interval: 0.5sec Threshold: 0.0625s |
79 | */ | 81 | */ |
80 | #define WATCHDOG_INTERVAL (HZ >> 1) | 82 | #define WATCHDOG_INTERVAL (HZ >> 1) |
81 | #define WATCHDOG_TRESHOLD (NSEC_PER_SEC >> 4) | 83 | #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) |
82 | 84 | ||
83 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | 85 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) |
84 | { | 86 | { |
85 | if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD) | 87 | if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) |
86 | return; | 88 | return; |
87 | 89 | ||
88 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | 90 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", |
@@ -98,15 +100,26 @@ static void clocksource_watchdog(unsigned long data) | |||
98 | struct clocksource *cs, *tmp; | 100 | struct clocksource *cs, *tmp; |
99 | cycle_t csnow, wdnow; | 101 | cycle_t csnow, wdnow; |
100 | int64_t wd_nsec, cs_nsec; | 102 | int64_t wd_nsec, cs_nsec; |
103 | int resumed; | ||
101 | 104 | ||
102 | spin_lock(&watchdog_lock); | 105 | spin_lock(&watchdog_lock); |
103 | 106 | ||
107 | resumed = watchdog_resumed; | ||
108 | if (unlikely(resumed)) | ||
109 | watchdog_resumed = 0; | ||
110 | |||
104 | wdnow = watchdog->read(); | 111 | wdnow = watchdog->read(); |
105 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 112 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); |
106 | watchdog_last = wdnow; | 113 | watchdog_last = wdnow; |
107 | 114 | ||
108 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | 115 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { |
109 | csnow = cs->read(); | 116 | csnow = cs->read(); |
117 | |||
118 | if (unlikely(resumed)) { | ||
119 | cs->wd_last = csnow; | ||
120 | continue; | ||
121 | } | ||
122 | |||
110 | /* Initialized ? */ | 123 | /* Initialized ? */ |
111 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | 124 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { |
112 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | 125 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && |
@@ -136,6 +149,13 @@ static void clocksource_watchdog(unsigned long data) | |||
136 | } | 149 | } |
137 | spin_unlock(&watchdog_lock); | 150 | spin_unlock(&watchdog_lock); |
138 | } | 151 | } |
152 | static void clocksource_resume_watchdog(void) | ||
153 | { | ||
154 | spin_lock(&watchdog_lock); | ||
155 | watchdog_resumed = 1; | ||
156 | spin_unlock(&watchdog_lock); | ||
157 | } | ||
158 | |||
139 | static void clocksource_check_watchdog(struct clocksource *cs) | 159 | static void clocksource_check_watchdog(struct clocksource *cs) |
140 | { | 160 | { |
141 | struct clocksource *cse; | 161 | struct clocksource *cse; |
@@ -182,9 +202,34 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
182 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 202 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
183 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | 203 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; |
184 | } | 204 | } |
205 | |||
206 | static inline void clocksource_resume_watchdog(void) { } | ||
185 | #endif | 207 | #endif |
186 | 208 | ||
187 | /** | 209 | /** |
210 | * clocksource_resume - resume the clocksource(s) | ||
211 | */ | ||
212 | void clocksource_resume(void) | ||
213 | { | ||
214 | struct list_head *tmp; | ||
215 | unsigned long flags; | ||
216 | |||
217 | spin_lock_irqsave(&clocksource_lock, flags); | ||
218 | |||
219 | list_for_each(tmp, &clocksource_list) { | ||
220 | struct clocksource *cs; | ||
221 | |||
222 | cs = list_entry(tmp, struct clocksource, list); | ||
223 | if (cs->resume) | ||
224 | cs->resume(); | ||
225 | } | ||
226 | |||
227 | clocksource_resume_watchdog(); | ||
228 | |||
229 | spin_unlock_irqrestore(&clocksource_lock, flags); | ||
230 | } | ||
231 | |||
232 | /** | ||
188 | * clocksource_get_next - Returns the selected clocksource | 233 | * clocksource_get_next - Returns the selected clocksource |
189 | * | 234 | * |
190 | */ | 235 | */ |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index b734ca4bc75e..8bbcfb77f7d2 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -65,7 +65,7 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | |||
65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | 65 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); |
66 | #endif | 66 | #endif |
67 | SEQ_printf(m, "\n"); | 67 | SEQ_printf(m, "\n"); |
68 | SEQ_printf(m, " # expires at %Ld nsecs [in %Ld nsecs]\n", | 68 | SEQ_printf(m, " # expires at %Lu nsecs [in %Lu nsecs]\n", |
69 | (unsigned long long)ktime_to_ns(timer->expires), | 69 | (unsigned long long)ktime_to_ns(timer->expires), |
70 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | 70 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); |
71 | } | 71 | } |
@@ -111,14 +111,14 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | |||
111 | { | 111 | { |
112 | SEQ_printf(m, " .index: %d\n", | 112 | SEQ_printf(m, " .index: %d\n", |
113 | base->index); | 113 | base->index); |
114 | SEQ_printf(m, " .resolution: %Ld nsecs\n", | 114 | SEQ_printf(m, " .resolution: %Lu nsecs\n", |
115 | (unsigned long long)ktime_to_ns(base->resolution)); | 115 | (unsigned long long)ktime_to_ns(base->resolution)); |
116 | SEQ_printf(m, " .get_time: "); | 116 | SEQ_printf(m, " .get_time: "); |
117 | print_name_offset(m, base->get_time); | 117 | print_name_offset(m, base->get_time); |
118 | SEQ_printf(m, "\n"); | 118 | SEQ_printf(m, "\n"); |
119 | #ifdef CONFIG_HIGH_RES_TIMERS | 119 | #ifdef CONFIG_HIGH_RES_TIMERS |
120 | SEQ_printf(m, " .offset: %Ld nsecs\n", | 120 | SEQ_printf(m, " .offset: %Lu nsecs\n", |
121 | ktime_to_ns(base->offset)); | 121 | (unsigned long long) ktime_to_ns(base->offset)); |
122 | #endif | 122 | #endif |
123 | SEQ_printf(m, "active timers:\n"); | 123 | SEQ_printf(m, "active timers:\n"); |
124 | print_active_timers(m, base, now); | 124 | print_active_timers(m, base, now); |
@@ -135,10 +135,11 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
135 | print_base(m, cpu_base->clock_base + i, now); | 135 | print_base(m, cpu_base->clock_base + i, now); |
136 | } | 136 | } |
137 | #define P(x) \ | 137 | #define P(x) \ |
138 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(cpu_base->x)) | 138 | SEQ_printf(m, " .%-15s: %Lu\n", #x, \ |
139 | (unsigned long long)(cpu_base->x)) | ||
139 | #define P_ns(x) \ | 140 | #define P_ns(x) \ |
140 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | 141 | SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \ |
141 | (u64)(ktime_to_ns(cpu_base->x))) | 142 | (unsigned long long)(ktime_to_ns(cpu_base->x))) |
142 | 143 | ||
143 | #ifdef CONFIG_HIGH_RES_TIMERS | 144 | #ifdef CONFIG_HIGH_RES_TIMERS |
144 | P_ns(expires_next); | 145 | P_ns(expires_next); |
@@ -150,10 +151,11 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
150 | 151 | ||
151 | #ifdef CONFIG_TICK_ONESHOT | 152 | #ifdef CONFIG_TICK_ONESHOT |
152 | # define P(x) \ | 153 | # define P(x) \ |
153 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(ts->x)) | 154 | SEQ_printf(m, " .%-15s: %Lu\n", #x, \ |
155 | (unsigned long long)(ts->x)) | ||
154 | # define P_ns(x) \ | 156 | # define P_ns(x) \ |
155 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | 157 | SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \ |
156 | (u64)(ktime_to_ns(ts->x))) | 158 | (unsigned long long)(ktime_to_ns(ts->x))) |
157 | { | 159 | { |
158 | struct tick_sched *ts = tick_get_tick_sched(cpu); | 160 | struct tick_sched *ts = tick_get_tick_sched(cpu); |
159 | P(nohz_mode); | 161 | P(nohz_mode); |
@@ -167,7 +169,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
167 | P(last_jiffies); | 169 | P(last_jiffies); |
168 | P(next_jiffies); | 170 | P(next_jiffies); |
169 | P_ns(idle_expires); | 171 | P_ns(idle_expires); |
170 | SEQ_printf(m, "jiffies: %Ld\n", (u64)jiffies); | 172 | SEQ_printf(m, "jiffies: %Lu\n", |
173 | (unsigned long long)jiffies); | ||
171 | } | 174 | } |
172 | #endif | 175 | #endif |
173 | 176 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 7a6448340f90..59a28b1752f8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -92,24 +92,24 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; | |||
92 | /* Functions below help us manage 'deferrable' flag */ | 92 | /* Functions below help us manage 'deferrable' flag */ |
93 | static inline unsigned int tbase_get_deferrable(tvec_base_t *base) | 93 | static inline unsigned int tbase_get_deferrable(tvec_base_t *base) |
94 | { | 94 | { |
95 | return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); | 95 | return (unsigned int)((unsigned long)base & TBASE_DEFERRABLE_FLAG); |
96 | } | 96 | } |
97 | 97 | ||
98 | static inline tvec_base_t *tbase_get_base(tvec_base_t *base) | 98 | static inline tvec_base_t *tbase_get_base(tvec_base_t *base) |
99 | { | 99 | { |
100 | return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); | 100 | return (tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG); |
101 | } | 101 | } |
102 | 102 | ||
103 | static inline void timer_set_deferrable(struct timer_list *timer) | 103 | static inline void timer_set_deferrable(struct timer_list *timer) |
104 | { | 104 | { |
105 | timer->base = ((tvec_base_t *)((unsigned long)(timer->base) | | 105 | timer->base = (tvec_base_t *)((unsigned long)timer->base | |
106 | TBASE_DEFERRABLE_FLAG)); | 106 | TBASE_DEFERRABLE_FLAG); |
107 | } | 107 | } |
108 | 108 | ||
109 | static inline void | 109 | static inline void |
110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) | 110 | timer_set_base(struct timer_list *timer, tvec_base_t *new_base) |
111 | { | 111 | { |
112 | timer->base = (tvec_base_t *)((unsigned long)(new_base) | | 112 | timer->base = (tvec_base_t *)((unsigned long)new_base | |
113 | tbase_get_deferrable(timer->base)); | 113 | tbase_get_deferrable(timer->base)); |
114 | } | 114 | } |
115 | 115 | ||
@@ -1293,11 +1293,13 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, | |||
1293 | long cpu = (long)hcpu; | 1293 | long cpu = (long)hcpu; |
1294 | switch(action) { | 1294 | switch(action) { |
1295 | case CPU_UP_PREPARE: | 1295 | case CPU_UP_PREPARE: |
1296 | case CPU_UP_PREPARE_FROZEN: | ||
1296 | if (init_timers_cpu(cpu) < 0) | 1297 | if (init_timers_cpu(cpu) < 0) |
1297 | return NOTIFY_BAD; | 1298 | return NOTIFY_BAD; |
1298 | break; | 1299 | break; |
1299 | #ifdef CONFIG_HOTPLUG_CPU | 1300 | #ifdef CONFIG_HOTPLUG_CPU |
1300 | case CPU_DEAD: | 1301 | case CPU_DEAD: |
1302 | case CPU_DEAD_FROZEN: | ||
1301 | migrate_timers(cpu); | 1303 | migrate_timers(cpu); |
1302 | break; | 1304 | break; |
1303 | #endif | 1305 | #endif |
@@ -1497,6 +1499,8 @@ unregister_time_interpolator(struct time_interpolator *ti) | |||
1497 | prev = &curr->next; | 1499 | prev = &curr->next; |
1498 | } | 1500 | } |
1499 | 1501 | ||
1502 | clocksource_resume(); | ||
1503 | |||
1500 | write_seqlock_irqsave(&xtime_lock, flags); | 1504 | write_seqlock_irqsave(&xtime_lock, flags); |
1501 | if (ti == time_interpolator) { | 1505 | if (ti == time_interpolator) { |
1502 | /* we lost the best time-interpolator: */ | 1506 | /* we lost the best time-interpolator: */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b6fa5e63085d..fb56fedd5c02 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -36,30 +36,20 @@ | |||
36 | /* | 36 | /* |
37 | * The per-CPU workqueue (if single thread, we always use the first | 37 | * The per-CPU workqueue (if single thread, we always use the first |
38 | * possible cpu). | 38 | * possible cpu). |
39 | * | ||
40 | * The sequence counters are for flush_scheduled_work(). It wants to wait | ||
41 | * until all currently-scheduled works are completed, but it doesn't | ||
42 | * want to be livelocked by new, incoming ones. So it waits until | ||
43 | * remove_sequence is >= the insert_sequence which pertained when | ||
44 | * flush_scheduled_work() was called. | ||
45 | */ | 39 | */ |
46 | struct cpu_workqueue_struct { | 40 | struct cpu_workqueue_struct { |
47 | 41 | ||
48 | spinlock_t lock; | 42 | spinlock_t lock; |
49 | 43 | ||
50 | long remove_sequence; /* Least-recently added (next to run) */ | ||
51 | long insert_sequence; /* Next to add */ | ||
52 | |||
53 | struct list_head worklist; | 44 | struct list_head worklist; |
54 | wait_queue_head_t more_work; | 45 | wait_queue_head_t more_work; |
55 | wait_queue_head_t work_done; | 46 | struct work_struct *current_work; |
56 | 47 | ||
57 | struct workqueue_struct *wq; | 48 | struct workqueue_struct *wq; |
58 | struct task_struct *thread; | 49 | struct task_struct *thread; |
50 | int should_stop; | ||
59 | 51 | ||
60 | int run_depth; /* Detect run_workqueue() recursion depth */ | 52 | int run_depth; /* Detect run_workqueue() recursion depth */ |
61 | |||
62 | int freezeable; /* Freeze the thread during suspend */ | ||
63 | } ____cacheline_aligned; | 53 | } ____cacheline_aligned; |
64 | 54 | ||
65 | /* | 55 | /* |
@@ -68,8 +58,10 @@ struct cpu_workqueue_struct { | |||
68 | */ | 58 | */ |
69 | struct workqueue_struct { | 59 | struct workqueue_struct { |
70 | struct cpu_workqueue_struct *cpu_wq; | 60 | struct cpu_workqueue_struct *cpu_wq; |
61 | struct list_head list; | ||
71 | const char *name; | 62 | const char *name; |
72 | struct list_head list; /* Empty if single thread */ | 63 | int singlethread; |
64 | int freezeable; /* Freeze threads during suspend */ | ||
73 | }; | 65 | }; |
74 | 66 | ||
75 | /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove | 67 | /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove |
@@ -77,106 +69,68 @@ struct workqueue_struct { | |||
77 | static DEFINE_MUTEX(workqueue_mutex); | 69 | static DEFINE_MUTEX(workqueue_mutex); |
78 | static LIST_HEAD(workqueues); | 70 | static LIST_HEAD(workqueues); |
79 | 71 | ||
80 | static int singlethread_cpu; | 72 | static int singlethread_cpu __read_mostly; |
73 | static cpumask_t cpu_singlethread_map __read_mostly; | ||
74 | /* optimization, we could use cpu_possible_map */ | ||
75 | static cpumask_t cpu_populated_map __read_mostly; | ||
81 | 76 | ||
82 | /* If it's single threaded, it isn't in the list of workqueues. */ | 77 | /* If it's single threaded, it isn't in the list of workqueues. */ |
83 | static inline int is_single_threaded(struct workqueue_struct *wq) | 78 | static inline int is_single_threaded(struct workqueue_struct *wq) |
84 | { | 79 | { |
85 | return list_empty(&wq->list); | 80 | return wq->singlethread; |
81 | } | ||
82 | |||
83 | static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq) | ||
84 | { | ||
85 | return is_single_threaded(wq) | ||
86 | ? &cpu_singlethread_map : &cpu_populated_map; | ||
87 | } | ||
88 | |||
89 | static | ||
90 | struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu) | ||
91 | { | ||
92 | if (unlikely(is_single_threaded(wq))) | ||
93 | cpu = singlethread_cpu; | ||
94 | return per_cpu_ptr(wq->cpu_wq, cpu); | ||
86 | } | 95 | } |
87 | 96 | ||
88 | /* | 97 | /* |
89 | * Set the workqueue on which a work item is to be run | 98 | * Set the workqueue on which a work item is to be run |
90 | * - Must *only* be called if the pending flag is set | 99 | * - Must *only* be called if the pending flag is set |
91 | */ | 100 | */ |
92 | static inline void set_wq_data(struct work_struct *work, void *wq) | 101 | static inline void set_wq_data(struct work_struct *work, |
102 | struct cpu_workqueue_struct *cwq) | ||
93 | { | 103 | { |
94 | unsigned long new; | 104 | unsigned long new; |
95 | 105 | ||
96 | BUG_ON(!work_pending(work)); | 106 | BUG_ON(!work_pending(work)); |
97 | 107 | ||
98 | new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING); | 108 | new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING); |
99 | new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work); | 109 | new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work); |
100 | atomic_long_set(&work->data, new); | 110 | atomic_long_set(&work->data, new); |
101 | } | 111 | } |
102 | 112 | ||
103 | static inline void *get_wq_data(struct work_struct *work) | 113 | static inline |
114 | struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) | ||
104 | { | 115 | { |
105 | return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); | 116 | return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK); |
106 | } | 117 | } |
107 | 118 | ||
108 | static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) | 119 | static void insert_work(struct cpu_workqueue_struct *cwq, |
120 | struct work_struct *work, int tail) | ||
109 | { | 121 | { |
110 | int ret = 0; | 122 | set_wq_data(work, cwq); |
111 | unsigned long flags; | ||
112 | |||
113 | spin_lock_irqsave(&cwq->lock, flags); | ||
114 | /* | 123 | /* |
115 | * We need to re-validate the work info after we've gotten | 124 | * Ensure that we get the right work->data if we see the |
116 | * the cpu_workqueue lock. We can run the work now iff: | 125 | * result of list_add() below, see try_to_grab_pending(). |
117 | * | ||
118 | * - the wq_data still matches the cpu_workqueue_struct | ||
119 | * - AND the work is still marked pending | ||
120 | * - AND the work is still on a list (which will be this | ||
121 | * workqueue_struct list) | ||
122 | * | ||
123 | * All these conditions are important, because we | ||
124 | * need to protect against the work being run right | ||
125 | * now on another CPU (all but the last one might be | ||
126 | * true if it's currently running and has not been | ||
127 | * released yet, for example). | ||
128 | */ | 126 | */ |
129 | if (get_wq_data(work) == cwq | 127 | smp_wmb(); |
130 | && work_pending(work) | 128 | if (tail) |
131 | && !list_empty(&work->entry)) { | 129 | list_add_tail(&work->entry, &cwq->worklist); |
132 | work_func_t f = work->func; | 130 | else |
133 | list_del_init(&work->entry); | 131 | list_add(&work->entry, &cwq->worklist); |
134 | spin_unlock_irqrestore(&cwq->lock, flags); | 132 | wake_up(&cwq->more_work); |
135 | |||
136 | if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) | ||
137 | work_release(work); | ||
138 | f(work); | ||
139 | |||
140 | spin_lock_irqsave(&cwq->lock, flags); | ||
141 | cwq->remove_sequence++; | ||
142 | wake_up(&cwq->work_done); | ||
143 | ret = 1; | ||
144 | } | ||
145 | spin_unlock_irqrestore(&cwq->lock, flags); | ||
146 | return ret; | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * run_scheduled_work - run scheduled work synchronously | ||
151 | * @work: work to run | ||
152 | * | ||
153 | * This checks if the work was pending, and runs it | ||
154 | * synchronously if so. It returns a boolean to indicate | ||
155 | * whether it had any scheduled work to run or not. | ||
156 | * | ||
157 | * NOTE! This _only_ works for normal work_structs. You | ||
158 | * CANNOT use this for delayed work, because the wq data | ||
159 | * for delayed work will not point properly to the per- | ||
160 | * CPU workqueue struct, but will change! | ||
161 | */ | ||
162 | int fastcall run_scheduled_work(struct work_struct *work) | ||
163 | { | ||
164 | for (;;) { | ||
165 | struct cpu_workqueue_struct *cwq; | ||
166 | |||
167 | if (!work_pending(work)) | ||
168 | return 0; | ||
169 | if (list_empty(&work->entry)) | ||
170 | return 0; | ||
171 | /* NOTE! This depends intimately on __queue_work! */ | ||
172 | cwq = get_wq_data(work); | ||
173 | if (!cwq) | ||
174 | return 0; | ||
175 | if (__run_work(cwq, work)) | ||
176 | return 1; | ||
177 | } | ||
178 | } | 133 | } |
179 | EXPORT_SYMBOL(run_scheduled_work); | ||
180 | 134 | ||
181 | /* Preempt must be disabled. */ | 135 | /* Preempt must be disabled. */ |
182 | static void __queue_work(struct cpu_workqueue_struct *cwq, | 136 | static void __queue_work(struct cpu_workqueue_struct *cwq, |
@@ -185,10 +139,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
185 | unsigned long flags; | 139 | unsigned long flags; |
186 | 140 | ||
187 | spin_lock_irqsave(&cwq->lock, flags); | 141 | spin_lock_irqsave(&cwq->lock, flags); |
188 | set_wq_data(work, cwq); | 142 | insert_work(cwq, work, 1); |
189 | list_add_tail(&work->entry, &cwq->worklist); | ||
190 | cwq->insert_sequence++; | ||
191 | wake_up(&cwq->more_work); | ||
192 | spin_unlock_irqrestore(&cwq->lock, flags); | 143 | spin_unlock_irqrestore(&cwq->lock, flags); |
193 | } | 144 | } |
194 | 145 | ||
@@ -204,16 +155,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
204 | */ | 155 | */ |
205 | int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) | 156 | int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) |
206 | { | 157 | { |
207 | int ret = 0, cpu = get_cpu(); | 158 | int ret = 0; |
208 | 159 | ||
209 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { | 160 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { |
210 | if (unlikely(is_single_threaded(wq))) | ||
211 | cpu = singlethread_cpu; | ||
212 | BUG_ON(!list_empty(&work->entry)); | 161 | BUG_ON(!list_empty(&work->entry)); |
213 | __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); | 162 | __queue_work(wq_per_cpu(wq, get_cpu()), work); |
163 | put_cpu(); | ||
214 | ret = 1; | 164 | ret = 1; |
215 | } | 165 | } |
216 | put_cpu(); | ||
217 | return ret; | 166 | return ret; |
218 | } | 167 | } |
219 | EXPORT_SYMBOL_GPL(queue_work); | 168 | EXPORT_SYMBOL_GPL(queue_work); |
@@ -221,13 +170,10 @@ EXPORT_SYMBOL_GPL(queue_work); | |||
221 | void delayed_work_timer_fn(unsigned long __data) | 170 | void delayed_work_timer_fn(unsigned long __data) |
222 | { | 171 | { |
223 | struct delayed_work *dwork = (struct delayed_work *)__data; | 172 | struct delayed_work *dwork = (struct delayed_work *)__data; |
224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); | 173 | struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work); |
225 | int cpu = smp_processor_id(); | 174 | struct workqueue_struct *wq = cwq->wq; |
226 | 175 | ||
227 | if (unlikely(is_single_threaded(wq))) | 176 | __queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work); |
228 | cpu = singlethread_cpu; | ||
229 | |||
230 | __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), &dwork->work); | ||
231 | } | 177 | } |
232 | 178 | ||
233 | /** | 179 | /** |
@@ -241,27 +187,11 @@ void delayed_work_timer_fn(unsigned long __data) | |||
241 | int fastcall queue_delayed_work(struct workqueue_struct *wq, | 187 | int fastcall queue_delayed_work(struct workqueue_struct *wq, |
242 | struct delayed_work *dwork, unsigned long delay) | 188 | struct delayed_work *dwork, unsigned long delay) |
243 | { | 189 | { |
244 | int ret = 0; | 190 | timer_stats_timer_set_start_info(&dwork->timer); |
245 | struct timer_list *timer = &dwork->timer; | ||
246 | struct work_struct *work = &dwork->work; | ||
247 | |||
248 | timer_stats_timer_set_start_info(timer); | ||
249 | if (delay == 0) | 191 | if (delay == 0) |
250 | return queue_work(wq, work); | 192 | return queue_work(wq, &dwork->work); |
251 | |||
252 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { | ||
253 | BUG_ON(timer_pending(timer)); | ||
254 | BUG_ON(!list_empty(&work->entry)); | ||
255 | 193 | ||
256 | /* This stores wq for the moment, for the timer_fn */ | 194 | return queue_delayed_work_on(-1, wq, dwork, delay); |
257 | set_wq_data(work, wq); | ||
258 | timer->expires = jiffies + delay; | ||
259 | timer->data = (unsigned long)dwork; | ||
260 | timer->function = delayed_work_timer_fn; | ||
261 | add_timer(timer); | ||
262 | ret = 1; | ||
263 | } | ||
264 | return ret; | ||
265 | } | 195 | } |
266 | EXPORT_SYMBOL_GPL(queue_delayed_work); | 196 | EXPORT_SYMBOL_GPL(queue_delayed_work); |
267 | 197 | ||
@@ -285,12 +215,16 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
285 | BUG_ON(timer_pending(timer)); | 215 | BUG_ON(timer_pending(timer)); |
286 | BUG_ON(!list_empty(&work->entry)); | 216 | BUG_ON(!list_empty(&work->entry)); |
287 | 217 | ||
288 | /* This stores wq for the moment, for the timer_fn */ | 218 | /* This stores cwq for the moment, for the timer_fn */ |
289 | set_wq_data(work, wq); | 219 | set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id())); |
290 | timer->expires = jiffies + delay; | 220 | timer->expires = jiffies + delay; |
291 | timer->data = (unsigned long)dwork; | 221 | timer->data = (unsigned long)dwork; |
292 | timer->function = delayed_work_timer_fn; | 222 | timer->function = delayed_work_timer_fn; |
293 | add_timer_on(timer, cpu); | 223 | |
224 | if (unlikely(cpu >= 0)) | ||
225 | add_timer_on(timer, cpu); | ||
226 | else | ||
227 | add_timer(timer); | ||
294 | ret = 1; | 228 | ret = 1; |
295 | } | 229 | } |
296 | return ret; | 230 | return ret; |
@@ -299,13 +233,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on); | |||
299 | 233 | ||
300 | static void run_workqueue(struct cpu_workqueue_struct *cwq) | 234 | static void run_workqueue(struct cpu_workqueue_struct *cwq) |
301 | { | 235 | { |
302 | unsigned long flags; | 236 | spin_lock_irq(&cwq->lock); |
303 | |||
304 | /* | ||
305 | * Keep taking off work from the queue until | ||
306 | * done. | ||
307 | */ | ||
308 | spin_lock_irqsave(&cwq->lock, flags); | ||
309 | cwq->run_depth++; | 237 | cwq->run_depth++; |
310 | if (cwq->run_depth > 3) { | 238 | if (cwq->run_depth > 3) { |
311 | /* morton gets to eat his hat */ | 239 | /* morton gets to eat his hat */ |
@@ -318,12 +246,12 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
318 | struct work_struct, entry); | 246 | struct work_struct, entry); |
319 | work_func_t f = work->func; | 247 | work_func_t f = work->func; |
320 | 248 | ||
249 | cwq->current_work = work; | ||
321 | list_del_init(cwq->worklist.next); | 250 | list_del_init(cwq->worklist.next); |
322 | spin_unlock_irqrestore(&cwq->lock, flags); | 251 | spin_unlock_irq(&cwq->lock); |
323 | 252 | ||
324 | BUG_ON(get_wq_data(work) != cwq); | 253 | BUG_ON(get_wq_data(work) != cwq); |
325 | if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work))) | 254 | work_clear_pending(work); |
326 | work_release(work); | ||
327 | f(work); | 255 | f(work); |
328 | 256 | ||
329 | if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { | 257 | if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { |
@@ -337,63 +265,81 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
337 | dump_stack(); | 265 | dump_stack(); |
338 | } | 266 | } |
339 | 267 | ||
340 | spin_lock_irqsave(&cwq->lock, flags); | 268 | spin_lock_irq(&cwq->lock); |
341 | cwq->remove_sequence++; | 269 | cwq->current_work = NULL; |
342 | wake_up(&cwq->work_done); | ||
343 | } | 270 | } |
344 | cwq->run_depth--; | 271 | cwq->run_depth--; |
345 | spin_unlock_irqrestore(&cwq->lock, flags); | 272 | spin_unlock_irq(&cwq->lock); |
273 | } | ||
274 | |||
275 | /* | ||
276 | * NOTE: the caller must not touch *cwq if this func returns true | ||
277 | */ | ||
278 | static int cwq_should_stop(struct cpu_workqueue_struct *cwq) | ||
279 | { | ||
280 | int should_stop = cwq->should_stop; | ||
281 | |||
282 | if (unlikely(should_stop)) { | ||
283 | spin_lock_irq(&cwq->lock); | ||
284 | should_stop = cwq->should_stop && list_empty(&cwq->worklist); | ||
285 | if (should_stop) | ||
286 | cwq->thread = NULL; | ||
287 | spin_unlock_irq(&cwq->lock); | ||
288 | } | ||
289 | |||
290 | return should_stop; | ||
346 | } | 291 | } |
347 | 292 | ||
348 | static int worker_thread(void *__cwq) | 293 | static int worker_thread(void *__cwq) |
349 | { | 294 | { |
350 | struct cpu_workqueue_struct *cwq = __cwq; | 295 | struct cpu_workqueue_struct *cwq = __cwq; |
351 | DECLARE_WAITQUEUE(wait, current); | 296 | DEFINE_WAIT(wait); |
352 | struct k_sigaction sa; | ||
353 | sigset_t blocked; | ||
354 | 297 | ||
355 | if (!cwq->freezeable) | 298 | if (!cwq->wq->freezeable) |
356 | current->flags |= PF_NOFREEZE; | 299 | current->flags |= PF_NOFREEZE; |
357 | 300 | ||
358 | set_user_nice(current, -5); | 301 | set_user_nice(current, -5); |
359 | 302 | ||
360 | /* Block and flush all signals */ | 303 | for (;;) { |
361 | sigfillset(&blocked); | 304 | prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE); |
362 | sigprocmask(SIG_BLOCK, &blocked, NULL); | 305 | if (!freezing(current) && !cwq->should_stop |
363 | flush_signals(current); | 306 | && list_empty(&cwq->worklist)) |
364 | 307 | schedule(); | |
365 | /* | 308 | finish_wait(&cwq->more_work, &wait); |
366 | * We inherited MPOL_INTERLEAVE from the booting kernel. | ||
367 | * Set MPOL_DEFAULT to insure node local allocations. | ||
368 | */ | ||
369 | numa_default_policy(); | ||
370 | |||
371 | /* SIG_IGN makes children autoreap: see do_notify_parent(). */ | ||
372 | sa.sa.sa_handler = SIG_IGN; | ||
373 | sa.sa.sa_flags = 0; | ||
374 | siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); | ||
375 | do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); | ||
376 | 309 | ||
377 | set_current_state(TASK_INTERRUPTIBLE); | 310 | try_to_freeze(); |
378 | while (!kthread_should_stop()) { | ||
379 | if (cwq->freezeable) | ||
380 | try_to_freeze(); | ||
381 | 311 | ||
382 | add_wait_queue(&cwq->more_work, &wait); | 312 | if (cwq_should_stop(cwq)) |
383 | if (list_empty(&cwq->worklist)) | 313 | break; |
384 | schedule(); | ||
385 | else | ||
386 | __set_current_state(TASK_RUNNING); | ||
387 | remove_wait_queue(&cwq->more_work, &wait); | ||
388 | 314 | ||
389 | if (!list_empty(&cwq->worklist)) | 315 | run_workqueue(cwq); |
390 | run_workqueue(cwq); | ||
391 | set_current_state(TASK_INTERRUPTIBLE); | ||
392 | } | 316 | } |
393 | __set_current_state(TASK_RUNNING); | 317 | |
394 | return 0; | 318 | return 0; |
395 | } | 319 | } |
396 | 320 | ||
321 | struct wq_barrier { | ||
322 | struct work_struct work; | ||
323 | struct completion done; | ||
324 | }; | ||
325 | |||
326 | static void wq_barrier_func(struct work_struct *work) | ||
327 | { | ||
328 | struct wq_barrier *barr = container_of(work, struct wq_barrier, work); | ||
329 | complete(&barr->done); | ||
330 | } | ||
331 | |||
332 | static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | ||
333 | struct wq_barrier *barr, int tail) | ||
334 | { | ||
335 | INIT_WORK(&barr->work, wq_barrier_func); | ||
336 | __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); | ||
337 | |||
338 | init_completion(&barr->done); | ||
339 | |||
340 | insert_work(cwq, &barr->work, tail); | ||
341 | } | ||
342 | |||
397 | static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | 343 | static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) |
398 | { | 344 | { |
399 | if (cwq->thread == current) { | 345 | if (cwq->thread == current) { |
@@ -403,21 +349,18 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
403 | */ | 349 | */ |
404 | run_workqueue(cwq); | 350 | run_workqueue(cwq); |
405 | } else { | 351 | } else { |
406 | DEFINE_WAIT(wait); | 352 | struct wq_barrier barr; |
407 | long sequence_needed; | 353 | int active = 0; |
408 | 354 | ||
409 | spin_lock_irq(&cwq->lock); | 355 | spin_lock_irq(&cwq->lock); |
410 | sequence_needed = cwq->insert_sequence; | 356 | if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { |
411 | 357 | insert_wq_barrier(cwq, &barr, 1); | |
412 | while (sequence_needed - cwq->remove_sequence > 0) { | 358 | active = 1; |
413 | prepare_to_wait(&cwq->work_done, &wait, | ||
414 | TASK_UNINTERRUPTIBLE); | ||
415 | spin_unlock_irq(&cwq->lock); | ||
416 | schedule(); | ||
417 | spin_lock_irq(&cwq->lock); | ||
418 | } | 359 | } |
419 | finish_wait(&cwq->work_done, &wait); | ||
420 | spin_unlock_irq(&cwq->lock); | 360 | spin_unlock_irq(&cwq->lock); |
361 | |||
362 | if (active) | ||
363 | wait_for_completion(&barr.done); | ||
421 | } | 364 | } |
422 | } | 365 | } |
423 | 366 | ||
@@ -428,151 +371,145 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
428 | * Forces execution of the workqueue and blocks until its completion. | 371 | * Forces execution of the workqueue and blocks until its completion. |
429 | * This is typically used in driver shutdown handlers. | 372 | * This is typically used in driver shutdown handlers. |
430 | * | 373 | * |
431 | * This function will sample each workqueue's current insert_sequence number and | 374 | * We sleep until all works which were queued on entry have been handled, |
432 | * will sleep until the head sequence is greater than or equal to that. This | 375 | * but we are not livelocked by new incoming ones. |
433 | * means that we sleep until all works which were queued on entry have been | ||
434 | * handled, but we are not livelocked by new incoming ones. | ||
435 | * | 376 | * |
436 | * This function used to run the workqueues itself. Now we just wait for the | 377 | * This function used to run the workqueues itself. Now we just wait for the |
437 | * helper threads to do it. | 378 | * helper threads to do it. |
438 | */ | 379 | */ |
439 | void fastcall flush_workqueue(struct workqueue_struct *wq) | 380 | void fastcall flush_workqueue(struct workqueue_struct *wq) |
440 | { | 381 | { |
382 | const cpumask_t *cpu_map = wq_cpu_map(wq); | ||
383 | int cpu; | ||
384 | |||
441 | might_sleep(); | 385 | might_sleep(); |
386 | for_each_cpu_mask(cpu, *cpu_map) | ||
387 | flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); | ||
388 | } | ||
389 | EXPORT_SYMBOL_GPL(flush_workqueue); | ||
442 | 390 | ||
443 | if (is_single_threaded(wq)) { | 391 | /* |
444 | /* Always use first cpu's area. */ | 392 | * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit, |
445 | flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu)); | 393 | * so this work can't be re-armed in any way. |
446 | } else { | 394 | */ |
447 | int cpu; | 395 | static int try_to_grab_pending(struct work_struct *work) |
396 | { | ||
397 | struct cpu_workqueue_struct *cwq; | ||
398 | int ret = 0; | ||
448 | 399 | ||
449 | mutex_lock(&workqueue_mutex); | 400 | if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) |
450 | for_each_online_cpu(cpu) | 401 | return 1; |
451 | flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); | 402 | |
452 | mutex_unlock(&workqueue_mutex); | 403 | /* |
404 | * The queueing is in progress, or it is already queued. Try to | ||
405 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | ||
406 | */ | ||
407 | |||
408 | cwq = get_wq_data(work); | ||
409 | if (!cwq) | ||
410 | return ret; | ||
411 | |||
412 | spin_lock_irq(&cwq->lock); | ||
413 | if (!list_empty(&work->entry)) { | ||
414 | /* | ||
415 | * This work is queued, but perhaps we locked the wrong cwq. | ||
416 | * In that case we must see the new value after rmb(), see | ||
417 | * insert_work()->wmb(). | ||
418 | */ | ||
419 | smp_rmb(); | ||
420 | if (cwq == get_wq_data(work)) { | ||
421 | list_del_init(&work->entry); | ||
422 | ret = 1; | ||
423 | } | ||
453 | } | 424 | } |
425 | spin_unlock_irq(&cwq->lock); | ||
426 | |||
427 | return ret; | ||
454 | } | 428 | } |
455 | EXPORT_SYMBOL_GPL(flush_workqueue); | ||
456 | 429 | ||
457 | static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, | 430 | static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq, |
458 | int cpu, int freezeable) | 431 | struct work_struct *work) |
459 | { | 432 | { |
460 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 433 | struct wq_barrier barr; |
461 | struct task_struct *p; | 434 | int running = 0; |
462 | 435 | ||
463 | spin_lock_init(&cwq->lock); | 436 | spin_lock_irq(&cwq->lock); |
464 | cwq->wq = wq; | 437 | if (unlikely(cwq->current_work == work)) { |
465 | cwq->thread = NULL; | 438 | insert_wq_barrier(cwq, &barr, 0); |
466 | cwq->insert_sequence = 0; | 439 | running = 1; |
467 | cwq->remove_sequence = 0; | 440 | } |
468 | cwq->freezeable = freezeable; | 441 | spin_unlock_irq(&cwq->lock); |
469 | INIT_LIST_HEAD(&cwq->worklist); | ||
470 | init_waitqueue_head(&cwq->more_work); | ||
471 | init_waitqueue_head(&cwq->work_done); | ||
472 | 442 | ||
473 | if (is_single_threaded(wq)) | 443 | if (unlikely(running)) |
474 | p = kthread_create(worker_thread, cwq, "%s", wq->name); | 444 | wait_for_completion(&barr.done); |
475 | else | ||
476 | p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu); | ||
477 | if (IS_ERR(p)) | ||
478 | return NULL; | ||
479 | cwq->thread = p; | ||
480 | return p; | ||
481 | } | 445 | } |
482 | 446 | ||
483 | struct workqueue_struct *__create_workqueue(const char *name, | 447 | static void wait_on_work(struct work_struct *work) |
484 | int singlethread, int freezeable) | ||
485 | { | 448 | { |
486 | int cpu, destroy = 0; | 449 | struct cpu_workqueue_struct *cwq; |
487 | struct workqueue_struct *wq; | 450 | struct workqueue_struct *wq; |
488 | struct task_struct *p; | 451 | const cpumask_t *cpu_map; |
452 | int cpu; | ||
489 | 453 | ||
490 | wq = kzalloc(sizeof(*wq), GFP_KERNEL); | 454 | might_sleep(); |
491 | if (!wq) | ||
492 | return NULL; | ||
493 | 455 | ||
494 | wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); | 456 | cwq = get_wq_data(work); |
495 | if (!wq->cpu_wq) { | 457 | if (!cwq) |
496 | kfree(wq); | 458 | return; |
497 | return NULL; | ||
498 | } | ||
499 | 459 | ||
500 | wq->name = name; | 460 | wq = cwq->wq; |
501 | mutex_lock(&workqueue_mutex); | 461 | cpu_map = wq_cpu_map(wq); |
502 | if (singlethread) { | ||
503 | INIT_LIST_HEAD(&wq->list); | ||
504 | p = create_workqueue_thread(wq, singlethread_cpu, freezeable); | ||
505 | if (!p) | ||
506 | destroy = 1; | ||
507 | else | ||
508 | wake_up_process(p); | ||
509 | } else { | ||
510 | list_add(&wq->list, &workqueues); | ||
511 | for_each_online_cpu(cpu) { | ||
512 | p = create_workqueue_thread(wq, cpu, freezeable); | ||
513 | if (p) { | ||
514 | kthread_bind(p, cpu); | ||
515 | wake_up_process(p); | ||
516 | } else | ||
517 | destroy = 1; | ||
518 | } | ||
519 | } | ||
520 | mutex_unlock(&workqueue_mutex); | ||
521 | 462 | ||
522 | /* | 463 | for_each_cpu_mask(cpu, *cpu_map) |
523 | * Was there any error during startup? If yes then clean up: | 464 | wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); |
524 | */ | ||
525 | if (destroy) { | ||
526 | destroy_workqueue(wq); | ||
527 | wq = NULL; | ||
528 | } | ||
529 | return wq; | ||
530 | } | 465 | } |
531 | EXPORT_SYMBOL_GPL(__create_workqueue); | ||
532 | 466 | ||
533 | static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu) | 467 | /** |
468 | * cancel_work_sync - block until a work_struct's callback has terminated | ||
469 | * @work: the work which is to be flushed | ||
470 | * | ||
471 | * cancel_work_sync() will cancel the work if it is queued. If the work's | ||
472 | * callback appears to be running, cancel_work_sync() will block until it | ||
473 | * has completed. | ||
474 | * | ||
475 | * It is possible to use this function if the work re-queues itself. It can | ||
476 | * cancel the work even if it migrates to another workqueue, however in that | ||
477 | * case it only guarantees that work->func() has completed on the last queued | ||
478 | * workqueue. | ||
479 | * | ||
480 | * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not | ||
481 | * pending, otherwise it goes into a busy-wait loop until the timer expires. | ||
482 | * | ||
483 | * The caller must ensure that workqueue_struct on which this work was last | ||
484 | * queued can't be destroyed before this function returns. | ||
485 | */ | ||
486 | void cancel_work_sync(struct work_struct *work) | ||
534 | { | 487 | { |
535 | struct cpu_workqueue_struct *cwq; | 488 | while (!try_to_grab_pending(work)) |
536 | unsigned long flags; | 489 | cpu_relax(); |
537 | struct task_struct *p; | 490 | wait_on_work(work); |
538 | 491 | work_clear_pending(work); | |
539 | cwq = per_cpu_ptr(wq->cpu_wq, cpu); | ||
540 | spin_lock_irqsave(&cwq->lock, flags); | ||
541 | p = cwq->thread; | ||
542 | cwq->thread = NULL; | ||
543 | spin_unlock_irqrestore(&cwq->lock, flags); | ||
544 | if (p) | ||
545 | kthread_stop(p); | ||
546 | } | 492 | } |
493 | EXPORT_SYMBOL_GPL(cancel_work_sync); | ||
547 | 494 | ||
548 | /** | 495 | /** |
549 | * destroy_workqueue - safely terminate a workqueue | 496 | * cancel_rearming_delayed_work - reliably kill off a delayed work. |
550 | * @wq: target workqueue | 497 | * @dwork: the delayed work struct |
551 | * | 498 | * |
552 | * Safely destroy a workqueue. All work currently pending will be done first. | 499 | * It is possible to use this function if @dwork rearms itself via queue_work() |
500 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | ||
553 | */ | 501 | */ |
554 | void destroy_workqueue(struct workqueue_struct *wq) | 502 | void cancel_rearming_delayed_work(struct delayed_work *dwork) |
555 | { | 503 | { |
556 | int cpu; | 504 | while (!del_timer(&dwork->timer) && |
557 | 505 | !try_to_grab_pending(&dwork->work)) | |
558 | flush_workqueue(wq); | 506 | cpu_relax(); |
559 | 507 | wait_on_work(&dwork->work); | |
560 | /* We don't need the distraction of CPUs appearing and vanishing. */ | 508 | work_clear_pending(&dwork->work); |
561 | mutex_lock(&workqueue_mutex); | ||
562 | if (is_single_threaded(wq)) | ||
563 | cleanup_workqueue_thread(wq, singlethread_cpu); | ||
564 | else { | ||
565 | for_each_online_cpu(cpu) | ||
566 | cleanup_workqueue_thread(wq, cpu); | ||
567 | list_del(&wq->list); | ||
568 | } | ||
569 | mutex_unlock(&workqueue_mutex); | ||
570 | free_percpu(wq->cpu_wq); | ||
571 | kfree(wq); | ||
572 | } | 509 | } |
573 | EXPORT_SYMBOL_GPL(destroy_workqueue); | 510 | EXPORT_SYMBOL(cancel_rearming_delayed_work); |
574 | 511 | ||
575 | static struct workqueue_struct *keventd_wq; | 512 | static struct workqueue_struct *keventd_wq __read_mostly; |
576 | 513 | ||
577 | /** | 514 | /** |
578 | * schedule_work - put work task in global workqueue | 515 | * schedule_work - put work task in global workqueue |
@@ -638,7 +575,7 @@ int schedule_on_each_cpu(work_func_t func) | |||
638 | if (!works) | 575 | if (!works) |
639 | return -ENOMEM; | 576 | return -ENOMEM; |
640 | 577 | ||
641 | mutex_lock(&workqueue_mutex); | 578 | preempt_disable(); /* CPU hotplug */ |
642 | for_each_online_cpu(cpu) { | 579 | for_each_online_cpu(cpu) { |
643 | struct work_struct *work = per_cpu_ptr(works, cpu); | 580 | struct work_struct *work = per_cpu_ptr(works, cpu); |
644 | 581 | ||
@@ -646,7 +583,7 @@ int schedule_on_each_cpu(work_func_t func) | |||
646 | set_bit(WORK_STRUCT_PENDING, work_data_bits(work)); | 583 | set_bit(WORK_STRUCT_PENDING, work_data_bits(work)); |
647 | __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work); | 584 | __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work); |
648 | } | 585 | } |
649 | mutex_unlock(&workqueue_mutex); | 586 | preempt_enable(); |
650 | flush_workqueue(keventd_wq); | 587 | flush_workqueue(keventd_wq); |
651 | free_percpu(works); | 588 | free_percpu(works); |
652 | return 0; | 589 | return 0; |
@@ -659,29 +596,6 @@ void flush_scheduled_work(void) | |||
659 | EXPORT_SYMBOL(flush_scheduled_work); | 596 | EXPORT_SYMBOL(flush_scheduled_work); |
660 | 597 | ||
661 | /** | 598 | /** |
662 | * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work. | ||
663 | * @wq: the controlling workqueue structure | ||
664 | * @dwork: the delayed work struct | ||
665 | */ | ||
666 | void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, | ||
667 | struct delayed_work *dwork) | ||
668 | { | ||
669 | while (!cancel_delayed_work(dwork)) | ||
670 | flush_workqueue(wq); | ||
671 | } | ||
672 | EXPORT_SYMBOL(cancel_rearming_delayed_workqueue); | ||
673 | |||
674 | /** | ||
675 | * cancel_rearming_delayed_work - reliably kill off a delayed keventd work whose handler rearms the delayed work. | ||
676 | * @dwork: the delayed work struct | ||
677 | */ | ||
678 | void cancel_rearming_delayed_work(struct delayed_work *dwork) | ||
679 | { | ||
680 | cancel_rearming_delayed_workqueue(keventd_wq, dwork); | ||
681 | } | ||
682 | EXPORT_SYMBOL(cancel_rearming_delayed_work); | ||
683 | |||
684 | /** | ||
685 | * execute_in_process_context - reliably execute the routine with user context | 599 | * execute_in_process_context - reliably execute the routine with user context |
686 | * @fn: the function to execute | 600 | * @fn: the function to execute |
687 | * @ew: guaranteed storage for the execute work structure (must | 601 | * @ew: guaranteed storage for the execute work structure (must |
@@ -728,94 +642,209 @@ int current_is_keventd(void) | |||
728 | 642 | ||
729 | } | 643 | } |
730 | 644 | ||
731 | /* Take the work from this (downed) CPU. */ | 645 | static struct cpu_workqueue_struct * |
732 | static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) | 646 | init_cpu_workqueue(struct workqueue_struct *wq, int cpu) |
733 | { | 647 | { |
734 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); | 648 | struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
735 | struct list_head list; | ||
736 | struct work_struct *work; | ||
737 | 649 | ||
738 | spin_lock_irq(&cwq->lock); | 650 | cwq->wq = wq; |
739 | list_replace_init(&cwq->worklist, &list); | 651 | spin_lock_init(&cwq->lock); |
652 | INIT_LIST_HEAD(&cwq->worklist); | ||
653 | init_waitqueue_head(&cwq->more_work); | ||
654 | |||
655 | return cwq; | ||
656 | } | ||
657 | |||
658 | static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | ||
659 | { | ||
660 | struct workqueue_struct *wq = cwq->wq; | ||
661 | const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d"; | ||
662 | struct task_struct *p; | ||
663 | |||
664 | p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu); | ||
665 | /* | ||
666 | * Nobody can add the work_struct to this cwq, | ||
667 | * if (caller is __create_workqueue) | ||
668 | * nobody should see this wq | ||
669 | * else // caller is CPU_UP_PREPARE | ||
670 | * cpu is not on cpu_online_map | ||
671 | * so we can abort safely. | ||
672 | */ | ||
673 | if (IS_ERR(p)) | ||
674 | return PTR_ERR(p); | ||
675 | |||
676 | cwq->thread = p; | ||
677 | cwq->should_stop = 0; | ||
678 | |||
679 | return 0; | ||
680 | } | ||
681 | |||
682 | static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | ||
683 | { | ||
684 | struct task_struct *p = cwq->thread; | ||
740 | 685 | ||
741 | while (!list_empty(&list)) { | 686 | if (p != NULL) { |
742 | printk("Taking work for %s\n", wq->name); | 687 | if (cpu >= 0) |
743 | work = list_entry(list.next,struct work_struct,entry); | 688 | kthread_bind(p, cpu); |
744 | list_del(&work->entry); | 689 | wake_up_process(p); |
745 | __queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work); | ||
746 | } | 690 | } |
747 | spin_unlock_irq(&cwq->lock); | ||
748 | } | 691 | } |
749 | 692 | ||
750 | /* We're holding the cpucontrol mutex here */ | 693 | struct workqueue_struct *__create_workqueue(const char *name, |
751 | static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | 694 | int singlethread, int freezeable) |
752 | unsigned long action, | ||
753 | void *hcpu) | ||
754 | { | 695 | { |
755 | unsigned int hotcpu = (unsigned long)hcpu; | ||
756 | struct workqueue_struct *wq; | 696 | struct workqueue_struct *wq; |
697 | struct cpu_workqueue_struct *cwq; | ||
698 | int err = 0, cpu; | ||
757 | 699 | ||
758 | switch (action) { | 700 | wq = kzalloc(sizeof(*wq), GFP_KERNEL); |
759 | case CPU_UP_PREPARE: | 701 | if (!wq) |
760 | mutex_lock(&workqueue_mutex); | 702 | return NULL; |
761 | /* Create a new workqueue thread for it. */ | ||
762 | list_for_each_entry(wq, &workqueues, list) { | ||
763 | if (!create_workqueue_thread(wq, hotcpu, 0)) { | ||
764 | printk("workqueue for %i failed\n", hotcpu); | ||
765 | return NOTIFY_BAD; | ||
766 | } | ||
767 | } | ||
768 | break; | ||
769 | 703 | ||
770 | case CPU_ONLINE: | 704 | wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); |
771 | /* Kick off worker threads. */ | 705 | if (!wq->cpu_wq) { |
772 | list_for_each_entry(wq, &workqueues, list) { | 706 | kfree(wq); |
773 | struct cpu_workqueue_struct *cwq; | 707 | return NULL; |
708 | } | ||
774 | 709 | ||
775 | cwq = per_cpu_ptr(wq->cpu_wq, hotcpu); | 710 | wq->name = name; |
776 | kthread_bind(cwq->thread, hotcpu); | 711 | wq->singlethread = singlethread; |
777 | wake_up_process(cwq->thread); | 712 | wq->freezeable = freezeable; |
778 | } | 713 | INIT_LIST_HEAD(&wq->list); |
779 | mutex_unlock(&workqueue_mutex); | ||
780 | break; | ||
781 | 714 | ||
782 | case CPU_UP_CANCELED: | 715 | if (singlethread) { |
783 | list_for_each_entry(wq, &workqueues, list) { | 716 | cwq = init_cpu_workqueue(wq, singlethread_cpu); |
784 | if (!per_cpu_ptr(wq->cpu_wq, hotcpu)->thread) | 717 | err = create_workqueue_thread(cwq, singlethread_cpu); |
718 | start_workqueue_thread(cwq, -1); | ||
719 | } else { | ||
720 | mutex_lock(&workqueue_mutex); | ||
721 | list_add(&wq->list, &workqueues); | ||
722 | |||
723 | for_each_possible_cpu(cpu) { | ||
724 | cwq = init_cpu_workqueue(wq, cpu); | ||
725 | if (err || !cpu_online(cpu)) | ||
785 | continue; | 726 | continue; |
786 | /* Unbind so it can run. */ | 727 | err = create_workqueue_thread(cwq, cpu); |
787 | kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, | 728 | start_workqueue_thread(cwq, cpu); |
788 | any_online_cpu(cpu_online_map)); | ||
789 | cleanup_workqueue_thread(wq, hotcpu); | ||
790 | } | 729 | } |
791 | mutex_unlock(&workqueue_mutex); | 730 | mutex_unlock(&workqueue_mutex); |
792 | break; | 731 | } |
732 | |||
733 | if (err) { | ||
734 | destroy_workqueue(wq); | ||
735 | wq = NULL; | ||
736 | } | ||
737 | return wq; | ||
738 | } | ||
739 | EXPORT_SYMBOL_GPL(__create_workqueue); | ||
740 | |||
741 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | ||
742 | { | ||
743 | struct wq_barrier barr; | ||
744 | int alive = 0; | ||
745 | |||
746 | spin_lock_irq(&cwq->lock); | ||
747 | if (cwq->thread != NULL) { | ||
748 | insert_wq_barrier(cwq, &barr, 1); | ||
749 | cwq->should_stop = 1; | ||
750 | alive = 1; | ||
751 | } | ||
752 | spin_unlock_irq(&cwq->lock); | ||
753 | |||
754 | if (alive) { | ||
755 | wait_for_completion(&barr.done); | ||
793 | 756 | ||
794 | case CPU_DOWN_PREPARE: | 757 | while (unlikely(cwq->thread != NULL)) |
758 | cpu_relax(); | ||
759 | /* | ||
760 | * Wait until cwq->thread unlocks cwq->lock, | ||
761 | * it won't touch *cwq after that. | ||
762 | */ | ||
763 | smp_rmb(); | ||
764 | spin_unlock_wait(&cwq->lock); | ||
765 | } | ||
766 | } | ||
767 | |||
768 | /** | ||
769 | * destroy_workqueue - safely terminate a workqueue | ||
770 | * @wq: target workqueue | ||
771 | * | ||
772 | * Safely destroy a workqueue. All work currently pending will be done first. | ||
773 | */ | ||
774 | void destroy_workqueue(struct workqueue_struct *wq) | ||
775 | { | ||
776 | const cpumask_t *cpu_map = wq_cpu_map(wq); | ||
777 | struct cpu_workqueue_struct *cwq; | ||
778 | int cpu; | ||
779 | |||
780 | mutex_lock(&workqueue_mutex); | ||
781 | list_del(&wq->list); | ||
782 | mutex_unlock(&workqueue_mutex); | ||
783 | |||
784 | for_each_cpu_mask(cpu, *cpu_map) { | ||
785 | cwq = per_cpu_ptr(wq->cpu_wq, cpu); | ||
786 | cleanup_workqueue_thread(cwq, cpu); | ||
787 | } | ||
788 | |||
789 | free_percpu(wq->cpu_wq); | ||
790 | kfree(wq); | ||
791 | } | ||
792 | EXPORT_SYMBOL_GPL(destroy_workqueue); | ||
793 | |||
794 | static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | ||
795 | unsigned long action, | ||
796 | void *hcpu) | ||
797 | { | ||
798 | unsigned int cpu = (unsigned long)hcpu; | ||
799 | struct cpu_workqueue_struct *cwq; | ||
800 | struct workqueue_struct *wq; | ||
801 | |||
802 | action &= ~CPU_TASKS_FROZEN; | ||
803 | |||
804 | switch (action) { | ||
805 | case CPU_LOCK_ACQUIRE: | ||
795 | mutex_lock(&workqueue_mutex); | 806 | mutex_lock(&workqueue_mutex); |
796 | break; | 807 | return NOTIFY_OK; |
797 | 808 | ||
798 | case CPU_DOWN_FAILED: | 809 | case CPU_LOCK_RELEASE: |
799 | mutex_unlock(&workqueue_mutex); | 810 | mutex_unlock(&workqueue_mutex); |
800 | break; | 811 | return NOTIFY_OK; |
801 | 812 | ||
802 | case CPU_DEAD: | 813 | case CPU_UP_PREPARE: |
803 | list_for_each_entry(wq, &workqueues, list) | 814 | cpu_set(cpu, cpu_populated_map); |
804 | cleanup_workqueue_thread(wq, hotcpu); | 815 | } |
805 | list_for_each_entry(wq, &workqueues, list) | 816 | |
806 | take_over_work(wq, hotcpu); | 817 | list_for_each_entry(wq, &workqueues, list) { |
807 | mutex_unlock(&workqueue_mutex); | 818 | cwq = per_cpu_ptr(wq->cpu_wq, cpu); |
808 | break; | 819 | |
820 | switch (action) { | ||
821 | case CPU_UP_PREPARE: | ||
822 | if (!create_workqueue_thread(cwq, cpu)) | ||
823 | break; | ||
824 | printk(KERN_ERR "workqueue for %i failed\n", cpu); | ||
825 | return NOTIFY_BAD; | ||
826 | |||
827 | case CPU_ONLINE: | ||
828 | start_workqueue_thread(cwq, cpu); | ||
829 | break; | ||
830 | |||
831 | case CPU_UP_CANCELED: | ||
832 | start_workqueue_thread(cwq, -1); | ||
833 | case CPU_DEAD: | ||
834 | cleanup_workqueue_thread(cwq, cpu); | ||
835 | break; | ||
836 | } | ||
809 | } | 837 | } |
810 | 838 | ||
811 | return NOTIFY_OK; | 839 | return NOTIFY_OK; |
812 | } | 840 | } |
813 | 841 | ||
814 | void init_workqueues(void) | 842 | void __init init_workqueues(void) |
815 | { | 843 | { |
844 | cpu_populated_map = cpu_online_map; | ||
816 | singlethread_cpu = first_cpu(cpu_possible_map); | 845 | singlethread_cpu = first_cpu(cpu_possible_map); |
846 | cpu_singlethread_map = cpumask_of_cpu(singlethread_cpu); | ||
817 | hotcpu_notifier(workqueue_cpu_callback, 0); | 847 | hotcpu_notifier(workqueue_cpu_callback, 0); |
818 | keventd_wq = create_workqueue("events"); | 848 | keventd_wq = create_workqueue("events"); |
819 | BUG_ON(!keventd_wq); | 849 | BUG_ON(!keventd_wq); |
820 | } | 850 | } |
821 | |||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d69ddbe43865..402eb4eb6b23 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -1004,7 +1004,7 @@ static int radix_tree_callback(struct notifier_block *nfb, | |||
1004 | struct radix_tree_preload *rtp; | 1004 | struct radix_tree_preload *rtp; |
1005 | 1005 | ||
1006 | /* Free per-cpu pool of perloaded nodes */ | 1006 | /* Free per-cpu pool of perloaded nodes */ |
1007 | if (action == CPU_DEAD) { | 1007 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
1008 | rtp = &per_cpu(radix_tree_preloads, cpu); | 1008 | rtp = &per_cpu(radix_tree_preloads, cpu); |
1009 | while (rtp->nr) { | 1009 | while (rtp->nr) { |
1010 | kmem_cache_free(radix_tree_node_cachep, | 1010 | kmem_cache_free(radix_tree_node_cachep, |
diff --git a/mm/filemap.c b/mm/filemap.c index 9cbf4fea4a59..9e56fd158fa3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -750,6 +750,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
750 | read_unlock_irq(&mapping->tree_lock); | 750 | read_unlock_irq(&mapping->tree_lock); |
751 | return i; | 751 | return i; |
752 | } | 752 | } |
753 | EXPORT_SYMBOL(find_get_pages_contig); | ||
753 | 754 | ||
754 | /** | 755 | /** |
755 | * find_get_pages_tag - find and return pages that match @tag | 756 | * find_get_pages_tag - find and return pages that match @tag |
@@ -778,6 +779,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | |||
778 | read_unlock_irq(&mapping->tree_lock); | 779 | read_unlock_irq(&mapping->tree_lock); |
779 | return ret; | 780 | return ret; |
780 | } | 781 | } |
782 | EXPORT_SYMBOL(find_get_pages_tag); | ||
781 | 783 | ||
782 | /** | 784 | /** |
783 | * grab_cache_page_nowait - returns locked page at given index in given cache | 785 | * grab_cache_page_nowait - returns locked page at given index in given cache |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index cbb335813ec0..1b49dab9b25d 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -434,7 +434,6 @@ xip_truncate_page(struct address_space *mapping, loff_t from) | |||
434 | unsigned blocksize; | 434 | unsigned blocksize; |
435 | unsigned length; | 435 | unsigned length; |
436 | struct page *page; | 436 | struct page *page; |
437 | void *kaddr; | ||
438 | 437 | ||
439 | BUG_ON(!mapping->a_ops->get_xip_page); | 438 | BUG_ON(!mapping->a_ops->get_xip_page); |
440 | 439 | ||
@@ -458,11 +457,7 @@ xip_truncate_page(struct address_space *mapping, loff_t from) | |||
458 | else | 457 | else |
459 | return PTR_ERR(page); | 458 | return PTR_ERR(page); |
460 | } | 459 | } |
461 | kaddr = kmap_atomic(page, KM_USER0); | 460 | zero_user_page(page, offset, length, KM_USER0); |
462 | memset(kaddr + offset, 0, length); | ||
463 | kunmap_atomic(kaddr, KM_USER0); | ||
464 | |||
465 | flush_dcache_page(page); | ||
466 | return 0; | 461 | return 0; |
467 | } | 462 | } |
468 | EXPORT_SYMBOL_GPL(xip_truncate_page); | 463 | EXPORT_SYMBOL_GPL(xip_truncate_page); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 36db012b38dd..eb7180db3033 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -140,6 +140,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
140 | return page; | 140 | return page; |
141 | 141 | ||
142 | fail: | 142 | fail: |
143 | if (vma->vm_flags & VM_MAYSHARE) | ||
144 | resv_huge_pages++; | ||
143 | spin_unlock(&hugetlb_lock); | 145 | spin_unlock(&hugetlb_lock); |
144 | return NULL; | 146 | return NULL; |
145 | } | 147 | } |
@@ -172,6 +174,17 @@ static int __init hugetlb_setup(char *s) | |||
172 | } | 174 | } |
173 | __setup("hugepages=", hugetlb_setup); | 175 | __setup("hugepages=", hugetlb_setup); |
174 | 176 | ||
177 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
178 | { | ||
179 | int node; | ||
180 | unsigned int nr = 0; | ||
181 | |||
182 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
183 | nr += array[node]; | ||
184 | |||
185 | return nr; | ||
186 | } | ||
187 | |||
175 | #ifdef CONFIG_SYSCTL | 188 | #ifdef CONFIG_SYSCTL |
176 | static void update_and_free_page(struct page *page) | 189 | static void update_and_free_page(struct page *page) |
177 | { | 190 | { |
@@ -817,6 +830,26 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) | |||
817 | chg = region_chg(&inode->i_mapping->private_list, from, to); | 830 | chg = region_chg(&inode->i_mapping->private_list, from, to); |
818 | if (chg < 0) | 831 | if (chg < 0) |
819 | return chg; | 832 | return chg; |
833 | /* | ||
834 | * When cpuset is configured, it breaks the strict hugetlb page | ||
835 | * reservation as the accounting is done on a global variable. Such | ||
836 | * reservation is completely rubbish in the presence of cpuset because | ||
837 | * the reservation is not checked against page availability for the | ||
838 | * current cpuset. Application can still potentially OOM'ed by kernel | ||
839 | * with lack of free htlb page in cpuset that the task is in. | ||
840 | * Attempt to enforce strict accounting with cpuset is almost | ||
841 | * impossible (or too ugly) because cpuset is too fluid that | ||
842 | * task or memory node can be dynamically moved between cpusets. | ||
843 | * | ||
844 | * The change of semantics for shared hugetlb mapping with cpuset is | ||
845 | * undesirable. However, in order to preserve some of the semantics, | ||
846 | * we fall back to check against current free page availability as | ||
847 | * a best attempt and hopefully to minimize the impact of changing | ||
848 | * semantics that cpuset has. | ||
849 | */ | ||
850 | if (chg > cpuset_mems_nr(free_huge_pages_node)) | ||
851 | return -ENOMEM; | ||
852 | |||
820 | ret = hugetlb_acct_memory(chg); | 853 | ret = hugetlb_acct_memory(chg); |
821 | if (ret < 0) | 854 | if (ret < 0) |
822 | return ret; | 855 | return ret; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fd0b7455b0b..f9b5d6d5f4d6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -691,43 +691,26 @@ static void __init setup_nr_node_ids(void) {} | |||
691 | 691 | ||
692 | #ifdef CONFIG_NUMA | 692 | #ifdef CONFIG_NUMA |
693 | /* | 693 | /* |
694 | * Called from the slab reaper to drain pagesets on a particular node that | 694 | * Called from the vmstat counter updater to drain pagesets of this |
695 | * belongs to the currently executing processor. | 695 | * currently executing processor on remote nodes after they have |
696 | * expired. | ||
697 | * | ||
696 | * Note that this function must be called with the thread pinned to | 698 | * Note that this function must be called with the thread pinned to |
697 | * a single processor. | 699 | * a single processor. |
698 | */ | 700 | */ |
699 | void drain_node_pages(int nodeid) | 701 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
700 | { | 702 | { |
701 | int i; | ||
702 | enum zone_type z; | ||
703 | unsigned long flags; | 703 | unsigned long flags; |
704 | int to_drain; | ||
704 | 705 | ||
705 | for (z = 0; z < MAX_NR_ZONES; z++) { | 706 | local_irq_save(flags); |
706 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 707 | if (pcp->count >= pcp->batch) |
707 | struct per_cpu_pageset *pset; | 708 | to_drain = pcp->batch; |
708 | 709 | else | |
709 | if (!populated_zone(zone)) | 710 | to_drain = pcp->count; |
710 | continue; | 711 | free_pages_bulk(zone, to_drain, &pcp->list, 0); |
711 | 712 | pcp->count -= to_drain; | |
712 | pset = zone_pcp(zone, smp_processor_id()); | 713 | local_irq_restore(flags); |
713 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | ||
714 | struct per_cpu_pages *pcp; | ||
715 | |||
716 | pcp = &pset->pcp[i]; | ||
717 | if (pcp->count) { | ||
718 | int to_drain; | ||
719 | |||
720 | local_irq_save(flags); | ||
721 | if (pcp->count >= pcp->batch) | ||
722 | to_drain = pcp->batch; | ||
723 | else | ||
724 | to_drain = pcp->count; | ||
725 | free_pages_bulk(zone, to_drain, &pcp->list, 0); | ||
726 | pcp->count -= to_drain; | ||
727 | local_irq_restore(flags); | ||
728 | } | ||
729 | } | ||
730 | } | ||
731 | } | 714 | } |
732 | #endif | 715 | #endif |
733 | 716 | ||
@@ -2148,11 +2131,14 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, | |||
2148 | 2131 | ||
2149 | switch (action) { | 2132 | switch (action) { |
2150 | case CPU_UP_PREPARE: | 2133 | case CPU_UP_PREPARE: |
2134 | case CPU_UP_PREPARE_FROZEN: | ||
2151 | if (process_zones(cpu)) | 2135 | if (process_zones(cpu)) |
2152 | ret = NOTIFY_BAD; | 2136 | ret = NOTIFY_BAD; |
2153 | break; | 2137 | break; |
2154 | case CPU_UP_CANCELED: | 2138 | case CPU_UP_CANCELED: |
2139 | case CPU_UP_CANCELED_FROZEN: | ||
2155 | case CPU_DEAD: | 2140 | case CPU_DEAD: |
2141 | case CPU_DEAD_FROZEN: | ||
2156 | free_zone_pagesets(cpu); | 2142 | free_zone_pagesets(cpu); |
2157 | break; | 2143 | break; |
2158 | default: | 2144 | default: |
@@ -3012,7 +2998,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |||
3012 | { | 2998 | { |
3013 | int cpu = (unsigned long)hcpu; | 2999 | int cpu = (unsigned long)hcpu; |
3014 | 3000 | ||
3015 | if (action == CPU_DEAD) { | 3001 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
3016 | local_irq_disable(); | 3002 | local_irq_disable(); |
3017 | __drain_pages(cpu); | 3003 | __drain_pages(cpu); |
3018 | vm_events_fold_cpu(cpu); | 3004 | vm_events_fold_cpu(cpu); |
@@ -928,12 +928,6 @@ static void next_reap_node(void) | |||
928 | { | 928 | { |
929 | int node = __get_cpu_var(reap_node); | 929 | int node = __get_cpu_var(reap_node); |
930 | 930 | ||
931 | /* | ||
932 | * Also drain per cpu pages on remote zones | ||
933 | */ | ||
934 | if (node != numa_node_id()) | ||
935 | drain_node_pages(node); | ||
936 | |||
937 | node = next_node(node, node_online_map); | 931 | node = next_node(node, node_online_map); |
938 | if (unlikely(node >= MAX_NUMNODES)) | 932 | if (unlikely(node >= MAX_NUMNODES)) |
939 | node = first_node(node_online_map); | 933 | node = first_node(node_online_map); |
@@ -1186,8 +1180,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1186 | int memsize = sizeof(struct kmem_list3); | 1180 | int memsize = sizeof(struct kmem_list3); |
1187 | 1181 | ||
1188 | switch (action) { | 1182 | switch (action) { |
1189 | case CPU_UP_PREPARE: | 1183 | case CPU_LOCK_ACQUIRE: |
1190 | mutex_lock(&cache_chain_mutex); | 1184 | mutex_lock(&cache_chain_mutex); |
1185 | break; | ||
1186 | case CPU_UP_PREPARE: | ||
1187 | case CPU_UP_PREPARE_FROZEN: | ||
1191 | /* | 1188 | /* |
1192 | * We need to do this right in the beginning since | 1189 | * We need to do this right in the beginning since |
1193 | * alloc_arraycache's are going to use this list. | 1190 | * alloc_arraycache's are going to use this list. |
@@ -1274,17 +1271,28 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1274 | } | 1271 | } |
1275 | break; | 1272 | break; |
1276 | case CPU_ONLINE: | 1273 | case CPU_ONLINE: |
1277 | mutex_unlock(&cache_chain_mutex); | 1274 | case CPU_ONLINE_FROZEN: |
1278 | start_cpu_timer(cpu); | 1275 | start_cpu_timer(cpu); |
1279 | break; | 1276 | break; |
1280 | #ifdef CONFIG_HOTPLUG_CPU | 1277 | #ifdef CONFIG_HOTPLUG_CPU |
1281 | case CPU_DOWN_PREPARE: | 1278 | case CPU_DOWN_PREPARE: |
1282 | mutex_lock(&cache_chain_mutex); | 1279 | case CPU_DOWN_PREPARE_FROZEN: |
1283 | break; | 1280 | /* |
1284 | case CPU_DOWN_FAILED: | 1281 | * Shutdown cache reaper. Note that the cache_chain_mutex is |
1285 | mutex_unlock(&cache_chain_mutex); | 1282 | * held so that if cache_reap() is invoked it cannot do |
1286 | break; | 1283 | * anything expensive but will only modify reap_work |
1284 | * and reschedule the timer. | ||
1285 | */ | ||
1286 | cancel_rearming_delayed_work(&per_cpu(reap_work, cpu)); | ||
1287 | /* Now the cache_reaper is guaranteed to be not running. */ | ||
1288 | per_cpu(reap_work, cpu).work.func = NULL; | ||
1289 | break; | ||
1290 | case CPU_DOWN_FAILED: | ||
1291 | case CPU_DOWN_FAILED_FROZEN: | ||
1292 | start_cpu_timer(cpu); | ||
1293 | break; | ||
1287 | case CPU_DEAD: | 1294 | case CPU_DEAD: |
1295 | case CPU_DEAD_FROZEN: | ||
1288 | /* | 1296 | /* |
1289 | * Even if all the cpus of a node are down, we don't free the | 1297 | * Even if all the cpus of a node are down, we don't free the |
1290 | * kmem_list3 of any cache. This to avoid a race between | 1298 | * kmem_list3 of any cache. This to avoid a race between |
@@ -1296,6 +1304,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1296 | /* fall thru */ | 1304 | /* fall thru */ |
1297 | #endif | 1305 | #endif |
1298 | case CPU_UP_CANCELED: | 1306 | case CPU_UP_CANCELED: |
1307 | case CPU_UP_CANCELED_FROZEN: | ||
1299 | list_for_each_entry(cachep, &cache_chain, next) { | 1308 | list_for_each_entry(cachep, &cache_chain, next) { |
1300 | struct array_cache *nc; | 1309 | struct array_cache *nc; |
1301 | struct array_cache *shared; | 1310 | struct array_cache *shared; |
@@ -1354,6 +1363,8 @@ free_array_cache: | |||
1354 | continue; | 1363 | continue; |
1355 | drain_freelist(cachep, l3, l3->free_objects); | 1364 | drain_freelist(cachep, l3, l3->free_objects); |
1356 | } | 1365 | } |
1366 | break; | ||
1367 | case CPU_LOCK_RELEASE: | ||
1357 | mutex_unlock(&cache_chain_mutex); | 1368 | mutex_unlock(&cache_chain_mutex); |
1358 | break; | 1369 | break; |
1359 | } | 1370 | } |
@@ -3742,7 +3753,6 @@ EXPORT_SYMBOL(__kmalloc); | |||
3742 | 3753 | ||
3743 | /** | 3754 | /** |
3744 | * krealloc - reallocate memory. The contents will remain unchanged. | 3755 | * krealloc - reallocate memory. The contents will remain unchanged. |
3745 | * | ||
3746 | * @p: object to reallocate memory for. | 3756 | * @p: object to reallocate memory for. |
3747 | * @new_size: how many bytes of memory are required. | 3757 | * @new_size: how many bytes of memory are required. |
3748 | * @flags: the type of memory to allocate. | 3758 | * @flags: the type of memory to allocate. |
@@ -4140,7 +4150,6 @@ next: | |||
4140 | check_irq_on(); | 4150 | check_irq_on(); |
4141 | mutex_unlock(&cache_chain_mutex); | 4151 | mutex_unlock(&cache_chain_mutex); |
4142 | next_reap_node(); | 4152 | next_reap_node(); |
4143 | refresh_cpu_vm_stats(smp_processor_id()); | ||
4144 | out: | 4153 | out: |
4145 | /* Set up the next iteration */ | 4154 | /* Set up the next iteration */ |
4146 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); | 4155 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); |
@@ -66,11 +66,11 @@ | |||
66 | * SLUB assigns one slab for allocation to each processor. | 66 | * SLUB assigns one slab for allocation to each processor. |
67 | * Allocations only occur from these slabs called cpu slabs. | 67 | * Allocations only occur from these slabs called cpu slabs. |
68 | * | 68 | * |
69 | * Slabs with free elements are kept on a partial list. | 69 | * Slabs with free elements are kept on a partial list and during regular |
70 | * There is no list for full slabs. If an object in a full slab is | 70 | * operations no list for full slabs is used. If an object in a full slab is |
71 | * freed then the slab will show up again on the partial lists. | 71 | * freed then the slab will show up again on the partial lists. |
72 | * Otherwise there is no need to track full slabs unless we have to | 72 | * We track full slabs for debugging purposes though because otherwise we |
73 | * track full slabs for debugging purposes. | 73 | * cannot scan all objects. |
74 | * | 74 | * |
75 | * Slabs are freed when they become empty. Teardown and setup is | 75 | * Slabs are freed when they become empty. Teardown and setup is |
76 | * minimal so we rely on the page allocators per cpu caches for | 76 | * minimal so we rely on the page allocators per cpu caches for |
@@ -87,13 +87,36 @@ | |||
87 | * the fast path. | 87 | * the fast path. |
88 | */ | 88 | */ |
89 | 89 | ||
90 | static inline int SlabDebug(struct page *page) | ||
91 | { | ||
92 | #ifdef CONFIG_SLUB_DEBUG | ||
93 | return PageError(page); | ||
94 | #else | ||
95 | return 0; | ||
96 | #endif | ||
97 | } | ||
98 | |||
99 | static inline void SetSlabDebug(struct page *page) | ||
100 | { | ||
101 | #ifdef CONFIG_SLUB_DEBUG | ||
102 | SetPageError(page); | ||
103 | #endif | ||
104 | } | ||
105 | |||
106 | static inline void ClearSlabDebug(struct page *page) | ||
107 | { | ||
108 | #ifdef CONFIG_SLUB_DEBUG | ||
109 | ClearPageError(page); | ||
110 | #endif | ||
111 | } | ||
112 | |||
90 | /* | 113 | /* |
91 | * Issues still to be resolved: | 114 | * Issues still to be resolved: |
92 | * | 115 | * |
93 | * - The per cpu array is updated for each new slab and and is a remote | 116 | * - The per cpu array is updated for each new slab and and is a remote |
94 | * cacheline for most nodes. This could become a bouncing cacheline given | 117 | * cacheline for most nodes. This could become a bouncing cacheline given |
95 | * enough frequent updates. There are 16 pointers in a cacheline.so at | 118 | * enough frequent updates. There are 16 pointers in a cacheline, so at |
96 | * max 16 cpus could compete. Likely okay. | 119 | * max 16 cpus could compete for the cacheline which may be okay. |
97 | * | 120 | * |
98 | * - Support PAGE_ALLOC_DEBUG. Should be easy to do. | 121 | * - Support PAGE_ALLOC_DEBUG. Should be easy to do. |
99 | * | 122 | * |
@@ -137,6 +160,7 @@ | |||
137 | 160 | ||
138 | #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ | 161 | #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ |
139 | SLAB_POISON | SLAB_STORE_USER) | 162 | SLAB_POISON | SLAB_STORE_USER) |
163 | |||
140 | /* | 164 | /* |
141 | * Set of flags that will prevent slab merging | 165 | * Set of flags that will prevent slab merging |
142 | */ | 166 | */ |
@@ -157,6 +181,11 @@ | |||
157 | /* Internal SLUB flags */ | 181 | /* Internal SLUB flags */ |
158 | #define __OBJECT_POISON 0x80000000 /* Poison object */ | 182 | #define __OBJECT_POISON 0x80000000 /* Poison object */ |
159 | 183 | ||
184 | /* Not all arches define cache_line_size */ | ||
185 | #ifndef cache_line_size | ||
186 | #define cache_line_size() L1_CACHE_BYTES | ||
187 | #endif | ||
188 | |||
160 | static int kmem_size = sizeof(struct kmem_cache); | 189 | static int kmem_size = sizeof(struct kmem_cache); |
161 | 190 | ||
162 | #ifdef CONFIG_SMP | 191 | #ifdef CONFIG_SMP |
@@ -166,7 +195,7 @@ static struct notifier_block slab_notifier; | |||
166 | static enum { | 195 | static enum { |
167 | DOWN, /* No slab functionality available */ | 196 | DOWN, /* No slab functionality available */ |
168 | PARTIAL, /* kmem_cache_open() works but kmalloc does not */ | 197 | PARTIAL, /* kmem_cache_open() works but kmalloc does not */ |
169 | UP, /* Everything works */ | 198 | UP, /* Everything works but does not show up in sysfs */ |
170 | SYSFS /* Sysfs up */ | 199 | SYSFS /* Sysfs up */ |
171 | } slab_state = DOWN; | 200 | } slab_state = DOWN; |
172 | 201 | ||
@@ -174,7 +203,19 @@ static enum { | |||
174 | static DECLARE_RWSEM(slub_lock); | 203 | static DECLARE_RWSEM(slub_lock); |
175 | LIST_HEAD(slab_caches); | 204 | LIST_HEAD(slab_caches); |
176 | 205 | ||
177 | #ifdef CONFIG_SYSFS | 206 | /* |
207 | * Tracking user of a slab. | ||
208 | */ | ||
209 | struct track { | ||
210 | void *addr; /* Called from address */ | ||
211 | int cpu; /* Was running on cpu */ | ||
212 | int pid; /* Pid context */ | ||
213 | unsigned long when; /* When did the operation occur */ | ||
214 | }; | ||
215 | |||
216 | enum track_item { TRACK_ALLOC, TRACK_FREE }; | ||
217 | |||
218 | #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) | ||
178 | static int sysfs_slab_add(struct kmem_cache *); | 219 | static int sysfs_slab_add(struct kmem_cache *); |
179 | static int sysfs_slab_alias(struct kmem_cache *, const char *); | 220 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
180 | static void sysfs_slab_remove(struct kmem_cache *); | 221 | static void sysfs_slab_remove(struct kmem_cache *); |
@@ -202,6 +243,63 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | |||
202 | #endif | 243 | #endif |
203 | } | 244 | } |
204 | 245 | ||
246 | static inline int check_valid_pointer(struct kmem_cache *s, | ||
247 | struct page *page, const void *object) | ||
248 | { | ||
249 | void *base; | ||
250 | |||
251 | if (!object) | ||
252 | return 1; | ||
253 | |||
254 | base = page_address(page); | ||
255 | if (object < base || object >= base + s->objects * s->size || | ||
256 | (object - base) % s->size) { | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | return 1; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * Slow version of get and set free pointer. | ||
265 | * | ||
266 | * This version requires touching the cache lines of kmem_cache which | ||
267 | * we avoid to do in the fast alloc free paths. There we obtain the offset | ||
268 | * from the page struct. | ||
269 | */ | ||
270 | static inline void *get_freepointer(struct kmem_cache *s, void *object) | ||
271 | { | ||
272 | return *(void **)(object + s->offset); | ||
273 | } | ||
274 | |||
275 | static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | ||
276 | { | ||
277 | *(void **)(object + s->offset) = fp; | ||
278 | } | ||
279 | |||
280 | /* Loop over all objects in a slab */ | ||
281 | #define for_each_object(__p, __s, __addr) \ | ||
282 | for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ | ||
283 | __p += (__s)->size) | ||
284 | |||
285 | /* Scan freelist */ | ||
286 | #define for_each_free_object(__p, __s, __free) \ | ||
287 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) | ||
288 | |||
289 | /* Determine object index from a given position */ | ||
290 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | ||
291 | { | ||
292 | return (p - addr) / s->size; | ||
293 | } | ||
294 | |||
295 | #ifdef CONFIG_SLUB_DEBUG | ||
296 | /* | ||
297 | * Debug settings: | ||
298 | */ | ||
299 | static int slub_debug; | ||
300 | |||
301 | static char *slub_debug_slabs; | ||
302 | |||
205 | /* | 303 | /* |
206 | * Object debugging | 304 | * Object debugging |
207 | */ | 305 | */ |
@@ -237,35 +335,6 @@ static void print_section(char *text, u8 *addr, unsigned int length) | |||
237 | } | 335 | } |
238 | } | 336 | } |
239 | 337 | ||
240 | /* | ||
241 | * Slow version of get and set free pointer. | ||
242 | * | ||
243 | * This requires touching the cache lines of kmem_cache. | ||
244 | * The offset can also be obtained from the page. In that | ||
245 | * case it is in the cacheline that we already need to touch. | ||
246 | */ | ||
247 | static void *get_freepointer(struct kmem_cache *s, void *object) | ||
248 | { | ||
249 | return *(void **)(object + s->offset); | ||
250 | } | ||
251 | |||
252 | static void set_freepointer(struct kmem_cache *s, void *object, void *fp) | ||
253 | { | ||
254 | *(void **)(object + s->offset) = fp; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * Tracking user of a slab. | ||
259 | */ | ||
260 | struct track { | ||
261 | void *addr; /* Called from address */ | ||
262 | int cpu; /* Was running on cpu */ | ||
263 | int pid; /* Pid context */ | ||
264 | unsigned long when; /* When did the operation occur */ | ||
265 | }; | ||
266 | |||
267 | enum track_item { TRACK_ALLOC, TRACK_FREE }; | ||
268 | |||
269 | static struct track *get_track(struct kmem_cache *s, void *object, | 338 | static struct track *get_track(struct kmem_cache *s, void *object, |
270 | enum track_item alloc) | 339 | enum track_item alloc) |
271 | { | 340 | { |
@@ -400,24 +469,6 @@ static int check_bytes(u8 *start, unsigned int value, unsigned int bytes) | |||
400 | return 1; | 469 | return 1; |
401 | } | 470 | } |
402 | 471 | ||
403 | |||
404 | static int check_valid_pointer(struct kmem_cache *s, struct page *page, | ||
405 | void *object) | ||
406 | { | ||
407 | void *base; | ||
408 | |||
409 | if (!object) | ||
410 | return 1; | ||
411 | |||
412 | base = page_address(page); | ||
413 | if (object < base || object >= base + s->objects * s->size || | ||
414 | (object - base) % s->size) { | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | return 1; | ||
419 | } | ||
420 | |||
421 | /* | 472 | /* |
422 | * Object layout: | 473 | * Object layout: |
423 | * | 474 | * |
@@ -425,26 +476,34 @@ static int check_valid_pointer(struct kmem_cache *s, struct page *page, | |||
425 | * Bytes of the object to be managed. | 476 | * Bytes of the object to be managed. |
426 | * If the freepointer may overlay the object then the free | 477 | * If the freepointer may overlay the object then the free |
427 | * pointer is the first word of the object. | 478 | * pointer is the first word of the object. |
479 | * | ||
428 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is | 480 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is |
429 | * 0xa5 (POISON_END) | 481 | * 0xa5 (POISON_END) |
430 | * | 482 | * |
431 | * object + s->objsize | 483 | * object + s->objsize |
432 | * Padding to reach word boundary. This is also used for Redzoning. | 484 | * Padding to reach word boundary. This is also used for Redzoning. |
433 | * Padding is extended to word size if Redzoning is enabled | 485 | * Padding is extended by another word if Redzoning is enabled and |
434 | * and objsize == inuse. | 486 | * objsize == inuse. |
487 | * | ||
435 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with | 488 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with |
436 | * 0xcc (RED_ACTIVE) for objects in use. | 489 | * 0xcc (RED_ACTIVE) for objects in use. |
437 | * | 490 | * |
438 | * object + s->inuse | 491 | * object + s->inuse |
492 | * Meta data starts here. | ||
493 | * | ||
439 | * A. Free pointer (if we cannot overwrite object on free) | 494 | * A. Free pointer (if we cannot overwrite object on free) |
440 | * B. Tracking data for SLAB_STORE_USER | 495 | * B. Tracking data for SLAB_STORE_USER |
441 | * C. Padding to reach required alignment boundary | 496 | * C. Padding to reach required alignment boundary or at mininum |
442 | * Padding is done using 0x5a (POISON_INUSE) | 497 | * one word if debuggin is on to be able to detect writes |
498 | * before the word boundary. | ||
499 | * | ||
500 | * Padding is done using 0x5a (POISON_INUSE) | ||
443 | * | 501 | * |
444 | * object + s->size | 502 | * object + s->size |
503 | * Nothing is used beyond s->size. | ||
445 | * | 504 | * |
446 | * If slabcaches are merged then the objsize and inuse boundaries are to | 505 | * If slabcaches are merged then the objsize and inuse boundaries are mostly |
447 | * be ignored. And therefore no slab options that rely on these boundaries | 506 | * ignored. And therefore no slab options that rely on these boundaries |
448 | * may be used with merged slabcaches. | 507 | * may be used with merged slabcaches. |
449 | */ | 508 | */ |
450 | 509 | ||
@@ -570,8 +629,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
570 | /* | 629 | /* |
571 | * No choice but to zap it and thus loose the remainder | 630 | * No choice but to zap it and thus loose the remainder |
572 | * of the free objects in this slab. May cause | 631 | * of the free objects in this slab. May cause |
573 | * another error because the object count maybe | 632 | * another error because the object count is now wrong. |
574 | * wrong now. | ||
575 | */ | 633 | */ |
576 | set_freepointer(s, p, NULL); | 634 | set_freepointer(s, p, NULL); |
577 | return 0; | 635 | return 0; |
@@ -611,9 +669,8 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
611 | } | 669 | } |
612 | 670 | ||
613 | /* | 671 | /* |
614 | * Determine if a certain object on a page is on the freelist and | 672 | * Determine if a certain object on a page is on the freelist. Must hold the |
615 | * therefore free. Must hold the slab lock for cpu slabs to | 673 | * slab lock to guarantee that the chains are in a consistent state. |
616 | * guarantee that the chains are consistent. | ||
617 | */ | 674 | */ |
618 | static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | 675 | static int on_freelist(struct kmem_cache *s, struct page *page, void *search) |
619 | { | 676 | { |
@@ -659,7 +716,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
659 | } | 716 | } |
660 | 717 | ||
661 | /* | 718 | /* |
662 | * Tracking of fully allocated slabs for debugging | 719 | * Tracking of fully allocated slabs for debugging purposes. |
663 | */ | 720 | */ |
664 | static void add_full(struct kmem_cache_node *n, struct page *page) | 721 | static void add_full(struct kmem_cache_node *n, struct page *page) |
665 | { | 722 | { |
@@ -710,7 +767,7 @@ bad: | |||
710 | /* | 767 | /* |
711 | * If this is a slab page then lets do the best we can | 768 | * If this is a slab page then lets do the best we can |
712 | * to avoid issues in the future. Marking all objects | 769 | * to avoid issues in the future. Marking all objects |
713 | * as used avoids touching the remainder. | 770 | * as used avoids touching the remaining objects. |
714 | */ | 771 | */ |
715 | printk(KERN_ERR "@@@ SLUB: %s slab 0x%p. Marking all objects used.\n", | 772 | printk(KERN_ERR "@@@ SLUB: %s slab 0x%p. Marking all objects used.\n", |
716 | s->name, page); | 773 | s->name, page); |
@@ -764,6 +821,113 @@ fail: | |||
764 | return 0; | 821 | return 0; |
765 | } | 822 | } |
766 | 823 | ||
824 | static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) | ||
825 | { | ||
826 | if (s->flags & SLAB_TRACE) { | ||
827 | printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", | ||
828 | s->name, | ||
829 | alloc ? "alloc" : "free", | ||
830 | object, page->inuse, | ||
831 | page->freelist); | ||
832 | |||
833 | if (!alloc) | ||
834 | print_section("Object", (void *)object, s->objsize); | ||
835 | |||
836 | dump_stack(); | ||
837 | } | ||
838 | } | ||
839 | |||
840 | static int __init setup_slub_debug(char *str) | ||
841 | { | ||
842 | if (!str || *str != '=') | ||
843 | slub_debug = DEBUG_DEFAULT_FLAGS; | ||
844 | else { | ||
845 | str++; | ||
846 | if (*str == 0 || *str == ',') | ||
847 | slub_debug = DEBUG_DEFAULT_FLAGS; | ||
848 | else | ||
849 | for( ;*str && *str != ','; str++) | ||
850 | switch (*str) { | ||
851 | case 'f' : case 'F' : | ||
852 | slub_debug |= SLAB_DEBUG_FREE; | ||
853 | break; | ||
854 | case 'z' : case 'Z' : | ||
855 | slub_debug |= SLAB_RED_ZONE; | ||
856 | break; | ||
857 | case 'p' : case 'P' : | ||
858 | slub_debug |= SLAB_POISON; | ||
859 | break; | ||
860 | case 'u' : case 'U' : | ||
861 | slub_debug |= SLAB_STORE_USER; | ||
862 | break; | ||
863 | case 't' : case 'T' : | ||
864 | slub_debug |= SLAB_TRACE; | ||
865 | break; | ||
866 | default: | ||
867 | printk(KERN_ERR "slub_debug option '%c' " | ||
868 | "unknown. skipped\n",*str); | ||
869 | } | ||
870 | } | ||
871 | |||
872 | if (*str == ',') | ||
873 | slub_debug_slabs = str + 1; | ||
874 | return 1; | ||
875 | } | ||
876 | |||
877 | __setup("slub_debug", setup_slub_debug); | ||
878 | |||
879 | static void kmem_cache_open_debug_check(struct kmem_cache *s) | ||
880 | { | ||
881 | /* | ||
882 | * The page->offset field is only 16 bit wide. This is an offset | ||
883 | * in units of words from the beginning of an object. If the slab | ||
884 | * size is bigger then we cannot move the free pointer behind the | ||
885 | * object anymore. | ||
886 | * | ||
887 | * On 32 bit platforms the limit is 256k. On 64bit platforms | ||
888 | * the limit is 512k. | ||
889 | * | ||
890 | * Debugging or ctor/dtors may create a need to move the free | ||
891 | * pointer. Fail if this happens. | ||
892 | */ | ||
893 | if (s->size >= 65535 * sizeof(void *)) { | ||
894 | BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | | ||
895 | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); | ||
896 | BUG_ON(s->ctor || s->dtor); | ||
897 | } | ||
898 | else | ||
899 | /* | ||
900 | * Enable debugging if selected on the kernel commandline. | ||
901 | */ | ||
902 | if (slub_debug && (!slub_debug_slabs || | ||
903 | strncmp(slub_debug_slabs, s->name, | ||
904 | strlen(slub_debug_slabs)) == 0)) | ||
905 | s->flags |= slub_debug; | ||
906 | } | ||
907 | #else | ||
908 | |||
909 | static inline int alloc_object_checks(struct kmem_cache *s, | ||
910 | struct page *page, void *object) { return 0; } | ||
911 | |||
912 | static inline int free_object_checks(struct kmem_cache *s, | ||
913 | struct page *page, void *object) { return 0; } | ||
914 | |||
915 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} | ||
916 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} | ||
917 | static inline void trace(struct kmem_cache *s, struct page *page, | ||
918 | void *object, int alloc) {} | ||
919 | static inline void init_object(struct kmem_cache *s, | ||
920 | void *object, int active) {} | ||
921 | static inline void init_tracking(struct kmem_cache *s, void *object) {} | ||
922 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | ||
923 | { return 1; } | ||
924 | static inline int check_object(struct kmem_cache *s, struct page *page, | ||
925 | void *object, int active) { return 1; } | ||
926 | static inline void set_track(struct kmem_cache *s, void *object, | ||
927 | enum track_item alloc, void *addr) {} | ||
928 | static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} | ||
929 | #define slub_debug 0 | ||
930 | #endif | ||
767 | /* | 931 | /* |
768 | * Slab allocation and freeing | 932 | * Slab allocation and freeing |
769 | */ | 933 | */ |
@@ -797,7 +961,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
797 | static void setup_object(struct kmem_cache *s, struct page *page, | 961 | static void setup_object(struct kmem_cache *s, struct page *page, |
798 | void *object) | 962 | void *object) |
799 | { | 963 | { |
800 | if (PageError(page)) { | 964 | if (SlabDebug(page)) { |
801 | init_object(s, object, 0); | 965 | init_object(s, object, 0); |
802 | init_tracking(s, object); | 966 | init_tracking(s, object); |
803 | } | 967 | } |
@@ -832,7 +996,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
832 | page->flags |= 1 << PG_slab; | 996 | page->flags |= 1 << PG_slab; |
833 | if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | | 997 | if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | |
834 | SLAB_STORE_USER | SLAB_TRACE)) | 998 | SLAB_STORE_USER | SLAB_TRACE)) |
835 | page->flags |= 1 << PG_error; | 999 | SetSlabDebug(page); |
836 | 1000 | ||
837 | start = page_address(page); | 1001 | start = page_address(page); |
838 | end = start + s->objects * s->size; | 1002 | end = start + s->objects * s->size; |
@@ -841,7 +1005,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
841 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1005 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
842 | 1006 | ||
843 | last = start; | 1007 | last = start; |
844 | for (p = start + s->size; p < end; p += s->size) { | 1008 | for_each_object(p, s, start) { |
845 | setup_object(s, page, last); | 1009 | setup_object(s, page, last); |
846 | set_freepointer(s, last, p); | 1010 | set_freepointer(s, last, p); |
847 | last = p; | 1011 | last = p; |
@@ -861,13 +1025,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
861 | { | 1025 | { |
862 | int pages = 1 << s->order; | 1026 | int pages = 1 << s->order; |
863 | 1027 | ||
864 | if (unlikely(PageError(page) || s->dtor)) { | 1028 | if (unlikely(SlabDebug(page) || s->dtor)) { |
865 | void *start = page_address(page); | ||
866 | void *end = start + (pages << PAGE_SHIFT); | ||
867 | void *p; | 1029 | void *p; |
868 | 1030 | ||
869 | slab_pad_check(s, page); | 1031 | slab_pad_check(s, page); |
870 | for (p = start; p <= end - s->size; p += s->size) { | 1032 | for_each_object(p, s, page_address(page)) { |
871 | if (s->dtor) | 1033 | if (s->dtor) |
872 | s->dtor(p, s, 0); | 1034 | s->dtor(p, s, 0); |
873 | check_object(s, page, p, 0); | 1035 | check_object(s, page, p, 0); |
@@ -910,7 +1072,8 @@ static void discard_slab(struct kmem_cache *s, struct page *page) | |||
910 | 1072 | ||
911 | atomic_long_dec(&n->nr_slabs); | 1073 | atomic_long_dec(&n->nr_slabs); |
912 | reset_page_mapcount(page); | 1074 | reset_page_mapcount(page); |
913 | page->flags &= ~(1 << PG_slab | 1 << PG_error); | 1075 | ClearSlabDebug(page); |
1076 | __ClearPageSlab(page); | ||
914 | free_slab(s, page); | 1077 | free_slab(s, page); |
915 | } | 1078 | } |
916 | 1079 | ||
@@ -966,9 +1129,9 @@ static void remove_partial(struct kmem_cache *s, | |||
966 | } | 1129 | } |
967 | 1130 | ||
968 | /* | 1131 | /* |
969 | * Lock page and remove it from the partial list | 1132 | * Lock slab and remove from the partial list. |
970 | * | 1133 | * |
971 | * Must hold list_lock | 1134 | * Must hold list_lock. |
972 | */ | 1135 | */ |
973 | static int lock_and_del_slab(struct kmem_cache_node *n, struct page *page) | 1136 | static int lock_and_del_slab(struct kmem_cache_node *n, struct page *page) |
974 | { | 1137 | { |
@@ -981,7 +1144,7 @@ static int lock_and_del_slab(struct kmem_cache_node *n, struct page *page) | |||
981 | } | 1144 | } |
982 | 1145 | ||
983 | /* | 1146 | /* |
984 | * Try to get a partial slab from a specific node | 1147 | * Try to allocate a partial slab from a specific node. |
985 | */ | 1148 | */ |
986 | static struct page *get_partial_node(struct kmem_cache_node *n) | 1149 | static struct page *get_partial_node(struct kmem_cache_node *n) |
987 | { | 1150 | { |
@@ -990,7 +1153,8 @@ static struct page *get_partial_node(struct kmem_cache_node *n) | |||
990 | /* | 1153 | /* |
991 | * Racy check. If we mistakenly see no partial slabs then we | 1154 | * Racy check. If we mistakenly see no partial slabs then we |
992 | * just allocate an empty slab. If we mistakenly try to get a | 1155 | * just allocate an empty slab. If we mistakenly try to get a |
993 | * partial slab then get_partials() will return NULL. | 1156 | * partial slab and there is none available then get_partials() |
1157 | * will return NULL. | ||
994 | */ | 1158 | */ |
995 | if (!n || !n->nr_partial) | 1159 | if (!n || !n->nr_partial) |
996 | return NULL; | 1160 | return NULL; |
@@ -1006,8 +1170,7 @@ out: | |||
1006 | } | 1170 | } |
1007 | 1171 | ||
1008 | /* | 1172 | /* |
1009 | * Get a page from somewhere. Search in increasing NUMA | 1173 | * Get a page from somewhere. Search in increasing NUMA distances. |
1010 | * distances. | ||
1011 | */ | 1174 | */ |
1012 | static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | 1175 | static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) |
1013 | { | 1176 | { |
@@ -1017,24 +1180,22 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1017 | struct page *page; | 1180 | struct page *page; |
1018 | 1181 | ||
1019 | /* | 1182 | /* |
1020 | * The defrag ratio allows to configure the tradeoffs between | 1183 | * The defrag ratio allows a configuration of the tradeoffs between |
1021 | * inter node defragmentation and node local allocations. | 1184 | * inter node defragmentation and node local allocations. A lower |
1022 | * A lower defrag_ratio increases the tendency to do local | 1185 | * defrag_ratio increases the tendency to do local allocations |
1023 | * allocations instead of scanning throught the partial | 1186 | * instead of attempting to obtain partial slabs from other nodes. |
1024 | * lists on other nodes. | ||
1025 | * | ||
1026 | * If defrag_ratio is set to 0 then kmalloc() always | ||
1027 | * returns node local objects. If its higher then kmalloc() | ||
1028 | * may return off node objects in order to avoid fragmentation. | ||
1029 | * | 1187 | * |
1030 | * A higher ratio means slabs may be taken from other nodes | 1188 | * If the defrag_ratio is set to 0 then kmalloc() always |
1031 | * thus reducing the number of partial slabs on those nodes. | 1189 | * returns node local objects. If the ratio is higher then kmalloc() |
1190 | * may return off node objects because partial slabs are obtained | ||
1191 | * from other nodes and filled up. | ||
1032 | * | 1192 | * |
1033 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes | 1193 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes |
1034 | * defrag_ratio = 1000) then every (well almost) allocation | 1194 | * defrag_ratio = 1000) then every (well almost) allocation will |
1035 | * will first attempt to defrag slab caches on other nodes. This | 1195 | * first attempt to defrag slab caches on other nodes. This means |
1036 | * means scanning over all nodes to look for partial slabs which | 1196 | * scanning over all nodes to look for partial slabs which may be |
1037 | * may be a bit expensive to do on every slab allocation. | 1197 | * expensive if we do it every time we are trying to find a slab |
1198 | * with available objects. | ||
1038 | */ | 1199 | */ |
1039 | if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) | 1200 | if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) |
1040 | return NULL; | 1201 | return NULL; |
@@ -1087,18 +1248,19 @@ static void putback_slab(struct kmem_cache *s, struct page *page) | |||
1087 | 1248 | ||
1088 | if (page->freelist) | 1249 | if (page->freelist) |
1089 | add_partial(n, page); | 1250 | add_partial(n, page); |
1090 | else if (PageError(page) && (s->flags & SLAB_STORE_USER)) | 1251 | else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) |
1091 | add_full(n, page); | 1252 | add_full(n, page); |
1092 | slab_unlock(page); | 1253 | slab_unlock(page); |
1093 | 1254 | ||
1094 | } else { | 1255 | } else { |
1095 | if (n->nr_partial < MIN_PARTIAL) { | 1256 | if (n->nr_partial < MIN_PARTIAL) { |
1096 | /* | 1257 | /* |
1097 | * Adding an empty page to the partial slabs in order | 1258 | * Adding an empty slab to the partial slabs in order |
1098 | * to avoid page allocator overhead. This page needs to | 1259 | * to avoid page allocator overhead. This slab needs |
1099 | * come after all the others that are not fully empty | 1260 | * to come after the other slabs with objects in |
1100 | * in order to make sure that we do maximum | 1261 | * order to fill them up. That way the size of the |
1101 | * defragmentation. | 1262 | * partial list stays small. kmem_cache_shrink can |
1263 | * reclaim empty slabs from the partial list. | ||
1102 | */ | 1264 | */ |
1103 | add_partial_tail(n, page); | 1265 | add_partial_tail(n, page); |
1104 | slab_unlock(page); | 1266 | slab_unlock(page); |
@@ -1166,11 +1328,11 @@ static void flush_all(struct kmem_cache *s) | |||
1166 | * 1. The page struct | 1328 | * 1. The page struct |
1167 | * 2. The first cacheline of the object to be allocated. | 1329 | * 2. The first cacheline of the object to be allocated. |
1168 | * | 1330 | * |
1169 | * The only cache lines that are read (apart from code) is the | 1331 | * The only other cache lines that are read (apart from code) is the |
1170 | * per cpu array in the kmem_cache struct. | 1332 | * per cpu array in the kmem_cache struct. |
1171 | * | 1333 | * |
1172 | * Fastpath is not possible if we need to get a new slab or have | 1334 | * Fastpath is not possible if we need to get a new slab or have |
1173 | * debugging enabled (which means all slabs are marked with PageError) | 1335 | * debugging enabled (which means all slabs are marked with SlabDebug) |
1174 | */ | 1336 | */ |
1175 | static void *slab_alloc(struct kmem_cache *s, | 1337 | static void *slab_alloc(struct kmem_cache *s, |
1176 | gfp_t gfpflags, int node, void *addr) | 1338 | gfp_t gfpflags, int node, void *addr) |
@@ -1193,7 +1355,7 @@ redo: | |||
1193 | object = page->freelist; | 1355 | object = page->freelist; |
1194 | if (unlikely(!object)) | 1356 | if (unlikely(!object)) |
1195 | goto another_slab; | 1357 | goto another_slab; |
1196 | if (unlikely(PageError(page))) | 1358 | if (unlikely(SlabDebug(page))) |
1197 | goto debug; | 1359 | goto debug; |
1198 | 1360 | ||
1199 | have_object: | 1361 | have_object: |
@@ -1220,9 +1382,11 @@ have_slab: | |||
1220 | cpu = smp_processor_id(); | 1382 | cpu = smp_processor_id(); |
1221 | if (s->cpu_slab[cpu]) { | 1383 | if (s->cpu_slab[cpu]) { |
1222 | /* | 1384 | /* |
1223 | * Someone else populated the cpu_slab while we enabled | 1385 | * Someone else populated the cpu_slab while we |
1224 | * interrupts, or we have got scheduled on another cpu. | 1386 | * enabled interrupts, or we have gotten scheduled |
1225 | * The page may not be on the requested node. | 1387 | * on another cpu. The page may not be on the |
1388 | * requested node even if __GFP_THISNODE was | ||
1389 | * specified. So we need to recheck. | ||
1226 | */ | 1390 | */ |
1227 | if (node == -1 || | 1391 | if (node == -1 || |
1228 | page_to_nid(s->cpu_slab[cpu]) == node) { | 1392 | page_to_nid(s->cpu_slab[cpu]) == node) { |
@@ -1235,7 +1399,7 @@ have_slab: | |||
1235 | slab_lock(page); | 1399 | slab_lock(page); |
1236 | goto redo; | 1400 | goto redo; |
1237 | } | 1401 | } |
1238 | /* Dump the current slab */ | 1402 | /* New slab does not fit our expectations */ |
1239 | flush_slab(s, s->cpu_slab[cpu], cpu); | 1403 | flush_slab(s, s->cpu_slab[cpu], cpu); |
1240 | } | 1404 | } |
1241 | slab_lock(page); | 1405 | slab_lock(page); |
@@ -1248,12 +1412,7 @@ debug: | |||
1248 | goto another_slab; | 1412 | goto another_slab; |
1249 | if (s->flags & SLAB_STORE_USER) | 1413 | if (s->flags & SLAB_STORE_USER) |
1250 | set_track(s, object, TRACK_ALLOC, addr); | 1414 | set_track(s, object, TRACK_ALLOC, addr); |
1251 | if (s->flags & SLAB_TRACE) { | 1415 | trace(s, page, object, 1); |
1252 | printk(KERN_INFO "TRACE %s alloc 0x%p inuse=%d fp=0x%p\n", | ||
1253 | s->name, object, page->inuse, | ||
1254 | page->freelist); | ||
1255 | dump_stack(); | ||
1256 | } | ||
1257 | init_object(s, object, 1); | 1416 | init_object(s, object, 1); |
1258 | goto have_object; | 1417 | goto have_object; |
1259 | } | 1418 | } |
@@ -1276,7 +1435,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); | |||
1276 | * The fastpath only writes the cacheline of the page struct and the first | 1435 | * The fastpath only writes the cacheline of the page struct and the first |
1277 | * cacheline of the object. | 1436 | * cacheline of the object. |
1278 | * | 1437 | * |
1279 | * No special cachelines need to be read | 1438 | * We read the cpu_slab cacheline to check if the slab is the per cpu |
1439 | * slab for this processor. | ||
1280 | */ | 1440 | */ |
1281 | static void slab_free(struct kmem_cache *s, struct page *page, | 1441 | static void slab_free(struct kmem_cache *s, struct page *page, |
1282 | void *x, void *addr) | 1442 | void *x, void *addr) |
@@ -1288,7 +1448,7 @@ static void slab_free(struct kmem_cache *s, struct page *page, | |||
1288 | local_irq_save(flags); | 1448 | local_irq_save(flags); |
1289 | slab_lock(page); | 1449 | slab_lock(page); |
1290 | 1450 | ||
1291 | if (unlikely(PageError(page))) | 1451 | if (unlikely(SlabDebug(page))) |
1292 | goto debug; | 1452 | goto debug; |
1293 | checks_ok: | 1453 | checks_ok: |
1294 | prior = object[page->offset] = page->freelist; | 1454 | prior = object[page->offset] = page->freelist; |
@@ -1321,7 +1481,7 @@ out_unlock: | |||
1321 | slab_empty: | 1481 | slab_empty: |
1322 | if (prior) | 1482 | if (prior) |
1323 | /* | 1483 | /* |
1324 | * Slab on the partial list. | 1484 | * Slab still on the partial list. |
1325 | */ | 1485 | */ |
1326 | remove_partial(s, page); | 1486 | remove_partial(s, page); |
1327 | 1487 | ||
@@ -1337,13 +1497,7 @@ debug: | |||
1337 | remove_full(s, page); | 1497 | remove_full(s, page); |
1338 | if (s->flags & SLAB_STORE_USER) | 1498 | if (s->flags & SLAB_STORE_USER) |
1339 | set_track(s, x, TRACK_FREE, addr); | 1499 | set_track(s, x, TRACK_FREE, addr); |
1340 | if (s->flags & SLAB_TRACE) { | 1500 | trace(s, page, object, 0); |
1341 | printk(KERN_INFO "TRACE %s free 0x%p inuse=%d fp=0x%p\n", | ||
1342 | s->name, object, page->inuse, | ||
1343 | page->freelist); | ||
1344 | print_section("Object", (void *)object, s->objsize); | ||
1345 | dump_stack(); | ||
1346 | } | ||
1347 | init_object(s, object, 0); | 1501 | init_object(s, object, 0); |
1348 | goto checks_ok; | 1502 | goto checks_ok; |
1349 | } | 1503 | } |
@@ -1370,22 +1524,16 @@ static struct page *get_object_page(const void *x) | |||
1370 | } | 1524 | } |
1371 | 1525 | ||
1372 | /* | 1526 | /* |
1373 | * kmem_cache_open produces objects aligned at "size" and the first object | 1527 | * Object placement in a slab is made very easy because we always start at |
1374 | * is placed at offset 0 in the slab (We have no metainformation on the | 1528 | * offset 0. If we tune the size of the object to the alignment then we can |
1375 | * slab, all slabs are in essence "off slab"). | 1529 | * get the required alignment by putting one properly sized object after |
1376 | * | 1530 | * another. |
1377 | * In order to get the desired alignment one just needs to align the | ||
1378 | * size. | ||
1379 | * | 1531 | * |
1380 | * Notice that the allocation order determines the sizes of the per cpu | 1532 | * Notice that the allocation order determines the sizes of the per cpu |
1381 | * caches. Each processor has always one slab available for allocations. | 1533 | * caches. Each processor has always one slab available for allocations. |
1382 | * Increasing the allocation order reduces the number of times that slabs | 1534 | * Increasing the allocation order reduces the number of times that slabs |
1383 | * must be moved on and off the partial lists and therefore may influence | 1535 | * must be moved on and off the partial lists and is therefore a factor in |
1384 | * locking overhead. | 1536 | * locking overhead. |
1385 | * | ||
1386 | * The offset is used to relocate the free list link in each object. It is | ||
1387 | * therefore possible to move the free list link behind the object. This | ||
1388 | * is necessary for RCU to work properly and also useful for debugging. | ||
1389 | */ | 1537 | */ |
1390 | 1538 | ||
1391 | /* | 1539 | /* |
@@ -1396,76 +1544,110 @@ static struct page *get_object_page(const void *x) | |||
1396 | */ | 1544 | */ |
1397 | static int slub_min_order; | 1545 | static int slub_min_order; |
1398 | static int slub_max_order = DEFAULT_MAX_ORDER; | 1546 | static int slub_max_order = DEFAULT_MAX_ORDER; |
1399 | |||
1400 | /* | ||
1401 | * Minimum number of objects per slab. This is necessary in order to | ||
1402 | * reduce locking overhead. Similar to the queue size in SLAB. | ||
1403 | */ | ||
1404 | static int slub_min_objects = DEFAULT_MIN_OBJECTS; | 1547 | static int slub_min_objects = DEFAULT_MIN_OBJECTS; |
1405 | 1548 | ||
1406 | /* | 1549 | /* |
1407 | * Merge control. If this is set then no merging of slab caches will occur. | 1550 | * Merge control. If this is set then no merging of slab caches will occur. |
1551 | * (Could be removed. This was introduced to pacify the merge skeptics.) | ||
1408 | */ | 1552 | */ |
1409 | static int slub_nomerge; | 1553 | static int slub_nomerge; |
1410 | 1554 | ||
1411 | /* | 1555 | /* |
1412 | * Debug settings: | ||
1413 | */ | ||
1414 | static int slub_debug; | ||
1415 | |||
1416 | static char *slub_debug_slabs; | ||
1417 | |||
1418 | /* | ||
1419 | * Calculate the order of allocation given an slab object size. | 1556 | * Calculate the order of allocation given an slab object size. |
1420 | * | 1557 | * |
1421 | * The order of allocation has significant impact on other elements | 1558 | * The order of allocation has significant impact on performance and other |
1422 | * of the system. Generally order 0 allocations should be preferred | 1559 | * system components. Generally order 0 allocations should be preferred since |
1423 | * since they do not cause fragmentation in the page allocator. Larger | 1560 | * order 0 does not cause fragmentation in the page allocator. Larger objects |
1424 | * objects may have problems with order 0 because there may be too much | 1561 | * be problematic to put into order 0 slabs because there may be too much |
1425 | * space left unused in a slab. We go to a higher order if more than 1/8th | 1562 | * unused space left. We go to a higher order if more than 1/8th of the slab |
1426 | * of the slab would be wasted. | 1563 | * would be wasted. |
1427 | * | 1564 | * |
1428 | * In order to reach satisfactory performance we must ensure that | 1565 | * In order to reach satisfactory performance we must ensure that a minimum |
1429 | * a minimum number of objects is in one slab. Otherwise we may | 1566 | * number of objects is in one slab. Otherwise we may generate too much |
1430 | * generate too much activity on the partial lists. This is less a | 1567 | * activity on the partial lists which requires taking the list_lock. This is |
1431 | * concern for large slabs though. slub_max_order specifies the order | 1568 | * less a concern for large slabs though which are rarely used. |
1432 | * where we begin to stop considering the number of objects in a slab. | ||
1433 | * | 1569 | * |
1434 | * Higher order allocations also allow the placement of more objects | 1570 | * slub_max_order specifies the order where we begin to stop considering the |
1435 | * in a slab and thereby reduce object handling overhead. If the user | 1571 | * number of objects in a slab as critical. If we reach slub_max_order then |
1436 | * has requested a higher mininum order then we start with that one | 1572 | * we try to keep the page order as low as possible. So we accept more waste |
1437 | * instead of zero. | 1573 | * of space in favor of a small page order. |
1574 | * | ||
1575 | * Higher order allocations also allow the placement of more objects in a | ||
1576 | * slab and thereby reduce object handling overhead. If the user has | ||
1577 | * requested a higher mininum order then we start with that one instead of | ||
1578 | * the smallest order which will fit the object. | ||
1438 | */ | 1579 | */ |
1439 | static int calculate_order(int size) | 1580 | static inline int slab_order(int size, int min_objects, |
1581 | int max_order, int fract_leftover) | ||
1440 | { | 1582 | { |
1441 | int order; | 1583 | int order; |
1442 | int rem; | 1584 | int rem; |
1443 | 1585 | ||
1444 | for (order = max(slub_min_order, fls(size - 1) - PAGE_SHIFT); | 1586 | for (order = max(slub_min_order, |
1445 | order < MAX_ORDER; order++) { | 1587 | fls(min_objects * size - 1) - PAGE_SHIFT); |
1446 | unsigned long slab_size = PAGE_SIZE << order; | 1588 | order <= max_order; order++) { |
1447 | 1589 | ||
1448 | if (slub_max_order > order && | 1590 | unsigned long slab_size = PAGE_SIZE << order; |
1449 | slab_size < slub_min_objects * size) | ||
1450 | continue; | ||
1451 | 1591 | ||
1452 | if (slab_size < size) | 1592 | if (slab_size < min_objects * size) |
1453 | continue; | 1593 | continue; |
1454 | 1594 | ||
1455 | rem = slab_size % size; | 1595 | rem = slab_size % size; |
1456 | 1596 | ||
1457 | if (rem <= (PAGE_SIZE << order) / 8) | 1597 | if (rem <= slab_size / fract_leftover) |
1458 | break; | 1598 | break; |
1459 | 1599 | ||
1460 | } | 1600 | } |
1461 | if (order >= MAX_ORDER) | 1601 | |
1462 | return -E2BIG; | ||
1463 | return order; | 1602 | return order; |
1464 | } | 1603 | } |
1465 | 1604 | ||
1605 | static inline int calculate_order(int size) | ||
1606 | { | ||
1607 | int order; | ||
1608 | int min_objects; | ||
1609 | int fraction; | ||
1610 | |||
1611 | /* | ||
1612 | * Attempt to find best configuration for a slab. This | ||
1613 | * works by first attempting to generate a layout with | ||
1614 | * the best configuration and backing off gradually. | ||
1615 | * | ||
1616 | * First we reduce the acceptable waste in a slab. Then | ||
1617 | * we reduce the minimum objects required in a slab. | ||
1618 | */ | ||
1619 | min_objects = slub_min_objects; | ||
1620 | while (min_objects > 1) { | ||
1621 | fraction = 8; | ||
1622 | while (fraction >= 4) { | ||
1623 | order = slab_order(size, min_objects, | ||
1624 | slub_max_order, fraction); | ||
1625 | if (order <= slub_max_order) | ||
1626 | return order; | ||
1627 | fraction /= 2; | ||
1628 | } | ||
1629 | min_objects /= 2; | ||
1630 | } | ||
1631 | |||
1632 | /* | ||
1633 | * We were unable to place multiple objects in a slab. Now | ||
1634 | * lets see if we can place a single object there. | ||
1635 | */ | ||
1636 | order = slab_order(size, 1, slub_max_order, 1); | ||
1637 | if (order <= slub_max_order) | ||
1638 | return order; | ||
1639 | |||
1640 | /* | ||
1641 | * Doh this slab cannot be placed using slub_max_order. | ||
1642 | */ | ||
1643 | order = slab_order(size, 1, MAX_ORDER, 1); | ||
1644 | if (order <= MAX_ORDER) | ||
1645 | return order; | ||
1646 | return -ENOSYS; | ||
1647 | } | ||
1648 | |||
1466 | /* | 1649 | /* |
1467 | * Function to figure out which alignment to use from the | 1650 | * Figure out what the alignment of the objects will be. |
1468 | * various ways of specifying it. | ||
1469 | */ | 1651 | */ |
1470 | static unsigned long calculate_alignment(unsigned long flags, | 1652 | static unsigned long calculate_alignment(unsigned long flags, |
1471 | unsigned long align, unsigned long size) | 1653 | unsigned long align, unsigned long size) |
@@ -1480,8 +1662,8 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
1480 | * then use it. | 1662 | * then use it. |
1481 | */ | 1663 | */ |
1482 | if ((flags & SLAB_HWCACHE_ALIGN) && | 1664 | if ((flags & SLAB_HWCACHE_ALIGN) && |
1483 | size > L1_CACHE_BYTES / 2) | 1665 | size > cache_line_size() / 2) |
1484 | return max_t(unsigned long, align, L1_CACHE_BYTES); | 1666 | return max_t(unsigned long, align, cache_line_size()); |
1485 | 1667 | ||
1486 | if (align < ARCH_SLAB_MINALIGN) | 1668 | if (align < ARCH_SLAB_MINALIGN) |
1487 | return ARCH_SLAB_MINALIGN; | 1669 | return ARCH_SLAB_MINALIGN; |
@@ -1619,22 +1801,23 @@ static int calculate_sizes(struct kmem_cache *s) | |||
1619 | */ | 1801 | */ |
1620 | size = ALIGN(size, sizeof(void *)); | 1802 | size = ALIGN(size, sizeof(void *)); |
1621 | 1803 | ||
1804 | #ifdef CONFIG_SLUB_DEBUG | ||
1622 | /* | 1805 | /* |
1623 | * If we are redzoning then check if there is some space between the | 1806 | * If we are Redzoning then check if there is some space between the |
1624 | * end of the object and the free pointer. If not then add an | 1807 | * end of the object and the free pointer. If not then add an |
1625 | * additional word, so that we can establish a redzone between | 1808 | * additional word to have some bytes to store Redzone information. |
1626 | * the object and the freepointer to be able to check for overwrites. | ||
1627 | */ | 1809 | */ |
1628 | if ((flags & SLAB_RED_ZONE) && size == s->objsize) | 1810 | if ((flags & SLAB_RED_ZONE) && size == s->objsize) |
1629 | size += sizeof(void *); | 1811 | size += sizeof(void *); |
1812 | #endif | ||
1630 | 1813 | ||
1631 | /* | 1814 | /* |
1632 | * With that we have determined how much of the slab is in actual | 1815 | * With that we have determined the number of bytes in actual use |
1633 | * use by the object. This is the potential offset to the free | 1816 | * by the object. This is the potential offset to the free pointer. |
1634 | * pointer. | ||
1635 | */ | 1817 | */ |
1636 | s->inuse = size; | 1818 | s->inuse = size; |
1637 | 1819 | ||
1820 | #ifdef CONFIG_SLUB_DEBUG | ||
1638 | if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || | 1821 | if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || |
1639 | s->ctor || s->dtor)) { | 1822 | s->ctor || s->dtor)) { |
1640 | /* | 1823 | /* |
@@ -1656,7 +1839,7 @@ static int calculate_sizes(struct kmem_cache *s) | |||
1656 | */ | 1839 | */ |
1657 | size += 2 * sizeof(struct track); | 1840 | size += 2 * sizeof(struct track); |
1658 | 1841 | ||
1659 | if (flags & DEBUG_DEFAULT_FLAGS) | 1842 | if (flags & SLAB_RED_ZONE) |
1660 | /* | 1843 | /* |
1661 | * Add some empty padding so that we can catch | 1844 | * Add some empty padding so that we can catch |
1662 | * overwrites from earlier objects rather than let | 1845 | * overwrites from earlier objects rather than let |
@@ -1665,10 +1848,12 @@ static int calculate_sizes(struct kmem_cache *s) | |||
1665 | * of the object. | 1848 | * of the object. |
1666 | */ | 1849 | */ |
1667 | size += sizeof(void *); | 1850 | size += sizeof(void *); |
1851 | #endif | ||
1852 | |||
1668 | /* | 1853 | /* |
1669 | * Determine the alignment based on various parameters that the | 1854 | * Determine the alignment based on various parameters that the |
1670 | * user specified (this is unecessarily complex due to the attempt | 1855 | * user specified and the dynamic determination of cache line size |
1671 | * to be compatible with SLAB. Should be cleaned up some day). | 1856 | * on bootup. |
1672 | */ | 1857 | */ |
1673 | align = calculate_alignment(flags, align, s->objsize); | 1858 | align = calculate_alignment(flags, align, s->objsize); |
1674 | 1859 | ||
@@ -1700,23 +1885,6 @@ static int calculate_sizes(struct kmem_cache *s) | |||
1700 | 1885 | ||
1701 | } | 1886 | } |
1702 | 1887 | ||
1703 | static int __init finish_bootstrap(void) | ||
1704 | { | ||
1705 | struct list_head *h; | ||
1706 | int err; | ||
1707 | |||
1708 | slab_state = SYSFS; | ||
1709 | |||
1710 | list_for_each(h, &slab_caches) { | ||
1711 | struct kmem_cache *s = | ||
1712 | container_of(h, struct kmem_cache, list); | ||
1713 | |||
1714 | err = sysfs_slab_add(s); | ||
1715 | BUG_ON(err); | ||
1716 | } | ||
1717 | return 0; | ||
1718 | } | ||
1719 | |||
1720 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | 1888 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, |
1721 | const char *name, size_t size, | 1889 | const char *name, size_t size, |
1722 | size_t align, unsigned long flags, | 1890 | size_t align, unsigned long flags, |
@@ -1730,32 +1898,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
1730 | s->objsize = size; | 1898 | s->objsize = size; |
1731 | s->flags = flags; | 1899 | s->flags = flags; |
1732 | s->align = align; | 1900 | s->align = align; |
1733 | 1901 | kmem_cache_open_debug_check(s); | |
1734 | /* | ||
1735 | * The page->offset field is only 16 bit wide. This is an offset | ||
1736 | * in units of words from the beginning of an object. If the slab | ||
1737 | * size is bigger then we cannot move the free pointer behind the | ||
1738 | * object anymore. | ||
1739 | * | ||
1740 | * On 32 bit platforms the limit is 256k. On 64bit platforms | ||
1741 | * the limit is 512k. | ||
1742 | * | ||
1743 | * Debugging or ctor/dtors may create a need to move the free | ||
1744 | * pointer. Fail if this happens. | ||
1745 | */ | ||
1746 | if (s->size >= 65535 * sizeof(void *)) { | ||
1747 | BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | | ||
1748 | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); | ||
1749 | BUG_ON(ctor || dtor); | ||
1750 | } | ||
1751 | else | ||
1752 | /* | ||
1753 | * Enable debugging if selected on the kernel commandline. | ||
1754 | */ | ||
1755 | if (slub_debug && (!slub_debug_slabs || | ||
1756 | strncmp(slub_debug_slabs, name, | ||
1757 | strlen(slub_debug_slabs)) == 0)) | ||
1758 | s->flags |= slub_debug; | ||
1759 | 1902 | ||
1760 | if (!calculate_sizes(s)) | 1903 | if (!calculate_sizes(s)) |
1761 | goto error; | 1904 | goto error; |
@@ -1783,7 +1926,6 @@ EXPORT_SYMBOL(kmem_cache_open); | |||
1783 | int kmem_ptr_validate(struct kmem_cache *s, const void *object) | 1926 | int kmem_ptr_validate(struct kmem_cache *s, const void *object) |
1784 | { | 1927 | { |
1785 | struct page * page; | 1928 | struct page * page; |
1786 | void *addr; | ||
1787 | 1929 | ||
1788 | page = get_object_page(object); | 1930 | page = get_object_page(object); |
1789 | 1931 | ||
@@ -1791,13 +1933,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) | |||
1791 | /* No slab or wrong slab */ | 1933 | /* No slab or wrong slab */ |
1792 | return 0; | 1934 | return 0; |
1793 | 1935 | ||
1794 | addr = page_address(page); | 1936 | if (!check_valid_pointer(s, page, object)) |
1795 | if (object < addr || object >= addr + s->objects * s->size) | ||
1796 | /* Out of bounds */ | ||
1797 | return 0; | ||
1798 | |||
1799 | if ((object - addr) % s->size) | ||
1800 | /* Improperly aligned */ | ||
1801 | return 0; | 1937 | return 0; |
1802 | 1938 | ||
1803 | /* | 1939 | /* |
@@ -1826,7 +1962,8 @@ const char *kmem_cache_name(struct kmem_cache *s) | |||
1826 | EXPORT_SYMBOL(kmem_cache_name); | 1962 | EXPORT_SYMBOL(kmem_cache_name); |
1827 | 1963 | ||
1828 | /* | 1964 | /* |
1829 | * Attempt to free all slabs on a node | 1965 | * Attempt to free all slabs on a node. Return the number of slabs we |
1966 | * were unable to free. | ||
1830 | */ | 1967 | */ |
1831 | static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, | 1968 | static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, |
1832 | struct list_head *list) | 1969 | struct list_head *list) |
@@ -1847,7 +1984,7 @@ static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, | |||
1847 | } | 1984 | } |
1848 | 1985 | ||
1849 | /* | 1986 | /* |
1850 | * Release all resources used by slab cache | 1987 | * Release all resources used by a slab cache. |
1851 | */ | 1988 | */ |
1852 | static int kmem_cache_close(struct kmem_cache *s) | 1989 | static int kmem_cache_close(struct kmem_cache *s) |
1853 | { | 1990 | { |
@@ -1932,45 +2069,6 @@ static int __init setup_slub_nomerge(char *str) | |||
1932 | 2069 | ||
1933 | __setup("slub_nomerge", setup_slub_nomerge); | 2070 | __setup("slub_nomerge", setup_slub_nomerge); |
1934 | 2071 | ||
1935 | static int __init setup_slub_debug(char *str) | ||
1936 | { | ||
1937 | if (!str || *str != '=') | ||
1938 | slub_debug = DEBUG_DEFAULT_FLAGS; | ||
1939 | else { | ||
1940 | str++; | ||
1941 | if (*str == 0 || *str == ',') | ||
1942 | slub_debug = DEBUG_DEFAULT_FLAGS; | ||
1943 | else | ||
1944 | for( ;*str && *str != ','; str++) | ||
1945 | switch (*str) { | ||
1946 | case 'f' : case 'F' : | ||
1947 | slub_debug |= SLAB_DEBUG_FREE; | ||
1948 | break; | ||
1949 | case 'z' : case 'Z' : | ||
1950 | slub_debug |= SLAB_RED_ZONE; | ||
1951 | break; | ||
1952 | case 'p' : case 'P' : | ||
1953 | slub_debug |= SLAB_POISON; | ||
1954 | break; | ||
1955 | case 'u' : case 'U' : | ||
1956 | slub_debug |= SLAB_STORE_USER; | ||
1957 | break; | ||
1958 | case 't' : case 'T' : | ||
1959 | slub_debug |= SLAB_TRACE; | ||
1960 | break; | ||
1961 | default: | ||
1962 | printk(KERN_ERR "slub_debug option '%c' " | ||
1963 | "unknown. skipped\n",*str); | ||
1964 | } | ||
1965 | } | ||
1966 | |||
1967 | if (*str == ',') | ||
1968 | slub_debug_slabs = str + 1; | ||
1969 | return 1; | ||
1970 | } | ||
1971 | |||
1972 | __setup("slub_debug", setup_slub_debug); | ||
1973 | |||
1974 | static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, | 2072 | static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, |
1975 | const char *name, int size, gfp_t gfp_flags) | 2073 | const char *name, int size, gfp_t gfp_flags) |
1976 | { | 2074 | { |
@@ -2108,13 +2206,14 @@ void kfree(const void *x) | |||
2108 | EXPORT_SYMBOL(kfree); | 2206 | EXPORT_SYMBOL(kfree); |
2109 | 2207 | ||
2110 | /* | 2208 | /* |
2111 | * kmem_cache_shrink removes empty slabs from the partial lists | 2209 | * kmem_cache_shrink removes empty slabs from the partial lists and sorts |
2112 | * and then sorts the partially allocated slabs by the number | 2210 | * the remaining slabs by the number of items in use. The slabs with the |
2113 | * of items in use. The slabs with the most items in use | 2211 | * most items in use come first. New allocations will then fill those up |
2114 | * come first. New allocations will remove these from the | 2212 | * and thus they can be removed from the partial lists. |
2115 | * partial list because they are full. The slabs with the | 2213 | * |
2116 | * least items are placed last. If it happens that the objects | 2214 | * The slabs with the least items are placed last. This results in them |
2117 | * are freed then the page can be returned to the page allocator. | 2215 | * being allocated from last increasing the chance that the last objects |
2216 | * are freed in them. | ||
2118 | */ | 2217 | */ |
2119 | int kmem_cache_shrink(struct kmem_cache *s) | 2218 | int kmem_cache_shrink(struct kmem_cache *s) |
2120 | { | 2219 | { |
@@ -2143,12 +2242,10 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2143 | spin_lock_irqsave(&n->list_lock, flags); | 2242 | spin_lock_irqsave(&n->list_lock, flags); |
2144 | 2243 | ||
2145 | /* | 2244 | /* |
2146 | * Build lists indexed by the items in use in | 2245 | * Build lists indexed by the items in use in each slab. |
2147 | * each slab or free slabs if empty. | ||
2148 | * | 2246 | * |
2149 | * Note that concurrent frees may occur while | 2247 | * Note that concurrent frees may occur while we hold the |
2150 | * we hold the list_lock. page->inuse here is | 2248 | * list_lock. page->inuse here is the upper limit. |
2151 | * the upper limit. | ||
2152 | */ | 2249 | */ |
2153 | list_for_each_entry_safe(page, t, &n->partial, lru) { | 2250 | list_for_each_entry_safe(page, t, &n->partial, lru) { |
2154 | if (!page->inuse && slab_trylock(page)) { | 2251 | if (!page->inuse && slab_trylock(page)) { |
@@ -2172,8 +2269,8 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2172 | goto out; | 2269 | goto out; |
2173 | 2270 | ||
2174 | /* | 2271 | /* |
2175 | * Rebuild the partial list with the slabs filled up | 2272 | * Rebuild the partial list with the slabs filled up most |
2176 | * most first and the least used slabs at the end. | 2273 | * first and the least used slabs at the end. |
2177 | */ | 2274 | */ |
2178 | for (i = s->objects - 1; i >= 0; i--) | 2275 | for (i = s->objects - 1; i >= 0; i--) |
2179 | list_splice(slabs_by_inuse + i, n->partial.prev); | 2276 | list_splice(slabs_by_inuse + i, n->partial.prev); |
@@ -2189,7 +2286,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2189 | 2286 | ||
2190 | /** | 2287 | /** |
2191 | * krealloc - reallocate memory. The contents will remain unchanged. | 2288 | * krealloc - reallocate memory. The contents will remain unchanged. |
2192 | * | ||
2193 | * @p: object to reallocate memory for. | 2289 | * @p: object to reallocate memory for. |
2194 | * @new_size: how many bytes of memory are required. | 2290 | * @new_size: how many bytes of memory are required. |
2195 | * @flags: the type of memory to allocate. | 2291 | * @flags: the type of memory to allocate. |
@@ -2201,9 +2297,8 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2201 | */ | 2297 | */ |
2202 | void *krealloc(const void *p, size_t new_size, gfp_t flags) | 2298 | void *krealloc(const void *p, size_t new_size, gfp_t flags) |
2203 | { | 2299 | { |
2204 | struct kmem_cache *new_cache; | ||
2205 | void *ret; | 2300 | void *ret; |
2206 | struct page *page; | 2301 | size_t ks; |
2207 | 2302 | ||
2208 | if (unlikely(!p)) | 2303 | if (unlikely(!p)) |
2209 | return kmalloc(new_size, flags); | 2304 | return kmalloc(new_size, flags); |
@@ -2213,19 +2308,13 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) | |||
2213 | return NULL; | 2308 | return NULL; |
2214 | } | 2309 | } |
2215 | 2310 | ||
2216 | page = virt_to_head_page(p); | 2311 | ks = ksize(p); |
2217 | 2312 | if (ks >= new_size) | |
2218 | new_cache = get_slab(new_size, flags); | ||
2219 | |||
2220 | /* | ||
2221 | * If new size fits in the current cache, bail out. | ||
2222 | */ | ||
2223 | if (likely(page->slab == new_cache)) | ||
2224 | return (void *)p; | 2313 | return (void *)p; |
2225 | 2314 | ||
2226 | ret = kmalloc(new_size, flags); | 2315 | ret = kmalloc(new_size, flags); |
2227 | if (ret) { | 2316 | if (ret) { |
2228 | memcpy(ret, p, min(new_size, ksize(p))); | 2317 | memcpy(ret, p, min(new_size, ks)); |
2229 | kfree(p); | 2318 | kfree(p); |
2230 | } | 2319 | } |
2231 | return ret; | 2320 | return ret; |
@@ -2243,7 +2332,7 @@ void __init kmem_cache_init(void) | |||
2243 | #ifdef CONFIG_NUMA | 2332 | #ifdef CONFIG_NUMA |
2244 | /* | 2333 | /* |
2245 | * Must first have the slab cache available for the allocations of the | 2334 | * Must first have the slab cache available for the allocations of the |
2246 | * struct kmalloc_cache_node's. There is special bootstrap code in | 2335 | * struct kmem_cache_node's. There is special bootstrap code in |
2247 | * kmem_cache_open for slab_state == DOWN. | 2336 | * kmem_cache_open for slab_state == DOWN. |
2248 | */ | 2337 | */ |
2249 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", | 2338 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", |
@@ -2280,7 +2369,7 @@ void __init kmem_cache_init(void) | |||
2280 | 2369 | ||
2281 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2370 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
2282 | " Processors=%d, Nodes=%d\n", | 2371 | " Processors=%d, Nodes=%d\n", |
2283 | KMALLOC_SHIFT_HIGH, L1_CACHE_BYTES, | 2372 | KMALLOC_SHIFT_HIGH, cache_line_size(), |
2284 | slub_min_order, slub_max_order, slub_min_objects, | 2373 | slub_min_order, slub_max_order, slub_min_objects, |
2285 | nr_cpu_ids, nr_node_ids); | 2374 | nr_cpu_ids, nr_node_ids); |
2286 | } | 2375 | } |
@@ -2415,8 +2504,8 @@ static void for_all_slabs(void (*func)(struct kmem_cache *, int), int cpu) | |||
2415 | } | 2504 | } |
2416 | 2505 | ||
2417 | /* | 2506 | /* |
2418 | * Use the cpu notifier to insure that the slab are flushed | 2507 | * Use the cpu notifier to insure that the cpu slabs are flushed when |
2419 | * when necessary. | 2508 | * necessary. |
2420 | */ | 2509 | */ |
2421 | static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | 2510 | static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, |
2422 | unsigned long action, void *hcpu) | 2511 | unsigned long action, void *hcpu) |
@@ -2425,7 +2514,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
2425 | 2514 | ||
2426 | switch (action) { | 2515 | switch (action) { |
2427 | case CPU_UP_CANCELED: | 2516 | case CPU_UP_CANCELED: |
2517 | case CPU_UP_CANCELED_FROZEN: | ||
2428 | case CPU_DEAD: | 2518 | case CPU_DEAD: |
2519 | case CPU_DEAD_FROZEN: | ||
2429 | for_all_slabs(__flush_cpu_slab, cpu); | 2520 | for_all_slabs(__flush_cpu_slab, cpu); |
2430 | break; | 2521 | break; |
2431 | default: | 2522 | default: |
@@ -2439,153 +2530,6 @@ static struct notifier_block __cpuinitdata slab_notifier = | |||
2439 | 2530 | ||
2440 | #endif | 2531 | #endif |
2441 | 2532 | ||
2442 | #ifdef CONFIG_NUMA | ||
2443 | |||
2444 | /***************************************************************** | ||
2445 | * Generic reaper used to support the page allocator | ||
2446 | * (the cpu slabs are reaped by a per slab workqueue). | ||
2447 | * | ||
2448 | * Maybe move this to the page allocator? | ||
2449 | ****************************************************************/ | ||
2450 | |||
2451 | static DEFINE_PER_CPU(unsigned long, reap_node); | ||
2452 | |||
2453 | static void init_reap_node(int cpu) | ||
2454 | { | ||
2455 | int node; | ||
2456 | |||
2457 | node = next_node(cpu_to_node(cpu), node_online_map); | ||
2458 | if (node == MAX_NUMNODES) | ||
2459 | node = first_node(node_online_map); | ||
2460 | |||
2461 | __get_cpu_var(reap_node) = node; | ||
2462 | } | ||
2463 | |||
2464 | static void next_reap_node(void) | ||
2465 | { | ||
2466 | int node = __get_cpu_var(reap_node); | ||
2467 | |||
2468 | /* | ||
2469 | * Also drain per cpu pages on remote zones | ||
2470 | */ | ||
2471 | if (node != numa_node_id()) | ||
2472 | drain_node_pages(node); | ||
2473 | |||
2474 | node = next_node(node, node_online_map); | ||
2475 | if (unlikely(node >= MAX_NUMNODES)) | ||
2476 | node = first_node(node_online_map); | ||
2477 | __get_cpu_var(reap_node) = node; | ||
2478 | } | ||
2479 | #else | ||
2480 | #define init_reap_node(cpu) do { } while (0) | ||
2481 | #define next_reap_node(void) do { } while (0) | ||
2482 | #endif | ||
2483 | |||
2484 | #define REAPTIMEOUT_CPUC (2*HZ) | ||
2485 | |||
2486 | #ifdef CONFIG_SMP | ||
2487 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | ||
2488 | |||
2489 | static void cache_reap(struct work_struct *unused) | ||
2490 | { | ||
2491 | next_reap_node(); | ||
2492 | refresh_cpu_vm_stats(smp_processor_id()); | ||
2493 | schedule_delayed_work(&__get_cpu_var(reap_work), | ||
2494 | REAPTIMEOUT_CPUC); | ||
2495 | } | ||
2496 | |||
2497 | static void __devinit start_cpu_timer(int cpu) | ||
2498 | { | ||
2499 | struct delayed_work *reap_work = &per_cpu(reap_work, cpu); | ||
2500 | |||
2501 | /* | ||
2502 | * When this gets called from do_initcalls via cpucache_init(), | ||
2503 | * init_workqueues() has already run, so keventd will be setup | ||
2504 | * at that time. | ||
2505 | */ | ||
2506 | if (keventd_up() && reap_work->work.func == NULL) { | ||
2507 | init_reap_node(cpu); | ||
2508 | INIT_DELAYED_WORK(reap_work, cache_reap); | ||
2509 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | ||
2510 | } | ||
2511 | } | ||
2512 | |||
2513 | static int __init cpucache_init(void) | ||
2514 | { | ||
2515 | int cpu; | ||
2516 | |||
2517 | /* | ||
2518 | * Register the timers that drain pcp pages and update vm statistics | ||
2519 | */ | ||
2520 | for_each_online_cpu(cpu) | ||
2521 | start_cpu_timer(cpu); | ||
2522 | return 0; | ||
2523 | } | ||
2524 | __initcall(cpucache_init); | ||
2525 | #endif | ||
2526 | |||
2527 | #ifdef SLUB_RESILIENCY_TEST | ||
2528 | static unsigned long validate_slab_cache(struct kmem_cache *s); | ||
2529 | |||
2530 | static void resiliency_test(void) | ||
2531 | { | ||
2532 | u8 *p; | ||
2533 | |||
2534 | printk(KERN_ERR "SLUB resiliency testing\n"); | ||
2535 | printk(KERN_ERR "-----------------------\n"); | ||
2536 | printk(KERN_ERR "A. Corruption after allocation\n"); | ||
2537 | |||
2538 | p = kzalloc(16, GFP_KERNEL); | ||
2539 | p[16] = 0x12; | ||
2540 | printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" | ||
2541 | " 0x12->0x%p\n\n", p + 16); | ||
2542 | |||
2543 | validate_slab_cache(kmalloc_caches + 4); | ||
2544 | |||
2545 | /* Hmmm... The next two are dangerous */ | ||
2546 | p = kzalloc(32, GFP_KERNEL); | ||
2547 | p[32 + sizeof(void *)] = 0x34; | ||
2548 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | ||
2549 | " 0x34 -> -0x%p\n", p); | ||
2550 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | ||
2551 | |||
2552 | validate_slab_cache(kmalloc_caches + 5); | ||
2553 | p = kzalloc(64, GFP_KERNEL); | ||
2554 | p += 64 + (get_cycles() & 0xff) * sizeof(void *); | ||
2555 | *p = 0x56; | ||
2556 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | ||
2557 | p); | ||
2558 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | ||
2559 | validate_slab_cache(kmalloc_caches + 6); | ||
2560 | |||
2561 | printk(KERN_ERR "\nB. Corruption after free\n"); | ||
2562 | p = kzalloc(128, GFP_KERNEL); | ||
2563 | kfree(p); | ||
2564 | *p = 0x78; | ||
2565 | printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); | ||
2566 | validate_slab_cache(kmalloc_caches + 7); | ||
2567 | |||
2568 | p = kzalloc(256, GFP_KERNEL); | ||
2569 | kfree(p); | ||
2570 | p[50] = 0x9a; | ||
2571 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); | ||
2572 | validate_slab_cache(kmalloc_caches + 8); | ||
2573 | |||
2574 | p = kzalloc(512, GFP_KERNEL); | ||
2575 | kfree(p); | ||
2576 | p[512] = 0xab; | ||
2577 | printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); | ||
2578 | validate_slab_cache(kmalloc_caches + 9); | ||
2579 | } | ||
2580 | #else | ||
2581 | static void resiliency_test(void) {}; | ||
2582 | #endif | ||
2583 | |||
2584 | /* | ||
2585 | * These are not as efficient as kmalloc for the non debug case. | ||
2586 | * We do not have the page struct available so we have to touch one | ||
2587 | * cacheline in struct kmem_cache to check slab flags. | ||
2588 | */ | ||
2589 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | 2533 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) |
2590 | { | 2534 | { |
2591 | struct kmem_cache *s = get_slab(size, gfpflags); | 2535 | struct kmem_cache *s = get_slab(size, gfpflags); |
@@ -2607,13 +2551,12 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
2607 | return slab_alloc(s, gfpflags, node, caller); | 2551 | return slab_alloc(s, gfpflags, node, caller); |
2608 | } | 2552 | } |
2609 | 2553 | ||
2610 | #ifdef CONFIG_SYSFS | 2554 | #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) |
2611 | |||
2612 | static int validate_slab(struct kmem_cache *s, struct page *page) | 2555 | static int validate_slab(struct kmem_cache *s, struct page *page) |
2613 | { | 2556 | { |
2614 | void *p; | 2557 | void *p; |
2615 | void *addr = page_address(page); | 2558 | void *addr = page_address(page); |
2616 | unsigned long map[BITS_TO_LONGS(s->objects)]; | 2559 | DECLARE_BITMAP(map, s->objects); |
2617 | 2560 | ||
2618 | if (!check_slab(s, page) || | 2561 | if (!check_slab(s, page) || |
2619 | !on_freelist(s, page, NULL)) | 2562 | !on_freelist(s, page, NULL)) |
@@ -2622,14 +2565,14 @@ static int validate_slab(struct kmem_cache *s, struct page *page) | |||
2622 | /* Now we know that a valid freelist exists */ | 2565 | /* Now we know that a valid freelist exists */ |
2623 | bitmap_zero(map, s->objects); | 2566 | bitmap_zero(map, s->objects); |
2624 | 2567 | ||
2625 | for(p = page->freelist; p; p = get_freepointer(s, p)) { | 2568 | for_each_free_object(p, s, page->freelist) { |
2626 | set_bit((p - addr) / s->size, map); | 2569 | set_bit(slab_index(p, s, addr), map); |
2627 | if (!check_object(s, page, p, 0)) | 2570 | if (!check_object(s, page, p, 0)) |
2628 | return 0; | 2571 | return 0; |
2629 | } | 2572 | } |
2630 | 2573 | ||
2631 | for(p = addr; p < addr + s->objects * s->size; p += s->size) | 2574 | for_each_object(p, s, addr) |
2632 | if (!test_bit((p - addr) / s->size, map)) | 2575 | if (!test_bit(slab_index(p, s, addr), map)) |
2633 | if (!check_object(s, page, p, 1)) | 2576 | if (!check_object(s, page, p, 1)) |
2634 | return 0; | 2577 | return 0; |
2635 | return 1; | 2578 | return 1; |
@@ -2645,12 +2588,12 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page) | |||
2645 | s->name, page); | 2588 | s->name, page); |
2646 | 2589 | ||
2647 | if (s->flags & DEBUG_DEFAULT_FLAGS) { | 2590 | if (s->flags & DEBUG_DEFAULT_FLAGS) { |
2648 | if (!PageError(page)) | 2591 | if (!SlabDebug(page)) |
2649 | printk(KERN_ERR "SLUB %s: PageError not set " | 2592 | printk(KERN_ERR "SLUB %s: SlabDebug not set " |
2650 | "on slab 0x%p\n", s->name, page); | 2593 | "on slab 0x%p\n", s->name, page); |
2651 | } else { | 2594 | } else { |
2652 | if (PageError(page)) | 2595 | if (SlabDebug(page)) |
2653 | printk(KERN_ERR "SLUB %s: PageError set on " | 2596 | printk(KERN_ERR "SLUB %s: SlabDebug set on " |
2654 | "slab 0x%p\n", s->name, page); | 2597 | "slab 0x%p\n", s->name, page); |
2655 | } | 2598 | } |
2656 | } | 2599 | } |
@@ -2702,14 +2645,76 @@ static unsigned long validate_slab_cache(struct kmem_cache *s) | |||
2702 | return count; | 2645 | return count; |
2703 | } | 2646 | } |
2704 | 2647 | ||
2648 | #ifdef SLUB_RESILIENCY_TEST | ||
2649 | static void resiliency_test(void) | ||
2650 | { | ||
2651 | u8 *p; | ||
2652 | |||
2653 | printk(KERN_ERR "SLUB resiliency testing\n"); | ||
2654 | printk(KERN_ERR "-----------------------\n"); | ||
2655 | printk(KERN_ERR "A. Corruption after allocation\n"); | ||
2656 | |||
2657 | p = kzalloc(16, GFP_KERNEL); | ||
2658 | p[16] = 0x12; | ||
2659 | printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" | ||
2660 | " 0x12->0x%p\n\n", p + 16); | ||
2661 | |||
2662 | validate_slab_cache(kmalloc_caches + 4); | ||
2663 | |||
2664 | /* Hmmm... The next two are dangerous */ | ||
2665 | p = kzalloc(32, GFP_KERNEL); | ||
2666 | p[32 + sizeof(void *)] = 0x34; | ||
2667 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | ||
2668 | " 0x34 -> -0x%p\n", p); | ||
2669 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | ||
2670 | |||
2671 | validate_slab_cache(kmalloc_caches + 5); | ||
2672 | p = kzalloc(64, GFP_KERNEL); | ||
2673 | p += 64 + (get_cycles() & 0xff) * sizeof(void *); | ||
2674 | *p = 0x56; | ||
2675 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | ||
2676 | p); | ||
2677 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | ||
2678 | validate_slab_cache(kmalloc_caches + 6); | ||
2679 | |||
2680 | printk(KERN_ERR "\nB. Corruption after free\n"); | ||
2681 | p = kzalloc(128, GFP_KERNEL); | ||
2682 | kfree(p); | ||
2683 | *p = 0x78; | ||
2684 | printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); | ||
2685 | validate_slab_cache(kmalloc_caches + 7); | ||
2686 | |||
2687 | p = kzalloc(256, GFP_KERNEL); | ||
2688 | kfree(p); | ||
2689 | p[50] = 0x9a; | ||
2690 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); | ||
2691 | validate_slab_cache(kmalloc_caches + 8); | ||
2692 | |||
2693 | p = kzalloc(512, GFP_KERNEL); | ||
2694 | kfree(p); | ||
2695 | p[512] = 0xab; | ||
2696 | printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); | ||
2697 | validate_slab_cache(kmalloc_caches + 9); | ||
2698 | } | ||
2699 | #else | ||
2700 | static void resiliency_test(void) {}; | ||
2701 | #endif | ||
2702 | |||
2705 | /* | 2703 | /* |
2706 | * Generate lists of locations where slabcache objects are allocated | 2704 | * Generate lists of code addresses where slabcache objects are allocated |
2707 | * and freed. | 2705 | * and freed. |
2708 | */ | 2706 | */ |
2709 | 2707 | ||
2710 | struct location { | 2708 | struct location { |
2711 | unsigned long count; | 2709 | unsigned long count; |
2712 | void *addr; | 2710 | void *addr; |
2711 | long long sum_time; | ||
2712 | long min_time; | ||
2713 | long max_time; | ||
2714 | long min_pid; | ||
2715 | long max_pid; | ||
2716 | cpumask_t cpus; | ||
2717 | nodemask_t nodes; | ||
2713 | }; | 2718 | }; |
2714 | 2719 | ||
2715 | struct loc_track { | 2720 | struct loc_track { |
@@ -2750,11 +2755,12 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max) | |||
2750 | } | 2755 | } |
2751 | 2756 | ||
2752 | static int add_location(struct loc_track *t, struct kmem_cache *s, | 2757 | static int add_location(struct loc_track *t, struct kmem_cache *s, |
2753 | void *addr) | 2758 | const struct track *track) |
2754 | { | 2759 | { |
2755 | long start, end, pos; | 2760 | long start, end, pos; |
2756 | struct location *l; | 2761 | struct location *l; |
2757 | void *caddr; | 2762 | void *caddr; |
2763 | unsigned long age = jiffies - track->when; | ||
2758 | 2764 | ||
2759 | start = -1; | 2765 | start = -1; |
2760 | end = t->count; | 2766 | end = t->count; |
@@ -2770,19 +2776,36 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
2770 | break; | 2776 | break; |
2771 | 2777 | ||
2772 | caddr = t->loc[pos].addr; | 2778 | caddr = t->loc[pos].addr; |
2773 | if (addr == caddr) { | 2779 | if (track->addr == caddr) { |
2774 | t->loc[pos].count++; | 2780 | |
2781 | l = &t->loc[pos]; | ||
2782 | l->count++; | ||
2783 | if (track->when) { | ||
2784 | l->sum_time += age; | ||
2785 | if (age < l->min_time) | ||
2786 | l->min_time = age; | ||
2787 | if (age > l->max_time) | ||
2788 | l->max_time = age; | ||
2789 | |||
2790 | if (track->pid < l->min_pid) | ||
2791 | l->min_pid = track->pid; | ||
2792 | if (track->pid > l->max_pid) | ||
2793 | l->max_pid = track->pid; | ||
2794 | |||
2795 | cpu_set(track->cpu, l->cpus); | ||
2796 | } | ||
2797 | node_set(page_to_nid(virt_to_page(track)), l->nodes); | ||
2775 | return 1; | 2798 | return 1; |
2776 | } | 2799 | } |
2777 | 2800 | ||
2778 | if (addr < caddr) | 2801 | if (track->addr < caddr) |
2779 | end = pos; | 2802 | end = pos; |
2780 | else | 2803 | else |
2781 | start = pos; | 2804 | start = pos; |
2782 | } | 2805 | } |
2783 | 2806 | ||
2784 | /* | 2807 | /* |
2785 | * Not found. Insert new tracking element | 2808 | * Not found. Insert new tracking element. |
2786 | */ | 2809 | */ |
2787 | if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max)) | 2810 | if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max)) |
2788 | return 0; | 2811 | return 0; |
@@ -2793,7 +2816,16 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
2793 | (t->count - pos) * sizeof(struct location)); | 2816 | (t->count - pos) * sizeof(struct location)); |
2794 | t->count++; | 2817 | t->count++; |
2795 | l->count = 1; | 2818 | l->count = 1; |
2796 | l->addr = addr; | 2819 | l->addr = track->addr; |
2820 | l->sum_time = age; | ||
2821 | l->min_time = age; | ||
2822 | l->max_time = age; | ||
2823 | l->min_pid = track->pid; | ||
2824 | l->max_pid = track->pid; | ||
2825 | cpus_clear(l->cpus); | ||
2826 | cpu_set(track->cpu, l->cpus); | ||
2827 | nodes_clear(l->nodes); | ||
2828 | node_set(page_to_nid(virt_to_page(track)), l->nodes); | ||
2797 | return 1; | 2829 | return 1; |
2798 | } | 2830 | } |
2799 | 2831 | ||
@@ -2801,19 +2833,16 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, | |||
2801 | struct page *page, enum track_item alloc) | 2833 | struct page *page, enum track_item alloc) |
2802 | { | 2834 | { |
2803 | void *addr = page_address(page); | 2835 | void *addr = page_address(page); |
2804 | unsigned long map[BITS_TO_LONGS(s->objects)]; | 2836 | DECLARE_BITMAP(map, s->objects); |
2805 | void *p; | 2837 | void *p; |
2806 | 2838 | ||
2807 | bitmap_zero(map, s->objects); | 2839 | bitmap_zero(map, s->objects); |
2808 | for (p = page->freelist; p; p = get_freepointer(s, p)) | 2840 | for_each_free_object(p, s, page->freelist) |
2809 | set_bit((p - addr) / s->size, map); | 2841 | set_bit(slab_index(p, s, addr), map); |
2810 | |||
2811 | for (p = addr; p < addr + s->objects * s->size; p += s->size) | ||
2812 | if (!test_bit((p - addr) / s->size, map)) { | ||
2813 | void *addr = get_track(s, p, alloc)->addr; | ||
2814 | 2842 | ||
2815 | add_location(t, s, addr); | 2843 | for_each_object(p, s, addr) |
2816 | } | 2844 | if (!test_bit(slab_index(p, s, addr), map)) |
2845 | add_location(t, s, get_track(s, p, alloc)); | ||
2817 | } | 2846 | } |
2818 | 2847 | ||
2819 | static int list_locations(struct kmem_cache *s, char *buf, | 2848 | static int list_locations(struct kmem_cache *s, char *buf, |
@@ -2847,15 +2876,47 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
2847 | } | 2876 | } |
2848 | 2877 | ||
2849 | for (i = 0; i < t.count; i++) { | 2878 | for (i = 0; i < t.count; i++) { |
2850 | void *addr = t.loc[i].addr; | 2879 | struct location *l = &t.loc[i]; |
2851 | 2880 | ||
2852 | if (n > PAGE_SIZE - 100) | 2881 | if (n > PAGE_SIZE - 100) |
2853 | break; | 2882 | break; |
2854 | n += sprintf(buf + n, "%7ld ", t.loc[i].count); | 2883 | n += sprintf(buf + n, "%7ld ", l->count); |
2855 | if (addr) | 2884 | |
2856 | n += sprint_symbol(buf + n, (unsigned long)t.loc[i].addr); | 2885 | if (l->addr) |
2886 | n += sprint_symbol(buf + n, (unsigned long)l->addr); | ||
2857 | else | 2887 | else |
2858 | n += sprintf(buf + n, "<not-available>"); | 2888 | n += sprintf(buf + n, "<not-available>"); |
2889 | |||
2890 | if (l->sum_time != l->min_time) { | ||
2891 | unsigned long remainder; | ||
2892 | |||
2893 | n += sprintf(buf + n, " age=%ld/%ld/%ld", | ||
2894 | l->min_time, | ||
2895 | div_long_long_rem(l->sum_time, l->count, &remainder), | ||
2896 | l->max_time); | ||
2897 | } else | ||
2898 | n += sprintf(buf + n, " age=%ld", | ||
2899 | l->min_time); | ||
2900 | |||
2901 | if (l->min_pid != l->max_pid) | ||
2902 | n += sprintf(buf + n, " pid=%ld-%ld", | ||
2903 | l->min_pid, l->max_pid); | ||
2904 | else | ||
2905 | n += sprintf(buf + n, " pid=%ld", | ||
2906 | l->min_pid); | ||
2907 | |||
2908 | if (num_online_cpus() > 1 && !cpus_empty(l->cpus)) { | ||
2909 | n += sprintf(buf + n, " cpus="); | ||
2910 | n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, | ||
2911 | l->cpus); | ||
2912 | } | ||
2913 | |||
2914 | if (num_online_nodes() > 1 && !nodes_empty(l->nodes)) { | ||
2915 | n += sprintf(buf + n, " nodes="); | ||
2916 | n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, | ||
2917 | l->nodes); | ||
2918 | } | ||
2919 | |||
2859 | n += sprintf(buf + n, "\n"); | 2920 | n += sprintf(buf + n, "\n"); |
2860 | } | 2921 | } |
2861 | 2922 | ||
@@ -3491,6 +3552,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) | |||
3491 | 3552 | ||
3492 | static int __init slab_sysfs_init(void) | 3553 | static int __init slab_sysfs_init(void) |
3493 | { | 3554 | { |
3555 | struct list_head *h; | ||
3494 | int err; | 3556 | int err; |
3495 | 3557 | ||
3496 | err = subsystem_register(&slab_subsys); | 3558 | err = subsystem_register(&slab_subsys); |
@@ -3499,7 +3561,15 @@ static int __init slab_sysfs_init(void) | |||
3499 | return -ENOSYS; | 3561 | return -ENOSYS; |
3500 | } | 3562 | } |
3501 | 3563 | ||
3502 | finish_bootstrap(); | 3564 | slab_state = SYSFS; |
3565 | |||
3566 | list_for_each(h, &slab_caches) { | ||
3567 | struct kmem_cache *s = | ||
3568 | container_of(h, struct kmem_cache, list); | ||
3569 | |||
3570 | err = sysfs_slab_add(s); | ||
3571 | BUG_ON(err); | ||
3572 | } | ||
3503 | 3573 | ||
3504 | while (alias_list) { | 3574 | while (alias_list) { |
3505 | struct saved_alias *al = alias_list; | 3575 | struct saved_alias *al = alias_list; |
@@ -3515,6 +3585,4 @@ static int __init slab_sysfs_init(void) | |||
3515 | } | 3585 | } |
3516 | 3586 | ||
3517 | __initcall(slab_sysfs_init); | 3587 | __initcall(slab_sysfs_init); |
3518 | #else | ||
3519 | __initcall(finish_bootstrap); | ||
3520 | #endif | 3588 | #endif |
@@ -488,7 +488,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, | |||
488 | long *committed; | 488 | long *committed; |
489 | 489 | ||
490 | committed = &per_cpu(committed_space, (long)hcpu); | 490 | committed = &per_cpu(committed_space, (long)hcpu); |
491 | if (action == CPU_DEAD) { | 491 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
492 | atomic_add(*committed, &vm_committed_space); | 492 | atomic_add(*committed, &vm_committed_space); |
493 | *committed = 0; | 493 | *committed = 0; |
494 | __lru_add_drain((long)hcpu); | 494 | __lru_add_drain((long)hcpu); |
diff --git a/mm/truncate.c b/mm/truncate.c index 0f4b6d18ab0e..4fbe1a2da5fb 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/highmem.h> | ||
15 | #include <linux/pagevec.h> | 16 | #include <linux/pagevec.h> |
16 | #include <linux/task_io_accounting_ops.h> | 17 | #include <linux/task_io_accounting_ops.h> |
17 | #include <linux/buffer_head.h> /* grr. try_to_release_page, | 18 | #include <linux/buffer_head.h> /* grr. try_to_release_page, |
@@ -46,7 +47,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) | |||
46 | 47 | ||
47 | static inline void truncate_partial_page(struct page *page, unsigned partial) | 48 | static inline void truncate_partial_page(struct page *page, unsigned partial) |
48 | { | 49 | { |
49 | memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); | 50 | zero_user_page(page, partial, PAGE_CACHE_SIZE - partial, KM_USER0); |
50 | if (PagePrivate(page)) | 51 | if (PagePrivate(page)) |
51 | do_invalidatepage(page, partial); | 52 | do_invalidatepage(page, partial); |
52 | } | 53 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 1c8e75a1cfcd..1be5a6376ef0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1528,7 +1528,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, | |||
1528 | pg_data_t *pgdat; | 1528 | pg_data_t *pgdat; |
1529 | cpumask_t mask; | 1529 | cpumask_t mask; |
1530 | 1530 | ||
1531 | if (action == CPU_ONLINE) { | 1531 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { |
1532 | for_each_online_pgdat(pgdat) { | 1532 | for_each_online_pgdat(pgdat) { |
1533 | mask = node_to_cpumask(pgdat->node_id); | 1533 | mask = node_to_cpumask(pgdat->node_id); |
1534 | if (any_online_cpu(mask) != NR_CPUS) | 1534 | if (any_online_cpu(mask) != NR_CPUS) |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 6c488d6ac425..9832d9a41d8c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -281,6 +281,17 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
281 | 281 | ||
282 | /* | 282 | /* |
283 | * Update the zone counters for one cpu. | 283 | * Update the zone counters for one cpu. |
284 | * | ||
285 | * Note that refresh_cpu_vm_stats strives to only access | ||
286 | * node local memory. The per cpu pagesets on remote zones are placed | ||
287 | * in the memory local to the processor using that pageset. So the | ||
288 | * loop over all zones will access a series of cachelines local to | ||
289 | * the processor. | ||
290 | * | ||
291 | * The call to zone_page_state_add updates the cachelines with the | ||
292 | * statistics in the remote zone struct as well as the global cachelines | ||
293 | * with the global counters. These could cause remote node cache line | ||
294 | * bouncing and will have to be only done when necessary. | ||
284 | */ | 295 | */ |
285 | void refresh_cpu_vm_stats(int cpu) | 296 | void refresh_cpu_vm_stats(int cpu) |
286 | { | 297 | { |
@@ -289,21 +300,54 @@ void refresh_cpu_vm_stats(int cpu) | |||
289 | unsigned long flags; | 300 | unsigned long flags; |
290 | 301 | ||
291 | for_each_zone(zone) { | 302 | for_each_zone(zone) { |
292 | struct per_cpu_pageset *pcp; | 303 | struct per_cpu_pageset *p; |
293 | 304 | ||
294 | if (!populated_zone(zone)) | 305 | if (!populated_zone(zone)) |
295 | continue; | 306 | continue; |
296 | 307 | ||
297 | pcp = zone_pcp(zone, cpu); | 308 | p = zone_pcp(zone, cpu); |
298 | 309 | ||
299 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 310 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
300 | if (pcp->vm_stat_diff[i]) { | 311 | if (p->vm_stat_diff[i]) { |
301 | local_irq_save(flags); | 312 | local_irq_save(flags); |
302 | zone_page_state_add(pcp->vm_stat_diff[i], | 313 | zone_page_state_add(p->vm_stat_diff[i], |
303 | zone, i); | 314 | zone, i); |
304 | pcp->vm_stat_diff[i] = 0; | 315 | p->vm_stat_diff[i] = 0; |
316 | #ifdef CONFIG_NUMA | ||
317 | /* 3 seconds idle till flush */ | ||
318 | p->expire = 3; | ||
319 | #endif | ||
305 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
306 | } | 321 | } |
322 | #ifdef CONFIG_NUMA | ||
323 | /* | ||
324 | * Deal with draining the remote pageset of this | ||
325 | * processor | ||
326 | * | ||
327 | * Check if there are pages remaining in this pageset | ||
328 | * if not then there is nothing to expire. | ||
329 | */ | ||
330 | if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count)) | ||
331 | continue; | ||
332 | |||
333 | /* | ||
334 | * We never drain zones local to this processor. | ||
335 | */ | ||
336 | if (zone_to_nid(zone) == numa_node_id()) { | ||
337 | p->expire = 0; | ||
338 | continue; | ||
339 | } | ||
340 | |||
341 | p->expire--; | ||
342 | if (p->expire) | ||
343 | continue; | ||
344 | |||
345 | if (p->pcp[0].count) | ||
346 | drain_zone_pages(zone, p->pcp + 0); | ||
347 | |||
348 | if (p->pcp[1].count) | ||
349 | drain_zone_pages(zone, p->pcp + 1); | ||
350 | #endif | ||
307 | } | 351 | } |
308 | } | 352 | } |
309 | 353 | ||
@@ -640,6 +684,24 @@ const struct seq_operations vmstat_op = { | |||
640 | #endif /* CONFIG_PROC_FS */ | 684 | #endif /* CONFIG_PROC_FS */ |
641 | 685 | ||
642 | #ifdef CONFIG_SMP | 686 | #ifdef CONFIG_SMP |
687 | static DEFINE_PER_CPU(struct delayed_work, vmstat_work); | ||
688 | int sysctl_stat_interval __read_mostly = HZ; | ||
689 | |||
690 | static void vmstat_update(struct work_struct *w) | ||
691 | { | ||
692 | refresh_cpu_vm_stats(smp_processor_id()); | ||
693 | schedule_delayed_work(&__get_cpu_var(vmstat_work), | ||
694 | sysctl_stat_interval); | ||
695 | } | ||
696 | |||
697 | static void __devinit start_cpu_timer(int cpu) | ||
698 | { | ||
699 | struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu); | ||
700 | |||
701 | INIT_DELAYED_WORK(vmstat_work, vmstat_update); | ||
702 | schedule_delayed_work_on(cpu, vmstat_work, HZ + cpu); | ||
703 | } | ||
704 | |||
643 | /* | 705 | /* |
644 | * Use the cpu notifier to insure that the thresholds are recalculated | 706 | * Use the cpu notifier to insure that the thresholds are recalculated |
645 | * when necessary. | 707 | * when necessary. |
@@ -648,10 +710,24 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | |||
648 | unsigned long action, | 710 | unsigned long action, |
649 | void *hcpu) | 711 | void *hcpu) |
650 | { | 712 | { |
713 | long cpu = (long)hcpu; | ||
714 | |||
651 | switch (action) { | 715 | switch (action) { |
652 | case CPU_UP_PREPARE: | 716 | case CPU_ONLINE: |
653 | case CPU_UP_CANCELED: | 717 | case CPU_ONLINE_FROZEN: |
718 | start_cpu_timer(cpu); | ||
719 | break; | ||
720 | case CPU_DOWN_PREPARE: | ||
721 | case CPU_DOWN_PREPARE_FROZEN: | ||
722 | cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); | ||
723 | per_cpu(vmstat_work, cpu).work.func = NULL; | ||
724 | break; | ||
725 | case CPU_DOWN_FAILED: | ||
726 | case CPU_DOWN_FAILED_FROZEN: | ||
727 | start_cpu_timer(cpu); | ||
728 | break; | ||
654 | case CPU_DEAD: | 729 | case CPU_DEAD: |
730 | case CPU_DEAD_FROZEN: | ||
655 | refresh_zone_stat_thresholds(); | 731 | refresh_zone_stat_thresholds(); |
656 | break; | 732 | break; |
657 | default: | 733 | default: |
@@ -665,8 +741,13 @@ static struct notifier_block __cpuinitdata vmstat_notifier = | |||
665 | 741 | ||
666 | int __init setup_vmstat(void) | 742 | int __init setup_vmstat(void) |
667 | { | 743 | { |
744 | int cpu; | ||
745 | |||
668 | refresh_zone_stat_thresholds(); | 746 | refresh_zone_stat_thresholds(); |
669 | register_cpu_notifier(&vmstat_notifier); | 747 | register_cpu_notifier(&vmstat_notifier); |
748 | |||
749 | for_each_online_cpu(cpu) | ||
750 | start_cpu_timer(cpu); | ||
670 | return 0; | 751 | return 0; |
671 | } | 752 | } |
672 | module_init(setup_vmstat) | 753 | module_init(setup_vmstat) |
diff --git a/net/core/dev.c b/net/core/dev.c index 4317c1be4d3f..8301e2ac747f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3450,7 +3450,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
3450 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | 3450 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
3451 | struct softnet_data *sd, *oldsd; | 3451 | struct softnet_data *sd, *oldsd; |
3452 | 3452 | ||
3453 | if (action != CPU_DEAD) | 3453 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) |
3454 | return NOTIFY_OK; | 3454 | return NOTIFY_OK; |
3455 | 3455 | ||
3456 | local_irq_disable(); | 3456 | local_irq_disable(); |
diff --git a/net/core/flow.c b/net/core/flow.c index 5d25697920b1..051430545a05 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -338,7 +338,7 @@ static int flow_cache_cpu(struct notifier_block *nfb, | |||
338 | unsigned long action, | 338 | unsigned long action, |
339 | void *hcpu) | 339 | void *hcpu) |
340 | { | 340 | { |
341 | if (action == CPU_DEAD) | 341 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
342 | __flow_cache_shrink((unsigned long)hcpu, 0); | 342 | __flow_cache_shrink((unsigned long)hcpu, 0); |
343 | return NOTIFY_OK; | 343 | return NOTIFY_OK; |
344 | } | 344 | } |
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index b3050a6817e7..68fe1d4d0210 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c | |||
@@ -2387,6 +2387,7 @@ void ip_vs_control_cleanup(void) | |||
2387 | EnterFunction(2); | 2387 | EnterFunction(2); |
2388 | ip_vs_trash_cleanup(); | 2388 | ip_vs_trash_cleanup(); |
2389 | cancel_rearming_delayed_work(&defense_work); | 2389 | cancel_rearming_delayed_work(&defense_work); |
2390 | cancel_work_sync(&defense_work.work); | ||
2390 | ip_vs_kill_estimator(&ip_vs_stats); | 2391 | ip_vs_kill_estimator(&ip_vs_stats); |
2391 | unregister_sysctl_table(sysctl_header); | 2392 | unregister_sysctl_table(sysctl_header); |
2392 | proc_net_remove("ip_vs_stats"); | 2393 | proc_net_remove("ip_vs_stats"); |
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fb3faf72e850..b7333061016d 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c | |||
@@ -556,6 +556,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, | |||
556 | 556 | ||
557 | switch (action) { | 557 | switch (action) { |
558 | case CPU_UP_PREPARE: | 558 | case CPU_UP_PREPARE: |
559 | case CPU_UP_PREPARE_FROZEN: | ||
559 | if (!percpu_populate(iucv_irq_data, | 560 | if (!percpu_populate(iucv_irq_data, |
560 | sizeof(struct iucv_irq_data), | 561 | sizeof(struct iucv_irq_data), |
561 | GFP_KERNEL|GFP_DMA, cpu)) | 562 | GFP_KERNEL|GFP_DMA, cpu)) |
@@ -567,15 +568,20 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, | |||
567 | } | 568 | } |
568 | break; | 569 | break; |
569 | case CPU_UP_CANCELED: | 570 | case CPU_UP_CANCELED: |
571 | case CPU_UP_CANCELED_FROZEN: | ||
570 | case CPU_DEAD: | 572 | case CPU_DEAD: |
573 | case CPU_DEAD_FROZEN: | ||
571 | percpu_depopulate(iucv_param, cpu); | 574 | percpu_depopulate(iucv_param, cpu); |
572 | percpu_depopulate(iucv_irq_data, cpu); | 575 | percpu_depopulate(iucv_irq_data, cpu); |
573 | break; | 576 | break; |
574 | case CPU_ONLINE: | 577 | case CPU_ONLINE: |
578 | case CPU_ONLINE_FROZEN: | ||
575 | case CPU_DOWN_FAILED: | 579 | case CPU_DOWN_FAILED: |
580 | case CPU_DOWN_FAILED_FROZEN: | ||
576 | smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); | 581 | smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); |
577 | break; | 582 | break; |
578 | case CPU_DOWN_PREPARE: | 583 | case CPU_DOWN_PREPARE: |
584 | case CPU_DOWN_PREPARE_FROZEN: | ||
579 | cpumask = iucv_buffer_cpumask; | 585 | cpumask = iucv_buffer_cpumask; |
580 | cpu_clear(cpu, cpumask); | 586 | cpu_clear(cpu, cpumask); |
581 | if (cpus_empty(cpumask)) | 587 | if (cpus_empty(cpumask)) |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index db298b501c81..099a983797da 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -924,6 +924,7 @@ static inline int | |||
924 | gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) | 924 | gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) |
925 | { | 925 | { |
926 | struct rsc *rsci; | 926 | struct rsc *rsci; |
927 | int rc; | ||
927 | 928 | ||
928 | if (rsip->major_status != GSS_S_COMPLETE) | 929 | if (rsip->major_status != GSS_S_COMPLETE) |
929 | return gss_write_null_verf(rqstp); | 930 | return gss_write_null_verf(rqstp); |
@@ -932,7 +933,9 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) | |||
932 | rsip->major_status = GSS_S_NO_CONTEXT; | 933 | rsip->major_status = GSS_S_NO_CONTEXT; |
933 | return gss_write_null_verf(rqstp); | 934 | return gss_write_null_verf(rqstp); |
934 | } | 935 | } |
935 | return gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); | 936 | rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); |
937 | cache_put(&rsci->h, &rsc_cache); | ||
938 | return rc; | ||
936 | } | 939 | } |
937 | 940 | ||
938 | /* | 941 | /* |
@@ -1089,6 +1092,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
1089 | } | 1092 | } |
1090 | goto complete; | 1093 | goto complete; |
1091 | case RPC_GSS_PROC_DESTROY: | 1094 | case RPC_GSS_PROC_DESTROY: |
1095 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) | ||
1096 | goto auth_err; | ||
1092 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); | 1097 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); |
1093 | if (resv->iov_len + 4 > PAGE_SIZE) | 1098 | if (resv->iov_len + 4 > PAGE_SIZE) |
1094 | goto drop; | 1099 | goto drop; |
@@ -1196,13 +1201,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) | |||
1196 | if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, | 1201 | if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, |
1197 | integ_len)) | 1202 | integ_len)) |
1198 | BUG(); | 1203 | BUG(); |
1199 | if (resbuf->page_len == 0 | 1204 | if (resbuf->tail[0].iov_base == NULL) { |
1200 | && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE | ||
1201 | < PAGE_SIZE) { | ||
1202 | BUG_ON(resbuf->tail[0].iov_len); | ||
1203 | /* Use head for everything */ | ||
1204 | resv = &resbuf->head[0]; | ||
1205 | } else if (resbuf->tail[0].iov_base == NULL) { | ||
1206 | if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) | 1205 | if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) |
1207 | goto out_err; | 1206 | goto out_err; |
1208 | resbuf->tail[0].iov_base = resbuf->head[0].iov_base | 1207 | resbuf->tail[0].iov_base = resbuf->head[0].iov_base |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ad39b47e05bc..a2f1893bde53 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -845,6 +845,8 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) | |||
845 | 845 | ||
846 | int register_rpc_pipefs(void) | 846 | int register_rpc_pipefs(void) |
847 | { | 847 | { |
848 | int err; | ||
849 | |||
848 | rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", | 850 | rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", |
849 | sizeof(struct rpc_inode), | 851 | sizeof(struct rpc_inode), |
850 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 852 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
@@ -852,7 +854,12 @@ int register_rpc_pipefs(void) | |||
852 | init_once, NULL); | 854 | init_once, NULL); |
853 | if (!rpc_inode_cachep) | 855 | if (!rpc_inode_cachep) |
854 | return -ENOMEM; | 856 | return -ENOMEM; |
855 | register_filesystem(&rpc_pipe_fs_type); | 857 | err = register_filesystem(&rpc_pipe_fs_type); |
858 | if (err) { | ||
859 | kmem_cache_destroy(rpc_inode_cachep); | ||
860 | return err; | ||
861 | } | ||
862 | |||
856 | return 0; | 863 | return 0; |
857 | } | 864 | } |
858 | 865 | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 43ecf62f12ef..0d35bc796d00 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -146,9 +146,11 @@ init_sunrpc(void) | |||
146 | int err = register_rpc_pipefs(); | 146 | int err = register_rpc_pipefs(); |
147 | if (err) | 147 | if (err) |
148 | goto out; | 148 | goto out; |
149 | err = rpc_init_mempool() != 0; | 149 | err = rpc_init_mempool(); |
150 | if (err) | 150 | if (err) { |
151 | unregister_rpc_pipefs(); | ||
151 | goto out; | 152 | goto out; |
153 | } | ||
152 | #ifdef RPC_DEBUG | 154 | #ifdef RPC_DEBUG |
153 | rpc_register_sysctl(); | 155 | rpc_register_sysctl(); |
154 | #endif | 156 | #endif |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b7503c103ae8..e673ef993904 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -907,7 +907,7 @@ svc_process(struct svc_rqst *rqstp) | |||
907 | * better idea of reply size | 907 | * better idea of reply size |
908 | */ | 908 | */ |
909 | if (procp->pc_xdrressize) | 909 | if (procp->pc_xdrressize) |
910 | svc_reserve(rqstp, procp->pc_xdrressize<<2); | 910 | svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); |
911 | 911 | ||
912 | /* Call the function that processes the request. */ | 912 | /* Call the function that processes the request. */ |
913 | if (!versp->vs_dispatch) { | 913 | if (!versp->vs_dispatch) { |
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2bd23ea2aa8b..07dcd20cbee4 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -385,7 +385,7 @@ ip_map_cached_get(struct svc_rqst *rqstp) | |||
385 | { | 385 | { |
386 | struct ip_map *ipm; | 386 | struct ip_map *ipm; |
387 | struct svc_sock *svsk = rqstp->rq_sock; | 387 | struct svc_sock *svsk = rqstp->rq_sock; |
388 | spin_lock_bh(&svsk->sk_defer_lock); | 388 | spin_lock(&svsk->sk_lock); |
389 | ipm = svsk->sk_info_authunix; | 389 | ipm = svsk->sk_info_authunix; |
390 | if (ipm != NULL) { | 390 | if (ipm != NULL) { |
391 | if (!cache_valid(&ipm->h)) { | 391 | if (!cache_valid(&ipm->h)) { |
@@ -395,13 +395,13 @@ ip_map_cached_get(struct svc_rqst *rqstp) | |||
395 | * same IP address. | 395 | * same IP address. |
396 | */ | 396 | */ |
397 | svsk->sk_info_authunix = NULL; | 397 | svsk->sk_info_authunix = NULL; |
398 | spin_unlock_bh(&svsk->sk_defer_lock); | 398 | spin_unlock(&svsk->sk_lock); |
399 | cache_put(&ipm->h, &ip_map_cache); | 399 | cache_put(&ipm->h, &ip_map_cache); |
400 | return NULL; | 400 | return NULL; |
401 | } | 401 | } |
402 | cache_get(&ipm->h); | 402 | cache_get(&ipm->h); |
403 | } | 403 | } |
404 | spin_unlock_bh(&svsk->sk_defer_lock); | 404 | spin_unlock(&svsk->sk_lock); |
405 | return ipm; | 405 | return ipm; |
406 | } | 406 | } |
407 | 407 | ||
@@ -410,14 +410,14 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) | |||
410 | { | 410 | { |
411 | struct svc_sock *svsk = rqstp->rq_sock; | 411 | struct svc_sock *svsk = rqstp->rq_sock; |
412 | 412 | ||
413 | spin_lock_bh(&svsk->sk_defer_lock); | 413 | spin_lock(&svsk->sk_lock); |
414 | if (svsk->sk_sock->type == SOCK_STREAM && | 414 | if (svsk->sk_sock->type == SOCK_STREAM && |
415 | svsk->sk_info_authunix == NULL) { | 415 | svsk->sk_info_authunix == NULL) { |
416 | /* newly cached, keep the reference */ | 416 | /* newly cached, keep the reference */ |
417 | svsk->sk_info_authunix = ipm; | 417 | svsk->sk_info_authunix = ipm; |
418 | ipm = NULL; | 418 | ipm = NULL; |
419 | } | 419 | } |
420 | spin_unlock_bh(&svsk->sk_defer_lock); | 420 | spin_unlock(&svsk->sk_lock); |
421 | if (ipm) | 421 | if (ipm) |
422 | cache_put(&ipm->h, &ip_map_cache); | 422 | cache_put(&ipm->h, &ip_map_cache); |
423 | } | 423 | } |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 22f61aee4824..5baf48de2558 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -53,7 +53,8 @@ | |||
53 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | 53 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. |
54 | * when both need to be taken (rare), svc_serv->sv_lock is first. | 54 | * when both need to be taken (rare), svc_serv->sv_lock is first. |
55 | * BKL protects svc_serv->sv_nrthread. | 55 | * BKL protects svc_serv->sv_nrthread. |
56 | * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list | 56 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list |
57 | * and the ->sk_info_authunix cache. | ||
57 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | 58 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. |
58 | * | 59 | * |
59 | * Some flags can be set to certain values at any time | 60 | * Some flags can be set to certain values at any time |
@@ -787,15 +788,20 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
787 | } | 788 | } |
788 | 789 | ||
789 | clear_bit(SK_DATA, &svsk->sk_flags); | 790 | clear_bit(SK_DATA, &svsk->sk_flags); |
790 | while ((err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, | 791 | skb = NULL; |
791 | 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 || | 792 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, |
792 | (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { | 793 | 0, 0, MSG_PEEK | MSG_DONTWAIT); |
793 | if (err == -EAGAIN) { | 794 | if (err >= 0) |
794 | svc_sock_received(svsk); | 795 | skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); |
795 | return err; | 796 | |
797 | if (skb == NULL) { | ||
798 | if (err != -EAGAIN) { | ||
799 | /* possibly an icmp error */ | ||
800 | dprintk("svc: recvfrom returned error %d\n", -err); | ||
801 | set_bit(SK_DATA, &svsk->sk_flags); | ||
796 | } | 802 | } |
797 | /* possibly an icmp error */ | 803 | svc_sock_received(svsk); |
798 | dprintk("svc: recvfrom returned error %d\n", -err); | 804 | return -EAGAIN; |
799 | } | 805 | } |
800 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 806 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); |
801 | if (skb->tstamp.tv64 == 0) { | 807 | if (skb->tstamp.tv64 == 0) { |
@@ -1633,7 +1639,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1633 | svsk->sk_server = serv; | 1639 | svsk->sk_server = serv; |
1634 | atomic_set(&svsk->sk_inuse, 1); | 1640 | atomic_set(&svsk->sk_inuse, 1); |
1635 | svsk->sk_lastrecv = get_seconds(); | 1641 | svsk->sk_lastrecv = get_seconds(); |
1636 | spin_lock_init(&svsk->sk_defer_lock); | 1642 | spin_lock_init(&svsk->sk_lock); |
1637 | INIT_LIST_HEAD(&svsk->sk_deferred); | 1643 | INIT_LIST_HEAD(&svsk->sk_deferred); |
1638 | INIT_LIST_HEAD(&svsk->sk_ready); | 1644 | INIT_LIST_HEAD(&svsk->sk_ready); |
1639 | mutex_init(&svsk->sk_mutex); | 1645 | mutex_init(&svsk->sk_mutex); |
@@ -1857,9 +1863,9 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | |||
1857 | dprintk("revisit queued\n"); | 1863 | dprintk("revisit queued\n"); |
1858 | svsk = dr->svsk; | 1864 | svsk = dr->svsk; |
1859 | dr->svsk = NULL; | 1865 | dr->svsk = NULL; |
1860 | spin_lock_bh(&svsk->sk_defer_lock); | 1866 | spin_lock(&svsk->sk_lock); |
1861 | list_add(&dr->handle.recent, &svsk->sk_deferred); | 1867 | list_add(&dr->handle.recent, &svsk->sk_deferred); |
1862 | spin_unlock_bh(&svsk->sk_defer_lock); | 1868 | spin_unlock(&svsk->sk_lock); |
1863 | set_bit(SK_DEFERRED, &svsk->sk_flags); | 1869 | set_bit(SK_DEFERRED, &svsk->sk_flags); |
1864 | svc_sock_enqueue(svsk); | 1870 | svc_sock_enqueue(svsk); |
1865 | svc_sock_put(svsk); | 1871 | svc_sock_put(svsk); |
@@ -1925,7 +1931,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | |||
1925 | 1931 | ||
1926 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | 1932 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) |
1927 | return NULL; | 1933 | return NULL; |
1928 | spin_lock_bh(&svsk->sk_defer_lock); | 1934 | spin_lock(&svsk->sk_lock); |
1929 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | 1935 | clear_bit(SK_DEFERRED, &svsk->sk_flags); |
1930 | if (!list_empty(&svsk->sk_deferred)) { | 1936 | if (!list_empty(&svsk->sk_deferred)) { |
1931 | dr = list_entry(svsk->sk_deferred.next, | 1937 | dr = list_entry(svsk->sk_deferred.next, |
@@ -1934,6 +1940,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | |||
1934 | list_del_init(&dr->handle.recent); | 1940 | list_del_init(&dr->handle.recent); |
1935 | set_bit(SK_DEFERRED, &svsk->sk_flags); | 1941 | set_bit(SK_DEFERRED, &svsk->sk_flags); |
1936 | } | 1942 | } |
1937 | spin_unlock_bh(&svsk->sk_defer_lock); | 1943 | spin_unlock(&svsk->sk_lock); |
1938 | return dr; | 1944 | return dr; |
1939 | } | 1945 | } |
diff --git a/scripts/kernel-doc b/scripts/kernel-doc index a325a0c890dc..e5bf649e516a 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc | |||
@@ -337,6 +337,7 @@ sub get_kernel_version() { | |||
337 | } | 337 | } |
338 | return $version; | 338 | return $version; |
339 | } | 339 | } |
340 | my $kernelversion = get_kernel_version(); | ||
340 | 341 | ||
341 | # generate a sequence of code that will splice in highlighting information | 342 | # generate a sequence of code that will splice in highlighting information |
342 | # using the s// operator. | 343 | # using the s// operator. |
@@ -610,7 +611,7 @@ sub output_function_xml(%) { | |||
610 | print "<refmeta>\n"; | 611 | print "<refmeta>\n"; |
611 | print " <refentrytitle><phrase>".$args{'function'}."</phrase></refentrytitle>\n"; | 612 | print " <refentrytitle><phrase>".$args{'function'}."</phrase></refentrytitle>\n"; |
612 | print " <manvolnum>9</manvolnum>\n"; | 613 | print " <manvolnum>9</manvolnum>\n"; |
613 | print " <refmiscinfo class=\"version\">" . get_kernel_version() . "</refmiscinfo>\n"; | 614 | print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n"; |
614 | print "</refmeta>\n"; | 615 | print "</refmeta>\n"; |
615 | print "<refnamediv>\n"; | 616 | print "<refnamediv>\n"; |
616 | print " <refname>".$args{'function'}."</refname>\n"; | 617 | print " <refname>".$args{'function'}."</refname>\n"; |
@@ -687,7 +688,7 @@ sub output_struct_xml(%) { | |||
687 | print "<refmeta>\n"; | 688 | print "<refmeta>\n"; |
688 | print " <refentrytitle><phrase>".$args{'type'}." ".$args{'struct'}."</phrase></refentrytitle>\n"; | 689 | print " <refentrytitle><phrase>".$args{'type'}." ".$args{'struct'}."</phrase></refentrytitle>\n"; |
689 | print " <manvolnum>9</manvolnum>\n"; | 690 | print " <manvolnum>9</manvolnum>\n"; |
690 | print " <refmiscinfo class=\"version\">" . get_kernel_version() . "</refmiscinfo>\n"; | 691 | print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n"; |
691 | print "</refmeta>\n"; | 692 | print "</refmeta>\n"; |
692 | print "<refnamediv>\n"; | 693 | print "<refnamediv>\n"; |
693 | print " <refname>".$args{'type'}." ".$args{'struct'}."</refname>\n"; | 694 | print " <refname>".$args{'type'}." ".$args{'struct'}."</refname>\n"; |
@@ -772,7 +773,7 @@ sub output_enum_xml(%) { | |||
772 | print "<refmeta>\n"; | 773 | print "<refmeta>\n"; |
773 | print " <refentrytitle><phrase>enum ".$args{'enum'}."</phrase></refentrytitle>\n"; | 774 | print " <refentrytitle><phrase>enum ".$args{'enum'}."</phrase></refentrytitle>\n"; |
774 | print " <manvolnum>9</manvolnum>\n"; | 775 | print " <manvolnum>9</manvolnum>\n"; |
775 | print " <refmiscinfo class=\"version\">" . get_kernel_version() . "</refmiscinfo>\n"; | 776 | print " <refmiscinfo class=\"version\">" . $kernelversion . "</refmiscinfo>\n"; |
776 | print "</refmeta>\n"; | 777 | print "</refmeta>\n"; |
777 | print "<refnamediv>\n"; | 778 | print "<refnamediv>\n"; |
778 | print " <refname>enum ".$args{'enum'}."</refname>\n"; | 779 | print " <refname>enum ".$args{'enum'}."</refname>\n"; |
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 480e18b00aa6..113dc77b9f60 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c | |||
@@ -1343,6 +1343,7 @@ static void add_header(struct buffer *b, struct module *mod) | |||
1343 | buf_printf(b, "#ifdef CONFIG_MODULE_UNLOAD\n" | 1343 | buf_printf(b, "#ifdef CONFIG_MODULE_UNLOAD\n" |
1344 | " .exit = cleanup_module,\n" | 1344 | " .exit = cleanup_module,\n" |
1345 | "#endif\n"); | 1345 | "#endif\n"); |
1346 | buf_printf(b, " .arch = MODULE_ARCH_INIT,\n"); | ||
1346 | buf_printf(b, "};\n"); | 1347 | buf_printf(b, "};\n"); |
1347 | } | 1348 | } |
1348 | 1349 | ||