diff options
author | David S. Miller <davem@davemloft.net> | 2009-11-19 01:19:03 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-11-19 01:19:03 -0500 |
commit | 3505d1a9fd65e2d3e00827857b6795d9d8983658 (patch) | |
tree | 941cfafdb57c427bb6b7ebf6354ee93b2a3693b5 /Documentation | |
parent | dfef948ed2ba69cf041840b5e860d6b4e16fa0b1 (diff) | |
parent | 66b00a7c93ec782d118d2c03bd599cfd041e80a1 (diff) |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts:
drivers/net/sfc/sfe4001.c
drivers/net/wireless/libertas/cmd.c
drivers/staging/Kconfig
drivers/staging/Makefile
drivers/staging/rtl8187se/Kconfig
drivers/staging/rtl8192e/Kconfig
Diffstat (limited to 'Documentation')
25 files changed, 937 insertions, 355 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-usb_host b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc index 46b66ad1f1b4..4e8106f7cfd9 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_host +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc | |||
@@ -1,4 +1,4 @@ | |||
1 | What: /sys/class/usb_host/usb_hostN/wusb_chid | 1 | What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid |
2 | Date: July 2008 | 2 | Date: July 2008 |
3 | KernelVersion: 2.6.27 | 3 | KernelVersion: 2.6.27 |
4 | Contact: David Vrabel <david.vrabel@csr.com> | 4 | Contact: David Vrabel <david.vrabel@csr.com> |
@@ -9,7 +9,7 @@ Description: | |||
9 | 9 | ||
10 | Set an all zero CHID to stop the host controller. | 10 | Set an all zero CHID to stop the host controller. |
11 | 11 | ||
12 | What: /sys/class/usb_host/usb_hostN/wusb_trust_timeout | 12 | What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout |
13 | Date: July 2008 | 13 | Date: July 2008 |
14 | KernelVersion: 2.6.27 | 14 | KernelVersion: 2.6.27 |
15 | Contact: David Vrabel <david.vrabel@csr.com> | 15 | Contact: David Vrabel <david.vrabel@csr.com> |
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable deleted file mode 100644 index 175bb4f70512..000000000000 --- a/Documentation/ABI/testing/sysfs-devices-cache_disable +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X | ||
2 | Date: August 2008 | ||
3 | KernelVersion: 2.6.27 | ||
4 | Contact: mark.langsdorf@amd.com | ||
5 | Description: These files exist in every cpu's cache index directories. | ||
6 | There are currently 2 cache_disable_# files in each | ||
7 | directory. Reading from these files on a supported | ||
8 | processor will return that cache disable index value | ||
9 | for that processor and node. Writing to one of these | ||
10 | files will cause the specificed cache index to be disabled. | ||
11 | |||
12 | Currently, only AMD Family 10h Processors support cache index | ||
13 | disable, and only for their L3 caches. See the BIOS and | ||
14 | Kernel Developer's Guide at | ||
15 | http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf | ||
16 | for formatting information and other details on the | ||
17 | cache index disable. | ||
18 | Users: joachim.deguara@amd.com | ||
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu new file mode 100644 index 000000000000..a703b9e9aeb9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
@@ -0,0 +1,156 @@ | |||
1 | What: /sys/devices/system/cpu/ | ||
2 | Date: pre-git history | ||
3 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
4 | Description: | ||
5 | A collection of both global and individual CPU attributes | ||
6 | |||
7 | Individual CPU attributes are contained in subdirectories | ||
8 | named by the kernel's logical CPU number, e.g.: | ||
9 | |||
10 | /sys/devices/system/cpu/cpu#/ | ||
11 | |||
12 | What: /sys/devices/system/cpu/sched_mc_power_savings | ||
13 | /sys/devices/system/cpu/sched_smt_power_savings | ||
14 | Date: June 2006 | ||
15 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
16 | Description: Discover and adjust the kernel's multi-core scheduler support. | ||
17 | |||
18 | Possible values are: | ||
19 | |||
20 | 0 - No power saving load balance (default value) | ||
21 | 1 - Fill one thread/core/package first for long running threads | ||
22 | 2 - Also bias task wakeups to semi-idle cpu package for power | ||
23 | savings | ||
24 | |||
25 | sched_mc_power_savings is dependent upon SCHED_MC, which is | ||
26 | itself architecture dependent. | ||
27 | |||
28 | sched_smt_power_savings is dependent upon SCHED_SMT, which | ||
29 | is itself architecture dependent. | ||
30 | |||
31 | The two files are independent of each other. It is possible | ||
32 | that one file may be present without the other. | ||
33 | |||
34 | Introduced by git commit 5c45bf27. | ||
35 | |||
36 | |||
37 | What: /sys/devices/system/cpu/kernel_max | ||
38 | /sys/devices/system/cpu/offline | ||
39 | /sys/devices/system/cpu/online | ||
40 | /sys/devices/system/cpu/possible | ||
41 | /sys/devices/system/cpu/present | ||
42 | Date: December 2008 | ||
43 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
44 | Description: CPU topology files that describe kernel limits related to | ||
45 | hotplug. Briefly: | ||
46 | |||
47 | kernel_max: the maximum cpu index allowed by the kernel | ||
48 | configuration. | ||
49 | |||
50 | offline: cpus that are not online because they have been | ||
51 | HOTPLUGGED off or exceed the limit of cpus allowed by the | ||
52 | kernel configuration (kernel_max above). | ||
53 | |||
54 | online: cpus that are online and being scheduled. | ||
55 | |||
56 | possible: cpus that have been allocated resources and can be | ||
57 | brought online if they are present. | ||
58 | |||
59 | present: cpus that have been identified as being present in | ||
60 | the system. | ||
61 | |||
62 | See Documentation/cputopology.txt for more information. | ||
63 | |||
64 | |||
65 | |||
66 | What: /sys/devices/system/cpu/cpu#/node | ||
67 | Date: October 2009 | ||
68 | Contact: Linux memory management mailing list <linux-mm@kvack.org> | ||
69 | Description: Discover NUMA node a CPU belongs to | ||
70 | |||
71 | When CONFIG_NUMA is enabled, a symbolic link that points | ||
72 | to the corresponding NUMA node directory. | ||
73 | |||
74 | For example, the following symlink is created for cpu42 | ||
75 | in NUMA node 2: | ||
76 | |||
77 | /sys/devices/system/cpu/cpu42/node2 -> ../../node/node2 | ||
78 | |||
79 | |||
80 | What: /sys/devices/system/cpu/cpu#/topology/core_id | ||
81 | /sys/devices/system/cpu/cpu#/topology/core_siblings | ||
82 | /sys/devices/system/cpu/cpu#/topology/core_siblings_list | ||
83 | /sys/devices/system/cpu/cpu#/topology/physical_package_id | ||
84 | /sys/devices/system/cpu/cpu#/topology/thread_siblings | ||
85 | /sys/devices/system/cpu/cpu#/topology/thread_siblings_list | ||
86 | Date: December 2008 | ||
87 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
88 | Description: CPU topology files that describe a logical CPU's relationship | ||
89 | to other cores and threads in the same physical package. | ||
90 | |||
91 | One cpu# directory is created per logical CPU in the system, | ||
92 | e.g. /sys/devices/system/cpu/cpu42/. | ||
93 | |||
94 | Briefly, the files above are: | ||
95 | |||
96 | core_id: the CPU core ID of cpu#. Typically it is the | ||
97 | hardware platform's identifier (rather than the kernel's). | ||
98 | The actual value is architecture and platform dependent. | ||
99 | |||
100 | core_siblings: internal kernel map of cpu#'s hardware threads | ||
101 | within the same physical_package_id. | ||
102 | |||
103 | core_siblings_list: human-readable list of the logical CPU | ||
104 | numbers within the same physical_package_id as cpu#. | ||
105 | |||
106 | physical_package_id: physical package id of cpu#. Typically | ||
107 | corresponds to a physical socket number, but the actual value | ||
108 | is architecture and platform dependent. | ||
109 | |||
110 | thread_siblings: internel kernel map of cpu#'s hardware | ||
111 | threads within the same core as cpu# | ||
112 | |||
113 | thread_siblings_list: human-readable list of cpu#'s hardware | ||
114 | threads within the same core as cpu# | ||
115 | |||
116 | See Documentation/cputopology.txt for more information. | ||
117 | |||
118 | |||
119 | What: /sys/devices/system/cpu/cpuidle/current_driver | ||
120 | /sys/devices/system/cpu/cpuidle/current_governer_ro | ||
121 | Date: September 2007 | ||
122 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
123 | Description: Discover cpuidle policy and mechanism | ||
124 | |||
125 | Various CPUs today support multiple idle levels that are | ||
126 | differentiated by varying exit latencies and power | ||
127 | consumption during idle. | ||
128 | |||
129 | Idle policy (governor) is differentiated from idle mechanism | ||
130 | (driver) | ||
131 | |||
132 | current_driver: displays current idle mechanism | ||
133 | |||
134 | current_governor_ro: displays current idle policy | ||
135 | |||
136 | See files in Documentation/cpuidle/ for more information. | ||
137 | |||
138 | |||
139 | What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X | ||
140 | Date: August 2008 | ||
141 | KernelVersion: 2.6.27 | ||
142 | Contact: mark.langsdorf@amd.com | ||
143 | Description: These files exist in every cpu's cache index directories. | ||
144 | There are currently 2 cache_disable_# files in each | ||
145 | directory. Reading from these files on a supported | ||
146 | processor will return that cache disable index value | ||
147 | for that processor and node. Writing to one of these | ||
148 | files will cause the specificed cache index to be disabled. | ||
149 | |||
150 | Currently, only AMD Family 10h Processors support cache index | ||
151 | disable, and only for their L3 caches. See the BIOS and | ||
152 | Kernel Developer's Guide at | ||
153 | http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf | ||
154 | for formatting information and other details on the | ||
155 | cache index disable. | ||
156 | Users: joachim.deguara@amd.com | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 455d4e6d346d..0b33bfe7dde9 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system. | |||
227 | Each cgroup is represented by a directory in the cgroup file system | 227 | Each cgroup is represented by a directory in the cgroup file system |
228 | containing the following files describing that cgroup: | 228 | containing the following files describing that cgroup: |
229 | 229 | ||
230 | - tasks: list of tasks (by pid) attached to that cgroup | 230 | - tasks: list of tasks (by pid) attached to that cgroup. This list |
231 | is not guaranteed to be sorted. Writing a thread id into this file | ||
232 | moves the thread into this cgroup. | ||
233 | - cgroup.procs: list of tgids in the cgroup. This list is not | ||
234 | guaranteed to be sorted or free of duplicate tgids, and userspace | ||
235 | should sort/uniquify the list if this property is required. | ||
236 | Writing a tgid into this file moves all threads with that tgid into | ||
237 | this cgroup. | ||
231 | - notify_on_release flag: run the release agent on exit? | 238 | - notify_on_release flag: run the release agent on exit? |
232 | - release_agent: the path to use for release notifications (this file | 239 | - release_agent: the path to use for release notifications (this file |
233 | exists in the top cgroup only) | 240 | exists in the top cgroup only) |
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup. | |||
374 | 381 | ||
375 | In this directory you can find several files: | 382 | In this directory you can find several files: |
376 | # ls | 383 | # ls |
377 | notify_on_release tasks | 384 | cgroup.procs notify_on_release tasks |
378 | (plus whatever files added by the attached subsystems) | 385 | (plus whatever files added by the attached subsystems) |
379 | 386 | ||
380 | Now attach your shell to this cgroup: | 387 | Now attach your shell to this cgroup: |
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index b41f3e58aefa..f1c5c4bccd3e 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
@@ -1,15 +1,28 @@ | |||
1 | 1 | ||
2 | Export cpu topology info via sysfs. Items (attributes) are similar | 2 | Export CPU topology info via sysfs. Items (attributes) are similar |
3 | to /proc/cpuinfo. | 3 | to /proc/cpuinfo. |
4 | 4 | ||
5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: | 5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: |
6 | represent the physical package id of cpu X; | 6 | |
7 | physical package id of cpuX. Typically corresponds to a physical | ||
8 | socket number, but the actual value is architecture and platform | ||
9 | dependent. | ||
10 | |||
7 | 2) /sys/devices/system/cpu/cpuX/topology/core_id: | 11 | 2) /sys/devices/system/cpu/cpuX/topology/core_id: |
8 | represent the cpu core id to cpu X; | 12 | |
13 | the CPU core ID of cpuX. Typically it is the hardware platform's | ||
14 | identifier (rather than the kernel's). The actual value is | ||
15 | architecture and platform dependent. | ||
16 | |||
9 | 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: | 17 | 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: |
10 | represent the thread siblings to cpu X in the same core; | 18 | |
19 | internel kernel map of cpuX's hardware threads within the same | ||
20 | core as cpuX | ||
21 | |||
11 | 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: | 22 | 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: |
12 | represent the thread siblings to cpu X in the same physical package; | 23 | |
24 | internal kernel map of cpuX's hardware threads within the same | ||
25 | physical_package_id. | ||
13 | 26 | ||
14 | To implement it in an architecture-neutral way, a new source file, | 27 | To implement it in an architecture-neutral way, a new source file, |
15 | drivers/base/topology.c, is to export the 4 attributes. | 28 | drivers/base/topology.c, is to export the 4 attributes. |
@@ -32,32 +45,32 @@ not defined by include/asm-XXX/topology.h: | |||
32 | 3) thread_siblings: just the given CPU | 45 | 3) thread_siblings: just the given CPU |
33 | 4) core_siblings: just the given CPU | 46 | 4) core_siblings: just the given CPU |
34 | 47 | ||
35 | Additionally, cpu topology information is provided under | 48 | Additionally, CPU topology information is provided under |
36 | /sys/devices/system/cpu and includes these files. The internal | 49 | /sys/devices/system/cpu and includes these files. The internal |
37 | source for the output is in brackets ("[]"). | 50 | source for the output is in brackets ("[]"). |
38 | 51 | ||
39 | kernel_max: the maximum cpu index allowed by the kernel configuration. | 52 | kernel_max: the maximum CPU index allowed by the kernel configuration. |
40 | [NR_CPUS-1] | 53 | [NR_CPUS-1] |
41 | 54 | ||
42 | offline: cpus that are not online because they have been | 55 | offline: CPUs that are not online because they have been |
43 | HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit | 56 | HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit |
44 | of cpus allowed by the kernel configuration (kernel_max | 57 | of CPUs allowed by the kernel configuration (kernel_max |
45 | above). [~cpu_online_mask + cpus >= NR_CPUS] | 58 | above). [~cpu_online_mask + cpus >= NR_CPUS] |
46 | 59 | ||
47 | online: cpus that are online and being scheduled [cpu_online_mask] | 60 | online: CPUs that are online and being scheduled [cpu_online_mask] |
48 | 61 | ||
49 | possible: cpus that have been allocated resources and can be | 62 | possible: CPUs that have been allocated resources and can be |
50 | brought online if they are present. [cpu_possible_mask] | 63 | brought online if they are present. [cpu_possible_mask] |
51 | 64 | ||
52 | present: cpus that have been identified as being present in the | 65 | present: CPUs that have been identified as being present in the |
53 | system. [cpu_present_mask] | 66 | system. [cpu_present_mask] |
54 | 67 | ||
55 | The format for the above output is compatible with cpulist_parse() | 68 | The format for the above output is compatible with cpulist_parse() |
56 | [see <linux/cpumask.h>]. Some examples follow. | 69 | [see <linux/cpumask.h>]. Some examples follow. |
57 | 70 | ||
58 | In this example, there are 64 cpus in the system but cpus 32-63 exceed | 71 | In this example, there are 64 CPUs in the system but cpus 32-63 exceed |
59 | the kernel max which is limited to 0..31 by the NR_CPUS config option | 72 | the kernel max which is limited to 0..31 by the NR_CPUS config option |
60 | being 32. Note also that cpus 2 and 4-31 are not online but could be | 73 | being 32. Note also that CPUs 2 and 4-31 are not online but could be |
61 | brought online as they are both present and possible. | 74 | brought online as they are both present and possible. |
62 | 75 | ||
63 | kernel_max: 31 | 76 | kernel_max: 31 |
@@ -67,8 +80,8 @@ brought online as they are both present and possible. | |||
67 | present: 0-31 | 80 | present: 0-31 |
68 | 81 | ||
69 | In this example, the NR_CPUS config option is 128, but the kernel was | 82 | In this example, the NR_CPUS config option is 128, but the kernel was |
70 | started with possible_cpus=144. There are 4 cpus in the system and cpu2 | 83 | started with possible_cpus=144. There are 4 CPUs in the system and cpu2 |
71 | was manually taken offline (and is the only cpu that can be brought | 84 | was manually taken offline (and is the only CPU that can be brought |
72 | online.) | 85 | online.) |
73 | 86 | ||
74 | kernel_max: 127 | 87 | kernel_max: 127 |
@@ -78,4 +91,4 @@ online.) | |||
78 | present: 0-3 | 91 | present: 0-3 |
79 | 92 | ||
80 | See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter | 93 | See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter |
81 | as well as more information on the various cpumask's. | 94 | as well as more information on the various cpumasks. |
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt index 59a91e5c6909..611f5a5499b1 100644 --- a/Documentation/debugging-via-ohci1394.txt +++ b/Documentation/debugging-via-ohci1394.txt | |||
@@ -64,14 +64,14 @@ be used to view the printk buffer of a remote machine, even with live update. | |||
64 | 64 | ||
65 | Bernhard Kaindl enhanced firescope to support accessing 64-bit machines | 65 | Bernhard Kaindl enhanced firescope to support accessing 64-bit machines |
66 | from 32-bit firescope and vice versa: | 66 | from 32-bit firescope and vice versa: |
67 | - ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2 | 67 | - http://halobates.de/firewire/firescope-0.2.2.tar.bz2 |
68 | 68 | ||
69 | and he implemented fast system dump (alpha version - read README.txt): | 69 | and he implemented fast system dump (alpha version - read README.txt): |
70 | - ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2 | 70 | - http://halobates.de/firewire/firedump-0.1.tar.bz2 |
71 | 71 | ||
72 | There is also a gdb proxy for firewire which allows to use gdb to access | 72 | There is also a gdb proxy for firewire which allows to use gdb to access |
73 | data which can be referenced from symbols found by gdb in vmlinux: | 73 | data which can be referenced from symbols found by gdb in vmlinux: |
74 | - ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2 | 74 | - http://halobates.de/firewire/fireproxy-0.33.tar.bz2 |
75 | 75 | ||
76 | The latest version of this gdb proxy (fireproxy-0.34) can communicate (not | 76 | The latest version of this gdb proxy (fireproxy-0.34) can communicate (not |
77 | yet stable) with kgdb over an memory-based communication module (kgdbom). | 77 | yet stable) with kgdb over an memory-based communication module (kgdbom). |
@@ -178,7 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization: | |||
178 | 178 | ||
179 | Notes | 179 | Notes |
180 | ----- | 180 | ----- |
181 | Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs | 181 | Documentation and specifications: http://halobates.de/firewire/ |
182 | 182 | ||
183 | FireWire is a trademark of Apple Inc. - for more information please refer to: | 183 | FireWire is a trademark of Apple Inc. - for more information please refer to: |
184 | http://en.wikipedia.org/wiki/FireWire | 184 | http://en.wikipedia.org/wiki/FireWire |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 89a47b5aff07..bc693fffabe0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -418,6 +418,14 @@ When: 2.6.33 | |||
418 | Why: Should be implemented in userspace, policy daemon. | 418 | Why: Should be implemented in userspace, policy daemon. |
419 | Who: Johannes Berg <johannes@sipsolutions.net> | 419 | Who: Johannes Berg <johannes@sipsolutions.net> |
420 | 420 | ||
421 | --------------------------- | ||
422 | |||
423 | What: CONFIG_INOTIFY | ||
424 | When: 2.6.33 | ||
425 | Why: last user (audit) will be converted to the newer more generic | ||
426 | and more easily maintained fsnotify subsystem | ||
427 | Who: Eric Paris <eparis@redhat.com> | ||
428 | |||
421 | ---------------------------- | 429 | ---------------------------- |
422 | 430 | ||
423 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be | 431 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be |
@@ -451,3 +459,33 @@ Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR | |||
451 | will also allow making ALSA OSS emulation independent of | 459 | will also allow making ALSA OSS emulation independent of |
452 | sound_core. The dependency will be broken then too. | 460 | sound_core. The dependency will be broken then too. |
453 | Who: Tejun Heo <tj@kernel.org> | 461 | Who: Tejun Heo <tj@kernel.org> |
462 | |||
463 | ---------------------------- | ||
464 | |||
465 | What: Support for VMware's guest paravirtuliazation technique [VMI] will be | ||
466 | dropped. | ||
467 | When: 2.6.37 or earlier. | ||
468 | Why: With the recent innovations in CPU hardware acceleration technologies | ||
469 | from Intel and AMD, VMware ran a few experiments to compare these | ||
470 | techniques to guest paravirtualization technique on VMware's platform. | ||
471 | These hardware assisted virtualization techniques have outperformed the | ||
472 | performance benefits provided by VMI in most of the workloads. VMware | ||
473 | expects that these hardware features will be ubiquitous in a couple of | ||
474 | years, as a result, VMware has started a phased retirement of this | ||
475 | feature from the hypervisor. We will be removing this feature from the | ||
476 | Kernel too. Right now we are targeting 2.6.37 but can retire earlier if | ||
477 | technical reasons (read opportunity to remove major chunk of pvops) | ||
478 | arise. | ||
479 | |||
480 | Please note that VMI has always been an optimization and non-VMI kernels | ||
481 | still work fine on VMware's platform. | ||
482 | Latest versions of VMware's product which support VMI are, | ||
483 | Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence | ||
484 | releases for these products will continue supporting VMI. | ||
485 | |||
486 | For more details about VMI retirement take a look at this, | ||
487 | http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html | ||
488 | |||
489 | Who: Alok N Kataria <akataria@vmware.com> | ||
490 | |||
491 | ---------------------------- | ||
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 570f9bd9be2b..05d5cf1d743f 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks. | |||
123 | 123 | ||
124 | sb=n Use alternate superblock at this location. | 124 | sb=n Use alternate superblock at this location. |
125 | 125 | ||
126 | quota | 126 | quota These options are ignored by the filesystem. They |
127 | noquota | 127 | noquota are used only by quota tools to recognize volumes |
128 | grpquota | 128 | grpquota where quota should be turned on. See documentation |
129 | usrquota | 129 | usrquota in the quota-tools package for more details |
130 | (http://sourceforge.net/projects/linuxquota). | ||
131 | |||
132 | jqfmt=<quota type> These options tell filesystem details about quota | ||
133 | usrjquota=<file> so that quota information can be properly updated | ||
134 | grpjquota=<file> during journal replay. They replace the above | ||
135 | quota options. See documentation in the quota-tools | ||
136 | package for more details | ||
137 | (http://sourceforge.net/projects/linuxquota). | ||
130 | 138 | ||
131 | bh (*) ext3 associates buffer heads to data pages to | 139 | bh (*) ext3 associates buffer heads to data pages to |
132 | nobh (a) cache disk block mapping information | 140 | nobh (a) cache disk block mapping information |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index bf4f4b7e11b3..6d94e0696f8c 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -134,9 +134,15 @@ ro Mount filesystem read only. Note that ext4 will | |||
134 | mount options "ro,noload" can be used to prevent | 134 | mount options "ro,noload" can be used to prevent |
135 | writes to the filesystem. | 135 | writes to the filesystem. |
136 | 136 | ||
137 | journal_checksum Enable checksumming of the journal transactions. | ||
138 | This will allow the recovery code in e2fsck and the | ||
139 | kernel to detect corruption in the kernel. It is a | ||
140 | compatible change and will be ignored by older kernels. | ||
141 | |||
137 | journal_async_commit Commit block can be written to disk without waiting | 142 | journal_async_commit Commit block can be written to disk without waiting |
138 | for descriptor blocks. If enabled older kernels cannot | 143 | for descriptor blocks. If enabled older kernels cannot |
139 | mount the device. | 144 | mount the device. This will enable 'journal_checksum' |
145 | internally. | ||
140 | 146 | ||
141 | journal=update Update the ext4 file system's journal to the current | 147 | journal=update Update the ext4 file system's journal to the current |
142 | format. | 148 | format. |
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index 84eb26808dee..cb8a3a00cc92 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | Using flexible arrays in the kernel | 1 | Using flexible arrays in the kernel |
2 | Last updated for 2.6.31 | 2 | Last updated for 2.6.32 |
3 | Jonathan Corbet <corbet@lwn.net> | 3 | Jonathan Corbet <corbet@lwn.net> |
4 | 4 | ||
5 | Large contiguous memory allocations can be unreliable in the Linux kernel. | 5 | Large contiguous memory allocations can be unreliable in the Linux kernel. |
@@ -40,6 +40,13 @@ argument is passed directly to the internal memory allocation calls. With | |||
40 | the current code, using flags to ask for high memory is likely to lead to | 40 | the current code, using flags to ask for high memory is likely to lead to |
41 | notably unpleasant side effects. | 41 | notably unpleasant side effects. |
42 | 42 | ||
43 | It is also possible to define flexible arrays at compile time with: | ||
44 | |||
45 | DEFINE_FLEX_ARRAY(name, element_size, total); | ||
46 | |||
47 | This macro will result in a definition of an array with the given name; the | ||
48 | element size and total will be checked for validity at compile time. | ||
49 | |||
43 | Storing data into a flexible array is accomplished with a call to: | 50 | Storing data into a flexible array is accomplished with a call to: |
44 | 51 | ||
45 | int flex_array_put(struct flex_array *array, unsigned int element_nr, | 52 | int flex_array_put(struct flex_array *array, unsigned int element_nr, |
@@ -76,16 +83,30 @@ particular element has never been allocated. | |||
76 | Note that it is possible to get back a valid pointer for an element which | 83 | Note that it is possible to get back a valid pointer for an element which |
77 | has never been stored in the array. Memory for array elements is allocated | 84 | has never been stored in the array. Memory for array elements is allocated |
78 | one page at a time; a single allocation could provide memory for several | 85 | one page at a time; a single allocation could provide memory for several |
79 | adjacent elements. The flexible array code does not know if a specific | 86 | adjacent elements. Flexible array elements are normally initialized to the |
80 | element has been written; it only knows if the associated memory is | 87 | value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors |
81 | present. So a flex_array_get() call on an element which was never stored | 88 | involving that number probably result from use of unstored array entries. |
82 | in the array has the potential to return a pointer to random data. If the | 89 | Note that, if array elements are allocated with __GFP_ZERO, they will be |
83 | caller does not have a separate way to know which elements were actually | 90 | initialized to zero and this poisoning will not happen. |
84 | stored, it might be wise, at least, to add GFP_ZERO to the flags argument | 91 | |
85 | to ensure that all elements are zeroed. | 92 | Individual elements in the array can be cleared with: |
86 | 93 | ||
87 | There is no way to remove a single element from the array. It is possible, | 94 | int flex_array_clear(struct flex_array *array, unsigned int element_nr); |
88 | though, to remove all elements with a call to: | 95 | |
96 | This function will set the given element to FLEX_ARRAY_FREE and return | ||
97 | zero. If storage for the indicated element is not allocated for the array, | ||
98 | flex_array_clear() will return -EINVAL instead. Note that clearing an | ||
99 | element does not release the storage associated with it; to reduce the | ||
100 | allocated size of an array, call: | ||
101 | |||
102 | int flex_array_shrink(struct flex_array *array); | ||
103 | |||
104 | The return value will be the number of pages of memory actually freed. | ||
105 | This function works by scanning the array for pages containing nothing but | ||
106 | FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work | ||
107 | if the array's pages are allocated with __GFP_ZERO. | ||
108 | |||
109 | It is possible to remove all elements of an array with a call to: | ||
89 | 110 | ||
90 | void flex_array_free_parts(struct flex_array *array); | 111 | void flex_array_free_parts(struct flex_array *array); |
91 | 112 | ||
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index dcbd502c8792..82def883361b 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface | |||
@@ -353,10 +353,20 @@ power[1-*]_average Average power use | |||
353 | Unit: microWatt | 353 | Unit: microWatt |
354 | RO | 354 | RO |
355 | 355 | ||
356 | power[1-*]_average_interval Power use averaging interval | 356 | power[1-*]_average_interval Power use averaging interval. A poll |
357 | notification is sent to this file if the | ||
358 | hardware changes the averaging interval. | ||
357 | Unit: milliseconds | 359 | Unit: milliseconds |
358 | RW | 360 | RW |
359 | 361 | ||
362 | power[1-*]_average_interval_max Maximum power use averaging interval | ||
363 | Unit: milliseconds | ||
364 | RO | ||
365 | |||
366 | power[1-*]_average_interval_min Minimum power use averaging interval | ||
367 | Unit: milliseconds | ||
368 | RO | ||
369 | |||
360 | power[1-*]_average_highest Historical average maximum power use | 370 | power[1-*]_average_highest Historical average maximum power use |
361 | Unit: microWatt | 371 | Unit: microWatt |
362 | RO | 372 | RO |
@@ -365,6 +375,18 @@ power[1-*]_average_lowest Historical average minimum power use | |||
365 | Unit: microWatt | 375 | Unit: microWatt |
366 | RO | 376 | RO |
367 | 377 | ||
378 | power[1-*]_average_max A poll notification is sent to | ||
379 | power[1-*]_average when power use | ||
380 | rises above this value. | ||
381 | Unit: microWatt | ||
382 | RW | ||
383 | |||
384 | power[1-*]_average_min A poll notification is sent to | ||
385 | power[1-*]_average when power use | ||
386 | sinks below this value. | ||
387 | Unit: microWatt | ||
388 | RW | ||
389 | |||
368 | power[1-*]_input Instantaneous power use | 390 | power[1-*]_input Instantaneous power use |
369 | Unit: microWatt | 391 | Unit: microWatt |
370 | RO | 392 | RO |
@@ -381,6 +403,39 @@ power[1-*]_reset_history Reset input_highest, input_lowest, | |||
381 | average_highest and average_lowest. | 403 | average_highest and average_lowest. |
382 | WO | 404 | WO |
383 | 405 | ||
406 | power[1-*]_accuracy Accuracy of the power meter. | ||
407 | Unit: Percent | ||
408 | RO | ||
409 | |||
410 | power[1-*]_alarm 1 if the system is drawing more power than the | ||
411 | cap allows; 0 otherwise. A poll notification is | ||
412 | sent to this file when the power use exceeds the | ||
413 | cap. This file only appears if the cap is known | ||
414 | to be enforced by hardware. | ||
415 | RO | ||
416 | |||
417 | power[1-*]_cap If power use rises above this limit, the | ||
418 | system should take action to reduce power use. | ||
419 | A poll notification is sent to this file if the | ||
420 | cap is changed by the hardware. The *_cap | ||
421 | files only appear if the cap is known to be | ||
422 | enforced by hardware. | ||
423 | Unit: microWatt | ||
424 | RW | ||
425 | |||
426 | power[1-*]_cap_hyst Margin of hysteresis built around capping and | ||
427 | notification. | ||
428 | Unit: microWatt | ||
429 | RW | ||
430 | |||
431 | power[1-*]_cap_max Maximum cap that can be set. | ||
432 | Unit: microWatt | ||
433 | RO | ||
434 | |||
435 | power[1-*]_cap_min Minimum cap that can be set. | ||
436 | Unit: microWatt | ||
437 | RO | ||
438 | |||
384 | ********** | 439 | ********** |
385 | * Energy * | 440 | * Energy * |
386 | ********** | 441 | ********** |
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4 index c5b37c570554..ac540c71c7eb 100644 --- a/Documentation/i2c/busses/i2c-piix4 +++ b/Documentation/i2c/busses/i2c-piix4 | |||
@@ -8,7 +8,7 @@ Supported adapters: | |||
8 | Datasheet: Only available via NDA from ServerWorks | 8 | Datasheet: Only available via NDA from ServerWorks |
9 | * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges | 9 | * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges |
10 | Datasheet: Not publicly available | 10 | Datasheet: Not publicly available |
11 | * AMD SB900 | 11 | * AMD Hudson-2 |
12 | Datasheet: Not publicly available | 12 | Datasheet: Not publicly available |
13 | * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge | 13 | * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge |
14 | Datasheet: Publicly available at the SMSC website http://www.smsc.com | 14 | Datasheet: Publicly available at the SMSC website http://www.smsc.com |
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt index 744687dd195b..8a366959f5cc 100644 --- a/Documentation/infiniband/user_mad.txt +++ b/Documentation/infiniband/user_mad.txt | |||
@@ -128,8 +128,8 @@ Setting IsSM Capability Bit | |||
128 | To create the appropriate character device files automatically with | 128 | To create the appropriate character device files automatically with |
129 | udev, a rule like | 129 | udev, a rule like |
130 | 130 | ||
131 | KERNEL="umad*", NAME="infiniband/%k" | 131 | KERNEL=="umad*", NAME="infiniband/%k" |
132 | KERNEL="issm*", NAME="infiniband/%k" | 132 | KERNEL=="issm*", NAME="infiniband/%k" |
133 | 133 | ||
134 | can be used. This will create device nodes named | 134 | can be used. This will create device nodes named |
135 | 135 | ||
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt index f847501e50b5..afe3f8da9018 100644 --- a/Documentation/infiniband/user_verbs.txt +++ b/Documentation/infiniband/user_verbs.txt | |||
@@ -58,7 +58,7 @@ Memory pinning | |||
58 | To create the appropriate character device files automatically with | 58 | To create the appropriate character device files automatically with |
59 | udev, a rule like | 59 | udev, a rule like |
60 | 60 | ||
61 | KERNEL="uverbs*", NAME="infiniband/%k" | 61 | KERNEL=="uverbs*", NAME="infiniband/%k" |
62 | 62 | ||
63 | can be used. This will create device nodes named | 63 | can be used. This will create device nodes named |
64 | 64 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 02df20be7764..52c34b4f567e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -671,6 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
671 | earlyprintk= [X86,SH,BLACKFIN] | 671 | earlyprintk= [X86,SH,BLACKFIN] |
672 | earlyprintk=vga | 672 | earlyprintk=vga |
673 | earlyprintk=serial[,ttySn[,baudrate]] | 673 | earlyprintk=serial[,ttySn[,baudrate]] |
674 | earlyprintk=ttySn[,baudrate] | ||
674 | earlyprintk=dbgp[debugController#] | 675 | earlyprintk=dbgp[debugController#] |
675 | 676 | ||
676 | Append ",keep" to not disable it when the real console | 677 | Append ",keep" to not disable it when the real console |
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index ba9373f82ab5..098de5bce00a 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <signal.h> | 42 | #include <signal.h> |
43 | #include "linux/lguest_launcher.h" | 43 | #include "linux/lguest_launcher.h" |
44 | #include "linux/virtio_config.h" | 44 | #include "linux/virtio_config.h" |
45 | #include <linux/virtio_ids.h> | ||
46 | #include "linux/virtio_net.h" | 45 | #include "linux/virtio_net.h" |
47 | #include "linux/virtio_blk.h" | 46 | #include "linux/virtio_blk.h" |
48 | #include "linux/virtio_console.h" | 47 | #include "linux/virtio_console.h" |
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt index a6eb4add1be6..9605179711f4 100644 --- a/Documentation/scsi/hptiop.txt +++ b/Documentation/scsi/hptiop.txt | |||
@@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop) | |||
3 | Controller Register Map | 3 | Controller Register Map |
4 | ------------------------- | 4 | ------------------------- |
5 | 5 | ||
6 | For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2: | ||
7 | |||
8 | BAR0 offset Register | ||
9 | 0x11C5C Link Interface IRQ Set | ||
10 | 0x11C60 Link Interface IRQ Clear | ||
11 | |||
12 | BAR2 offset Register | ||
13 | 0x10 Inbound Message Register 0 | ||
14 | 0x14 Inbound Message Register 1 | ||
15 | 0x18 Outbound Message Register 0 | ||
16 | 0x1C Outbound Message Register 1 | ||
17 | 0x20 Inbound Doorbell Register | ||
18 | 0x24 Inbound Interrupt Status Register | ||
19 | 0x28 Inbound Interrupt Mask Register | ||
20 | 0x30 Outbound Interrupt Status Register | ||
21 | 0x34 Outbound Interrupt Mask Register | ||
22 | 0x40 Inbound Queue Port | ||
23 | 0x44 Outbound Queue Port | ||
24 | |||
6 | For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: | 25 | For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: |
7 | 26 | ||
8 | BAR0 offset Register | 27 | BAR0 offset Register |
@@ -93,7 +112,7 @@ The driver exposes following sysfs attributes: | |||
93 | 112 | ||
94 | 113 | ||
95 | ----------------------------------------------------------------------------- | 114 | ----------------------------------------------------------------------------- |
96 | Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved. | 115 | Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved. |
97 | 116 | ||
98 | This file is distributed in the hope that it will be useful, | 117 | This file is distributed in the hope that it will be useful, |
99 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 118 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 1c8eb4518ce0..fd9a2f67edf2 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -522,7 +522,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
522 | pcm_devs - Number of PCM devices assigned to each card | 522 | pcm_devs - Number of PCM devices assigned to each card |
523 | (default = 1, up to 4) | 523 | (default = 1, up to 4) |
524 | pcm_substreams - Number of PCM substreams assigned to each PCM | 524 | pcm_substreams - Number of PCM substreams assigned to each PCM |
525 | (default = 8, up to 16) | 525 | (default = 8, up to 128) |
526 | hrtimer - Use hrtimer (=1, default) or system timer (=0) | 526 | hrtimer - Use hrtimer (=1, default) or system timer (=0) |
527 | fake_buffer - Fake buffer allocations (default = 1) | 527 | fake_buffer - Fake buffer allocations (default = 1) |
528 | 528 | ||
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 75fddb40f416..4c7f9aee5c4e 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt | |||
@@ -359,6 +359,7 @@ STAC9227/9228/9229/927x | |||
359 | 5stack-no-fp D965 5stack without front panel | 359 | 5stack-no-fp D965 5stack without front panel |
360 | dell-3stack Dell Dimension E520 | 360 | dell-3stack Dell Dimension E520 |
361 | dell-bios Fixes with Dell BIOS setup | 361 | dell-bios Fixes with Dell BIOS setup |
362 | volknob Fixes with volume-knob widget 0x24 | ||
362 | auto BIOS setup (default) | 363 | auto BIOS setup (default) |
363 | 364 | ||
364 | STAC92HD71B* | 365 | STAC92HD71B* |
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index 70d68ce8640a..a87dc277a5ca 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | Generic Thermal Sysfs driver How To | 1 | Generic Thermal Sysfs driver How To |
2 | ========================= | 2 | =================================== |
3 | 3 | ||
4 | Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> | 4 | Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com> |
5 | 5 | ||
@@ -10,20 +10,20 @@ Copyright (c) 2008 Intel Corporation | |||
10 | 10 | ||
11 | 0. Introduction | 11 | 0. Introduction |
12 | 12 | ||
13 | The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors) | 13 | The generic thermal sysfs provides a set of interfaces for thermal zone |
14 | and thermal cooling devices (fan, processor...) to register with the thermal management | 14 | devices (sensors) and thermal cooling devices (fan, processor...) to register |
15 | solution and to be a part of it. | 15 | with the thermal management solution and to be a part of it. |
16 | 16 | ||
17 | This how-to focuses on enabling new thermal zone and cooling devices to participate | 17 | This how-to focuses on enabling new thermal zone and cooling devices to |
18 | in thermal management. | 18 | participate in thermal management. |
19 | This solution is platform independent and any type of thermal zone devices and | 19 | This solution is platform independent and any type of thermal zone devices |
20 | cooling devices should be able to make use of the infrastructure. | 20 | and cooling devices should be able to make use of the infrastructure. |
21 | 21 | ||
22 | The main task of the thermal sysfs driver is to expose thermal zone attributes as well | 22 | The main task of the thermal sysfs driver is to expose thermal zone attributes |
23 | as cooling device attributes to the user space. | 23 | as well as cooling device attributes to the user space. |
24 | An intelligent thermal management application can make decisions based on inputs | 24 | An intelligent thermal management application can make decisions based on |
25 | from thermal zone attributes (the current temperature and trip point temperature) | 25 | inputs from thermal zone attributes (the current temperature and trip point |
26 | and throttle appropriate devices. | 26 | temperature) and throttle appropriate devices. |
27 | 27 | ||
28 | [0-*] denotes any positive number starting from 0 | 28 | [0-*] denotes any positive number starting from 0 |
29 | [1-*] denotes any positive number starting from 1 | 29 | [1-*] denotes any positive number starting from 1 |
@@ -31,77 +31,77 @@ and throttle appropriate devices. | |||
31 | 1. thermal sysfs driver interface functions | 31 | 1. thermal sysfs driver interface functions |
32 | 32 | ||
33 | 1.1 thermal zone device interface | 33 | 1.1 thermal zone device interface |
34 | 1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips, | 34 | 1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, |
35 | void *devdata, struct thermal_zone_device_ops *ops) | 35 | int trips, void *devdata, struct thermal_zone_device_ops *ops) |
36 | 36 | ||
37 | This interface function adds a new thermal zone device (sensor) to | 37 | This interface function adds a new thermal zone device (sensor) to |
38 | /sys/class/thermal folder as thermal_zone[0-*]. | 38 | /sys/class/thermal folder as thermal_zone[0-*]. It tries to bind all the |
39 | It tries to bind all the thermal cooling devices registered at the same time. | 39 | thermal cooling devices registered at the same time. |
40 | 40 | ||
41 | name: the thermal zone name. | 41 | name: the thermal zone name. |
42 | trips: the total number of trip points this thermal zone supports. | 42 | trips: the total number of trip points this thermal zone supports. |
43 | devdata: device private data | 43 | devdata: device private data |
44 | ops: thermal zone device call-backs. | 44 | ops: thermal zone device call-backs. |
45 | .bind: bind the thermal zone device with a thermal cooling device. | 45 | .bind: bind the thermal zone device with a thermal cooling device. |
46 | .unbind: unbind the thermal zone device with a thermal cooling device. | 46 | .unbind: unbind the thermal zone device with a thermal cooling device. |
47 | .get_temp: get the current temperature of the thermal zone. | 47 | .get_temp: get the current temperature of the thermal zone. |
48 | .get_mode: get the current mode (user/kernel) of the thermal zone. | 48 | .get_mode: get the current mode (user/kernel) of the thermal zone. |
49 | "kernel" means thermal management is done in kernel. | 49 | - "kernel" means thermal management is done in kernel. |
50 | "user" will prevent kernel thermal driver actions upon trip points | 50 | - "user" will prevent kernel thermal driver actions upon trip points |
51 | so that user applications can take charge of thermal management. | 51 | so that user applications can take charge of thermal management. |
52 | .set_mode: set the mode (user/kernel) of the thermal zone. | 52 | .set_mode: set the mode (user/kernel) of the thermal zone. |
53 | .get_trip_type: get the type of certain trip point. | 53 | .get_trip_type: get the type of certain trip point. |
54 | .get_trip_temp: get the temperature above which the certain trip point | 54 | .get_trip_temp: get the temperature above which the certain trip point |
55 | will be fired. | 55 | will be fired. |
56 | 56 | ||
57 | 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) | 57 | 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) |
58 | 58 | ||
59 | This interface function removes the thermal zone device. | 59 | This interface function removes the thermal zone device. |
60 | It deletes the corresponding entry form /sys/class/thermal folder and unbind all | 60 | It deletes the corresponding entry form /sys/class/thermal folder and |
61 | the thermal cooling devices it uses. | 61 | unbind all the thermal cooling devices it uses. |
62 | 62 | ||
63 | 1.2 thermal cooling device interface | 63 | 1.2 thermal cooling device interface |
64 | 1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, | 64 | 1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name, |
65 | void *devdata, struct thermal_cooling_device_ops *) | 65 | void *devdata, struct thermal_cooling_device_ops *) |
66 | 66 | ||
67 | This interface function adds a new thermal cooling device (fan/processor/...) to | 67 | This interface function adds a new thermal cooling device (fan/processor/...) |
68 | /sys/class/thermal/ folder as cooling_device[0-*]. | 68 | to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself |
69 | It tries to bind itself to all the thermal zone devices register at the same time. | 69 | to all the thermal zone devices register at the same time. |
70 | name: the cooling device name. | 70 | name: the cooling device name. |
71 | devdata: device private data. | 71 | devdata: device private data. |
72 | ops: thermal cooling devices call-backs. | 72 | ops: thermal cooling devices call-backs. |
73 | .get_max_state: get the Maximum throttle state of the cooling device. | 73 | .get_max_state: get the Maximum throttle state of the cooling device. |
74 | .get_cur_state: get the Current throttle state of the cooling device. | 74 | .get_cur_state: get the Current throttle state of the cooling device. |
75 | .set_cur_state: set the Current throttle state of the cooling device. | 75 | .set_cur_state: set the Current throttle state of the cooling device. |
76 | 76 | ||
77 | 1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) | 77 | 1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) |
78 | 78 | ||
79 | This interface function remove the thermal cooling device. | 79 | This interface function remove the thermal cooling device. |
80 | It deletes the corresponding entry form /sys/class/thermal folder and unbind | 80 | It deletes the corresponding entry form /sys/class/thermal folder and |
81 | itself from all the thermal zone devices using it. | 81 | unbind itself from all the thermal zone devices using it. |
82 | 82 | ||
83 | 1.3 interface for binding a thermal zone device with a thermal cooling device | 83 | 1.3 interface for binding a thermal zone device with a thermal cooling device |
84 | 1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, | 84 | 1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, |
85 | int trip, struct thermal_cooling_device *cdev); | 85 | int trip, struct thermal_cooling_device *cdev); |
86 | 86 | ||
87 | This interface function bind a thermal cooling device to the certain trip point | 87 | This interface function bind a thermal cooling device to the certain trip |
88 | of a thermal zone device. | 88 | point of a thermal zone device. |
89 | This function is usually called in the thermal zone device .bind callback. | 89 | This function is usually called in the thermal zone device .bind callback. |
90 | tz: the thermal zone device | 90 | tz: the thermal zone device |
91 | cdev: thermal cooling device | 91 | cdev: thermal cooling device |
92 | trip: indicates which trip point the cooling devices is associated with | 92 | trip: indicates which trip point the cooling devices is associated with |
93 | in this thermal zone. | 93 | in this thermal zone. |
94 | 94 | ||
95 | 1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, | 95 | 1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, |
96 | int trip, struct thermal_cooling_device *cdev); | 96 | int trip, struct thermal_cooling_device *cdev); |
97 | 97 | ||
98 | This interface function unbind a thermal cooling device from the certain trip point | 98 | This interface function unbind a thermal cooling device from the certain |
99 | of a thermal zone device. | 99 | trip point of a thermal zone device. This function is usually called in |
100 | This function is usually called in the thermal zone device .unbind callback. | 100 | the thermal zone device .unbind callback. |
101 | tz: the thermal zone device | 101 | tz: the thermal zone device |
102 | cdev: thermal cooling device | 102 | cdev: thermal cooling device |
103 | trip: indicates which trip point the cooling devices is associated with | 103 | trip: indicates which trip point the cooling devices is associated with |
104 | in this thermal zone. | 104 | in this thermal zone. |
105 | 105 | ||
106 | 2. sysfs attributes structure | 106 | 2. sysfs attributes structure |
107 | 107 | ||
@@ -114,153 +114,166 @@ if hwmon is compiled in or built as a module. | |||
114 | 114 | ||
115 | Thermal zone device sys I/F, created once it's registered: | 115 | Thermal zone device sys I/F, created once it's registered: |
116 | /sys/class/thermal/thermal_zone[0-*]: | 116 | /sys/class/thermal/thermal_zone[0-*]: |
117 | |-----type: Type of the thermal zone | 117 | |---type: Type of the thermal zone |
118 | |-----temp: Current temperature | 118 | |---temp: Current temperature |
119 | |-----mode: Working mode of the thermal zone | 119 | |---mode: Working mode of the thermal zone |
120 | |-----trip_point_[0-*]_temp: Trip point temperature | 120 | |---trip_point_[0-*]_temp: Trip point temperature |
121 | |-----trip_point_[0-*]_type: Trip point type | 121 | |---trip_point_[0-*]_type: Trip point type |
122 | 122 | ||
123 | Thermal cooling device sys I/F, created once it's registered: | 123 | Thermal cooling device sys I/F, created once it's registered: |
124 | /sys/class/thermal/cooling_device[0-*]: | 124 | /sys/class/thermal/cooling_device[0-*]: |
125 | |-----type : Type of the cooling device(processor/fan/...) | 125 | |---type: Type of the cooling device(processor/fan/...) |
126 | |-----max_state: Maximum cooling state of the cooling device | 126 | |---max_state: Maximum cooling state of the cooling device |
127 | |-----cur_state: Current cooling state of the cooling device | 127 | |---cur_state: Current cooling state of the cooling device |
128 | 128 | ||
129 | 129 | ||
130 | These two dynamic attributes are created/removed in pairs. | 130 | Then next two dynamic attributes are created/removed in pairs. They represent |
131 | They represent the relationship between a thermal zone and its associated cooling device. | 131 | the relationship between a thermal zone and its associated cooling device. |
132 | They are created/removed for each | 132 | They are created/removed for each successful execution of |
133 | thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. | 133 | thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device. |
134 | 134 | ||
135 | /sys/class/thermal/thermal_zone[0-*] | 135 | /sys/class/thermal/thermal_zone[0-*]: |
136 | |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone | 136 | |---cdev[0-*]: [0-*]th cooling device in current thermal zone |
137 | |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with | 137 | |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with |
138 | 138 | ||
139 | Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, | 139 | Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, |
140 | the generic thermal driver also creates a hwmon sysfs I/F for each _type_ of | 140 | the generic thermal driver also creates a hwmon sysfs I/F for each _type_ |
141 | thermal zone device. E.g. the generic thermal driver registers one hwmon class device | 141 | of thermal zone device. E.g. the generic thermal driver registers one hwmon |
142 | and build the associated hwmon sysfs I/F for all the registered ACPI thermal zones. | 142 | class device and build the associated hwmon sysfs I/F for all the registered |
143 | ACPI thermal zones. | ||
144 | |||
143 | /sys/class/hwmon/hwmon[0-*]: | 145 | /sys/class/hwmon/hwmon[0-*]: |
144 | |-----name: The type of the thermal zone devices. | 146 | |---name: The type of the thermal zone devices |
145 | |-----temp[1-*]_input: The current temperature of thermal zone [1-*]. | 147 | |---temp[1-*]_input: The current temperature of thermal zone [1-*] |
146 | |-----temp[1-*]_critical: The critical trip point of thermal zone [1-*]. | 148 | |---temp[1-*]_critical: The critical trip point of thermal zone [1-*] |
149 | |||
147 | Please read Documentation/hwmon/sysfs-interface for additional information. | 150 | Please read Documentation/hwmon/sysfs-interface for additional information. |
148 | 151 | ||
149 | *************************** | 152 | *************************** |
150 | * Thermal zone attributes * | 153 | * Thermal zone attributes * |
151 | *************************** | 154 | *************************** |
152 | 155 | ||
153 | type Strings which represent the thermal zone type. | 156 | type |
154 | This is given by thermal zone driver as part of registration. | 157 | Strings which represent the thermal zone type. |
155 | Eg: "acpitz" indicates it's an ACPI thermal device. | 158 | This is given by thermal zone driver as part of registration. |
156 | In order to keep it consistent with hwmon sys attribute, | 159 | E.g: "acpitz" indicates it's an ACPI thermal device. |
157 | this should be a short, lowercase string, | 160 | In order to keep it consistent with hwmon sys attribute; this should |
158 | not containing spaces nor dashes. | 161 | be a short, lowercase string, not containing spaces nor dashes. |
159 | RO | 162 | RO, Required |
160 | Required | 163 | |
161 | 164 | temp | |
162 | temp Current temperature as reported by thermal zone (sensor) | 165 | Current temperature as reported by thermal zone (sensor). |
163 | Unit: millidegree Celsius | 166 | Unit: millidegree Celsius |
164 | RO | 167 | RO, Required |
165 | Required | 168 | |
166 | 169 | mode | |
167 | mode One of the predefined values in [kernel, user] | 170 | One of the predefined values in [kernel, user]. |
168 | This file gives information about the algorithm | 171 | This file gives information about the algorithm that is currently |
169 | that is currently managing the thermal zone. | 172 | managing the thermal zone. It can be either default kernel based |
170 | It can be either default kernel based algorithm | 173 | algorithm or user space application. |
171 | or user space application. | 174 | kernel = Thermal management in kernel thermal zone driver. |
172 | RW | 175 | user = Preventing kernel thermal zone driver actions upon |
173 | Optional | 176 | trip points so that user application can take full |
174 | kernel = Thermal management in kernel thermal zone driver. | 177 | charge of the thermal management. |
175 | user = Preventing kernel thermal zone driver actions upon | 178 | RW, Optional |
176 | trip points so that user application can take full | 179 | |
177 | charge of the thermal management. | 180 | trip_point_[0-*]_temp |
178 | 181 | The temperature above which trip point will be fired. | |
179 | trip_point_[0-*]_temp The temperature above which trip point will be fired | 182 | Unit: millidegree Celsius |
180 | Unit: millidegree Celsius | 183 | RO, Optional |
181 | RO | 184 | |
182 | Optional | 185 | trip_point_[0-*]_type |
183 | 186 | Strings which indicate the type of the trip point. | |
184 | trip_point_[0-*]_type Strings which indicate the type of the trip point | 187 | E.g. it can be one of critical, hot, passive, active[0-*] for ACPI |
185 | E.g. it can be one of critical, hot, passive, | 188 | thermal zone. |
186 | active[0-*] for ACPI thermal zone. | 189 | RO, Optional |
187 | RO | 190 | |
188 | Optional | 191 | cdev[0-*] |
189 | 192 | Sysfs link to the thermal cooling device node where the sys I/F | |
190 | cdev[0-*] Sysfs link to the thermal cooling device node where the sys I/F | 193 | for cooling device throttling control represents. |
191 | for cooling device throttling control represents. | 194 | RO, Optional |
192 | RO | 195 | |
193 | Optional | 196 | cdev[0-*]_trip_point |
194 | 197 | The trip point with which cdev[0-*] is associated in this thermal | |
195 | cdev[0-*]_trip_point The trip point with which cdev[0-*] is associated in this thermal zone | 198 | zone; -1 means the cooling device is not associated with any trip |
196 | -1 means the cooling device is not associated with any trip point. | 199 | point. |
197 | RO | 200 | RO, Optional |
198 | Optional | 201 | |
199 | 202 | passive | |
200 | ****************************** | 203 | Attribute is only present for zones in which the passive cooling |
201 | * Cooling device attributes * | 204 | policy is not supported by native thermal driver. Default is zero |
202 | ****************************** | 205 | and can be set to a temperature (in millidegrees) to enable a |
203 | 206 | passive trip point for the zone. Activation is done by polling with | |
204 | type String which represents the type of device | 207 | an interval of 1 second. |
205 | eg: For generic ACPI: this should be "Fan", | 208 | Unit: millidegrees Celsius |
206 | "Processor" or "LCD" | 209 | RW, Optional |
207 | eg. For memory controller device on intel_menlow platform: | 210 | |
208 | this should be "Memory controller" | 211 | ***************************** |
209 | RO | 212 | * Cooling device attributes * |
210 | Required | 213 | ***************************** |
211 | 214 | ||
212 | max_state The maximum permissible cooling state of this cooling device. | 215 | type |
213 | RO | 216 | String which represents the type of device, e.g: |
214 | Required | 217 | - for generic ACPI: should be "Fan", "Processor" or "LCD" |
215 | 218 | - for memory controller device on intel_menlow platform: | |
216 | cur_state The current cooling state of this cooling device. | 219 | should be "Memory controller". |
217 | the value can any integer numbers between 0 and max_state, | 220 | RO, Required |
218 | cur_state == 0 means no cooling | 221 | |
219 | cur_state == max_state means the maximum cooling. | 222 | max_state |
220 | RW | 223 | The maximum permissible cooling state of this cooling device. |
221 | Required | 224 | RO, Required |
225 | |||
226 | cur_state | ||
227 | The current cooling state of this cooling device. | ||
228 | The value can any integer numbers between 0 and max_state: | ||
229 | - cur_state == 0 means no cooling | ||
230 | - cur_state == max_state means the maximum cooling. | ||
231 | RW, Required | ||
222 | 232 | ||
223 | 3. A simple implementation | 233 | 3. A simple implementation |
224 | 234 | ||
225 | ACPI thermal zone may support multiple trip points like critical/hot/passive/active. | 235 | ACPI thermal zone may support multiple trip points like critical, hot, |
226 | If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time, | 236 | passive, active. If an ACPI thermal zone supports critical, passive, |
227 | it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all. | 237 | active[0] and active[1] at the same time, it may register itself as a |
228 | It has one processor and one fan, which are both registered as thermal_cooling_device. | 238 | thermal_zone_device (thermal_zone1) with 4 trip points in all. |
229 | If the processor is listed in _PSL method, and the fan is listed in _AL0 method, | 239 | It has one processor and one fan, which are both registered as |
230 | the sys I/F structure will be built like this: | 240 | thermal_cooling_device. |
241 | |||
242 | If the processor is listed in _PSL method, and the fan is listed in _AL0 | ||
243 | method, the sys I/F structure will be built like this: | ||
231 | 244 | ||
232 | /sys/class/thermal: | 245 | /sys/class/thermal: |
233 | 246 | ||
234 | |thermal_zone1: | 247 | |thermal_zone1: |
235 | |-----type: acpitz | 248 | |---type: acpitz |
236 | |-----temp: 37000 | 249 | |---temp: 37000 |
237 | |-----mode: kernel | 250 | |---mode: kernel |
238 | |-----trip_point_0_temp: 100000 | 251 | |---trip_point_0_temp: 100000 |
239 | |-----trip_point_0_type: critical | 252 | |---trip_point_0_type: critical |
240 | |-----trip_point_1_temp: 80000 | 253 | |---trip_point_1_temp: 80000 |
241 | |-----trip_point_1_type: passive | 254 | |---trip_point_1_type: passive |
242 | |-----trip_point_2_temp: 70000 | 255 | |---trip_point_2_temp: 70000 |
243 | |-----trip_point_2_type: active0 | 256 | |---trip_point_2_type: active0 |
244 | |-----trip_point_3_temp: 60000 | 257 | |---trip_point_3_temp: 60000 |
245 | |-----trip_point_3_type: active1 | 258 | |---trip_point_3_type: active1 |
246 | |-----cdev0: --->/sys/class/thermal/cooling_device0 | 259 | |---cdev0: --->/sys/class/thermal/cooling_device0 |
247 | |-----cdev0_trip_point: 1 /* cdev0 can be used for passive */ | 260 | |---cdev0_trip_point: 1 /* cdev0 can be used for passive */ |
248 | |-----cdev1: --->/sys/class/thermal/cooling_device3 | 261 | |---cdev1: --->/sys/class/thermal/cooling_device3 |
249 | |-----cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ | 262 | |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ |
250 | 263 | ||
251 | |cooling_device0: | 264 | |cooling_device0: |
252 | |-----type: Processor | 265 | |---type: Processor |
253 | |-----max_state: 8 | 266 | |---max_state: 8 |
254 | |-----cur_state: 0 | 267 | |---cur_state: 0 |
255 | 268 | ||
256 | |cooling_device3: | 269 | |cooling_device3: |
257 | |-----type: Fan | 270 | |---type: Fan |
258 | |-----max_state: 2 | 271 | |---max_state: 2 |
259 | |-----cur_state: 0 | 272 | |---cur_state: 0 |
260 | 273 | ||
261 | /sys/class/hwmon: | 274 | /sys/class/hwmon: |
262 | 275 | ||
263 | |hwmon0: | 276 | |hwmon0: |
264 | |-----name: acpitz | 277 | |---name: acpitz |
265 | |-----temp1_input: 37000 | 278 | |---temp1_input: 37000 |
266 | |-----temp1_crit: 100000 | 279 | |---temp1_crit: 100000 |
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 957b22fde2df..8179692fbb90 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -1231,6 +1231,7 @@ something like this simple program: | |||
1231 | #include <sys/stat.h> | 1231 | #include <sys/stat.h> |
1232 | #include <fcntl.h> | 1232 | #include <fcntl.h> |
1233 | #include <unistd.h> | 1233 | #include <unistd.h> |
1234 | #include <string.h> | ||
1234 | 1235 | ||
1235 | #define _STR(x) #x | 1236 | #define _STR(x) #x |
1236 | #define STR(x) _STR(x) | 1237 | #define STR(x) _STR(x) |
@@ -1265,6 +1266,7 @@ const char *find_debugfs(void) | |||
1265 | return NULL; | 1266 | return NULL; |
1266 | } | 1267 | } |
1267 | 1268 | ||
1269 | strcat(debugfs, "/tracing/"); | ||
1268 | debugfs_found = 1; | 1270 | debugfs_found = 1; |
1269 | 1271 | ||
1270 | return debugfs; | 1272 | return debugfs; |
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt new file mode 100644 index 000000000000..3ffadf8da61f --- /dev/null +++ b/Documentation/vm/hwpoison.txt | |||
@@ -0,0 +1,136 @@ | |||
1 | What is hwpoison? | ||
2 | |||
3 | Upcoming Intel CPUs have support for recovering from some memory errors | ||
4 | (``MCA recovery''). This requires the OS to declare a page "poisoned", | ||
5 | kill the processes associated with it and avoid using it in the future. | ||
6 | |||
7 | This patchkit implements the necessary infrastructure in the VM. | ||
8 | |||
9 | To quote the overview comment: | ||
10 | |||
11 | * High level machine check handler. Handles pages reported by the | ||
12 | * hardware as being corrupted usually due to a 2bit ECC memory or cache | ||
13 | * failure. | ||
14 | * | ||
15 | * This focusses on pages detected as corrupted in the background. | ||
16 | * When the current CPU tries to consume corruption the currently | ||
17 | * running process can just be killed directly instead. This implies | ||
18 | * that if the error cannot be handled for some reason it's safe to | ||
19 | * just ignore it because no corruption has been consumed yet. Instead | ||
20 | * when that happens another machine check will happen. | ||
21 | * | ||
22 | * Handles page cache pages in various states. The tricky part | ||
23 | * here is that we can access any page asynchronous to other VM | ||
24 | * users, because memory failures could happen anytime and anywhere, | ||
25 | * possibly violating some of their assumptions. This is why this code | ||
26 | * has to be extremely careful. Generally it tries to use normal locking | ||
27 | * rules, as in get the standard locks, even if that means the | ||
28 | * error handling takes potentially a long time. | ||
29 | * | ||
30 | * Some of the operations here are somewhat inefficient and have non | ||
31 | * linear algorithmic complexity, because the data structures have not | ||
32 | * been optimized for this case. This is in particular the case | ||
33 | * for the mapping from a vma to a process. Since this case is expected | ||
34 | * to be rare we hope we can get away with this. | ||
35 | |||
36 | The code consists of a the high level handler in mm/memory-failure.c, | ||
37 | a new page poison bit and various checks in the VM to handle poisoned | ||
38 | pages. | ||
39 | |||
40 | The main target right now is KVM guests, but it works for all kinds | ||
41 | of applications. KVM support requires a recent qemu-kvm release. | ||
42 | |||
43 | For the KVM use there was need for a new signal type so that | ||
44 | KVM can inject the machine check into the guest with the proper | ||
45 | address. This in theory allows other applications to handle | ||
46 | memory failures too. The expection is that near all applications | ||
47 | won't do that, but some very specialized ones might. | ||
48 | |||
49 | --- | ||
50 | |||
51 | There are two (actually three) modi memory failure recovery can be in: | ||
52 | |||
53 | vm.memory_failure_recovery sysctl set to zero: | ||
54 | All memory failures cause a panic. Do not attempt recovery. | ||
55 | (on x86 this can be also affected by the tolerant level of the | ||
56 | MCE subsystem) | ||
57 | |||
58 | early kill | ||
59 | (can be controlled globally and per process) | ||
60 | Send SIGBUS to the application as soon as the error is detected | ||
61 | This allows applications who can process memory errors in a gentle | ||
62 | way (e.g. drop affected object) | ||
63 | This is the mode used by KVM qemu. | ||
64 | |||
65 | late kill | ||
66 | Send SIGBUS when the application runs into the corrupted page. | ||
67 | This is best for memory error unaware applications and default | ||
68 | Note some pages are always handled as late kill. | ||
69 | |||
70 | --- | ||
71 | |||
72 | User control: | ||
73 | |||
74 | vm.memory_failure_recovery | ||
75 | See sysctl.txt | ||
76 | |||
77 | vm.memory_failure_early_kill | ||
78 | Enable early kill mode globally | ||
79 | |||
80 | PR_MCE_KILL | ||
81 | Set early/late kill mode/revert to system default | ||
82 | arg1: PR_MCE_KILL_CLEAR: Revert to system default | ||
83 | arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode | ||
84 | PR_MCE_KILL_EARLY: Early kill | ||
85 | PR_MCE_KILL_LATE: Late kill | ||
86 | PR_MCE_KILL_DEFAULT: Use system global default | ||
87 | PR_MCE_KILL_GET | ||
88 | return current mode | ||
89 | |||
90 | |||
91 | --- | ||
92 | |||
93 | Testing: | ||
94 | |||
95 | madvise(MADV_POISON, ....) | ||
96 | (as root) | ||
97 | Poison a page in the process for testing | ||
98 | |||
99 | |||
100 | hwpoison-inject module through debugfs | ||
101 | /sys/debug/hwpoison/corrupt-pfn | ||
102 | |||
103 | Inject hwpoison fault at PFN echoed into this file | ||
104 | |||
105 | |||
106 | Architecture specific MCE injector | ||
107 | |||
108 | x86 has mce-inject, mce-test | ||
109 | |||
110 | Some portable hwpoison test programs in mce-test, see blow. | ||
111 | |||
112 | --- | ||
113 | |||
114 | References: | ||
115 | |||
116 | http://halobates.de/mce-lc09-2.pdf | ||
117 | Overview presentation from LinuxCon 09 | ||
118 | |||
119 | git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git | ||
120 | Test suite (hwpoison specific portable tests in tsrc) | ||
121 | |||
122 | git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git | ||
123 | x86 specific injector | ||
124 | |||
125 | |||
126 | --- | ||
127 | |||
128 | Limitations: | ||
129 | |||
130 | - Not all page types are supported and never will. Most kernel internal | ||
131 | objects cannot be recovered, only LRU pages for now. | ||
132 | - Right now hugepage support is missing. | ||
133 | |||
134 | --- | ||
135 | Andi Kleen, Oct 2009 | ||
136 | |||
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt index 72a22f65960e..262d8e6793a3 100644 --- a/Documentation/vm/ksm.txt +++ b/Documentation/vm/ksm.txt | |||
@@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/, | |||
52 | readable by all but writable only by root: | 52 | readable by all but writable only by root: |
53 | 53 | ||
54 | max_kernel_pages - set to maximum number of kernel pages that KSM may use | 54 | max_kernel_pages - set to maximum number of kernel pages that KSM may use |
55 | e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages" | 55 | e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages" |
56 | Value 0 imposes no limit on the kernel pages KSM may use; | 56 | Value 0 imposes no limit on the kernel pages KSM may use; |
57 | but note that any process using MADV_MERGEABLE can cause | 57 | but note that any process using MADV_MERGEABLE can cause |
58 | KSM to allocate these pages, unswappable until it exits. | 58 | KSM to allocate these pages, unswappable until it exits. |
59 | Default: 2000 (chosen for demonstration purposes) | 59 | Default: quarter of memory (chosen to not pin too much) |
60 | 60 | ||
61 | pages_to_scan - how many present pages to scan before ksmd goes to sleep | 61 | pages_to_scan - how many present pages to scan before ksmd goes to sleep |
62 | e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan" | 62 | e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan" |
63 | Default: 200 (chosen for demonstration purposes) | 63 | Default: 100 (chosen for demonstration purposes) |
64 | 64 | ||
65 | sleep_millisecs - how many milliseconds ksmd should sleep before next scan | 65 | sleep_millisecs - how many milliseconds ksmd should sleep before next scan |
66 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" | 66 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" |
@@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages, | |||
70 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", | 70 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", |
71 | set 2 to stop ksmd and unmerge all pages currently merged, | 71 | set 2 to stop ksmd and unmerge all pages currently merged, |
72 | but leave mergeable areas registered for next run | 72 | but leave mergeable areas registered for next run |
73 | Default: 1 (for immediate use by apps which register) | 73 | Default: 0 (must be changed to 1 to activate KSM, |
74 | except if CONFIG_SYSFS is disabled) | ||
74 | 75 | ||
75 | The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: | 76 | The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: |
76 | 77 | ||
@@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high | |||
86 | proportion there would also indicate poor use of madvise MADV_MERGEABLE. | 87 | proportion there would also indicate poor use of madvise MADV_MERGEABLE. |
87 | 88 | ||
88 | Izik Eidus, | 89 | Izik Eidus, |
89 | Hugh Dickins, 30 July 2009 | 90 | Hugh Dickins, 24 Sept 2009 |
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index fa1a30d9e9d5..4793c6aac733 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c | |||
@@ -2,7 +2,10 @@ | |||
2 | * page-types: Tool for querying page flags | 2 | * page-types: Tool for querying page flags |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Intel corporation | 4 | * Copyright (C) 2009 Intel corporation |
5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> | 5 | * |
6 | * Authors: Wu Fengguang <fengguang.wu@intel.com> | ||
7 | * | ||
8 | * Released under the General Public License (GPL). | ||
6 | */ | 9 | */ |
7 | 10 | ||
8 | #define _LARGEFILE64_SOURCE | 11 | #define _LARGEFILE64_SOURCE |
@@ -69,7 +72,9 @@ | |||
69 | #define KPF_COMPOUND_TAIL 16 | 72 | #define KPF_COMPOUND_TAIL 16 |
70 | #define KPF_HUGE 17 | 73 | #define KPF_HUGE 17 |
71 | #define KPF_UNEVICTABLE 18 | 74 | #define KPF_UNEVICTABLE 18 |
75 | #define KPF_HWPOISON 19 | ||
72 | #define KPF_NOPAGE 20 | 76 | #define KPF_NOPAGE 20 |
77 | #define KPF_KSM 21 | ||
73 | 78 | ||
74 | /* [32-] kernel hacking assistances */ | 79 | /* [32-] kernel hacking assistances */ |
75 | #define KPF_RESERVED 32 | 80 | #define KPF_RESERVED 32 |
@@ -116,7 +121,9 @@ static char *page_flag_names[] = { | |||
116 | [KPF_COMPOUND_TAIL] = "T:compound_tail", | 121 | [KPF_COMPOUND_TAIL] = "T:compound_tail", |
117 | [KPF_HUGE] = "G:huge", | 122 | [KPF_HUGE] = "G:huge", |
118 | [KPF_UNEVICTABLE] = "u:unevictable", | 123 | [KPF_UNEVICTABLE] = "u:unevictable", |
124 | [KPF_HWPOISON] = "X:hwpoison", | ||
119 | [KPF_NOPAGE] = "n:nopage", | 125 | [KPF_NOPAGE] = "n:nopage", |
126 | [KPF_KSM] = "x:ksm", | ||
120 | 127 | ||
121 | [KPF_RESERVED] = "r:reserved", | 128 | [KPF_RESERVED] = "r:reserved", |
122 | [KPF_MLOCKED] = "m:mlocked", | 129 | [KPF_MLOCKED] = "m:mlocked", |
@@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES]; | |||
152 | static int nr_vmas; | 159 | static int nr_vmas; |
153 | static unsigned long pg_start[MAX_VMAS]; | 160 | static unsigned long pg_start[MAX_VMAS]; |
154 | static unsigned long pg_end[MAX_VMAS]; | 161 | static unsigned long pg_end[MAX_VMAS]; |
155 | static unsigned long voffset; | ||
156 | |||
157 | static int pagemap_fd; | ||
158 | 162 | ||
159 | #define MAX_BIT_FILTERS 64 | 163 | #define MAX_BIT_FILTERS 64 |
160 | static int nr_bit_filters; | 164 | static int nr_bit_filters; |
@@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS]; | |||
163 | 167 | ||
164 | static int page_size; | 168 | static int page_size; |
165 | 169 | ||
166 | #define PAGES_BATCH (64 << 10) /* 64k pages */ | 170 | static int pagemap_fd; |
167 | static int kpageflags_fd; | 171 | static int kpageflags_fd; |
168 | 172 | ||
173 | static int opt_hwpoison; | ||
174 | static int opt_unpoison; | ||
175 | |||
176 | static char *hwpoison_debug_fs = "/debug/hwpoison"; | ||
177 | static int hwpoison_inject_fd; | ||
178 | static int hwpoison_forget_fd; | ||
179 | |||
169 | #define HASH_SHIFT 13 | 180 | #define HASH_SHIFT 13 |
170 | #define HASH_SIZE (1 << HASH_SHIFT) | 181 | #define HASH_SIZE (1 << HASH_SHIFT) |
171 | #define HASH_MASK (HASH_SIZE - 1) | 182 | #define HASH_MASK (HASH_SIZE - 1) |
@@ -207,6 +218,74 @@ static void fatal(const char *x, ...) | |||
207 | exit(EXIT_FAILURE); | 218 | exit(EXIT_FAILURE); |
208 | } | 219 | } |
209 | 220 | ||
221 | static int checked_open(const char *pathname, int flags) | ||
222 | { | ||
223 | int fd = open(pathname, flags); | ||
224 | |||
225 | if (fd < 0) { | ||
226 | perror(pathname); | ||
227 | exit(EXIT_FAILURE); | ||
228 | } | ||
229 | |||
230 | return fd; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * pagemap/kpageflags routines | ||
235 | */ | ||
236 | |||
237 | static unsigned long do_u64_read(int fd, char *name, | ||
238 | uint64_t *buf, | ||
239 | unsigned long index, | ||
240 | unsigned long count) | ||
241 | { | ||
242 | long bytes; | ||
243 | |||
244 | if (index > ULONG_MAX / 8) | ||
245 | fatal("index overflow: %lu\n", index); | ||
246 | |||
247 | if (lseek(fd, index * 8, SEEK_SET) < 0) { | ||
248 | perror(name); | ||
249 | exit(EXIT_FAILURE); | ||
250 | } | ||
251 | |||
252 | bytes = read(fd, buf, count * 8); | ||
253 | if (bytes < 0) { | ||
254 | perror(name); | ||
255 | exit(EXIT_FAILURE); | ||
256 | } | ||
257 | if (bytes % 8) | ||
258 | fatal("partial read: %lu bytes\n", bytes); | ||
259 | |||
260 | return bytes / 8; | ||
261 | } | ||
262 | |||
263 | static unsigned long kpageflags_read(uint64_t *buf, | ||
264 | unsigned long index, | ||
265 | unsigned long pages) | ||
266 | { | ||
267 | return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); | ||
268 | } | ||
269 | |||
270 | static unsigned long pagemap_read(uint64_t *buf, | ||
271 | unsigned long index, | ||
272 | unsigned long pages) | ||
273 | { | ||
274 | return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); | ||
275 | } | ||
276 | |||
277 | static unsigned long pagemap_pfn(uint64_t val) | ||
278 | { | ||
279 | unsigned long pfn; | ||
280 | |||
281 | if (val & PM_PRESENT) | ||
282 | pfn = PM_PFRAME(val); | ||
283 | else | ||
284 | pfn = 0; | ||
285 | |||
286 | return pfn; | ||
287 | } | ||
288 | |||
210 | 289 | ||
211 | /* | 290 | /* |
212 | * page flag names | 291 | * page flag names |
@@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags) | |||
255 | * page list and summary | 334 | * page list and summary |
256 | */ | 335 | */ |
257 | 336 | ||
258 | static void show_page_range(unsigned long offset, uint64_t flags) | 337 | static void show_page_range(unsigned long voffset, |
338 | unsigned long offset, uint64_t flags) | ||
259 | { | 339 | { |
260 | static uint64_t flags0; | 340 | static uint64_t flags0; |
261 | static unsigned long voff; | 341 | static unsigned long voff; |
@@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags) | |||
281 | count = 1; | 361 | count = 1; |
282 | } | 362 | } |
283 | 363 | ||
284 | static void show_page(unsigned long offset, uint64_t flags) | 364 | static void show_page(unsigned long voffset, |
365 | unsigned long offset, uint64_t flags) | ||
285 | { | 366 | { |
286 | if (opt_pid) | 367 | if (opt_pid) |
287 | printf("%lx\t", voffset); | 368 | printf("%lx\t", voffset); |
@@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags) | |||
362 | return flags; | 443 | return flags; |
363 | } | 444 | } |
364 | 445 | ||
446 | static uint64_t kpageflags_flags(uint64_t flags) | ||
447 | { | ||
448 | flags = expand_overloaded_flags(flags); | ||
449 | |||
450 | if (!opt_raw) | ||
451 | flags = well_known_flags(flags); | ||
452 | |||
453 | return flags; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * page actions | ||
458 | */ | ||
459 | |||
460 | static void prepare_hwpoison_fd(void) | ||
461 | { | ||
462 | char buf[100]; | ||
463 | |||
464 | if (opt_hwpoison && !hwpoison_inject_fd) { | ||
465 | sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs); | ||
466 | hwpoison_inject_fd = checked_open(buf, O_WRONLY); | ||
467 | } | ||
468 | |||
469 | if (opt_unpoison && !hwpoison_forget_fd) { | ||
470 | sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs); | ||
471 | hwpoison_forget_fd = checked_open(buf, O_WRONLY); | ||
472 | } | ||
473 | } | ||
474 | |||
475 | static int hwpoison_page(unsigned long offset) | ||
476 | { | ||
477 | char buf[100]; | ||
478 | int len; | ||
479 | |||
480 | len = sprintf(buf, "0x%lx\n", offset); | ||
481 | len = write(hwpoison_inject_fd, buf, len); | ||
482 | if (len < 0) { | ||
483 | perror("hwpoison inject"); | ||
484 | return len; | ||
485 | } | ||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | static int unpoison_page(unsigned long offset) | ||
490 | { | ||
491 | char buf[100]; | ||
492 | int len; | ||
493 | |||
494 | len = sprintf(buf, "0x%lx\n", offset); | ||
495 | len = write(hwpoison_forget_fd, buf, len); | ||
496 | if (len < 0) { | ||
497 | perror("hwpoison forget"); | ||
498 | return len; | ||
499 | } | ||
500 | return 0; | ||
501 | } | ||
365 | 502 | ||
366 | /* | 503 | /* |
367 | * page frame walker | 504 | * page frame walker |
@@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags) | |||
394 | exit(EXIT_FAILURE); | 531 | exit(EXIT_FAILURE); |
395 | } | 532 | } |
396 | 533 | ||
397 | static void add_page(unsigned long offset, uint64_t flags) | 534 | static void add_page(unsigned long voffset, |
535 | unsigned long offset, uint64_t flags) | ||
398 | { | 536 | { |
399 | flags = expand_overloaded_flags(flags); | 537 | flags = kpageflags_flags(flags); |
400 | |||
401 | if (!opt_raw) | ||
402 | flags = well_known_flags(flags); | ||
403 | 538 | ||
404 | if (!bit_mask_ok(flags)) | 539 | if (!bit_mask_ok(flags)) |
405 | return; | 540 | return; |
406 | 541 | ||
542 | if (opt_hwpoison) | ||
543 | hwpoison_page(offset); | ||
544 | if (opt_unpoison) | ||
545 | unpoison_page(offset); | ||
546 | |||
407 | if (opt_list == 1) | 547 | if (opt_list == 1) |
408 | show_page_range(offset, flags); | 548 | show_page_range(voffset, offset, flags); |
409 | else if (opt_list == 2) | 549 | else if (opt_list == 2) |
410 | show_page(offset, flags); | 550 | show_page(voffset, offset, flags); |
411 | 551 | ||
412 | nr_pages[hash_slot(flags)]++; | 552 | nr_pages[hash_slot(flags)]++; |
413 | total_pages++; | 553 | total_pages++; |
414 | } | 554 | } |
415 | 555 | ||
416 | static void walk_pfn(unsigned long index, unsigned long count) | 556 | #define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ |
557 | static void walk_pfn(unsigned long voffset, | ||
558 | unsigned long index, | ||
559 | unsigned long count) | ||
417 | { | 560 | { |
561 | uint64_t buf[KPAGEFLAGS_BATCH]; | ||
418 | unsigned long batch; | 562 | unsigned long batch; |
419 | unsigned long n; | 563 | unsigned long pages; |
420 | unsigned long i; | 564 | unsigned long i; |
421 | 565 | ||
422 | if (index > ULONG_MAX / KPF_BYTES) | ||
423 | fatal("index overflow: %lu\n", index); | ||
424 | |||
425 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); | ||
426 | |||
427 | while (count) { | 566 | while (count) { |
428 | uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | 567 | batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); |
429 | 568 | pages = kpageflags_read(buf, index, batch); | |
430 | batch = min_t(unsigned long, count, PAGES_BATCH); | 569 | if (pages == 0) |
431 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); | ||
432 | if (n == 0) | ||
433 | break; | 570 | break; |
434 | if (n < 0) { | ||
435 | perror(PROC_KPAGEFLAGS); | ||
436 | exit(EXIT_FAILURE); | ||
437 | } | ||
438 | 571 | ||
439 | if (n % KPF_BYTES != 0) | 572 | for (i = 0; i < pages; i++) |
440 | fatal("partial read: %lu bytes\n", n); | 573 | add_page(voffset + i, index + i, buf[i]); |
441 | n = n / KPF_BYTES; | ||
442 | 574 | ||
443 | for (i = 0; i < n; i++) | 575 | index += pages; |
444 | add_page(index + i, kpageflags_buf[i]); | 576 | count -= pages; |
445 | |||
446 | index += batch; | ||
447 | count -= batch; | ||
448 | } | 577 | } |
449 | } | 578 | } |
450 | 579 | ||
451 | 580 | #define PAGEMAP_BATCH (64 << 10) | |
452 | #define PAGEMAP_BATCH 4096 | 581 | static void walk_vma(unsigned long index, unsigned long count) |
453 | static unsigned long task_pfn(unsigned long pgoff) | ||
454 | { | 582 | { |
455 | static uint64_t buf[PAGEMAP_BATCH]; | 583 | uint64_t buf[PAGEMAP_BATCH]; |
456 | static unsigned long start; | 584 | unsigned long batch; |
457 | static long count; | 585 | unsigned long pages; |
458 | uint64_t pfn; | 586 | unsigned long pfn; |
587 | unsigned long i; | ||
459 | 588 | ||
460 | if (pgoff < start || pgoff >= start + count) { | 589 | while (count) { |
461 | if (lseek64(pagemap_fd, | 590 | batch = min_t(unsigned long, count, PAGEMAP_BATCH); |
462 | (uint64_t)pgoff * PM_ENTRY_BYTES, | 591 | pages = pagemap_read(buf, index, batch); |
463 | SEEK_SET) < 0) { | 592 | if (pages == 0) |
464 | perror("pagemap seek"); | 593 | break; |
465 | exit(EXIT_FAILURE); | ||
466 | } | ||
467 | count = read(pagemap_fd, buf, sizeof(buf)); | ||
468 | if (count == 0) | ||
469 | return 0; | ||
470 | if (count < 0) { | ||
471 | perror("pagemap read"); | ||
472 | exit(EXIT_FAILURE); | ||
473 | } | ||
474 | if (count % PM_ENTRY_BYTES) { | ||
475 | fatal("pagemap read not aligned.\n"); | ||
476 | exit(EXIT_FAILURE); | ||
477 | } | ||
478 | count /= PM_ENTRY_BYTES; | ||
479 | start = pgoff; | ||
480 | } | ||
481 | 594 | ||
482 | pfn = buf[pgoff - start]; | 595 | for (i = 0; i < pages; i++) { |
483 | if (pfn & PM_PRESENT) | 596 | pfn = pagemap_pfn(buf[i]); |
484 | pfn = PM_PFRAME(pfn); | 597 | if (pfn) |
485 | else | 598 | walk_pfn(index + i, pfn, 1); |
486 | pfn = 0; | 599 | } |
487 | 600 | ||
488 | return pfn; | 601 | index += pages; |
602 | count -= pages; | ||
603 | } | ||
489 | } | 604 | } |
490 | 605 | ||
491 | static void walk_task(unsigned long index, unsigned long count) | 606 | static void walk_task(unsigned long index, unsigned long count) |
492 | { | 607 | { |
493 | int i = 0; | ||
494 | const unsigned long end = index + count; | 608 | const unsigned long end = index + count; |
609 | unsigned long start; | ||
610 | int i = 0; | ||
495 | 611 | ||
496 | while (index < end) { | 612 | while (index < end) { |
497 | 613 | ||
@@ -501,15 +617,11 @@ static void walk_task(unsigned long index, unsigned long count) | |||
501 | if (pg_start[i] >= end) | 617 | if (pg_start[i] >= end) |
502 | return; | 618 | return; |
503 | 619 | ||
504 | voffset = max_t(unsigned long, pg_start[i], index); | 620 | start = max_t(unsigned long, pg_start[i], index); |
505 | index = min_t(unsigned long, pg_end[i], end); | 621 | index = min_t(unsigned long, pg_end[i], end); |
506 | 622 | ||
507 | assert(voffset < index); | 623 | assert(start < index); |
508 | for (; voffset < index; voffset++) { | 624 | walk_vma(start, index - start); |
509 | unsigned long pfn = task_pfn(voffset); | ||
510 | if (pfn) | ||
511 | walk_pfn(pfn, 1); | ||
512 | } | ||
513 | } | 625 | } |
514 | } | 626 | } |
515 | 627 | ||
@@ -527,18 +639,14 @@ static void walk_addr_ranges(void) | |||
527 | { | 639 | { |
528 | int i; | 640 | int i; |
529 | 641 | ||
530 | kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY); | 642 | kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); |
531 | if (kpageflags_fd < 0) { | ||
532 | perror(PROC_KPAGEFLAGS); | ||
533 | exit(EXIT_FAILURE); | ||
534 | } | ||
535 | 643 | ||
536 | if (!nr_addr_ranges) | 644 | if (!nr_addr_ranges) |
537 | add_addr_range(0, ULONG_MAX); | 645 | add_addr_range(0, ULONG_MAX); |
538 | 646 | ||
539 | for (i = 0; i < nr_addr_ranges; i++) | 647 | for (i = 0; i < nr_addr_ranges; i++) |
540 | if (!opt_pid) | 648 | if (!opt_pid) |
541 | walk_pfn(opt_offset[i], opt_size[i]); | 649 | walk_pfn(0, opt_offset[i], opt_size[i]); |
542 | else | 650 | else |
543 | walk_task(opt_offset[i], opt_size[i]); | 651 | walk_task(opt_offset[i], opt_size[i]); |
544 | 652 | ||
@@ -575,6 +683,8 @@ static void usage(void) | |||
575 | " -l|--list Show page details in ranges\n" | 683 | " -l|--list Show page details in ranges\n" |
576 | " -L|--list-each Show page details one by one\n" | 684 | " -L|--list-each Show page details one by one\n" |
577 | " -N|--no-summary Don't show summay info\n" | 685 | " -N|--no-summary Don't show summay info\n" |
686 | " -X|--hwpoison hwpoison pages\n" | ||
687 | " -x|--unpoison unpoison pages\n" | ||
578 | " -h|--help Show this usage message\n" | 688 | " -h|--help Show this usage message\n" |
579 | "addr-spec:\n" | 689 | "addr-spec:\n" |
580 | " N one page at offset N (unit: pages)\n" | 690 | " N one page at offset N (unit: pages)\n" |
@@ -624,11 +734,7 @@ static void parse_pid(const char *str) | |||
624 | opt_pid = parse_number(str); | 734 | opt_pid = parse_number(str); |
625 | 735 | ||
626 | sprintf(buf, "/proc/%d/pagemap", opt_pid); | 736 | sprintf(buf, "/proc/%d/pagemap", opt_pid); |
627 | pagemap_fd = open(buf, O_RDONLY); | 737 | pagemap_fd = checked_open(buf, O_RDONLY); |
628 | if (pagemap_fd < 0) { | ||
629 | perror(buf); | ||
630 | exit(EXIT_FAILURE); | ||
631 | } | ||
632 | 738 | ||
633 | sprintf(buf, "/proc/%d/maps", opt_pid); | 739 | sprintf(buf, "/proc/%d/maps", opt_pid); |
634 | file = fopen(buf, "r"); | 740 | file = fopen(buf, "r"); |
@@ -788,6 +894,8 @@ static struct option opts[] = { | |||
788 | { "list" , 0, NULL, 'l' }, | 894 | { "list" , 0, NULL, 'l' }, |
789 | { "list-each" , 0, NULL, 'L' }, | 895 | { "list-each" , 0, NULL, 'L' }, |
790 | { "no-summary", 0, NULL, 'N' }, | 896 | { "no-summary", 0, NULL, 'N' }, |
897 | { "hwpoison" , 0, NULL, 'X' }, | ||
898 | { "unpoison" , 0, NULL, 'x' }, | ||
791 | { "help" , 0, NULL, 'h' }, | 899 | { "help" , 0, NULL, 'h' }, |
792 | { NULL , 0, NULL, 0 } | 900 | { NULL , 0, NULL, 0 } |
793 | }; | 901 | }; |
@@ -799,7 +907,7 @@ int main(int argc, char *argv[]) | |||
799 | page_size = getpagesize(); | 907 | page_size = getpagesize(); |
800 | 908 | ||
801 | while ((c = getopt_long(argc, argv, | 909 | while ((c = getopt_long(argc, argv, |
802 | "rp:f:a:b:lLNh", opts, NULL)) != -1) { | 910 | "rp:f:a:b:lLNXxh", opts, NULL)) != -1) { |
803 | switch (c) { | 911 | switch (c) { |
804 | case 'r': | 912 | case 'r': |
805 | opt_raw = 1; | 913 | opt_raw = 1; |
@@ -825,6 +933,14 @@ int main(int argc, char *argv[]) | |||
825 | case 'N': | 933 | case 'N': |
826 | opt_no_summary = 1; | 934 | opt_no_summary = 1; |
827 | break; | 935 | break; |
936 | case 'X': | ||
937 | opt_hwpoison = 1; | ||
938 | prepare_hwpoison_fd(); | ||
939 | break; | ||
940 | case 'x': | ||
941 | opt_unpoison = 1; | ||
942 | prepare_hwpoison_fd(); | ||
943 | break; | ||
828 | case 'h': | 944 | case 'h': |
829 | usage(); | 945 | usage(); |
830 | exit(0); | 946 | exit(0); |
@@ -844,7 +960,7 @@ int main(int argc, char *argv[]) | |||
844 | walk_addr_ranges(); | 960 | walk_addr_ranges(); |
845 | 961 | ||
846 | if (opt_list == 1) | 962 | if (opt_list == 1) |
847 | show_page_range(0, 0); /* drain the buffer */ | 963 | show_page_range(0, 0, 0); /* drain the buffer */ |
848 | 964 | ||
849 | if (opt_no_summary) | 965 | if (opt_no_summary) |
850 | return 0; | 966 | return 0; |
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 600a304a828c..df09b9650a81 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt | |||
@@ -57,7 +57,9 @@ There are three components to pagemap: | |||
57 | 16. COMPOUND_TAIL | 57 | 16. COMPOUND_TAIL |
58 | 16. HUGE | 58 | 16. HUGE |
59 | 18. UNEVICTABLE | 59 | 18. UNEVICTABLE |
60 | 19. HWPOISON | ||
60 | 20. NOPAGE | 61 | 20. NOPAGE |
62 | 21. KSM | ||
61 | 63 | ||
62 | Short descriptions to the page flags: | 64 | Short descriptions to the page flags: |
63 | 65 | ||
@@ -86,9 +88,15 @@ Short descriptions to the page flags: | |||
86 | 17. HUGE | 88 | 17. HUGE |
87 | this is an integral part of a HugeTLB page | 89 | this is an integral part of a HugeTLB page |
88 | 90 | ||
91 | 19. HWPOISON | ||
92 | hardware detected memory corruption on this page: don't touch the data! | ||
93 | |||
89 | 20. NOPAGE | 94 | 20. NOPAGE |
90 | no page frame exists at the requested address | 95 | no page frame exists at the requested address |
91 | 96 | ||
97 | 21. KSM | ||
98 | identical memory pages dynamically shared between one or more processes | ||
99 | |||
92 | [IO related page flags] | 100 | [IO related page flags] |
93 | 1. ERROR IO error occurred | 101 | 1. ERROR IO error occurred |
94 | 3. UPTODATE page has up-to-date data | 102 | 3. UPTODATE page has up-to-date data |