diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-11-03 15:14:39 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-11-03 15:14:39 -0500 |
commit | 2058297d2d045cb57138c33b87cfabcc80e65186 (patch) | |
tree | 7ccffd0e162cbd7471f643561e79f23abb989a62 /Documentation | |
parent | 150e6c67f4bf6ab51e62defc41bd19a2eefe5709 (diff) | |
parent | 4b27e1bb442e964903f8a3fa6bdf33a602dc0941 (diff) |
Merge branch 'for-linus' into for-2.6.33
Conflicts:
block/cfq-iosched.c
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'Documentation')
22 files changed, 800 insertions, 180 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-usb_host b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc index 46b66ad1f1b4..4e8106f7cfd9 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_host +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc | |||
@@ -1,4 +1,4 @@ | |||
1 | What: /sys/class/usb_host/usb_hostN/wusb_chid | 1 | What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid |
2 | Date: July 2008 | 2 | Date: July 2008 |
3 | KernelVersion: 2.6.27 | 3 | KernelVersion: 2.6.27 |
4 | Contact: David Vrabel <david.vrabel@csr.com> | 4 | Contact: David Vrabel <david.vrabel@csr.com> |
@@ -9,7 +9,7 @@ Description: | |||
9 | 9 | ||
10 | Set an all zero CHID to stop the host controller. | 10 | Set an all zero CHID to stop the host controller. |
11 | 11 | ||
12 | What: /sys/class/usb_host/usb_hostN/wusb_trust_timeout | 12 | What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout |
13 | Date: July 2008 | 13 | Date: July 2008 |
14 | KernelVersion: 2.6.27 | 14 | KernelVersion: 2.6.27 |
15 | Contact: David Vrabel <david.vrabel@csr.com> | 15 | Contact: David Vrabel <david.vrabel@csr.com> |
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable deleted file mode 100644 index 175bb4f70512..000000000000 --- a/Documentation/ABI/testing/sysfs-devices-cache_disable +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X | ||
2 | Date: August 2008 | ||
3 | KernelVersion: 2.6.27 | ||
4 | Contact: mark.langsdorf@amd.com | ||
5 | Description: These files exist in every cpu's cache index directories. | ||
6 | There are currently 2 cache_disable_# files in each | ||
7 | directory. Reading from these files on a supported | ||
8 | processor will return that cache disable index value | ||
9 | for that processor and node. Writing to one of these | ||
10 | files will cause the specificed cache index to be disabled. | ||
11 | |||
12 | Currently, only AMD Family 10h Processors support cache index | ||
13 | disable, and only for their L3 caches. See the BIOS and | ||
14 | Kernel Developer's Guide at | ||
15 | http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf | ||
16 | for formatting information and other details on the | ||
17 | cache index disable. | ||
18 | Users: joachim.deguara@amd.com | ||
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu new file mode 100644 index 000000000000..a703b9e9aeb9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | |||
@@ -0,0 +1,156 @@ | |||
1 | What: /sys/devices/system/cpu/ | ||
2 | Date: pre-git history | ||
3 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
4 | Description: | ||
5 | A collection of both global and individual CPU attributes | ||
6 | |||
7 | Individual CPU attributes are contained in subdirectories | ||
8 | named by the kernel's logical CPU number, e.g.: | ||
9 | |||
10 | /sys/devices/system/cpu/cpu#/ | ||
11 | |||
12 | What: /sys/devices/system/cpu/sched_mc_power_savings | ||
13 | /sys/devices/system/cpu/sched_smt_power_savings | ||
14 | Date: June 2006 | ||
15 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
16 | Description: Discover and adjust the kernel's multi-core scheduler support. | ||
17 | |||
18 | Possible values are: | ||
19 | |||
20 | 0 - No power saving load balance (default value) | ||
21 | 1 - Fill one thread/core/package first for long running threads | ||
22 | 2 - Also bias task wakeups to semi-idle cpu package for power | ||
23 | savings | ||
24 | |||
25 | sched_mc_power_savings is dependent upon SCHED_MC, which is | ||
26 | itself architecture dependent. | ||
27 | |||
28 | sched_smt_power_savings is dependent upon SCHED_SMT, which | ||
29 | is itself architecture dependent. | ||
30 | |||
31 | The two files are independent of each other. It is possible | ||
32 | that one file may be present without the other. | ||
33 | |||
34 | Introduced by git commit 5c45bf27. | ||
35 | |||
36 | |||
37 | What: /sys/devices/system/cpu/kernel_max | ||
38 | /sys/devices/system/cpu/offline | ||
39 | /sys/devices/system/cpu/online | ||
40 | /sys/devices/system/cpu/possible | ||
41 | /sys/devices/system/cpu/present | ||
42 | Date: December 2008 | ||
43 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
44 | Description: CPU topology files that describe kernel limits related to | ||
45 | hotplug. Briefly: | ||
46 | |||
47 | kernel_max: the maximum cpu index allowed by the kernel | ||
48 | configuration. | ||
49 | |||
50 | offline: cpus that are not online because they have been | ||
51 | HOTPLUGGED off or exceed the limit of cpus allowed by the | ||
52 | kernel configuration (kernel_max above). | ||
53 | |||
54 | online: cpus that are online and being scheduled. | ||
55 | |||
56 | possible: cpus that have been allocated resources and can be | ||
57 | brought online if they are present. | ||
58 | |||
59 | present: cpus that have been identified as being present in | ||
60 | the system. | ||
61 | |||
62 | See Documentation/cputopology.txt for more information. | ||
63 | |||
64 | |||
65 | |||
66 | What: /sys/devices/system/cpu/cpu#/node | ||
67 | Date: October 2009 | ||
68 | Contact: Linux memory management mailing list <linux-mm@kvack.org> | ||
69 | Description: Discover NUMA node a CPU belongs to | ||
70 | |||
71 | When CONFIG_NUMA is enabled, a symbolic link that points | ||
72 | to the corresponding NUMA node directory. | ||
73 | |||
74 | For example, the following symlink is created for cpu42 | ||
75 | in NUMA node 2: | ||
76 | |||
77 | /sys/devices/system/cpu/cpu42/node2 -> ../../node/node2 | ||
78 | |||
79 | |||
80 | What: /sys/devices/system/cpu/cpu#/topology/core_id | ||
81 | /sys/devices/system/cpu/cpu#/topology/core_siblings | ||
82 | /sys/devices/system/cpu/cpu#/topology/core_siblings_list | ||
83 | /sys/devices/system/cpu/cpu#/topology/physical_package_id | ||
84 | /sys/devices/system/cpu/cpu#/topology/thread_siblings | ||
85 | /sys/devices/system/cpu/cpu#/topology/thread_siblings_list | ||
86 | Date: December 2008 | ||
87 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
88 | Description: CPU topology files that describe a logical CPU's relationship | ||
89 | to other cores and threads in the same physical package. | ||
90 | |||
91 | One cpu# directory is created per logical CPU in the system, | ||
92 | e.g. /sys/devices/system/cpu/cpu42/. | ||
93 | |||
94 | Briefly, the files above are: | ||
95 | |||
96 | core_id: the CPU core ID of cpu#. Typically it is the | ||
97 | hardware platform's identifier (rather than the kernel's). | ||
98 | The actual value is architecture and platform dependent. | ||
99 | |||
100 | core_siblings: internal kernel map of cpu#'s hardware threads | ||
101 | within the same physical_package_id. | ||
102 | |||
103 | core_siblings_list: human-readable list of the logical CPU | ||
104 | numbers within the same physical_package_id as cpu#. | ||
105 | |||
106 | physical_package_id: physical package id of cpu#. Typically | ||
107 | corresponds to a physical socket number, but the actual value | ||
108 | is architecture and platform dependent. | ||
109 | |||
110 | thread_siblings: internel kernel map of cpu#'s hardware | ||
111 | threads within the same core as cpu# | ||
112 | |||
113 | thread_siblings_list: human-readable list of cpu#'s hardware | ||
114 | threads within the same core as cpu# | ||
115 | |||
116 | See Documentation/cputopology.txt for more information. | ||
117 | |||
118 | |||
119 | What: /sys/devices/system/cpu/cpuidle/current_driver | ||
120 | /sys/devices/system/cpu/cpuidle/current_governer_ro | ||
121 | Date: September 2007 | ||
122 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
123 | Description: Discover cpuidle policy and mechanism | ||
124 | |||
125 | Various CPUs today support multiple idle levels that are | ||
126 | differentiated by varying exit latencies and power | ||
127 | consumption during idle. | ||
128 | |||
129 | Idle policy (governor) is differentiated from idle mechanism | ||
130 | (driver) | ||
131 | |||
132 | current_driver: displays current idle mechanism | ||
133 | |||
134 | current_governor_ro: displays current idle policy | ||
135 | |||
136 | See files in Documentation/cpuidle/ for more information. | ||
137 | |||
138 | |||
139 | What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X | ||
140 | Date: August 2008 | ||
141 | KernelVersion: 2.6.27 | ||
142 | Contact: mark.langsdorf@amd.com | ||
143 | Description: These files exist in every cpu's cache index directories. | ||
144 | There are currently 2 cache_disable_# files in each | ||
145 | directory. Reading from these files on a supported | ||
146 | processor will return that cache disable index value | ||
147 | for that processor and node. Writing to one of these | ||
148 | files will cause the specificed cache index to be disabled. | ||
149 | |||
150 | Currently, only AMD Family 10h Processors support cache index | ||
151 | disable, and only for their L3 caches. See the BIOS and | ||
152 | Kernel Developer's Guide at | ||
153 | http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf | ||
154 | for formatting information and other details on the | ||
155 | cache index disable. | ||
156 | Users: joachim.deguara@amd.com | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 455d4e6d346d..0b33bfe7dde9 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system. | |||
227 | Each cgroup is represented by a directory in the cgroup file system | 227 | Each cgroup is represented by a directory in the cgroup file system |
228 | containing the following files describing that cgroup: | 228 | containing the following files describing that cgroup: |
229 | 229 | ||
230 | - tasks: list of tasks (by pid) attached to that cgroup | 230 | - tasks: list of tasks (by pid) attached to that cgroup. This list |
231 | is not guaranteed to be sorted. Writing a thread id into this file | ||
232 | moves the thread into this cgroup. | ||
233 | - cgroup.procs: list of tgids in the cgroup. This list is not | ||
234 | guaranteed to be sorted or free of duplicate tgids, and userspace | ||
235 | should sort/uniquify the list if this property is required. | ||
236 | Writing a tgid into this file moves all threads with that tgid into | ||
237 | this cgroup. | ||
231 | - notify_on_release flag: run the release agent on exit? | 238 | - notify_on_release flag: run the release agent on exit? |
232 | - release_agent: the path to use for release notifications (this file | 239 | - release_agent: the path to use for release notifications (this file |
233 | exists in the top cgroup only) | 240 | exists in the top cgroup only) |
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup. | |||
374 | 381 | ||
375 | In this directory you can find several files: | 382 | In this directory you can find several files: |
376 | # ls | 383 | # ls |
377 | notify_on_release tasks | 384 | cgroup.procs notify_on_release tasks |
378 | (plus whatever files added by the attached subsystems) | 385 | (plus whatever files added by the attached subsystems) |
379 | 386 | ||
380 | Now attach your shell to this cgroup: | 387 | Now attach your shell to this cgroup: |
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index b41f3e58aefa..f1c5c4bccd3e 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt | |||
@@ -1,15 +1,28 @@ | |||
1 | 1 | ||
2 | Export cpu topology info via sysfs. Items (attributes) are similar | 2 | Export CPU topology info via sysfs. Items (attributes) are similar |
3 | to /proc/cpuinfo. | 3 | to /proc/cpuinfo. |
4 | 4 | ||
5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: | 5 | 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: |
6 | represent the physical package id of cpu X; | 6 | |
7 | physical package id of cpuX. Typically corresponds to a physical | ||
8 | socket number, but the actual value is architecture and platform | ||
9 | dependent. | ||
10 | |||
7 | 2) /sys/devices/system/cpu/cpuX/topology/core_id: | 11 | 2) /sys/devices/system/cpu/cpuX/topology/core_id: |
8 | represent the cpu core id to cpu X; | 12 | |
13 | the CPU core ID of cpuX. Typically it is the hardware platform's | ||
14 | identifier (rather than the kernel's). The actual value is | ||
15 | architecture and platform dependent. | ||
16 | |||
9 | 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: | 17 | 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings: |
10 | represent the thread siblings to cpu X in the same core; | 18 | |
19 | internel kernel map of cpuX's hardware threads within the same | ||
20 | core as cpuX | ||
21 | |||
11 | 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: | 22 | 4) /sys/devices/system/cpu/cpuX/topology/core_siblings: |
12 | represent the thread siblings to cpu X in the same physical package; | 23 | |
24 | internal kernel map of cpuX's hardware threads within the same | ||
25 | physical_package_id. | ||
13 | 26 | ||
14 | To implement it in an architecture-neutral way, a new source file, | 27 | To implement it in an architecture-neutral way, a new source file, |
15 | drivers/base/topology.c, is to export the 4 attributes. | 28 | drivers/base/topology.c, is to export the 4 attributes. |
@@ -32,32 +45,32 @@ not defined by include/asm-XXX/topology.h: | |||
32 | 3) thread_siblings: just the given CPU | 45 | 3) thread_siblings: just the given CPU |
33 | 4) core_siblings: just the given CPU | 46 | 4) core_siblings: just the given CPU |
34 | 47 | ||
35 | Additionally, cpu topology information is provided under | 48 | Additionally, CPU topology information is provided under |
36 | /sys/devices/system/cpu and includes these files. The internal | 49 | /sys/devices/system/cpu and includes these files. The internal |
37 | source for the output is in brackets ("[]"). | 50 | source for the output is in brackets ("[]"). |
38 | 51 | ||
39 | kernel_max: the maximum cpu index allowed by the kernel configuration. | 52 | kernel_max: the maximum CPU index allowed by the kernel configuration. |
40 | [NR_CPUS-1] | 53 | [NR_CPUS-1] |
41 | 54 | ||
42 | offline: cpus that are not online because they have been | 55 | offline: CPUs that are not online because they have been |
43 | HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit | 56 | HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit |
44 | of cpus allowed by the kernel configuration (kernel_max | 57 | of CPUs allowed by the kernel configuration (kernel_max |
45 | above). [~cpu_online_mask + cpus >= NR_CPUS] | 58 | above). [~cpu_online_mask + cpus >= NR_CPUS] |
46 | 59 | ||
47 | online: cpus that are online and being scheduled [cpu_online_mask] | 60 | online: CPUs that are online and being scheduled [cpu_online_mask] |
48 | 61 | ||
49 | possible: cpus that have been allocated resources and can be | 62 | possible: CPUs that have been allocated resources and can be |
50 | brought online if they are present. [cpu_possible_mask] | 63 | brought online if they are present. [cpu_possible_mask] |
51 | 64 | ||
52 | present: cpus that have been identified as being present in the | 65 | present: CPUs that have been identified as being present in the |
53 | system. [cpu_present_mask] | 66 | system. [cpu_present_mask] |
54 | 67 | ||
55 | The format for the above output is compatible with cpulist_parse() | 68 | The format for the above output is compatible with cpulist_parse() |
56 | [see <linux/cpumask.h>]. Some examples follow. | 69 | [see <linux/cpumask.h>]. Some examples follow. |
57 | 70 | ||
58 | In this example, there are 64 cpus in the system but cpus 32-63 exceed | 71 | In this example, there are 64 CPUs in the system but cpus 32-63 exceed |
59 | the kernel max which is limited to 0..31 by the NR_CPUS config option | 72 | the kernel max which is limited to 0..31 by the NR_CPUS config option |
60 | being 32. Note also that cpus 2 and 4-31 are not online but could be | 73 | being 32. Note also that CPUs 2 and 4-31 are not online but could be |
61 | brought online as they are both present and possible. | 74 | brought online as they are both present and possible. |
62 | 75 | ||
63 | kernel_max: 31 | 76 | kernel_max: 31 |
@@ -67,8 +80,8 @@ brought online as they are both present and possible. | |||
67 | present: 0-31 | 80 | present: 0-31 |
68 | 81 | ||
69 | In this example, the NR_CPUS config option is 128, but the kernel was | 82 | In this example, the NR_CPUS config option is 128, but the kernel was |
70 | started with possible_cpus=144. There are 4 cpus in the system and cpu2 | 83 | started with possible_cpus=144. There are 4 CPUs in the system and cpu2 |
71 | was manually taken offline (and is the only cpu that can be brought | 84 | was manually taken offline (and is the only CPU that can be brought |
72 | online.) | 85 | online.) |
73 | 86 | ||
74 | kernel_max: 127 | 87 | kernel_max: 127 |
@@ -78,4 +91,4 @@ online.) | |||
78 | present: 0-3 | 91 | present: 0-3 |
79 | 92 | ||
80 | See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter | 93 | See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter |
81 | as well as more information on the various cpumask's. | 94 | as well as more information on the various cpumasks. |
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt index 59a91e5c6909..611f5a5499b1 100644 --- a/Documentation/debugging-via-ohci1394.txt +++ b/Documentation/debugging-via-ohci1394.txt | |||
@@ -64,14 +64,14 @@ be used to view the printk buffer of a remote machine, even with live update. | |||
64 | 64 | ||
65 | Bernhard Kaindl enhanced firescope to support accessing 64-bit machines | 65 | Bernhard Kaindl enhanced firescope to support accessing 64-bit machines |
66 | from 32-bit firescope and vice versa: | 66 | from 32-bit firescope and vice versa: |
67 | - ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2 | 67 | - http://halobates.de/firewire/firescope-0.2.2.tar.bz2 |
68 | 68 | ||
69 | and he implemented fast system dump (alpha version - read README.txt): | 69 | and he implemented fast system dump (alpha version - read README.txt): |
70 | - ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2 | 70 | - http://halobates.de/firewire/firedump-0.1.tar.bz2 |
71 | 71 | ||
72 | There is also a gdb proxy for firewire which allows to use gdb to access | 72 | There is also a gdb proxy for firewire which allows to use gdb to access |
73 | data which can be referenced from symbols found by gdb in vmlinux: | 73 | data which can be referenced from symbols found by gdb in vmlinux: |
74 | - ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2 | 74 | - http://halobates.de/firewire/fireproxy-0.33.tar.bz2 |
75 | 75 | ||
76 | The latest version of this gdb proxy (fireproxy-0.34) can communicate (not | 76 | The latest version of this gdb proxy (fireproxy-0.34) can communicate (not |
77 | yet stable) with kgdb over an memory-based communication module (kgdbom). | 77 | yet stable) with kgdb over an memory-based communication module (kgdbom). |
@@ -178,7 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization: | |||
178 | 178 | ||
179 | Notes | 179 | Notes |
180 | ----- | 180 | ----- |
181 | Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs | 181 | Documentation and specifications: http://halobates.de/firewire/ |
182 | 182 | ||
183 | FireWire is a trademark of Apple Inc. - for more information please refer to: | 183 | FireWire is a trademark of Apple Inc. - for more information please refer to: |
184 | http://en.wikipedia.org/wiki/FireWire | 184 | http://en.wikipedia.org/wiki/FireWire |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 89a47b5aff07..bc693fffabe0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -418,6 +418,14 @@ When: 2.6.33 | |||
418 | Why: Should be implemented in userspace, policy daemon. | 418 | Why: Should be implemented in userspace, policy daemon. |
419 | Who: Johannes Berg <johannes@sipsolutions.net> | 419 | Who: Johannes Berg <johannes@sipsolutions.net> |
420 | 420 | ||
421 | --------------------------- | ||
422 | |||
423 | What: CONFIG_INOTIFY | ||
424 | When: 2.6.33 | ||
425 | Why: last user (audit) will be converted to the newer more generic | ||
426 | and more easily maintained fsnotify subsystem | ||
427 | Who: Eric Paris <eparis@redhat.com> | ||
428 | |||
421 | ---------------------------- | 429 | ---------------------------- |
422 | 430 | ||
423 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be | 431 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be |
@@ -451,3 +459,33 @@ Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR | |||
451 | will also allow making ALSA OSS emulation independent of | 459 | will also allow making ALSA OSS emulation independent of |
452 | sound_core. The dependency will be broken then too. | 460 | sound_core. The dependency will be broken then too. |
453 | Who: Tejun Heo <tj@kernel.org> | 461 | Who: Tejun Heo <tj@kernel.org> |
462 | |||
463 | ---------------------------- | ||
464 | |||
465 | What: Support for VMware's guest paravirtuliazation technique [VMI] will be | ||
466 | dropped. | ||
467 | When: 2.6.37 or earlier. | ||
468 | Why: With the recent innovations in CPU hardware acceleration technologies | ||
469 | from Intel and AMD, VMware ran a few experiments to compare these | ||
470 | techniques to guest paravirtualization technique on VMware's platform. | ||
471 | These hardware assisted virtualization techniques have outperformed the | ||
472 | performance benefits provided by VMI in most of the workloads. VMware | ||
473 | expects that these hardware features will be ubiquitous in a couple of | ||
474 | years, as a result, VMware has started a phased retirement of this | ||
475 | feature from the hypervisor. We will be removing this feature from the | ||
476 | Kernel too. Right now we are targeting 2.6.37 but can retire earlier if | ||
477 | technical reasons (read opportunity to remove major chunk of pvops) | ||
478 | arise. | ||
479 | |||
480 | Please note that VMI has always been an optimization and non-VMI kernels | ||
481 | still work fine on VMware's platform. | ||
482 | Latest versions of VMware's product which support VMI are, | ||
483 | Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence | ||
484 | releases for these products will continue supporting VMI. | ||
485 | |||
486 | For more details about VMI retirement take a look at this, | ||
487 | http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html | ||
488 | |||
489 | Who: Alok N Kataria <akataria@vmware.com> | ||
490 | |||
491 | ---------------------------- | ||
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 570f9bd9be2b..05d5cf1d743f 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt | |||
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks. | |||
123 | 123 | ||
124 | sb=n Use alternate superblock at this location. | 124 | sb=n Use alternate superblock at this location. |
125 | 125 | ||
126 | quota | 126 | quota These options are ignored by the filesystem. They |
127 | noquota | 127 | noquota are used only by quota tools to recognize volumes |
128 | grpquota | 128 | grpquota where quota should be turned on. See documentation |
129 | usrquota | 129 | usrquota in the quota-tools package for more details |
130 | (http://sourceforge.net/projects/linuxquota). | ||
131 | |||
132 | jqfmt=<quota type> These options tell filesystem details about quota | ||
133 | usrjquota=<file> so that quota information can be properly updated | ||
134 | grpjquota=<file> during journal replay. They replace the above | ||
135 | quota options. See documentation in the quota-tools | ||
136 | package for more details | ||
137 | (http://sourceforge.net/projects/linuxquota). | ||
130 | 138 | ||
131 | bh (*) ext3 associates buffer heads to data pages to | 139 | bh (*) ext3 associates buffer heads to data pages to |
132 | nobh (a) cache disk block mapping information | 140 | nobh (a) cache disk block mapping information |
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index 84eb26808dee..cb8a3a00cc92 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | Using flexible arrays in the kernel | 1 | Using flexible arrays in the kernel |
2 | Last updated for 2.6.31 | 2 | Last updated for 2.6.32 |
3 | Jonathan Corbet <corbet@lwn.net> | 3 | Jonathan Corbet <corbet@lwn.net> |
4 | 4 | ||
5 | Large contiguous memory allocations can be unreliable in the Linux kernel. | 5 | Large contiguous memory allocations can be unreliable in the Linux kernel. |
@@ -40,6 +40,13 @@ argument is passed directly to the internal memory allocation calls. With | |||
40 | the current code, using flags to ask for high memory is likely to lead to | 40 | the current code, using flags to ask for high memory is likely to lead to |
41 | notably unpleasant side effects. | 41 | notably unpleasant side effects. |
42 | 42 | ||
43 | It is also possible to define flexible arrays at compile time with: | ||
44 | |||
45 | DEFINE_FLEX_ARRAY(name, element_size, total); | ||
46 | |||
47 | This macro will result in a definition of an array with the given name; the | ||
48 | element size and total will be checked for validity at compile time. | ||
49 | |||
43 | Storing data into a flexible array is accomplished with a call to: | 50 | Storing data into a flexible array is accomplished with a call to: |
44 | 51 | ||
45 | int flex_array_put(struct flex_array *array, unsigned int element_nr, | 52 | int flex_array_put(struct flex_array *array, unsigned int element_nr, |
@@ -76,16 +83,30 @@ particular element has never been allocated. | |||
76 | Note that it is possible to get back a valid pointer for an element which | 83 | Note that it is possible to get back a valid pointer for an element which |
77 | has never been stored in the array. Memory for array elements is allocated | 84 | has never been stored in the array. Memory for array elements is allocated |
78 | one page at a time; a single allocation could provide memory for several | 85 | one page at a time; a single allocation could provide memory for several |
79 | adjacent elements. The flexible array code does not know if a specific | 86 | adjacent elements. Flexible array elements are normally initialized to the |
80 | element has been written; it only knows if the associated memory is | 87 | value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors |
81 | present. So a flex_array_get() call on an element which was never stored | 88 | involving that number probably result from use of unstored array entries. |
82 | in the array has the potential to return a pointer to random data. If the | 89 | Note that, if array elements are allocated with __GFP_ZERO, they will be |
83 | caller does not have a separate way to know which elements were actually | 90 | initialized to zero and this poisoning will not happen. |
84 | stored, it might be wise, at least, to add GFP_ZERO to the flags argument | 91 | |
85 | to ensure that all elements are zeroed. | 92 | Individual elements in the array can be cleared with: |
86 | 93 | ||
87 | There is no way to remove a single element from the array. It is possible, | 94 | int flex_array_clear(struct flex_array *array, unsigned int element_nr); |
88 | though, to remove all elements with a call to: | 95 | |
96 | This function will set the given element to FLEX_ARRAY_FREE and return | ||
97 | zero. If storage for the indicated element is not allocated for the array, | ||
98 | flex_array_clear() will return -EINVAL instead. Note that clearing an | ||
99 | element does not release the storage associated with it; to reduce the | ||
100 | allocated size of an array, call: | ||
101 | |||
102 | int flex_array_shrink(struct flex_array *array); | ||
103 | |||
104 | The return value will be the number of pages of memory actually freed. | ||
105 | This function works by scanning the array for pages containing nothing but | ||
106 | FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work | ||
107 | if the array's pages are allocated with __GFP_ZERO. | ||
108 | |||
109 | It is possible to remove all elements of an array with a call to: | ||
89 | 110 | ||
90 | void flex_array_free_parts(struct flex_array *array); | 111 | void flex_array_free_parts(struct flex_array *array); |
91 | 112 | ||
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index dcbd502c8792..82def883361b 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface | |||
@@ -353,10 +353,20 @@ power[1-*]_average Average power use | |||
353 | Unit: microWatt | 353 | Unit: microWatt |
354 | RO | 354 | RO |
355 | 355 | ||
356 | power[1-*]_average_interval Power use averaging interval | 356 | power[1-*]_average_interval Power use averaging interval. A poll |
357 | notification is sent to this file if the | ||
358 | hardware changes the averaging interval. | ||
357 | Unit: milliseconds | 359 | Unit: milliseconds |
358 | RW | 360 | RW |
359 | 361 | ||
362 | power[1-*]_average_interval_max Maximum power use averaging interval | ||
363 | Unit: milliseconds | ||
364 | RO | ||
365 | |||
366 | power[1-*]_average_interval_min Minimum power use averaging interval | ||
367 | Unit: milliseconds | ||
368 | RO | ||
369 | |||
360 | power[1-*]_average_highest Historical average maximum power use | 370 | power[1-*]_average_highest Historical average maximum power use |
361 | Unit: microWatt | 371 | Unit: microWatt |
362 | RO | 372 | RO |
@@ -365,6 +375,18 @@ power[1-*]_average_lowest Historical average minimum power use | |||
365 | Unit: microWatt | 375 | Unit: microWatt |
366 | RO | 376 | RO |
367 | 377 | ||
378 | power[1-*]_average_max A poll notification is sent to | ||
379 | power[1-*]_average when power use | ||
380 | rises above this value. | ||
381 | Unit: microWatt | ||
382 | RW | ||
383 | |||
384 | power[1-*]_average_min A poll notification is sent to | ||
385 | power[1-*]_average when power use | ||
386 | sinks below this value. | ||
387 | Unit: microWatt | ||
388 | RW | ||
389 | |||
368 | power[1-*]_input Instantaneous power use | 390 | power[1-*]_input Instantaneous power use |
369 | Unit: microWatt | 391 | Unit: microWatt |
370 | RO | 392 | RO |
@@ -381,6 +403,39 @@ power[1-*]_reset_history Reset input_highest, input_lowest, | |||
381 | average_highest and average_lowest. | 403 | average_highest and average_lowest. |
382 | WO | 404 | WO |
383 | 405 | ||
406 | power[1-*]_accuracy Accuracy of the power meter. | ||
407 | Unit: Percent | ||
408 | RO | ||
409 | |||
410 | power[1-*]_alarm 1 if the system is drawing more power than the | ||
411 | cap allows; 0 otherwise. A poll notification is | ||
412 | sent to this file when the power use exceeds the | ||
413 | cap. This file only appears if the cap is known | ||
414 | to be enforced by hardware. | ||
415 | RO | ||
416 | |||
417 | power[1-*]_cap If power use rises above this limit, the | ||
418 | system should take action to reduce power use. | ||
419 | A poll notification is sent to this file if the | ||
420 | cap is changed by the hardware. The *_cap | ||
421 | files only appear if the cap is known to be | ||
422 | enforced by hardware. | ||
423 | Unit: microWatt | ||
424 | RW | ||
425 | |||
426 | power[1-*]_cap_hyst Margin of hysteresis built around capping and | ||
427 | notification. | ||
428 | Unit: microWatt | ||
429 | RW | ||
430 | |||
431 | power[1-*]_cap_max Maximum cap that can be set. | ||
432 | Unit: microWatt | ||
433 | RO | ||
434 | |||
435 | power[1-*]_cap_min Minimum cap that can be set. | ||
436 | Unit: microWatt | ||
437 | RO | ||
438 | |||
384 | ********** | 439 | ********** |
385 | * Energy * | 440 | * Energy * |
386 | ********** | 441 | ********** |
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt index 744687dd195b..8a366959f5cc 100644 --- a/Documentation/infiniband/user_mad.txt +++ b/Documentation/infiniband/user_mad.txt | |||
@@ -128,8 +128,8 @@ Setting IsSM Capability Bit | |||
128 | To create the appropriate character device files automatically with | 128 | To create the appropriate character device files automatically with |
129 | udev, a rule like | 129 | udev, a rule like |
130 | 130 | ||
131 | KERNEL="umad*", NAME="infiniband/%k" | 131 | KERNEL=="umad*", NAME="infiniband/%k" |
132 | KERNEL="issm*", NAME="infiniband/%k" | 132 | KERNEL=="issm*", NAME="infiniband/%k" |
133 | 133 | ||
134 | can be used. This will create device nodes named | 134 | can be used. This will create device nodes named |
135 | 135 | ||
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt index f847501e50b5..afe3f8da9018 100644 --- a/Documentation/infiniband/user_verbs.txt +++ b/Documentation/infiniband/user_verbs.txt | |||
@@ -58,7 +58,7 @@ Memory pinning | |||
58 | To create the appropriate character device files automatically with | 58 | To create the appropriate character device files automatically with |
59 | udev, a rule like | 59 | udev, a rule like |
60 | 60 | ||
61 | KERNEL="uverbs*", NAME="infiniband/%k" | 61 | KERNEL=="uverbs*", NAME="infiniband/%k" |
62 | 62 | ||
63 | can be used. This will create device nodes named | 63 | can be used. This will create device nodes named |
64 | 64 | ||
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI index 686e107923ec..5fe8de5cc727 100644 --- a/Documentation/isdn/INTERFACE.CAPI +++ b/Documentation/isdn/INTERFACE.CAPI | |||
@@ -60,10 +60,9 @@ open() operation on regular files or character devices. | |||
60 | 60 | ||
61 | After a successful return from register_appl(), CAPI messages from the | 61 | After a successful return from register_appl(), CAPI messages from the |
62 | application may be passed to the driver for the device via calls to the | 62 | application may be passed to the driver for the device via calls to the |
63 | send_message() callback function. The CAPI message to send is stored in the | 63 | send_message() callback function. Conversely, the driver may call Kernel |
64 | data portion of an skb. Conversely, the driver may call Kernel CAPI's | 64 | CAPI's capi_ctr_handle_message() function to pass a received CAPI message to |
65 | capi_ctr_handle_message() function to pass a received CAPI message to Kernel | 65 | Kernel CAPI for forwarding to an application, specifying its ApplID. |
66 | CAPI for forwarding to an application, specifying its ApplID. | ||
67 | 66 | ||
68 | Deregistration requests (CAPI operation CAPI_RELEASE) from applications are | 67 | Deregistration requests (CAPI operation CAPI_RELEASE) from applications are |
69 | forwarded as calls to the release_appl() callback function, passing the same | 68 | forwarded as calls to the release_appl() callback function, passing the same |
@@ -142,6 +141,7 @@ u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb) | |||
142 | to accepting or queueing the message. Errors occurring during the | 141 | to accepting or queueing the message. Errors occurring during the |
143 | actual processing of the message should be signaled with an | 142 | actual processing of the message should be signaled with an |
144 | appropriate reply message. | 143 | appropriate reply message. |
144 | May be called in process or interrupt context. | ||
145 | Calls to this function are not serialized by Kernel CAPI, ie. it must | 145 | Calls to this function are not serialized by Kernel CAPI, ie. it must |
146 | be prepared to be re-entered. | 146 | be prepared to be re-entered. |
147 | 147 | ||
@@ -154,7 +154,8 @@ read_proc_t *ctr_read_proc | |||
154 | system entry, /proc/capi/controllers/<n>; will be called with a | 154 | system entry, /proc/capi/controllers/<n>; will be called with a |
155 | pointer to the device's capi_ctr structure as the last (data) argument | 155 | pointer to the device's capi_ctr structure as the last (data) argument |
156 | 156 | ||
157 | Note: Callback functions are never called in interrupt context. | 157 | Note: Callback functions except send_message() are never called in interrupt |
158 | context. | ||
158 | 159 | ||
159 | - to be filled in before calling capi_ctr_ready(): | 160 | - to be filled in before calling capi_ctr_ready(): |
160 | 161 | ||
@@ -171,14 +172,40 @@ u8 serial[CAPI_SERIAL_LEN] | |||
171 | value to return for CAPI_GET_SERIAL | 172 | value to return for CAPI_GET_SERIAL |
172 | 173 | ||
173 | 174 | ||
174 | 4.3 The _cmsg Structure | 175 | 4.3 SKBs |
176 | |||
177 | CAPI messages are passed between Kernel CAPI and the driver via send_message() | ||
178 | and capi_ctr_handle_message(), stored in the data portion of a socket buffer | ||
179 | (skb). Each skb contains a single CAPI message coded according to the CAPI 2.0 | ||
180 | standard. | ||
181 | |||
182 | For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual | ||
183 | payload data immediately follows the CAPI message itself within the same skb. | ||
184 | The Data and Data64 parameters are not used for processing. The Data64 | ||
185 | parameter may be omitted by setting the length field of the CAPI message to 22 | ||
186 | instead of 30. | ||
187 | |||
188 | |||
189 | 4.4 The _cmsg Structure | ||
175 | 190 | ||
176 | (declared in <linux/isdn/capiutil.h>) | 191 | (declared in <linux/isdn/capiutil.h>) |
177 | 192 | ||
178 | The _cmsg structure stores the contents of a CAPI 2.0 message in an easily | 193 | The _cmsg structure stores the contents of a CAPI 2.0 message in an easily |
179 | accessible form. It contains members for all possible CAPI 2.0 parameters, of | 194 | accessible form. It contains members for all possible CAPI 2.0 parameters, |
180 | which only those appearing in the message type currently being processed are | 195 | including subparameters of the Additional Info and B Protocol structured |
181 | actually used. Unused members should be set to zero. | 196 | parameters, with the following exceptions: |
197 | |||
198 | * second Calling party number (CONNECT_IND) | ||
199 | |||
200 | * Data64 (DATA_B3_REQ and DATA_B3_IND) | ||
201 | |||
202 | * Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ) | ||
203 | |||
204 | * Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP | ||
205 | and SELECT_B_PROTOCOL_REQ) | ||
206 | |||
207 | Only those parameters appearing in the message type currently being processed | ||
208 | are actually used. Unused members should be set to zero. | ||
182 | 209 | ||
183 | Members are named after the CAPI 2.0 standard names of the parameters they | 210 | Members are named after the CAPI 2.0 standard names of the parameters they |
184 | represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data | 211 | represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data |
@@ -190,18 +217,19 @@ u16 for CAPI parameters of type 'word' | |||
190 | 217 | ||
191 | u32 for CAPI parameters of type 'dword' | 218 | u32 for CAPI parameters of type 'dword' |
192 | 219 | ||
193 | _cstruct for CAPI parameters of type 'struct' not containing any | 220 | _cstruct for CAPI parameters of type 'struct' |
194 | variably-sized (struct) subparameters (eg. 'Called Party Number') | ||
195 | The member is a pointer to a buffer containing the parameter in | 221 | The member is a pointer to a buffer containing the parameter in |
196 | CAPI encoding (length + content). It may also be NULL, which will | 222 | CAPI encoding (length + content). It may also be NULL, which will |
197 | be taken to represent an empty (zero length) parameter. | 223 | be taken to represent an empty (zero length) parameter. |
224 | Subparameters are stored in encoded form within the content part. | ||
198 | 225 | ||
199 | _cmstruct for CAPI parameters of type 'struct' containing 'struct' | 226 | _cmstruct alternative representation for CAPI parameters of type 'struct' |
200 | subparameters ('Additional Info' and 'B Protocol') | 227 | (used only for the 'Additional Info' and 'B Protocol' parameters) |
201 | The representation is a single byte containing one of the values: | 228 | The representation is a single byte containing one of the values: |
202 | CAPI_DEFAULT: the parameter is empty | 229 | CAPI_DEFAULT: The parameter is empty/absent. |
203 | CAPI_COMPOSE: the values of the subparameters are stored | 230 | CAPI_COMPOSE: The parameter is present. |
204 | individually in the corresponding _cmsg structure members | 231 | Subparameter values are stored individually in the corresponding |
232 | _cmsg structure members. | ||
205 | 233 | ||
206 | Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert | 234 | Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert |
207 | messages between their transport encoding described in the CAPI 2.0 standard | 235 | messages between their transport encoding described in the CAPI 2.0 standard |
@@ -297,3 +325,26 @@ char *capi_cmd2str(u8 Command, u8 Subcommand) | |||
297 | be NULL if the command/subcommand is not one of those defined in the | 325 | be NULL if the command/subcommand is not one of those defined in the |
298 | CAPI 2.0 standard. | 326 | CAPI 2.0 standard. |
299 | 327 | ||
328 | |||
329 | 7. Debugging | ||
330 | |||
331 | The module kernelcapi has a module parameter showcapimsgs controlling some | ||
332 | debugging output produced by the module. It can only be set when the module is | ||
333 | loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on | ||
334 | the command line or in the configuration file. | ||
335 | |||
336 | If the lowest bit of showcapimsgs is set, kernelcapi logs controller and | ||
337 | application up and down events. | ||
338 | |||
339 | In addition, every registered CAPI controller has an associated traceflag | ||
340 | parameter controlling how CAPI messages sent from and to tha controller are | ||
341 | logged. The traceflag parameter is initialized with the value of the | ||
342 | showcapimsgs parameter when the controller is registered, but can later be | ||
343 | changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE. | ||
344 | |||
345 | If the value of traceflag is non-zero, CAPI messages are logged. | ||
346 | DATA_B3 messages are only logged if the value of traceflag is > 2. | ||
347 | |||
348 | If the lowest bit of traceflag is set, only the command/subcommand and message | ||
349 | length are logged. Otherwise, kernelcapi logs a readable representation of | ||
350 | the entire message. | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6fa7292947e5..9107b387e91f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -671,6 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
671 | earlyprintk= [X86,SH,BLACKFIN] | 671 | earlyprintk= [X86,SH,BLACKFIN] |
672 | earlyprintk=vga | 672 | earlyprintk=vga |
673 | earlyprintk=serial[,ttySn[,baudrate]] | 673 | earlyprintk=serial[,ttySn[,baudrate]] |
674 | earlyprintk=ttySn[,baudrate] | ||
674 | earlyprintk=dbgp[debugController#] | 675 | earlyprintk=dbgp[debugController#] |
675 | 676 | ||
676 | Append ",keep" to not disable it when the real console | 677 | Append ",keep" to not disable it when the real console |
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index ba9373f82ab5..098de5bce00a 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <signal.h> | 42 | #include <signal.h> |
43 | #include "linux/lguest_launcher.h" | 43 | #include "linux/lguest_launcher.h" |
44 | #include "linux/virtio_config.h" | 44 | #include "linux/virtio_config.h" |
45 | #include <linux/virtio_ids.h> | ||
46 | #include "linux/virtio_net.h" | 45 | #include "linux/virtio_net.h" |
47 | #include "linux/virtio_blk.h" | 46 | #include "linux/virtio_blk.h" |
48 | #include "linux/virtio_console.h" | 47 | #include "linux/virtio_console.h" |
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index c6cf4a3c16e0..61bb645d50e0 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt | |||
@@ -90,6 +90,11 @@ Examples: | |||
90 | pgset "dstmac 00:00:00:00:00:00" sets MAC destination address | 90 | pgset "dstmac 00:00:00:00:00:00" sets MAC destination address |
91 | pgset "srcmac 00:00:00:00:00:00" sets MAC source address | 91 | pgset "srcmac 00:00:00:00:00:00" sets MAC source address |
92 | 92 | ||
93 | pgset "queue_map_min 0" Sets the min value of tx queue interval | ||
94 | pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices | ||
95 | To select queue 1 of a given device, | ||
96 | use queue_map_min=1 and queue_map_max=1 | ||
97 | |||
93 | pgset "src_mac_count 1" Sets the number of MACs we'll range through. | 98 | pgset "src_mac_count 1" Sets the number of MACs we'll range through. |
94 | The 'minimum' MAC is what you set with srcmac. | 99 | The 'minimum' MAC is what you set with srcmac. |
95 | 100 | ||
@@ -101,6 +106,9 @@ Examples: | |||
101 | IPDST_RND, UDPSRC_RND, | 106 | IPDST_RND, UDPSRC_RND, |
102 | UDPDST_RND, MACSRC_RND, MACDST_RND | 107 | UDPDST_RND, MACSRC_RND, MACDST_RND |
103 | MPLS_RND, VID_RND, SVID_RND | 108 | MPLS_RND, VID_RND, SVID_RND |
109 | QUEUE_MAP_RND # queue map random | ||
110 | QUEUE_MAP_CPU # queue map mirrors smp_processor_id() | ||
111 | |||
104 | 112 | ||
105 | pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then | 113 | pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then |
106 | cycle through the port range. | 114 | cycle through the port range. |
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt index a6eb4add1be6..9605179711f4 100644 --- a/Documentation/scsi/hptiop.txt +++ b/Documentation/scsi/hptiop.txt | |||
@@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop) | |||
3 | Controller Register Map | 3 | Controller Register Map |
4 | ------------------------- | 4 | ------------------------- |
5 | 5 | ||
6 | For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2: | ||
7 | |||
8 | BAR0 offset Register | ||
9 | 0x11C5C Link Interface IRQ Set | ||
10 | 0x11C60 Link Interface IRQ Clear | ||
11 | |||
12 | BAR2 offset Register | ||
13 | 0x10 Inbound Message Register 0 | ||
14 | 0x14 Inbound Message Register 1 | ||
15 | 0x18 Outbound Message Register 0 | ||
16 | 0x1C Outbound Message Register 1 | ||
17 | 0x20 Inbound Doorbell Register | ||
18 | 0x24 Inbound Interrupt Status Register | ||
19 | 0x28 Inbound Interrupt Mask Register | ||
20 | 0x30 Outbound Interrupt Status Register | ||
21 | 0x34 Outbound Interrupt Mask Register | ||
22 | 0x40 Inbound Queue Port | ||
23 | 0x44 Outbound Queue Port | ||
24 | |||
6 | For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: | 25 | For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: |
7 | 26 | ||
8 | BAR0 offset Register | 27 | BAR0 offset Register |
@@ -93,7 +112,7 @@ The driver exposes following sysfs attributes: | |||
93 | 112 | ||
94 | 113 | ||
95 | ----------------------------------------------------------------------------- | 114 | ----------------------------------------------------------------------------- |
96 | Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved. | 115 | Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved. |
97 | 116 | ||
98 | This file is distributed in the hope that it will be useful, | 117 | This file is distributed in the hope that it will be useful, |
99 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 118 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 75fddb40f416..4c7f9aee5c4e 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt | |||
@@ -359,6 +359,7 @@ STAC9227/9228/9229/927x | |||
359 | 5stack-no-fp D965 5stack without front panel | 359 | 5stack-no-fp D965 5stack without front panel |
360 | dell-3stack Dell Dimension E520 | 360 | dell-3stack Dell Dimension E520 |
361 | dell-bios Fixes with Dell BIOS setup | 361 | dell-bios Fixes with Dell BIOS setup |
362 | volknob Fixes with volume-knob widget 0x24 | ||
362 | auto BIOS setup (default) | 363 | auto BIOS setup (default) |
363 | 364 | ||
364 | STAC92HD71B* | 365 | STAC92HD71B* |
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt new file mode 100644 index 000000000000..3ffadf8da61f --- /dev/null +++ b/Documentation/vm/hwpoison.txt | |||
@@ -0,0 +1,136 @@ | |||
1 | What is hwpoison? | ||
2 | |||
3 | Upcoming Intel CPUs have support for recovering from some memory errors | ||
4 | (``MCA recovery''). This requires the OS to declare a page "poisoned", | ||
5 | kill the processes associated with it and avoid using it in the future. | ||
6 | |||
7 | This patchkit implements the necessary infrastructure in the VM. | ||
8 | |||
9 | To quote the overview comment: | ||
10 | |||
11 | * High level machine check handler. Handles pages reported by the | ||
12 | * hardware as being corrupted usually due to a 2bit ECC memory or cache | ||
13 | * failure. | ||
14 | * | ||
15 | * This focusses on pages detected as corrupted in the background. | ||
16 | * When the current CPU tries to consume corruption the currently | ||
17 | * running process can just be killed directly instead. This implies | ||
18 | * that if the error cannot be handled for some reason it's safe to | ||
19 | * just ignore it because no corruption has been consumed yet. Instead | ||
20 | * when that happens another machine check will happen. | ||
21 | * | ||
22 | * Handles page cache pages in various states. The tricky part | ||
23 | * here is that we can access any page asynchronous to other VM | ||
24 | * users, because memory failures could happen anytime and anywhere, | ||
25 | * possibly violating some of their assumptions. This is why this code | ||
26 | * has to be extremely careful. Generally it tries to use normal locking | ||
27 | * rules, as in get the standard locks, even if that means the | ||
28 | * error handling takes potentially a long time. | ||
29 | * | ||
30 | * Some of the operations here are somewhat inefficient and have non | ||
31 | * linear algorithmic complexity, because the data structures have not | ||
32 | * been optimized for this case. This is in particular the case | ||
33 | * for the mapping from a vma to a process. Since this case is expected | ||
34 | * to be rare we hope we can get away with this. | ||
35 | |||
36 | The code consists of a the high level handler in mm/memory-failure.c, | ||
37 | a new page poison bit and various checks in the VM to handle poisoned | ||
38 | pages. | ||
39 | |||
40 | The main target right now is KVM guests, but it works for all kinds | ||
41 | of applications. KVM support requires a recent qemu-kvm release. | ||
42 | |||
43 | For the KVM use there was need for a new signal type so that | ||
44 | KVM can inject the machine check into the guest with the proper | ||
45 | address. This in theory allows other applications to handle | ||
46 | memory failures too. The expection is that near all applications | ||
47 | won't do that, but some very specialized ones might. | ||
48 | |||
49 | --- | ||
50 | |||
51 | There are two (actually three) modi memory failure recovery can be in: | ||
52 | |||
53 | vm.memory_failure_recovery sysctl set to zero: | ||
54 | All memory failures cause a panic. Do not attempt recovery. | ||
55 | (on x86 this can be also affected by the tolerant level of the | ||
56 | MCE subsystem) | ||
57 | |||
58 | early kill | ||
59 | (can be controlled globally and per process) | ||
60 | Send SIGBUS to the application as soon as the error is detected | ||
61 | This allows applications who can process memory errors in a gentle | ||
62 | way (e.g. drop affected object) | ||
63 | This is the mode used by KVM qemu. | ||
64 | |||
65 | late kill | ||
66 | Send SIGBUS when the application runs into the corrupted page. | ||
67 | This is best for memory error unaware applications and default | ||
68 | Note some pages are always handled as late kill. | ||
69 | |||
70 | --- | ||
71 | |||
72 | User control: | ||
73 | |||
74 | vm.memory_failure_recovery | ||
75 | See sysctl.txt | ||
76 | |||
77 | vm.memory_failure_early_kill | ||
78 | Enable early kill mode globally | ||
79 | |||
80 | PR_MCE_KILL | ||
81 | Set early/late kill mode/revert to system default | ||
82 | arg1: PR_MCE_KILL_CLEAR: Revert to system default | ||
83 | arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode | ||
84 | PR_MCE_KILL_EARLY: Early kill | ||
85 | PR_MCE_KILL_LATE: Late kill | ||
86 | PR_MCE_KILL_DEFAULT: Use system global default | ||
87 | PR_MCE_KILL_GET | ||
88 | return current mode | ||
89 | |||
90 | |||
91 | --- | ||
92 | |||
93 | Testing: | ||
94 | |||
95 | madvise(MADV_POISON, ....) | ||
96 | (as root) | ||
97 | Poison a page in the process for testing | ||
98 | |||
99 | |||
100 | hwpoison-inject module through debugfs | ||
101 | /sys/debug/hwpoison/corrupt-pfn | ||
102 | |||
103 | Inject hwpoison fault at PFN echoed into this file | ||
104 | |||
105 | |||
106 | Architecture specific MCE injector | ||
107 | |||
108 | x86 has mce-inject, mce-test | ||
109 | |||
110 | Some portable hwpoison test programs in mce-test, see blow. | ||
111 | |||
112 | --- | ||
113 | |||
114 | References: | ||
115 | |||
116 | http://halobates.de/mce-lc09-2.pdf | ||
117 | Overview presentation from LinuxCon 09 | ||
118 | |||
119 | git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git | ||
120 | Test suite (hwpoison specific portable tests in tsrc) | ||
121 | |||
122 | git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git | ||
123 | x86 specific injector | ||
124 | |||
125 | |||
126 | --- | ||
127 | |||
128 | Limitations: | ||
129 | |||
130 | - Not all page types are supported and never will. Most kernel internal | ||
131 | objects cannot be recovered, only LRU pages for now. | ||
132 | - Right now hugepage support is missing. | ||
133 | |||
134 | --- | ||
135 | Andi Kleen, Oct 2009 | ||
136 | |||
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt index 72a22f65960e..262d8e6793a3 100644 --- a/Documentation/vm/ksm.txt +++ b/Documentation/vm/ksm.txt | |||
@@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/, | |||
52 | readable by all but writable only by root: | 52 | readable by all but writable only by root: |
53 | 53 | ||
54 | max_kernel_pages - set to maximum number of kernel pages that KSM may use | 54 | max_kernel_pages - set to maximum number of kernel pages that KSM may use |
55 | e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages" | 55 | e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages" |
56 | Value 0 imposes no limit on the kernel pages KSM may use; | 56 | Value 0 imposes no limit on the kernel pages KSM may use; |
57 | but note that any process using MADV_MERGEABLE can cause | 57 | but note that any process using MADV_MERGEABLE can cause |
58 | KSM to allocate these pages, unswappable until it exits. | 58 | KSM to allocate these pages, unswappable until it exits. |
59 | Default: 2000 (chosen for demonstration purposes) | 59 | Default: quarter of memory (chosen to not pin too much) |
60 | 60 | ||
61 | pages_to_scan - how many present pages to scan before ksmd goes to sleep | 61 | pages_to_scan - how many present pages to scan before ksmd goes to sleep |
62 | e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan" | 62 | e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan" |
63 | Default: 200 (chosen for demonstration purposes) | 63 | Default: 100 (chosen for demonstration purposes) |
64 | 64 | ||
65 | sleep_millisecs - how many milliseconds ksmd should sleep before next scan | 65 | sleep_millisecs - how many milliseconds ksmd should sleep before next scan |
66 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" | 66 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" |
@@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages, | |||
70 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", | 70 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", |
71 | set 2 to stop ksmd and unmerge all pages currently merged, | 71 | set 2 to stop ksmd and unmerge all pages currently merged, |
72 | but leave mergeable areas registered for next run | 72 | but leave mergeable areas registered for next run |
73 | Default: 1 (for immediate use by apps which register) | 73 | Default: 0 (must be changed to 1 to activate KSM, |
74 | except if CONFIG_SYSFS is disabled) | ||
74 | 75 | ||
75 | The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: | 76 | The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: |
76 | 77 | ||
@@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high | |||
86 | proportion there would also indicate poor use of madvise MADV_MERGEABLE. | 87 | proportion there would also indicate poor use of madvise MADV_MERGEABLE. |
87 | 88 | ||
88 | Izik Eidus, | 89 | Izik Eidus, |
89 | Hugh Dickins, 30 July 2009 | 90 | Hugh Dickins, 24 Sept 2009 |
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index fa1a30d9e9d5..3ec4f2a22585 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c | |||
@@ -2,7 +2,10 @@ | |||
2 | * page-types: Tool for querying page flags | 2 | * page-types: Tool for querying page flags |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Intel corporation | 4 | * Copyright (C) 2009 Intel corporation |
5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> | 5 | * |
6 | * Authors: Wu Fengguang <fengguang.wu@intel.com> | ||
7 | * | ||
8 | * Released under the General Public License (GPL). | ||
6 | */ | 9 | */ |
7 | 10 | ||
8 | #define _LARGEFILE64_SOURCE | 11 | #define _LARGEFILE64_SOURCE |
@@ -69,7 +72,9 @@ | |||
69 | #define KPF_COMPOUND_TAIL 16 | 72 | #define KPF_COMPOUND_TAIL 16 |
70 | #define KPF_HUGE 17 | 73 | #define KPF_HUGE 17 |
71 | #define KPF_UNEVICTABLE 18 | 74 | #define KPF_UNEVICTABLE 18 |
75 | #define KPF_HWPOISON 19 | ||
72 | #define KPF_NOPAGE 20 | 76 | #define KPF_NOPAGE 20 |
77 | #define KPF_KSM 21 | ||
73 | 78 | ||
74 | /* [32-] kernel hacking assistances */ | 79 | /* [32-] kernel hacking assistances */ |
75 | #define KPF_RESERVED 32 | 80 | #define KPF_RESERVED 32 |
@@ -116,7 +121,9 @@ static char *page_flag_names[] = { | |||
116 | [KPF_COMPOUND_TAIL] = "T:compound_tail", | 121 | [KPF_COMPOUND_TAIL] = "T:compound_tail", |
117 | [KPF_HUGE] = "G:huge", | 122 | [KPF_HUGE] = "G:huge", |
118 | [KPF_UNEVICTABLE] = "u:unevictable", | 123 | [KPF_UNEVICTABLE] = "u:unevictable", |
124 | [KPF_HWPOISON] = "X:hwpoison", | ||
119 | [KPF_NOPAGE] = "n:nopage", | 125 | [KPF_NOPAGE] = "n:nopage", |
126 | [KPF_KSM] = "x:ksm", | ||
120 | 127 | ||
121 | [KPF_RESERVED] = "r:reserved", | 128 | [KPF_RESERVED] = "r:reserved", |
122 | [KPF_MLOCKED] = "m:mlocked", | 129 | [KPF_MLOCKED] = "m:mlocked", |
@@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES]; | |||
152 | static int nr_vmas; | 159 | static int nr_vmas; |
153 | static unsigned long pg_start[MAX_VMAS]; | 160 | static unsigned long pg_start[MAX_VMAS]; |
154 | static unsigned long pg_end[MAX_VMAS]; | 161 | static unsigned long pg_end[MAX_VMAS]; |
155 | static unsigned long voffset; | ||
156 | |||
157 | static int pagemap_fd; | ||
158 | 162 | ||
159 | #define MAX_BIT_FILTERS 64 | 163 | #define MAX_BIT_FILTERS 64 |
160 | static int nr_bit_filters; | 164 | static int nr_bit_filters; |
@@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS]; | |||
163 | 167 | ||
164 | static int page_size; | 168 | static int page_size; |
165 | 169 | ||
166 | #define PAGES_BATCH (64 << 10) /* 64k pages */ | 170 | static int pagemap_fd; |
167 | static int kpageflags_fd; | 171 | static int kpageflags_fd; |
168 | 172 | ||
173 | static int opt_hwpoison; | ||
174 | static int opt_unpoison; | ||
175 | |||
176 | static char *hwpoison_debug_fs = "/debug/hwpoison"; | ||
177 | static int hwpoison_inject_fd; | ||
178 | static int hwpoison_forget_fd; | ||
179 | |||
169 | #define HASH_SHIFT 13 | 180 | #define HASH_SHIFT 13 |
170 | #define HASH_SIZE (1 << HASH_SHIFT) | 181 | #define HASH_SIZE (1 << HASH_SHIFT) |
171 | #define HASH_MASK (HASH_SIZE - 1) | 182 | #define HASH_MASK (HASH_SIZE - 1) |
@@ -207,6 +218,74 @@ static void fatal(const char *x, ...) | |||
207 | exit(EXIT_FAILURE); | 218 | exit(EXIT_FAILURE); |
208 | } | 219 | } |
209 | 220 | ||
221 | int checked_open(const char *pathname, int flags) | ||
222 | { | ||
223 | int fd = open(pathname, flags); | ||
224 | |||
225 | if (fd < 0) { | ||
226 | perror(pathname); | ||
227 | exit(EXIT_FAILURE); | ||
228 | } | ||
229 | |||
230 | return fd; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * pagemap/kpageflags routines | ||
235 | */ | ||
236 | |||
237 | static unsigned long do_u64_read(int fd, char *name, | ||
238 | uint64_t *buf, | ||
239 | unsigned long index, | ||
240 | unsigned long count) | ||
241 | { | ||
242 | long bytes; | ||
243 | |||
244 | if (index > ULONG_MAX / 8) | ||
245 | fatal("index overflow: %lu\n", index); | ||
246 | |||
247 | if (lseek(fd, index * 8, SEEK_SET) < 0) { | ||
248 | perror(name); | ||
249 | exit(EXIT_FAILURE); | ||
250 | } | ||
251 | |||
252 | bytes = read(fd, buf, count * 8); | ||
253 | if (bytes < 0) { | ||
254 | perror(name); | ||
255 | exit(EXIT_FAILURE); | ||
256 | } | ||
257 | if (bytes % 8) | ||
258 | fatal("partial read: %lu bytes\n", bytes); | ||
259 | |||
260 | return bytes / 8; | ||
261 | } | ||
262 | |||
263 | static unsigned long kpageflags_read(uint64_t *buf, | ||
264 | unsigned long index, | ||
265 | unsigned long pages) | ||
266 | { | ||
267 | return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); | ||
268 | } | ||
269 | |||
270 | static unsigned long pagemap_read(uint64_t *buf, | ||
271 | unsigned long index, | ||
272 | unsigned long pages) | ||
273 | { | ||
274 | return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); | ||
275 | } | ||
276 | |||
277 | static unsigned long pagemap_pfn(uint64_t val) | ||
278 | { | ||
279 | unsigned long pfn; | ||
280 | |||
281 | if (val & PM_PRESENT) | ||
282 | pfn = PM_PFRAME(val); | ||
283 | else | ||
284 | pfn = 0; | ||
285 | |||
286 | return pfn; | ||
287 | } | ||
288 | |||
210 | 289 | ||
211 | /* | 290 | /* |
212 | * page flag names | 291 | * page flag names |
@@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags) | |||
255 | * page list and summary | 334 | * page list and summary |
256 | */ | 335 | */ |
257 | 336 | ||
258 | static void show_page_range(unsigned long offset, uint64_t flags) | 337 | static void show_page_range(unsigned long voffset, |
338 | unsigned long offset, uint64_t flags) | ||
259 | { | 339 | { |
260 | static uint64_t flags0; | 340 | static uint64_t flags0; |
261 | static unsigned long voff; | 341 | static unsigned long voff; |
@@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags) | |||
281 | count = 1; | 361 | count = 1; |
282 | } | 362 | } |
283 | 363 | ||
284 | static void show_page(unsigned long offset, uint64_t flags) | 364 | static void show_page(unsigned long voffset, |
365 | unsigned long offset, uint64_t flags) | ||
285 | { | 366 | { |
286 | if (opt_pid) | 367 | if (opt_pid) |
287 | printf("%lx\t", voffset); | 368 | printf("%lx\t", voffset); |
@@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags) | |||
362 | return flags; | 443 | return flags; |
363 | } | 444 | } |
364 | 445 | ||
446 | static uint64_t kpageflags_flags(uint64_t flags) | ||
447 | { | ||
448 | flags = expand_overloaded_flags(flags); | ||
449 | |||
450 | if (!opt_raw) | ||
451 | flags = well_known_flags(flags); | ||
452 | |||
453 | return flags; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * page actions | ||
458 | */ | ||
459 | |||
460 | static void prepare_hwpoison_fd(void) | ||
461 | { | ||
462 | char buf[100]; | ||
463 | |||
464 | if (opt_hwpoison && !hwpoison_inject_fd) { | ||
465 | sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs); | ||
466 | hwpoison_inject_fd = checked_open(buf, O_WRONLY); | ||
467 | } | ||
468 | |||
469 | if (opt_unpoison && !hwpoison_forget_fd) { | ||
470 | sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs); | ||
471 | hwpoison_forget_fd = checked_open(buf, O_WRONLY); | ||
472 | } | ||
473 | } | ||
474 | |||
475 | static int hwpoison_page(unsigned long offset) | ||
476 | { | ||
477 | char buf[100]; | ||
478 | int len; | ||
479 | |||
480 | len = sprintf(buf, "0x%lx\n", offset); | ||
481 | len = write(hwpoison_inject_fd, buf, len); | ||
482 | if (len < 0) { | ||
483 | perror("hwpoison inject"); | ||
484 | return len; | ||
485 | } | ||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | static int unpoison_page(unsigned long offset) | ||
490 | { | ||
491 | char buf[100]; | ||
492 | int len; | ||
493 | |||
494 | len = sprintf(buf, "0x%lx\n", offset); | ||
495 | len = write(hwpoison_forget_fd, buf, len); | ||
496 | if (len < 0) { | ||
497 | perror("hwpoison forget"); | ||
498 | return len; | ||
499 | } | ||
500 | return 0; | ||
501 | } | ||
365 | 502 | ||
366 | /* | 503 | /* |
367 | * page frame walker | 504 | * page frame walker |
@@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags) | |||
394 | exit(EXIT_FAILURE); | 531 | exit(EXIT_FAILURE); |
395 | } | 532 | } |
396 | 533 | ||
397 | static void add_page(unsigned long offset, uint64_t flags) | 534 | static void add_page(unsigned long voffset, |
535 | unsigned long offset, uint64_t flags) | ||
398 | { | 536 | { |
399 | flags = expand_overloaded_flags(flags); | 537 | flags = kpageflags_flags(flags); |
400 | |||
401 | if (!opt_raw) | ||
402 | flags = well_known_flags(flags); | ||
403 | 538 | ||
404 | if (!bit_mask_ok(flags)) | 539 | if (!bit_mask_ok(flags)) |
405 | return; | 540 | return; |
406 | 541 | ||
542 | if (opt_hwpoison) | ||
543 | hwpoison_page(offset); | ||
544 | if (opt_unpoison) | ||
545 | unpoison_page(offset); | ||
546 | |||
407 | if (opt_list == 1) | 547 | if (opt_list == 1) |
408 | show_page_range(offset, flags); | 548 | show_page_range(voffset, offset, flags); |
409 | else if (opt_list == 2) | 549 | else if (opt_list == 2) |
410 | show_page(offset, flags); | 550 | show_page(voffset, offset, flags); |
411 | 551 | ||
412 | nr_pages[hash_slot(flags)]++; | 552 | nr_pages[hash_slot(flags)]++; |
413 | total_pages++; | 553 | total_pages++; |
414 | } | 554 | } |
415 | 555 | ||
416 | static void walk_pfn(unsigned long index, unsigned long count) | 556 | #define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ |
557 | static void walk_pfn(unsigned long voffset, | ||
558 | unsigned long index, | ||
559 | unsigned long count) | ||
417 | { | 560 | { |
561 | uint64_t buf[KPAGEFLAGS_BATCH]; | ||
418 | unsigned long batch; | 562 | unsigned long batch; |
419 | unsigned long n; | 563 | unsigned long pages; |
420 | unsigned long i; | 564 | unsigned long i; |
421 | 565 | ||
422 | if (index > ULONG_MAX / KPF_BYTES) | ||
423 | fatal("index overflow: %lu\n", index); | ||
424 | |||
425 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); | ||
426 | |||
427 | while (count) { | 566 | while (count) { |
428 | uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | 567 | batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); |
429 | 568 | pages = kpageflags_read(buf, index, batch); | |
430 | batch = min_t(unsigned long, count, PAGES_BATCH); | 569 | if (pages == 0) |
431 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); | ||
432 | if (n == 0) | ||
433 | break; | 570 | break; |
434 | if (n < 0) { | ||
435 | perror(PROC_KPAGEFLAGS); | ||
436 | exit(EXIT_FAILURE); | ||
437 | } | ||
438 | 571 | ||
439 | if (n % KPF_BYTES != 0) | 572 | for (i = 0; i < pages; i++) |
440 | fatal("partial read: %lu bytes\n", n); | 573 | add_page(voffset + i, index + i, buf[i]); |
441 | n = n / KPF_BYTES; | ||
442 | 574 | ||
443 | for (i = 0; i < n; i++) | 575 | index += pages; |
444 | add_page(index + i, kpageflags_buf[i]); | 576 | count -= pages; |
445 | |||
446 | index += batch; | ||
447 | count -= batch; | ||
448 | } | 577 | } |
449 | } | 578 | } |
450 | 579 | ||
451 | 580 | #define PAGEMAP_BATCH (64 << 10) | |
452 | #define PAGEMAP_BATCH 4096 | 581 | static void walk_vma(unsigned long index, unsigned long count) |
453 | static unsigned long task_pfn(unsigned long pgoff) | ||
454 | { | 582 | { |
455 | static uint64_t buf[PAGEMAP_BATCH]; | 583 | uint64_t buf[PAGEMAP_BATCH]; |
456 | static unsigned long start; | 584 | unsigned long batch; |
457 | static long count; | 585 | unsigned long pages; |
458 | uint64_t pfn; | 586 | unsigned long pfn; |
587 | unsigned long i; | ||
459 | 588 | ||
460 | if (pgoff < start || pgoff >= start + count) { | 589 | while (count) { |
461 | if (lseek64(pagemap_fd, | 590 | batch = min_t(unsigned long, count, PAGEMAP_BATCH); |
462 | (uint64_t)pgoff * PM_ENTRY_BYTES, | 591 | pages = pagemap_read(buf, index, batch); |
463 | SEEK_SET) < 0) { | 592 | if (pages == 0) |
464 | perror("pagemap seek"); | 593 | break; |
465 | exit(EXIT_FAILURE); | ||
466 | } | ||
467 | count = read(pagemap_fd, buf, sizeof(buf)); | ||
468 | if (count == 0) | ||
469 | return 0; | ||
470 | if (count < 0) { | ||
471 | perror("pagemap read"); | ||
472 | exit(EXIT_FAILURE); | ||
473 | } | ||
474 | if (count % PM_ENTRY_BYTES) { | ||
475 | fatal("pagemap read not aligned.\n"); | ||
476 | exit(EXIT_FAILURE); | ||
477 | } | ||
478 | count /= PM_ENTRY_BYTES; | ||
479 | start = pgoff; | ||
480 | } | ||
481 | 594 | ||
482 | pfn = buf[pgoff - start]; | 595 | for (i = 0; i < pages; i++) { |
483 | if (pfn & PM_PRESENT) | 596 | pfn = pagemap_pfn(buf[i]); |
484 | pfn = PM_PFRAME(pfn); | 597 | if (pfn) |
485 | else | 598 | walk_pfn(index + i, pfn, 1); |
486 | pfn = 0; | 599 | } |
487 | 600 | ||
488 | return pfn; | 601 | index += pages; |
602 | count -= pages; | ||
603 | } | ||
489 | } | 604 | } |
490 | 605 | ||
491 | static void walk_task(unsigned long index, unsigned long count) | 606 | static void walk_task(unsigned long index, unsigned long count) |
492 | { | 607 | { |
493 | int i = 0; | ||
494 | const unsigned long end = index + count; | 608 | const unsigned long end = index + count; |
609 | unsigned long start; | ||
610 | int i = 0; | ||
495 | 611 | ||
496 | while (index < end) { | 612 | while (index < end) { |
497 | 613 | ||
@@ -501,15 +617,11 @@ static void walk_task(unsigned long index, unsigned long count) | |||
501 | if (pg_start[i] >= end) | 617 | if (pg_start[i] >= end) |
502 | return; | 618 | return; |
503 | 619 | ||
504 | voffset = max_t(unsigned long, pg_start[i], index); | 620 | start = max_t(unsigned long, pg_start[i], index); |
505 | index = min_t(unsigned long, pg_end[i], end); | 621 | index = min_t(unsigned long, pg_end[i], end); |
506 | 622 | ||
507 | assert(voffset < index); | 623 | assert(start < index); |
508 | for (; voffset < index; voffset++) { | 624 | walk_vma(start, index - start); |
509 | unsigned long pfn = task_pfn(voffset); | ||
510 | if (pfn) | ||
511 | walk_pfn(pfn, 1); | ||
512 | } | ||
513 | } | 625 | } |
514 | } | 626 | } |
515 | 627 | ||
@@ -527,18 +639,14 @@ static void walk_addr_ranges(void) | |||
527 | { | 639 | { |
528 | int i; | 640 | int i; |
529 | 641 | ||
530 | kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY); | 642 | kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); |
531 | if (kpageflags_fd < 0) { | ||
532 | perror(PROC_KPAGEFLAGS); | ||
533 | exit(EXIT_FAILURE); | ||
534 | } | ||
535 | 643 | ||
536 | if (!nr_addr_ranges) | 644 | if (!nr_addr_ranges) |
537 | add_addr_range(0, ULONG_MAX); | 645 | add_addr_range(0, ULONG_MAX); |
538 | 646 | ||
539 | for (i = 0; i < nr_addr_ranges; i++) | 647 | for (i = 0; i < nr_addr_ranges; i++) |
540 | if (!opt_pid) | 648 | if (!opt_pid) |
541 | walk_pfn(opt_offset[i], opt_size[i]); | 649 | walk_pfn(0, opt_offset[i], opt_size[i]); |
542 | else | 650 | else |
543 | walk_task(opt_offset[i], opt_size[i]); | 651 | walk_task(opt_offset[i], opt_size[i]); |
544 | 652 | ||
@@ -575,6 +683,8 @@ static void usage(void) | |||
575 | " -l|--list Show page details in ranges\n" | 683 | " -l|--list Show page details in ranges\n" |
576 | " -L|--list-each Show page details one by one\n" | 684 | " -L|--list-each Show page details one by one\n" |
577 | " -N|--no-summary Don't show summay info\n" | 685 | " -N|--no-summary Don't show summay info\n" |
686 | " -X|--hwpoison hwpoison pages\n" | ||
687 | " -x|--unpoison unpoison pages\n" | ||
578 | " -h|--help Show this usage message\n" | 688 | " -h|--help Show this usage message\n" |
579 | "addr-spec:\n" | 689 | "addr-spec:\n" |
580 | " N one page at offset N (unit: pages)\n" | 690 | " N one page at offset N (unit: pages)\n" |
@@ -624,11 +734,7 @@ static void parse_pid(const char *str) | |||
624 | opt_pid = parse_number(str); | 734 | opt_pid = parse_number(str); |
625 | 735 | ||
626 | sprintf(buf, "/proc/%d/pagemap", opt_pid); | 736 | sprintf(buf, "/proc/%d/pagemap", opt_pid); |
627 | pagemap_fd = open(buf, O_RDONLY); | 737 | pagemap_fd = checked_open(buf, O_RDONLY); |
628 | if (pagemap_fd < 0) { | ||
629 | perror(buf); | ||
630 | exit(EXIT_FAILURE); | ||
631 | } | ||
632 | 738 | ||
633 | sprintf(buf, "/proc/%d/maps", opt_pid); | 739 | sprintf(buf, "/proc/%d/maps", opt_pid); |
634 | file = fopen(buf, "r"); | 740 | file = fopen(buf, "r"); |
@@ -788,6 +894,8 @@ static struct option opts[] = { | |||
788 | { "list" , 0, NULL, 'l' }, | 894 | { "list" , 0, NULL, 'l' }, |
789 | { "list-each" , 0, NULL, 'L' }, | 895 | { "list-each" , 0, NULL, 'L' }, |
790 | { "no-summary", 0, NULL, 'N' }, | 896 | { "no-summary", 0, NULL, 'N' }, |
897 | { "hwpoison" , 0, NULL, 'X' }, | ||
898 | { "unpoison" , 0, NULL, 'x' }, | ||
791 | { "help" , 0, NULL, 'h' }, | 899 | { "help" , 0, NULL, 'h' }, |
792 | { NULL , 0, NULL, 0 } | 900 | { NULL , 0, NULL, 0 } |
793 | }; | 901 | }; |
@@ -799,7 +907,7 @@ int main(int argc, char *argv[]) | |||
799 | page_size = getpagesize(); | 907 | page_size = getpagesize(); |
800 | 908 | ||
801 | while ((c = getopt_long(argc, argv, | 909 | while ((c = getopt_long(argc, argv, |
802 | "rp:f:a:b:lLNh", opts, NULL)) != -1) { | 910 | "rp:f:a:b:lLNXxh", opts, NULL)) != -1) { |
803 | switch (c) { | 911 | switch (c) { |
804 | case 'r': | 912 | case 'r': |
805 | opt_raw = 1; | 913 | opt_raw = 1; |
@@ -825,6 +933,14 @@ int main(int argc, char *argv[]) | |||
825 | case 'N': | 933 | case 'N': |
826 | opt_no_summary = 1; | 934 | opt_no_summary = 1; |
827 | break; | 935 | break; |
936 | case 'X': | ||
937 | opt_hwpoison = 1; | ||
938 | prepare_hwpoison_fd(); | ||
939 | break; | ||
940 | case 'x': | ||
941 | opt_unpoison = 1; | ||
942 | prepare_hwpoison_fd(); | ||
943 | break; | ||
828 | case 'h': | 944 | case 'h': |
829 | usage(); | 945 | usage(); |
830 | exit(0); | 946 | exit(0); |
@@ -844,7 +960,7 @@ int main(int argc, char *argv[]) | |||
844 | walk_addr_ranges(); | 960 | walk_addr_ranges(); |
845 | 961 | ||
846 | if (opt_list == 1) | 962 | if (opt_list == 1) |
847 | show_page_range(0, 0); /* drain the buffer */ | 963 | show_page_range(0, 0, 0); /* drain the buffer */ |
848 | 964 | ||
849 | if (opt_no_summary) | 965 | if (opt_no_summary) |
850 | return 0; | 966 | return 0; |
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 600a304a828c..df09b9650a81 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt | |||
@@ -57,7 +57,9 @@ There are three components to pagemap: | |||
57 | 16. COMPOUND_TAIL | 57 | 16. COMPOUND_TAIL |
58 | 16. HUGE | 58 | 16. HUGE |
59 | 18. UNEVICTABLE | 59 | 18. UNEVICTABLE |
60 | 19. HWPOISON | ||
60 | 20. NOPAGE | 61 | 20. NOPAGE |
62 | 21. KSM | ||
61 | 63 | ||
62 | Short descriptions to the page flags: | 64 | Short descriptions to the page flags: |
63 | 65 | ||
@@ -86,9 +88,15 @@ Short descriptions to the page flags: | |||
86 | 17. HUGE | 88 | 17. HUGE |
87 | this is an integral part of a HugeTLB page | 89 | this is an integral part of a HugeTLB page |
88 | 90 | ||
91 | 19. HWPOISON | ||
92 | hardware detected memory corruption on this page: don't touch the data! | ||
93 | |||
89 | 20. NOPAGE | 94 | 20. NOPAGE |
90 | no page frame exists at the requested address | 95 | no page frame exists at the requested address |
91 | 96 | ||
97 | 21. KSM | ||
98 | identical memory pages dynamically shared between one or more processes | ||
99 | |||
92 | [IO related page flags] | 100 | [IO related page flags] |
93 | 1. ERROR IO error occurred | 101 | 1. ERROR IO error occurred |
94 | 3. UPTODATE page has up-to-date data | 102 | 3. UPTODATE page has up-to-date data |