aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/cgroups/cgroups.txt11
-rw-r--r--Documentation/filesystems/ext3.txt16
-rw-r--r--Documentation/infiniband/user_mad.txt4
-rw-r--r--Documentation/infiniband/user_verbs.txt2
-rw-r--r--Documentation/isdn/INTERFACE.CAPI83
-rw-r--r--Documentation/kernel-parameters.txt1
-rw-r--r--Documentation/networking/pktgen.txt8
-rw-r--r--Documentation/scsi/hptiop.txt21
-rw-r--r--Documentation/vm/ksm.txt13
-rw-r--r--Documentation/vm/page-types.c304
-rw-r--r--Documentation/vm/pagemap.txt8
11 files changed, 345 insertions, 126 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 455d4e6d346d..0b33bfe7dde9 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system.
227Each cgroup is represented by a directory in the cgroup file system 227Each cgroup is represented by a directory in the cgroup file system
228containing the following files describing that cgroup: 228containing the following files describing that cgroup:
229 229
230 - tasks: list of tasks (by pid) attached to that cgroup 230 - tasks: list of tasks (by pid) attached to that cgroup. This list
231 is not guaranteed to be sorted. Writing a thread id into this file
232 moves the thread into this cgroup.
233 - cgroup.procs: list of tgids in the cgroup. This list is not
234 guaranteed to be sorted or free of duplicate tgids, and userspace
235 should sort/uniquify the list if this property is required.
236 Writing a tgid into this file moves all threads with that tgid into
237 this cgroup.
231 - notify_on_release flag: run the release agent on exit? 238 - notify_on_release flag: run the release agent on exit?
232 - release_agent: the path to use for release notifications (this file 239 - release_agent: the path to use for release notifications (this file
233 exists in the top cgroup only) 240 exists in the top cgroup only)
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup.
374 381
375In this directory you can find several files: 382In this directory you can find several files:
376# ls 383# ls
377notify_on_release tasks 384cgroup.procs notify_on_release tasks
378(plus whatever files added by the attached subsystems) 385(plus whatever files added by the attached subsystems)
379 386
380Now attach your shell to this cgroup: 387Now attach your shell to this cgroup:
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 570f9bd9be2b..05d5cf1d743f 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks.
123 123
124sb=n Use alternate superblock at this location. 124sb=n Use alternate superblock at this location.
125 125
126quota 126quota These options are ignored by the filesystem. They
127noquota 127noquota are used only by quota tools to recognize volumes
128grpquota 128grpquota where quota should be turned on. See documentation
129usrquota 129usrquota in the quota-tools package for more details
130 (http://sourceforge.net/projects/linuxquota).
131
132jqfmt=<quota type> These options tell filesystem details about quota
133usrjquota=<file> so that quota information can be properly updated
134grpjquota=<file> during journal replay. They replace the above
135 quota options. See documentation in the quota-tools
136 package for more details
137 (http://sourceforge.net/projects/linuxquota).
130 138
131bh (*) ext3 associates buffer heads to data pages to 139bh (*) ext3 associates buffer heads to data pages to
132nobh (a) cache disk block mapping information 140nobh (a) cache disk block mapping information
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 744687dd195b..8a366959f5cc 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -128,8 +128,8 @@ Setting IsSM Capability Bit
128 To create the appropriate character device files automatically with 128 To create the appropriate character device files automatically with
129 udev, a rule like 129 udev, a rule like
130 130
131 KERNEL="umad*", NAME="infiniband/%k" 131 KERNEL=="umad*", NAME="infiniband/%k"
132 KERNEL="issm*", NAME="infiniband/%k" 132 KERNEL=="issm*", NAME="infiniband/%k"
133 133
134 can be used. This will create device nodes named 134 can be used. This will create device nodes named
135 135
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
index f847501e50b5..afe3f8da9018 100644
--- a/Documentation/infiniband/user_verbs.txt
+++ b/Documentation/infiniband/user_verbs.txt
@@ -58,7 +58,7 @@ Memory pinning
58 To create the appropriate character device files automatically with 58 To create the appropriate character device files automatically with
59 udev, a rule like 59 udev, a rule like
60 60
61 KERNEL="uverbs*", NAME="infiniband/%k" 61 KERNEL=="uverbs*", NAME="infiniband/%k"
62 62
63 can be used. This will create device nodes named 63 can be used. This will create device nodes named
64 64
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 686e107923ec..5fe8de5cc727 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -60,10 +60,9 @@ open() operation on regular files or character devices.
60 60
61After a successful return from register_appl(), CAPI messages from the 61After a successful return from register_appl(), CAPI messages from the
62application may be passed to the driver for the device via calls to the 62application may be passed to the driver for the device via calls to the
63send_message() callback function. The CAPI message to send is stored in the 63send_message() callback function. Conversely, the driver may call Kernel
64data portion of an skb. Conversely, the driver may call Kernel CAPI's 64CAPI's capi_ctr_handle_message() function to pass a received CAPI message to
65capi_ctr_handle_message() function to pass a received CAPI message to Kernel 65Kernel CAPI for forwarding to an application, specifying its ApplID.
66CAPI for forwarding to an application, specifying its ApplID.
67 66
68Deregistration requests (CAPI operation CAPI_RELEASE) from applications are 67Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
69forwarded as calls to the release_appl() callback function, passing the same 68forwarded as calls to the release_appl() callback function, passing the same
@@ -142,6 +141,7 @@ u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
142 to accepting or queueing the message. Errors occurring during the 141 to accepting or queueing the message. Errors occurring during the
143 actual processing of the message should be signaled with an 142 actual processing of the message should be signaled with an
144 appropriate reply message. 143 appropriate reply message.
144 May be called in process or interrupt context.
145 Calls to this function are not serialized by Kernel CAPI, ie. it must 145 Calls to this function are not serialized by Kernel CAPI, ie. it must
146 be prepared to be re-entered. 146 be prepared to be re-entered.
147 147
@@ -154,7 +154,8 @@ read_proc_t *ctr_read_proc
154 system entry, /proc/capi/controllers/<n>; will be called with a 154 system entry, /proc/capi/controllers/<n>; will be called with a
155 pointer to the device's capi_ctr structure as the last (data) argument 155 pointer to the device's capi_ctr structure as the last (data) argument
156 156
157Note: Callback functions are never called in interrupt context. 157Note: Callback functions except send_message() are never called in interrupt
158context.
158 159
159- to be filled in before calling capi_ctr_ready(): 160- to be filled in before calling capi_ctr_ready():
160 161
@@ -171,14 +172,40 @@ u8 serial[CAPI_SERIAL_LEN]
171 value to return for CAPI_GET_SERIAL 172 value to return for CAPI_GET_SERIAL
172 173
173 174
1744.3 The _cmsg Structure 1754.3 SKBs
176
177CAPI messages are passed between Kernel CAPI and the driver via send_message()
178and capi_ctr_handle_message(), stored in the data portion of a socket buffer
179(skb). Each skb contains a single CAPI message coded according to the CAPI 2.0
180standard.
181
182For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual
183payload data immediately follows the CAPI message itself within the same skb.
184The Data and Data64 parameters are not used for processing. The Data64
185parameter may be omitted by setting the length field of the CAPI message to 22
186instead of 30.
187
188
1894.4 The _cmsg Structure
175 190
176(declared in <linux/isdn/capiutil.h>) 191(declared in <linux/isdn/capiutil.h>)
177 192
178The _cmsg structure stores the contents of a CAPI 2.0 message in an easily 193The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
179accessible form. It contains members for all possible CAPI 2.0 parameters, of 194accessible form. It contains members for all possible CAPI 2.0 parameters,
180which only those appearing in the message type currently being processed are 195including subparameters of the Additional Info and B Protocol structured
181actually used. Unused members should be set to zero. 196parameters, with the following exceptions:
197
198* second Calling party number (CONNECT_IND)
199
200* Data64 (DATA_B3_REQ and DATA_B3_IND)
201
202* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ)
203
204* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP
205 and SELECT_B_PROTOCOL_REQ)
206
207Only those parameters appearing in the message type currently being processed
208are actually used. Unused members should be set to zero.
182 209
183Members are named after the CAPI 2.0 standard names of the parameters they 210Members are named after the CAPI 2.0 standard names of the parameters they
184represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data 211represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
@@ -190,18 +217,19 @@ u16 for CAPI parameters of type 'word'
190 217
191u32 for CAPI parameters of type 'dword' 218u32 for CAPI parameters of type 'dword'
192 219
193_cstruct for CAPI parameters of type 'struct' not containing any 220_cstruct for CAPI parameters of type 'struct'
194 variably-sized (struct) subparameters (eg. 'Called Party Number')
195 The member is a pointer to a buffer containing the parameter in 221 The member is a pointer to a buffer containing the parameter in
196 CAPI encoding (length + content). It may also be NULL, which will 222 CAPI encoding (length + content). It may also be NULL, which will
197 be taken to represent an empty (zero length) parameter. 223 be taken to represent an empty (zero length) parameter.
224 Subparameters are stored in encoded form within the content part.
198 225
199_cmstruct for CAPI parameters of type 'struct' containing 'struct' 226_cmstruct alternative representation for CAPI parameters of type 'struct'
200 subparameters ('Additional Info' and 'B Protocol') 227 (used only for the 'Additional Info' and 'B Protocol' parameters)
201 The representation is a single byte containing one of the values: 228 The representation is a single byte containing one of the values:
202 CAPI_DEFAULT: the parameter is empty 229 CAPI_DEFAULT: The parameter is empty/absent.
203 CAPI_COMPOSE: the values of the subparameters are stored 230 CAPI_COMPOSE: The parameter is present.
204 individually in the corresponding _cmsg structure members 231 Subparameter values are stored individually in the corresponding
232 _cmsg structure members.
205 233
206Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert 234Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
207messages between their transport encoding described in the CAPI 2.0 standard 235messages between their transport encoding described in the CAPI 2.0 standard
@@ -297,3 +325,26 @@ char *capi_cmd2str(u8 Command, u8 Subcommand)
297 be NULL if the command/subcommand is not one of those defined in the 325 be NULL if the command/subcommand is not one of those defined in the
298 CAPI 2.0 standard. 326 CAPI 2.0 standard.
299 327
328
3297. Debugging
330
331The module kernelcapi has a module parameter showcapimsgs controlling some
332debugging output produced by the module. It can only be set when the module is
333loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on
334the command line or in the configuration file.
335
336If the lowest bit of showcapimsgs is set, kernelcapi logs controller and
337application up and down events.
338
339In addition, every registered CAPI controller has an associated traceflag
340parameter controlling how CAPI messages sent from and to tha controller are
341logged. The traceflag parameter is initialized with the value of the
342showcapimsgs parameter when the controller is registered, but can later be
343changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE.
344
345If the value of traceflag is non-zero, CAPI messages are logged.
346DATA_B3 messages are only logged if the value of traceflag is > 2.
347
348If the lowest bit of traceflag is set, only the command/subcommand and message
349length are logged. Otherwise, kernelcapi logs a readable representation of
350the entire message.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6fa7292947e5..9107b387e91f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -671,6 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file
671 earlyprintk= [X86,SH,BLACKFIN] 671 earlyprintk= [X86,SH,BLACKFIN]
672 earlyprintk=vga 672 earlyprintk=vga
673 earlyprintk=serial[,ttySn[,baudrate]] 673 earlyprintk=serial[,ttySn[,baudrate]]
674 earlyprintk=ttySn[,baudrate]
674 earlyprintk=dbgp[debugController#] 675 earlyprintk=dbgp[debugController#]
675 676
676 Append ",keep" to not disable it when the real console 677 Append ",keep" to not disable it when the real console
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index c6cf4a3c16e0..61bb645d50e0 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -90,6 +90,11 @@ Examples:
90 pgset "dstmac 00:00:00:00:00:00" sets MAC destination address 90 pgset "dstmac 00:00:00:00:00:00" sets MAC destination address
91 pgset "srcmac 00:00:00:00:00:00" sets MAC source address 91 pgset "srcmac 00:00:00:00:00:00" sets MAC source address
92 92
93 pgset "queue_map_min 0" Sets the min value of tx queue interval
94 pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
95 To select queue 1 of a given device,
96 use queue_map_min=1 and queue_map_max=1
97
93 pgset "src_mac_count 1" Sets the number of MACs we'll range through. 98 pgset "src_mac_count 1" Sets the number of MACs we'll range through.
94 The 'minimum' MAC is what you set with srcmac. 99 The 'minimum' MAC is what you set with srcmac.
95 100
@@ -101,6 +106,9 @@ Examples:
101 IPDST_RND, UDPSRC_RND, 106 IPDST_RND, UDPSRC_RND,
102 UDPDST_RND, MACSRC_RND, MACDST_RND 107 UDPDST_RND, MACSRC_RND, MACDST_RND
103 MPLS_RND, VID_RND, SVID_RND 108 MPLS_RND, VID_RND, SVID_RND
109 QUEUE_MAP_RND # queue map random
110 QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
111
104 112
105 pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then 113 pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
106 cycle through the port range. 114 cycle through the port range.
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt
index a6eb4add1be6..9605179711f4 100644
--- a/Documentation/scsi/hptiop.txt
+++ b/Documentation/scsi/hptiop.txt
@@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop)
3Controller Register Map 3Controller Register Map
4------------------------- 4-------------------------
5 5
6For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2:
7
8 BAR0 offset Register
9 0x11C5C Link Interface IRQ Set
10 0x11C60 Link Interface IRQ Clear
11
12 BAR2 offset Register
13 0x10 Inbound Message Register 0
14 0x14 Inbound Message Register 1
15 0x18 Outbound Message Register 0
16 0x1C Outbound Message Register 1
17 0x20 Inbound Doorbell Register
18 0x24 Inbound Interrupt Status Register
19 0x28 Inbound Interrupt Mask Register
20 0x30 Outbound Interrupt Status Register
21 0x34 Outbound Interrupt Mask Register
22 0x40 Inbound Queue Port
23 0x44 Outbound Queue Port
24
6For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0: 25For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0:
7 26
8 BAR0 offset Register 27 BAR0 offset Register
@@ -93,7 +112,7 @@ The driver exposes following sysfs attributes:
93 112
94 113
95----------------------------------------------------------------------------- 114-----------------------------------------------------------------------------
96Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved. 115Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved.
97 116
98 This file is distributed in the hope that it will be useful, 117 This file is distributed in the hope that it will be useful,
99 but WITHOUT ANY WARRANTY; without even the implied warranty of 118 but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index 72a22f65960e..262d8e6793a3 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
52readable by all but writable only by root: 52readable by all but writable only by root:
53 53
54max_kernel_pages - set to maximum number of kernel pages that KSM may use 54max_kernel_pages - set to maximum number of kernel pages that KSM may use
55 e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages" 55 e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages"
56 Value 0 imposes no limit on the kernel pages KSM may use; 56 Value 0 imposes no limit on the kernel pages KSM may use;
57 but note that any process using MADV_MERGEABLE can cause 57 but note that any process using MADV_MERGEABLE can cause
58 KSM to allocate these pages, unswappable until it exits. 58 KSM to allocate these pages, unswappable until it exits.
59 Default: 2000 (chosen for demonstration purposes) 59 Default: quarter of memory (chosen to not pin too much)
60 60
61pages_to_scan - how many present pages to scan before ksmd goes to sleep 61pages_to_scan - how many present pages to scan before ksmd goes to sleep
62 e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan" 62 e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
63 Default: 200 (chosen for demonstration purposes) 63 Default: 100 (chosen for demonstration purposes)
64 64
65sleep_millisecs - how many milliseconds ksmd should sleep before next scan 65sleep_millisecs - how many milliseconds ksmd should sleep before next scan
66 e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" 66 e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
@@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages,
70 set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", 70 set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
71 set 2 to stop ksmd and unmerge all pages currently merged, 71 set 2 to stop ksmd and unmerge all pages currently merged,
72 but leave mergeable areas registered for next run 72 but leave mergeable areas registered for next run
73 Default: 1 (for immediate use by apps which register) 73 Default: 0 (must be changed to 1 to activate KSM,
74 except if CONFIG_SYSFS is disabled)
74 75
75The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/: 76The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
76 77
@@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high
86proportion there would also indicate poor use of madvise MADV_MERGEABLE. 87proportion there would also indicate poor use of madvise MADV_MERGEABLE.
87 88
88Izik Eidus, 89Izik Eidus,
89Hugh Dickins, 30 July 2009 90Hugh Dickins, 24 Sept 2009
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index fa1a30d9e9d5..3ec4f2a22585 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -2,7 +2,10 @@
2 * page-types: Tool for querying page flags 2 * page-types: Tool for querying page flags
3 * 3 *
4 * Copyright (C) 2009 Intel corporation 4 * Copyright (C) 2009 Intel corporation
5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> 5 *
6 * Authors: Wu Fengguang <fengguang.wu@intel.com>
7 *
8 * Released under the General Public License (GPL).
6 */ 9 */
7 10
8#define _LARGEFILE64_SOURCE 11#define _LARGEFILE64_SOURCE
@@ -69,7 +72,9 @@
69#define KPF_COMPOUND_TAIL 16 72#define KPF_COMPOUND_TAIL 16
70#define KPF_HUGE 17 73#define KPF_HUGE 17
71#define KPF_UNEVICTABLE 18 74#define KPF_UNEVICTABLE 18
75#define KPF_HWPOISON 19
72#define KPF_NOPAGE 20 76#define KPF_NOPAGE 20
77#define KPF_KSM 21
73 78
74/* [32-] kernel hacking assistances */ 79/* [32-] kernel hacking assistances */
75#define KPF_RESERVED 32 80#define KPF_RESERVED 32
@@ -116,7 +121,9 @@ static char *page_flag_names[] = {
116 [KPF_COMPOUND_TAIL] = "T:compound_tail", 121 [KPF_COMPOUND_TAIL] = "T:compound_tail",
117 [KPF_HUGE] = "G:huge", 122 [KPF_HUGE] = "G:huge",
118 [KPF_UNEVICTABLE] = "u:unevictable", 123 [KPF_UNEVICTABLE] = "u:unevictable",
124 [KPF_HWPOISON] = "X:hwpoison",
119 [KPF_NOPAGE] = "n:nopage", 125 [KPF_NOPAGE] = "n:nopage",
126 [KPF_KSM] = "x:ksm",
120 127
121 [KPF_RESERVED] = "r:reserved", 128 [KPF_RESERVED] = "r:reserved",
122 [KPF_MLOCKED] = "m:mlocked", 129 [KPF_MLOCKED] = "m:mlocked",
@@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES];
152static int nr_vmas; 159static int nr_vmas;
153static unsigned long pg_start[MAX_VMAS]; 160static unsigned long pg_start[MAX_VMAS];
154static unsigned long pg_end[MAX_VMAS]; 161static unsigned long pg_end[MAX_VMAS];
155static unsigned long voffset;
156
157static int pagemap_fd;
158 162
159#define MAX_BIT_FILTERS 64 163#define MAX_BIT_FILTERS 64
160static int nr_bit_filters; 164static int nr_bit_filters;
@@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS];
163 167
164static int page_size; 168static int page_size;
165 169
166#define PAGES_BATCH (64 << 10) /* 64k pages */ 170static int pagemap_fd;
167static int kpageflags_fd; 171static int kpageflags_fd;
168 172
173static int opt_hwpoison;
174static int opt_unpoison;
175
176static char *hwpoison_debug_fs = "/debug/hwpoison";
177static int hwpoison_inject_fd;
178static int hwpoison_forget_fd;
179
169#define HASH_SHIFT 13 180#define HASH_SHIFT 13
170#define HASH_SIZE (1 << HASH_SHIFT) 181#define HASH_SIZE (1 << HASH_SHIFT)
171#define HASH_MASK (HASH_SIZE - 1) 182#define HASH_MASK (HASH_SIZE - 1)
@@ -207,6 +218,74 @@ static void fatal(const char *x, ...)
207 exit(EXIT_FAILURE); 218 exit(EXIT_FAILURE);
208} 219}
209 220
221int checked_open(const char *pathname, int flags)
222{
223 int fd = open(pathname, flags);
224
225 if (fd < 0) {
226 perror(pathname);
227 exit(EXIT_FAILURE);
228 }
229
230 return fd;
231}
232
233/*
234 * pagemap/kpageflags routines
235 */
236
237static unsigned long do_u64_read(int fd, char *name,
238 uint64_t *buf,
239 unsigned long index,
240 unsigned long count)
241{
242 long bytes;
243
244 if (index > ULONG_MAX / 8)
245 fatal("index overflow: %lu\n", index);
246
247 if (lseek(fd, index * 8, SEEK_SET) < 0) {
248 perror(name);
249 exit(EXIT_FAILURE);
250 }
251
252 bytes = read(fd, buf, count * 8);
253 if (bytes < 0) {
254 perror(name);
255 exit(EXIT_FAILURE);
256 }
257 if (bytes % 8)
258 fatal("partial read: %lu bytes\n", bytes);
259
260 return bytes / 8;
261}
262
263static unsigned long kpageflags_read(uint64_t *buf,
264 unsigned long index,
265 unsigned long pages)
266{
267 return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
268}
269
270static unsigned long pagemap_read(uint64_t *buf,
271 unsigned long index,
272 unsigned long pages)
273{
274 return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
275}
276
277static unsigned long pagemap_pfn(uint64_t val)
278{
279 unsigned long pfn;
280
281 if (val & PM_PRESENT)
282 pfn = PM_PFRAME(val);
283 else
284 pfn = 0;
285
286 return pfn;
287}
288
210 289
211/* 290/*
212 * page flag names 291 * page flag names
@@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags)
255 * page list and summary 334 * page list and summary
256 */ 335 */
257 336
258static void show_page_range(unsigned long offset, uint64_t flags) 337static void show_page_range(unsigned long voffset,
338 unsigned long offset, uint64_t flags)
259{ 339{
260 static uint64_t flags0; 340 static uint64_t flags0;
261 static unsigned long voff; 341 static unsigned long voff;
@@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags)
281 count = 1; 361 count = 1;
282} 362}
283 363
284static void show_page(unsigned long offset, uint64_t flags) 364static void show_page(unsigned long voffset,
365 unsigned long offset, uint64_t flags)
285{ 366{
286 if (opt_pid) 367 if (opt_pid)
287 printf("%lx\t", voffset); 368 printf("%lx\t", voffset);
@@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags)
362 return flags; 443 return flags;
363} 444}
364 445
446static uint64_t kpageflags_flags(uint64_t flags)
447{
448 flags = expand_overloaded_flags(flags);
449
450 if (!opt_raw)
451 flags = well_known_flags(flags);
452
453 return flags;
454}
455
456/*
457 * page actions
458 */
459
460static void prepare_hwpoison_fd(void)
461{
462 char buf[100];
463
464 if (opt_hwpoison && !hwpoison_inject_fd) {
465 sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs);
466 hwpoison_inject_fd = checked_open(buf, O_WRONLY);
467 }
468
469 if (opt_unpoison && !hwpoison_forget_fd) {
470 sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs);
471 hwpoison_forget_fd = checked_open(buf, O_WRONLY);
472 }
473}
474
475static int hwpoison_page(unsigned long offset)
476{
477 char buf[100];
478 int len;
479
480 len = sprintf(buf, "0x%lx\n", offset);
481 len = write(hwpoison_inject_fd, buf, len);
482 if (len < 0) {
483 perror("hwpoison inject");
484 return len;
485 }
486 return 0;
487}
488
489static int unpoison_page(unsigned long offset)
490{
491 char buf[100];
492 int len;
493
494 len = sprintf(buf, "0x%lx\n", offset);
495 len = write(hwpoison_forget_fd, buf, len);
496 if (len < 0) {
497 perror("hwpoison forget");
498 return len;
499 }
500 return 0;
501}
365 502
366/* 503/*
367 * page frame walker 504 * page frame walker
@@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags)
394 exit(EXIT_FAILURE); 531 exit(EXIT_FAILURE);
395} 532}
396 533
397static void add_page(unsigned long offset, uint64_t flags) 534static void add_page(unsigned long voffset,
535 unsigned long offset, uint64_t flags)
398{ 536{
399 flags = expand_overloaded_flags(flags); 537 flags = kpageflags_flags(flags);
400
401 if (!opt_raw)
402 flags = well_known_flags(flags);
403 538
404 if (!bit_mask_ok(flags)) 539 if (!bit_mask_ok(flags))
405 return; 540 return;
406 541
542 if (opt_hwpoison)
543 hwpoison_page(offset);
544 if (opt_unpoison)
545 unpoison_page(offset);
546
407 if (opt_list == 1) 547 if (opt_list == 1)
408 show_page_range(offset, flags); 548 show_page_range(voffset, offset, flags);
409 else if (opt_list == 2) 549 else if (opt_list == 2)
410 show_page(offset, flags); 550 show_page(voffset, offset, flags);
411 551
412 nr_pages[hash_slot(flags)]++; 552 nr_pages[hash_slot(flags)]++;
413 total_pages++; 553 total_pages++;
414} 554}
415 555
416static void walk_pfn(unsigned long index, unsigned long count) 556#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
557static void walk_pfn(unsigned long voffset,
558 unsigned long index,
559 unsigned long count)
417{ 560{
561 uint64_t buf[KPAGEFLAGS_BATCH];
418 unsigned long batch; 562 unsigned long batch;
419 unsigned long n; 563 unsigned long pages;
420 unsigned long i; 564 unsigned long i;
421 565
422 if (index > ULONG_MAX / KPF_BYTES)
423 fatal("index overflow: %lu\n", index);
424
425 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
426
427 while (count) { 566 while (count) {
428 uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; 567 batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
429 568 pages = kpageflags_read(buf, index, batch);
430 batch = min_t(unsigned long, count, PAGES_BATCH); 569 if (pages == 0)
431 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
432 if (n == 0)
433 break; 570 break;
434 if (n < 0) {
435 perror(PROC_KPAGEFLAGS);
436 exit(EXIT_FAILURE);
437 }
438 571
439 if (n % KPF_BYTES != 0) 572 for (i = 0; i < pages; i++)
440 fatal("partial read: %lu bytes\n", n); 573 add_page(voffset + i, index + i, buf[i]);
441 n = n / KPF_BYTES;
442 574
443 for (i = 0; i < n; i++) 575 index += pages;
444 add_page(index + i, kpageflags_buf[i]); 576 count -= pages;
445
446 index += batch;
447 count -= batch;
448 } 577 }
449} 578}
450 579
451 580#define PAGEMAP_BATCH (64 << 10)
452#define PAGEMAP_BATCH 4096 581static void walk_vma(unsigned long index, unsigned long count)
453static unsigned long task_pfn(unsigned long pgoff)
454{ 582{
455 static uint64_t buf[PAGEMAP_BATCH]; 583 uint64_t buf[PAGEMAP_BATCH];
456 static unsigned long start; 584 unsigned long batch;
457 static long count; 585 unsigned long pages;
458 uint64_t pfn; 586 unsigned long pfn;
587 unsigned long i;
459 588
460 if (pgoff < start || pgoff >= start + count) { 589 while (count) {
461 if (lseek64(pagemap_fd, 590 batch = min_t(unsigned long, count, PAGEMAP_BATCH);
462 (uint64_t)pgoff * PM_ENTRY_BYTES, 591 pages = pagemap_read(buf, index, batch);
463 SEEK_SET) < 0) { 592 if (pages == 0)
464 perror("pagemap seek"); 593 break;
465 exit(EXIT_FAILURE);
466 }
467 count = read(pagemap_fd, buf, sizeof(buf));
468 if (count == 0)
469 return 0;
470 if (count < 0) {
471 perror("pagemap read");
472 exit(EXIT_FAILURE);
473 }
474 if (count % PM_ENTRY_BYTES) {
475 fatal("pagemap read not aligned.\n");
476 exit(EXIT_FAILURE);
477 }
478 count /= PM_ENTRY_BYTES;
479 start = pgoff;
480 }
481 594
482 pfn = buf[pgoff - start]; 595 for (i = 0; i < pages; i++) {
483 if (pfn & PM_PRESENT) 596 pfn = pagemap_pfn(buf[i]);
484 pfn = PM_PFRAME(pfn); 597 if (pfn)
485 else 598 walk_pfn(index + i, pfn, 1);
486 pfn = 0; 599 }
487 600
488 return pfn; 601 index += pages;
602 count -= pages;
603 }
489} 604}
490 605
491static void walk_task(unsigned long index, unsigned long count) 606static void walk_task(unsigned long index, unsigned long count)
492{ 607{
493 int i = 0;
494 const unsigned long end = index + count; 608 const unsigned long end = index + count;
609 unsigned long start;
610 int i = 0;
495 611
496 while (index < end) { 612 while (index < end) {
497 613
@@ -501,15 +617,11 @@ static void walk_task(unsigned long index, unsigned long count)
501 if (pg_start[i] >= end) 617 if (pg_start[i] >= end)
502 return; 618 return;
503 619
504 voffset = max_t(unsigned long, pg_start[i], index); 620 start = max_t(unsigned long, pg_start[i], index);
505 index = min_t(unsigned long, pg_end[i], end); 621 index = min_t(unsigned long, pg_end[i], end);
506 622
507 assert(voffset < index); 623 assert(start < index);
508 for (; voffset < index; voffset++) { 624 walk_vma(start, index - start);
509 unsigned long pfn = task_pfn(voffset);
510 if (pfn)
511 walk_pfn(pfn, 1);
512 }
513 } 625 }
514} 626}
515 627
@@ -527,18 +639,14 @@ static void walk_addr_ranges(void)
527{ 639{
528 int i; 640 int i;
529 641
530 kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY); 642 kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
531 if (kpageflags_fd < 0) {
532 perror(PROC_KPAGEFLAGS);
533 exit(EXIT_FAILURE);
534 }
535 643
536 if (!nr_addr_ranges) 644 if (!nr_addr_ranges)
537 add_addr_range(0, ULONG_MAX); 645 add_addr_range(0, ULONG_MAX);
538 646
539 for (i = 0; i < nr_addr_ranges; i++) 647 for (i = 0; i < nr_addr_ranges; i++)
540 if (!opt_pid) 648 if (!opt_pid)
541 walk_pfn(opt_offset[i], opt_size[i]); 649 walk_pfn(0, opt_offset[i], opt_size[i]);
542 else 650 else
543 walk_task(opt_offset[i], opt_size[i]); 651 walk_task(opt_offset[i], opt_size[i]);
544 652
@@ -575,6 +683,8 @@ static void usage(void)
575" -l|--list Show page details in ranges\n" 683" -l|--list Show page details in ranges\n"
576" -L|--list-each Show page details one by one\n" 684" -L|--list-each Show page details one by one\n"
577" -N|--no-summary Don't show summay info\n" 685" -N|--no-summary Don't show summay info\n"
686" -X|--hwpoison hwpoison pages\n"
687" -x|--unpoison unpoison pages\n"
578" -h|--help Show this usage message\n" 688" -h|--help Show this usage message\n"
579"addr-spec:\n" 689"addr-spec:\n"
580" N one page at offset N (unit: pages)\n" 690" N one page at offset N (unit: pages)\n"
@@ -624,11 +734,7 @@ static void parse_pid(const char *str)
624 opt_pid = parse_number(str); 734 opt_pid = parse_number(str);
625 735
626 sprintf(buf, "/proc/%d/pagemap", opt_pid); 736 sprintf(buf, "/proc/%d/pagemap", opt_pid);
627 pagemap_fd = open(buf, O_RDONLY); 737 pagemap_fd = checked_open(buf, O_RDONLY);
628 if (pagemap_fd < 0) {
629 perror(buf);
630 exit(EXIT_FAILURE);
631 }
632 738
633 sprintf(buf, "/proc/%d/maps", opt_pid); 739 sprintf(buf, "/proc/%d/maps", opt_pid);
634 file = fopen(buf, "r"); 740 file = fopen(buf, "r");
@@ -788,6 +894,8 @@ static struct option opts[] = {
788 { "list" , 0, NULL, 'l' }, 894 { "list" , 0, NULL, 'l' },
789 { "list-each" , 0, NULL, 'L' }, 895 { "list-each" , 0, NULL, 'L' },
790 { "no-summary", 0, NULL, 'N' }, 896 { "no-summary", 0, NULL, 'N' },
897 { "hwpoison" , 0, NULL, 'X' },
898 { "unpoison" , 0, NULL, 'x' },
791 { "help" , 0, NULL, 'h' }, 899 { "help" , 0, NULL, 'h' },
792 { NULL , 0, NULL, 0 } 900 { NULL , 0, NULL, 0 }
793}; 901};
@@ -799,7 +907,7 @@ int main(int argc, char *argv[])
799 page_size = getpagesize(); 907 page_size = getpagesize();
800 908
801 while ((c = getopt_long(argc, argv, 909 while ((c = getopt_long(argc, argv,
802 "rp:f:a:b:lLNh", opts, NULL)) != -1) { 910 "rp:f:a:b:lLNXxh", opts, NULL)) != -1) {
803 switch (c) { 911 switch (c) {
804 case 'r': 912 case 'r':
805 opt_raw = 1; 913 opt_raw = 1;
@@ -825,6 +933,14 @@ int main(int argc, char *argv[])
825 case 'N': 933 case 'N':
826 opt_no_summary = 1; 934 opt_no_summary = 1;
827 break; 935 break;
936 case 'X':
937 opt_hwpoison = 1;
938 prepare_hwpoison_fd();
939 break;
940 case 'x':
941 opt_unpoison = 1;
942 prepare_hwpoison_fd();
943 break;
828 case 'h': 944 case 'h':
829 usage(); 945 usage();
830 exit(0); 946 exit(0);
@@ -844,7 +960,7 @@ int main(int argc, char *argv[])
844 walk_addr_ranges(); 960 walk_addr_ranges();
845 961
846 if (opt_list == 1) 962 if (opt_list == 1)
847 show_page_range(0, 0); /* drain the buffer */ 963 show_page_range(0, 0, 0); /* drain the buffer */
848 964
849 if (opt_no_summary) 965 if (opt_no_summary)
850 return 0; 966 return 0;
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 600a304a828c..df09b9650a81 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -57,7 +57,9 @@ There are three components to pagemap:
57 16. COMPOUND_TAIL 57 16. COMPOUND_TAIL
58 16. HUGE 58 16. HUGE
59 18. UNEVICTABLE 59 18. UNEVICTABLE
60 19. HWPOISON
60 20. NOPAGE 61 20. NOPAGE
62 21. KSM
61 63
62Short descriptions to the page flags: 64Short descriptions to the page flags:
63 65
@@ -86,9 +88,15 @@ Short descriptions to the page flags:
8617. HUGE 8817. HUGE
87 this is an integral part of a HugeTLB page 89 this is an integral part of a HugeTLB page
88 90
9119. HWPOISON
92 hardware detected memory corruption on this page: don't touch the data!
93
8920. NOPAGE 9420. NOPAGE
90 no page frame exists at the requested address 95 no page frame exists at the requested address
91 96
9721. KSM
98 identical memory pages dynamically shared between one or more processes
99
92 [IO related page flags] 100 [IO related page flags]
93 1. ERROR IO error occurred 101 1. ERROR IO error occurred
94 3. UPTODATE page has up-to-date data 102 3. UPTODATE page has up-to-date data