aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/auxdisplay/cfag12864b-example.c1
-rw-r--r--Documentation/cgroups/cgroups.txt32
-rw-r--r--Documentation/cgroups/memory.txt41
-rw-r--r--Documentation/crypto/async-tx-api.txt75
-rw-r--r--Documentation/filesystems/sharedsubtree.txt220
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/sysctl/fs.txt17
-rw-r--r--Documentation/sysctl/kernel.txt22
-rw-r--r--Documentation/sysctl/vm.txt41
-rw-r--r--Documentation/vm/.gitignore1
-rw-r--r--Documentation/vm/page-types.c200
12 files changed, 417 insertions, 241 deletions
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
index 1d2c010bae12..e7823ffb1ca0 100644
--- a/Documentation/auxdisplay/cfag12864b-example.c
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
194 */ 194 */
195 195
196#include <stdio.h> 196#include <stdio.h>
197#include <string.h>
198 197
199#define EXAMPLES 6 198#define EXAMPLES 6
200 199
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 6eb1a97e88ce..455d4e6d346d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0:
408 408
409# echo 0 > tasks 409# echo 0 > tasks
410 410
4112.3 Mounting hierarchies by name
412--------------------------------
413
414Passing the name=<x> option when mounting a cgroups hierarchy
415associates the given name with the hierarchy. This can be used when
416mounting a pre-existing hierarchy, in order to refer to it by name
417rather than by its set of active subsystems. Each hierarchy is either
418nameless, or has a unique name.
419
420The name should match [\w.-]+
421
422When passing a name=<x> option for a new hierarchy, you need to
423specify subsystems manually; the legacy behaviour of mounting all
424subsystems when none are explicitly specified is not supported when
425you give a subsystem a name.
426
427The name of the subsystem appears as part of the hierarchy description
428in /proc/mounts and /proc/<pid>/cgroups.
429
430
4113. Kernel API 4313. Kernel API
412============= 432=============
413 433
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
501called multiple times against a cgroup. 521called multiple times against a cgroup.
502 522
503int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 523int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
504 struct task_struct *task) 524 struct task_struct *task, bool threadgroup)
505(cgroup_mutex held by caller) 525(cgroup_mutex held by caller)
506 526
507Called prior to moving a task into a cgroup; if the subsystem 527Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL
509task is passed, then a successful result indicates that *any* 529task is passed, then a successful result indicates that *any*
510unspecified task can be moved into the cgroup. Note that this isn't 530unspecified task can be moved into the cgroup. Note that this isn't
511called on a fork. If this method returns 0 (success) then this should 531called on a fork. If this method returns 0 (success) then this should
512remain valid while the caller holds cgroup_mutex. 532remain valid while the caller holds cgroup_mutex. If threadgroup is
533true, then a successful result indicates that all threads in the given
534thread's threadgroup can be moved together.
513 535
514void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 536void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
515 struct cgroup *old_cgrp, struct task_struct *task) 537 struct cgroup *old_cgrp, struct task_struct *task,
538 bool threadgroup)
516(cgroup_mutex held by caller) 539(cgroup_mutex held by caller)
517 540
518Called after the task has been attached to the cgroup, to allow any 541Called after the task has been attached to the cgroup, to allow any
519post-attachment activity that requires memory allocations or blocking. 542post-attachment activity that requires memory allocations or blocking.
543If threadgroup is true, the subsystem should take care of all threads
544in the specified thread's threadgroup. Currently does not support any
545subsystem that might need the old_cgrp for every thread in the group.
520 546
521void fork(struct cgroup_subsy *ss, struct task_struct *task) 547void fork(struct cgroup_subsy *ss, struct task_struct *task)
522 548
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23d1262c0775..b871f2552b45 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
179pages that are selected for reclaiming come from the per cgroup LRU 179pages that are selected for reclaiming come from the per cgroup LRU
180list. 180list.
181 181
182NOTE: Reclaim does not work for the root cgroup, since we cannot set any
183limits on the root cgroup.
184
1822. Locking 1852. Locking
183 186
184The memory controller uses the following hierarchy 187The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
210NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, 213NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
211mega or gigabytes. 214mega or gigabytes.
212NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). 215NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
216NOTE: We cannot set limits on the root cgroup any more.
213 217
214# cat /cgroups/0/memory.limit_in_bytes 218# cat /cgroups/0/memory.limit_in_bytes
2154194304 2194194304
@@ -375,7 +379,42 @@ cgroups created below it.
375 379
376NOTE2: This feature can be enabled/disabled per subtree. 380NOTE2: This feature can be enabled/disabled per subtree.
377 381
3787. TODO 3827. Soft limits
383
384Soft limits allow for greater sharing of memory. The idea behind soft limits
385is to allow control groups to use as much of the memory as needed, provided
386
387a. There is no memory contention
388b. They do not exceed their hard limit
389
390When the system detects memory contention or low memory control groups
391are pushed back to their soft limits. If the soft limit of each control
392group is very high, they are pushed back as much as possible to make
393sure that one control group does not starve the others of memory.
394
395Please note that soft limits is a best effort feature, it comes with
396no guarantees, but it does its best to make sure that when memory is
397heavily contended for, memory is allocated based on the soft limit
398hints/setup. Currently soft limit based reclaim is setup such that
399it gets invoked from balance_pgdat (kswapd).
400
4017.1 Interface
402
403Soft limits can be setup by using the following commands (in this example we
404assume a soft limit of 256 megabytes)
405
406# echo 256M > memory.soft_limit_in_bytes
407
408If we want to change this to 1G, we can at any time use
409
410# echo 1G > memory.soft_limit_in_bytes
411
412NOTE1: Soft limits take effect over a long period of time, since they involve
413 reclaiming memory for balancing between memory cgroups
414NOTE2: It is recommended to set the soft limit always below the hard limit,
415 otherwise the hard limit will take precedence.
416
4178. TODO
379 418
3801. Add support for accounting huge pages (as a separate controller) 4191. Add support for accounting huge pages (as a separate controller)
3812. Make per-cgroup scanner reclaim not-shared pages first 4202. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 9f59fcbf5d82..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
54 54
553.1 General format of the API: 553.1 General format of the API:
56struct dma_async_tx_descriptor * 56struct dma_async_tx_descriptor *
57async_<operation>(<op specific parameters>, 57async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
58 enum async_tx_flags flags,
59 struct dma_async_tx_descriptor *dependency,
60 dma_async_tx_callback callback_routine,
61 void *callback_parameter);
62 58
633.2 Supported operations: 593.2 Supported operations:
64memcpy - memory copy between a source and a destination buffer 60memcpy - memory copy between a source and a destination buffer
65memset - fill a destination buffer with a byte value 61memset - fill a destination buffer with a byte value
66xor - xor a series of source buffers and write the result to a 62xor - xor a series of source buffers and write the result to a
67 destination buffer 63 destination buffer
68xor_zero_sum - xor a series of source buffers and set a flag if the 64xor_val - xor a series of source buffers and set a flag if the
69 result is zero. The implementation attempts to prevent 65 result is zero. The implementation attempts to prevent
70 writes to memory 66 writes to memory
67pq - generate the p+q (raid6 syndrome) from a series of source buffers
68pq_val - validate that a p and or q buffer are in sync with a given series of
69 sources
70datap - (raid6_datap_recov) recover a raid6 data block and the p block
71 from the given sources
722data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
73 sources
71 74
723.3 Descriptor management: 753.3 Descriptor management:
73The return value is non-NULL and points to a 'descriptor' when the operation 76The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
80recycle (or free) the descriptor. A descriptor can be acked by one of the 83recycle (or free) the descriptor. A descriptor can be acked by one of the
81following methods: 84following methods:
821/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted 851/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
832/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent 862/ submitting an unacknowledged descriptor as a dependency to another
84 descriptor of a new operation. 87 async_tx call will implicitly set the acknowledged state.
853/ calling async_tx_ack() on the descriptor. 883/ calling async_tx_ack() on the descriptor.
86 89
873.4 When does the operation execute? 903.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
119Perform a xor->copy->xor operation where each operation depends on the 122Perform a xor->copy->xor operation where each operation depends on the
120result from the previous operation: 123result from the previous operation:
121 124
122void complete_xor_copy_xor(void *param) 125void callback(void *param)
123{ 126{
124 printk("complete\n"); 127 struct completion *cmp = param;
128
129 complete(cmp);
125} 130}
126 131
127int run_xor_copy_xor(struct page **xor_srcs, 132void run_xor_copy_xor(struct page **xor_srcs,
128 int xor_src_cnt, 133 int xor_src_cnt,
129 struct page *xor_dest, 134 struct page *xor_dest,
130 size_t xor_len, 135 size_t xor_len,
131 struct page *copy_src, 136 struct page *copy_src,
132 struct page *copy_dest, 137 struct page *copy_dest,
133 size_t copy_len) 138 size_t copy_len)
134{ 139{
135 struct dma_async_tx_descriptor *tx; 140 struct dma_async_tx_descriptor *tx;
141 addr_conv_t addr_conv[xor_src_cnt];
142 struct async_submit_ctl submit;
143 addr_conv_t addr_conv[NDISKS];
144 struct completion cmp;
145
146 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
147 addr_conv);
148 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
136 149
137 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 150 submit->depend_tx = tx;
138 ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); 151 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
139 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, 152
140 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 153 init_completion(&cmp);
141 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 154 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
142 ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, 155 callback, &cmp, addr_conv);
143 tx, complete_xor_copy_xor, NULL); 156 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
144 157
145 async_tx_issue_pending_all(); 158 async_tx_issue_pending_all();
159
160 wait_for_completion(&cmp);
146} 161}
147 162
148See include/linux/async_tx.h for more information on the flags. See the 163See include/linux/async_tx.h for more information on the flags. See the
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 736540045dc7..23a181074f94 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -4,7 +4,7 @@ Shared Subtrees
4Contents: 4Contents:
5 1) Overview 5 1) Overview
6 2) Features 6 2) Features
7 3) smount command 7 3) Setting mount states
8 4) Use-case 8 4) Use-case
9 5) Detailed semantics 9 5) Detailed semantics
10 6) Quiz 10 6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
41 41
42 Here is an example: 42 Here is an example:
43 43
44 Lets say /mnt has a mount that is shared. 44 Let's say /mnt has a mount that is shared.
45 mount --make-shared /mnt 45 mount --make-shared /mnt
46 46
47 note: mount command does not yet support the --make-shared flag. 47 Note: mount(8) command now supports the --make-shared flag,
48 I have included a small C program which does the same by executing 48 so the sample 'smount' program is no longer needed and has been
49 'smount /mnt shared' 49 removed.
50 50
51 #mount --bind /mnt /tmp 51 # mount --bind /mnt /tmp
52 The above command replicates the mount at /mnt to the mountpoint /tmp 52 The above command replicates the mount at /mnt to the mountpoint /tmp
53 and the contents of both the mounts remain identical. 53 and the contents of both the mounts remain identical.
54 54
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
58 #ls /tmp 58 #ls /tmp
59 a b c 59 a b c
60 60
61 Now lets say we mount a device at /tmp/a 61 Now let's say we mount a device at /tmp/a
62 #mount /dev/sd0 /tmp/a 62 # mount /dev/sd0 /tmp/a
63 63
64 #ls /tmp/a 64 #ls /tmp/a
65 t1 t2 t2 65 t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
80 80
81 Here is an example: 81 Here is an example:
82 82
83 Lets say /mnt has a mount which is shared. 83 Let's say /mnt has a mount which is shared.
84 #mount --make-shared /mnt 84 # mount --make-shared /mnt
85 85
86 Lets bind mount /mnt to /tmp 86 Let's bind mount /mnt to /tmp
87 #mount --bind /mnt /tmp 87 # mount --bind /mnt /tmp
88 88
89 the new mount at /tmp becomes a shared mount and it is a replica of 89 the new mount at /tmp becomes a shared mount and it is a replica of
90 the mount at /mnt. 90 the mount at /mnt.
91 91
92 Now lets make the mount at /tmp; a slave of /mnt 92 Now let's make the mount at /tmp; a slave of /mnt
93 #mount --make-slave /tmp 93 # mount --make-slave /tmp
94 [or smount /tmp slave]
95 94
96 lets mount /dev/sd0 on /mnt/a 95 let's mount /dev/sd0 on /mnt/a
97 #mount /dev/sd0 /mnt/a 96 # mount /dev/sd0 /mnt/a
98 97
99 #ls /mnt/a 98 #ls /mnt/a
100 t1 t2 t3 99 t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
104 103
105 Note the mount event has propagated to the mount at /tmp 104 Note the mount event has propagated to the mount at /tmp
106 105
107 However lets see what happens if we mount something on the mount at /tmp 106 However let's see what happens if we mount something on the mount at /tmp
108 107
109 #mount /dev/sd1 /tmp/b 108 # mount /dev/sd1 /tmp/b
110 109
111 #ls /tmp/b 110 #ls /tmp/b
112 s1 s2 s3 111 s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
124 123
1252d) A unbindable mount is a unbindable private mount 1242d) A unbindable mount is a unbindable private mount
126 125
127 lets say we have a mount at /mnt and we make is unbindable 126 let's say we have a mount at /mnt and we make is unbindable
128 127
129 #mount --make-unbindable /mnt 128 # mount --make-unbindable /mnt
130 [ smount /mnt unbindable ]
131 129
132 Lets try to bind mount this mount somewhere else. 130 Let's try to bind mount this mount somewhere else.
133 # mount --bind /mnt /tmp 131 # mount --bind /mnt /tmp
134 mount: wrong fs type, bad option, bad superblock on /mnt, 132 mount: wrong fs type, bad option, bad superblock on /mnt,
135 or too many mounted file systems 133 or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
137 Binding a unbindable mount is a invalid operation. 135 Binding a unbindable mount is a invalid operation.
138 136
139 137
1403) smount command 1383) Setting mount states
141 139
142 Currently the mount command is not aware of shared subtree features. 140 The mount command (util-linux package) can be used to set mount
143 Work is in progress to add the support in mount ( util-linux package ). 141 states:
144 Till then use the following program.
145 142
146 ------------------------------------------------------------------------ 143 mount --make-shared mountpoint
147 // 144 mount --make-slave mountpoint
148 //this code was developed my Miklos Szeredi <miklos@szeredi.hu> 145 mount --make-private mountpoint
149 //and modified by Ram Pai <linuxram@us.ibm.com> 146 mount --make-unbindable mountpoint
150 // sample usage:
151 // smount /tmp shared
152 //
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <unistd.h>
156 #include <string.h>
157 #include <sys/mount.h>
158 #include <sys/fsuid.h>
159
160 #ifndef MS_REC
161 #define MS_REC 0x4000 /* 16384: Recursive loopback */
162 #endif
163
164 #ifndef MS_SHARED
165 #define MS_SHARED 1<<20 /* Shared */
166 #endif
167
168 #ifndef MS_PRIVATE
169 #define MS_PRIVATE 1<<18 /* Private */
170 #endif
171
172 #ifndef MS_SLAVE
173 #define MS_SLAVE 1<<19 /* Slave */
174 #endif
175
176 #ifndef MS_UNBINDABLE
177 #define MS_UNBINDABLE 1<<17 /* Unbindable */
178 #endif
179
180 int main(int argc, char *argv[])
181 {
182 int type;
183 if(argc != 3) {
184 fprintf(stderr, "usage: %s dir "
185 "<rshared|rslave|rprivate|runbindable|shared|slave"
186 "|private|unbindable>\n" , argv[0]);
187 return 1;
188 }
189
190 fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
191
192 if (strcmp(argv[2],"rshared")==0)
193 type=(MS_SHARED|MS_REC);
194 else if (strcmp(argv[2],"rslave")==0)
195 type=(MS_SLAVE|MS_REC);
196 else if (strcmp(argv[2],"rprivate")==0)
197 type=(MS_PRIVATE|MS_REC);
198 else if (strcmp(argv[2],"runbindable")==0)
199 type=(MS_UNBINDABLE|MS_REC);
200 else if (strcmp(argv[2],"shared")==0)
201 type=MS_SHARED;
202 else if (strcmp(argv[2],"slave")==0)
203 type=MS_SLAVE;
204 else if (strcmp(argv[2],"private")==0)
205 type=MS_PRIVATE;
206 else if (strcmp(argv[2],"unbindable")==0)
207 type=MS_UNBINDABLE;
208 else {
209 fprintf(stderr, "invalid operation: %s\n", argv[2]);
210 return 1;
211 }
212 setfsuid(getuid());
213
214 if(mount("", argv[1], "dontcare", type, "") == -1) {
215 perror("mount");
216 return 1;
217 }
218 return 0;
219 }
220 -----------------------------------------------------------------------
221
222 Copy the above code snippet into smount.c
223 gcc -o smount smount.c
224
225
226 (i) To mark all the mounts under /mnt as shared execute the following
227 command:
228
229 smount /mnt rshared
230 the corresponding syntax planned for mount command is
231 mount --make-rshared /mnt
232
233 just to mark a mount /mnt as shared, execute the following
234 command:
235 smount /mnt shared
236 the corresponding syntax planned for mount command is
237 mount --make-shared /mnt
238
239 (ii) To mark all the shared mounts under /mnt as slave execute the
240 following
241
242 command:
243 smount /mnt rslave
244 the corresponding syntax planned for mount command is
245 mount --make-rslave /mnt
246
247 just to mark a mount /mnt as slave, execute the following
248 command:
249 smount /mnt slave
250 the corresponding syntax planned for mount command is
251 mount --make-slave /mnt
252
253 (iii) To mark all the mounts under /mnt as private execute the
254 following command:
255
256 smount /mnt rprivate
257 the corresponding syntax planned for mount command is
258 mount --make-rprivate /mnt
259
260 just to mark a mount /mnt as private, execute the following
261 command:
262 smount /mnt private
263 the corresponding syntax planned for mount command is
264 mount --make-private /mnt
265
266 NOTE: by default all the mounts are created as private. But if
267 you want to change some shared/slave/unbindable mount as
268 private at a later point in time, this command can help.
269
270 (iv) To mark all the mounts under /mnt as unbindable execute the
271 following
272
273 command:
274 smount /mnt runbindable
275 the corresponding syntax planned for mount command is
276 mount --make-runbindable /mnt
277
278 just to mark a mount /mnt as unbindable, execute the following
279 command:
280 smount /mnt unbindable
281 the corresponding syntax planned for mount command is
282 mount --make-unbindable /mnt
283 147
284 148
2854) Use cases 1494) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
350 mount --rbind / /view/v3 214 mount --rbind / /view/v3
351 mount --rbind / /view/v4 215 mount --rbind / /view/v4
352 216
353 and if /usr has a versioning filesystem mounted, than that 217 and if /usr has a versioning filesystem mounted, then that
354 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and 218 mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
355 /view/v4/usr too 219 /view/v4/usr too
356 220
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
390 254
391 For example: 255 For example:
392 mount --make-shared /mnt 256 mount --make-shared /mnt
393 mount --bin /mnt /tmp 257 mount --bind /mnt /tmp
394 258
395 The mount at /mnt and that at /tmp are both shared and belong 259 The mount at /mnt and that at /tmp are both shared and belong
396 to the same peer group. Anything mounted or unmounted under 260 to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
558 then the subtree under the unbindable mount is pruned in the new 422 then the subtree under the unbindable mount is pruned in the new
559 location. 423 location.
560 424
561 eg: lets say we have the following mount tree. 425 eg: let's say we have the following mount tree.
562 426
563 A 427 A
564 / \ 428 / \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
566 / \ / \ 430 / \ / \
567 D E F G 431 D E F G
568 432
569 Lets say all the mount except the mount C in the tree are 433 Let's say all the mount except the mount C in the tree are
570 of a type other than unbindable. 434 of a type other than unbindable.
571 435
572 If this tree is rbound to say Z 436 If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
683 'b' on mounts that receive propagation from mount 'B' and does not have 547 'b' on mounts that receive propagation from mount 'B' and does not have
684 sub-mounts within them are unmounted. 548 sub-mounts within them are unmounted.
685 549
686 Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to 550 Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
687 each other. 551 each other.
688 552
689 lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount 553 let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
690 'B1', 'B2' and 'B3' respectively. 554 'B1', 'B2' and 'B3' respectively.
691 555
692 lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on 556 let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
693 mount 'B1', 'B2' and 'B3' respectively. 557 mount 'B1', 'B2' and 'B3' respectively.
694 558
695 if 'C1' is unmounted, all the mounts that are most-recently-mounted on 559 if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
710 A cloned namespace contains all the mounts as that of the parent 574 A cloned namespace contains all the mounts as that of the parent
711 namespace. 575 namespace.
712 576
713 Lets say 'A' and 'B' are the corresponding mounts in the parent and the 577 Let's say 'A' and 'B' are the corresponding mounts in the parent and the
714 child namespace. 578 child namespace.
715 579
716 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to 580 If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
759 mount --make-slave /mnt 623 mount --make-slave /mnt
760 624
761 At this point we have the first mount at /tmp and 625 At this point we have the first mount at /tmp and
762 its root dentry is 1. Lets call this mount 'A' 626 its root dentry is 1. Let's call this mount 'A'
763 And then we have a second mount at /tmp1 with root 627 And then we have a second mount at /tmp1 with root
764 dentry 2. Lets call this mount 'B' 628 dentry 2. Let's call this mount 'B'
765 Next we have a third mount at /mnt with root dentry 629 Next we have a third mount at /mnt with root dentry
766 mnt. Lets call this mount 'C' 630 mnt. Let's call this mount 'C'
767 631
768 'B' is the slave of 'A' and 'C' is a slave of 'B' 632 'B' is the slave of 'A' and 'C' is a slave of 'B'
769 A -> B -> C 633 A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
794 658
795 Q3 Why is unbindable mount needed? 659 Q3 Why is unbindable mount needed?
796 660
797 Lets say we want to replicate the mount tree at multiple 661 Let's say we want to replicate the mount tree at multiple
798 locations within the same subtree. 662 locations within the same subtree.
799 663
800 if one rbind mounts a tree within the same subtree 'n' times 664 if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
803 mounts. Here is a example. 667 mounts. Here is a example.
804 668
805 step 1: 669 step 1:
806 lets say the root tree has just two directories with 670 let's say the root tree has just two directories with
807 one vfsmount. 671 one vfsmount.
808 root 672 root
809 / \ 673 / \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
875 Unclonable mounts come in handy here. 739 Unclonable mounts come in handy here.
876 740
877 step 1: 741 step 1:
878 lets say the root tree has just two directories with 742 let's say the root tree has just two directories with
879 one vfsmount. 743 one vfsmount.
880 root 744 root
881 / \ 745 / \
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e573..623f094c9d8d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
536 /* migrate the contents of a page to the specified target */ 536 /* migrate the contents of a page to the specified target */
537 int (*migratepage) (struct page *, struct page *); 537 int (*migratepage) (struct page *, struct page *);
538 int (*launder_page) (struct page *); 538 int (*launder_page) (struct page *);
539 int (*error_remove_page) (struct mapping *mapping, struct page *page);
539}; 540};
540 541
541 writepage: called by the VM to write a dirty page to backing store. 542 writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
694 prevent redirtying the page, it is kept locked during the whole 695 prevent redirtying the page, it is kept locked during the whole
695 operation. 696 operation.
696 697
698 error_remove_page: normally set to generic_error_remove_page if truncation
699 is ok for this address space. Used for memory failure handling.
700 Setting this implies you deal with pages going away under you,
701 unless you have them locked or reference counts increased.
702
703
697The File Object 704The File Object
698=============== 705===============
699 706
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index aafca0a8f66a..947374977ca5 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -135,6 +135,7 @@ Code Seq# Include File Comments
135 <http://mikonos.dia.unisa.it/tcfs> 135 <http://mikonos.dia.unisa.it/tcfs>
136'l' 40-7F linux/udf_fs_i.h in development: 136'l' 40-7F linux/udf_fs_i.h in development:
137 <http://sourceforge.net/projects/linux-udf/> 137 <http://sourceforge.net/projects/linux-udf/>
138'm' 00-09 linux/mmtimer.h
138'm' all linux/mtio.h conflict! 139'm' all linux/mtio.h conflict!
139'm' all linux/soundcard.h conflict! 140'm' all linux/soundcard.h conflict!
140'm' all linux/synclink.h conflict! 141'm' all linux/synclink.h conflict!
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 1458448436cc..62682500878a 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots
96of error messages about running out of file handles, you might 96of error messages about running out of file handles, you might
97want to increase this limit. 97want to increase this limit.
98 98
99The three values in file-nr denote the number of allocated 99Historically, the three values in file-nr denoted the number of
100file handles, the number of unused file handles and the maximum 100allocated file handles, the number of allocated but unused file
101number of file handles. When the allocated file handles come 101handles, and the maximum number of file handles. Linux 2.6 always
102close to the maximum, but the number of unused file handles is 102reports 0 as the number of free file handles -- this is not an
103significantly greater than 0, you've encountered a peak in your 103error, it just means that the number of allocated file handles
104usage of file handles and you don't need to increase the maximum. 104exactly matches the number of used file handles.
105 105
106Attempts to allocate more file descriptors than file-max are
107reported with printk, look for "VFS: file-max limit <number>
108reached".
106============================================================== 109==============================================================
107 110
108nr_open: 111nr_open:
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index b3d8b4922740..a028b92001ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel:
22- callhome [ S390 only ] 22- callhome [ S390 only ]
23- auto_msgmni 23- auto_msgmni
24- core_pattern 24- core_pattern
25- core_pipe_limit
25- core_uses_pid 26- core_uses_pid
26- ctrl-alt-del 27- ctrl-alt-del
27- dentry-state 28- dentry-state
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name.
135 136
136============================================================== 137==============================================================
137 138
139core_pipe_limit:
140
141This sysctl is only applicable when core_pattern is configured to pipe core
142files to user space helper a (when the first character of core_pattern is a '|',
143see above). When collecting cores via a pipe to an application, it is
144occasionally usefull for the collecting application to gather data about the
145crashing process from its /proc/pid directory. In order to do this safely, the
146kernel must wait for the collecting process to exit, so as not to remove the
147crashing processes proc files prematurely. This in turn creates the possibility
148that a misbehaving userspace collecting process can block the reaping of a
149crashed process simply by never exiting. This sysctl defends against that. It
150defines how many concurrent crashing processes may be piped to user space
151applications in parallel. If this value is exceeded, then those crashing
152processes above that value are noted via the kernel log and their cores are
153skipped. 0 is a special value, indicating that unlimited processes may be
154captured in parallel, but that no waiting will take place (i.e. the collecting
155process is not guaranteed access to /proc/<crahing pid>/). This value defaults
156to 0.
157
158==============================================================
159
138core_uses_pid: 160core_uses_pid:
139 161
140The default coredump filename is "core". By setting 162The default coredump filename is "core". By setting
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e6fb1ec2744b..a6e360d2055c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
32- legacy_va_layout 32- legacy_va_layout
33- lowmem_reserve_ratio 33- lowmem_reserve_ratio
34- max_map_count 34- max_map_count
35- memory_failure_early_kill
36- memory_failure_recovery
35- min_free_kbytes 37- min_free_kbytes
36- min_slab_ratio 38- min_slab_ratio
37- min_unmapped_ratio 39- min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
53- vfs_cache_pressure 55- vfs_cache_pressure
54- zone_reclaim_mode 56- zone_reclaim_mode
55 57
56
57============================================================== 58==============================================================
58 59
59block_dump 60block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
275 276
276The default value is 65536. 277The default value is 65536.
277 278
279=============================================================
280
281memory_failure_early_kill:
282
283Control how to kill processes when uncorrected memory error (typically
284a 2bit error in a memory module) is detected in the background by hardware
285that cannot be handled by the kernel. In some cases (like the page
286still having a valid copy on disk) the kernel will handle the failure
287transparently without affecting any applications. But if there is
288no other uptodate copy of the data it will kill to prevent any data
289corruptions from propagating.
290
2911: Kill all processes that have the corrupted and not reloadable page mapped
292as soon as the corruption is detected. Note this is not supported
293for a few types of pages, like kernel internally allocated data or
294the swap cache, but works for the majority of user pages.
295
2960: Only unmap the corrupted page from all processes and only kill a process
297who tries to access it.
298
299The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
300handle this if they want to.
301
302This is only active on architectures/platforms with advanced machine
303check handling and depends on the hardware capabilities.
304
305Applications can override this setting individually with the PR_MCE_KILL prctl
306
307==============================================================
308
309memory_failure_recovery
310
311Enable memory failure recovery (when supported by the platform)
312
3131: Attempt recovery.
314
3150: Always panic on a memory failure.
316
278============================================================== 317==============================================================
279 318
280min_free_kbytes: 319min_free_kbytes:
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
index 33e8a023df02..09b164a5700f 100644
--- a/Documentation/vm/.gitignore
+++ b/Documentation/vm/.gitignore
@@ -1 +1,2 @@
1page-types
1slabinfo 2slabinfo
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 3eda8ea00852..fa1a30d9e9d5 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -5,6 +5,7 @@
5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> 5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
6 */ 6 */
7 7
8#define _LARGEFILE64_SOURCE
8#include <stdio.h> 9#include <stdio.h>
9#include <stdlib.h> 10#include <stdlib.h>
10#include <unistd.h> 11#include <unistd.h>
@@ -13,12 +14,33 @@
13#include <string.h> 14#include <string.h>
14#include <getopt.h> 15#include <getopt.h>
15#include <limits.h> 16#include <limits.h>
17#include <assert.h>
16#include <sys/types.h> 18#include <sys/types.h>
17#include <sys/errno.h> 19#include <sys/errno.h>
18#include <sys/fcntl.h> 20#include <sys/fcntl.h>
19 21
20 22
21/* 23/*
24 * pagemap kernel ABI bits
25 */
26
27#define PM_ENTRY_BYTES sizeof(uint64_t)
28#define PM_STATUS_BITS 3
29#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
30#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
31#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
32#define PM_PSHIFT_BITS 6
33#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
34#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
35#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
36#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
37#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
38
39#define PM_PRESENT PM_STATUS(4LL)
40#define PM_SWAP PM_STATUS(2LL)
41
42
43/*
22 * kernel page flags 44 * kernel page flags
23 */ 45 */
24 46
@@ -126,6 +148,14 @@ static int nr_addr_ranges;
126static unsigned long opt_offset[MAX_ADDR_RANGES]; 148static unsigned long opt_offset[MAX_ADDR_RANGES];
127static unsigned long opt_size[MAX_ADDR_RANGES]; 149static unsigned long opt_size[MAX_ADDR_RANGES];
128 150
151#define MAX_VMAS 10240
152static int nr_vmas;
153static unsigned long pg_start[MAX_VMAS];
154static unsigned long pg_end[MAX_VMAS];
155static unsigned long voffset;
156
157static int pagemap_fd;
158
129#define MAX_BIT_FILTERS 64 159#define MAX_BIT_FILTERS 64
130static int nr_bit_filters; 160static int nr_bit_filters;
131static uint64_t opt_mask[MAX_BIT_FILTERS]; 161static uint64_t opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int page_size;
135 165
136#define PAGES_BATCH (64 << 10) /* 64k pages */ 166#define PAGES_BATCH (64 << 10) /* 64k pages */
137static int kpageflags_fd; 167static int kpageflags_fd;
138static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
139 168
140#define HASH_SHIFT 13 169#define HASH_SHIFT 13
141#define HASH_SIZE (1 << HASH_SHIFT) 170#define HASH_SIZE (1 << HASH_SHIFT)
@@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE];
158 type __min2 = (y); \ 187 type __min2 = (y); \
159 __min1 < __min2 ? __min1 : __min2; }) 188 __min1 < __min2 ? __min1 : __min2; })
160 189
190#define max_t(type, x, y) ({ \
191 type __max1 = (x); \
192 type __max2 = (y); \
193 __max1 > __max2 ? __max1 : __max2; })
194
161static unsigned long pages2mb(unsigned long pages) 195static unsigned long pages2mb(unsigned long pages)
162{ 196{
163 return (pages * page_size) >> 20; 197 return (pages * page_size) >> 20;
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags)
224static void show_page_range(unsigned long offset, uint64_t flags) 258static void show_page_range(unsigned long offset, uint64_t flags)
225{ 259{
226 static uint64_t flags0; 260 static uint64_t flags0;
261 static unsigned long voff;
227 static unsigned long index; 262 static unsigned long index;
228 static unsigned long count; 263 static unsigned long count;
229 264
230 if (flags == flags0 && offset == index + count) { 265 if (flags == flags0 && offset == index + count &&
266 (!opt_pid || voffset == voff + count)) {
231 count++; 267 count++;
232 return; 268 return;
233 } 269 }
234 270
235 if (count) 271 if (count) {
236 printf("%lu\t%lu\t%s\n", 272 if (opt_pid)
273 printf("%lx\t", voff);
274 printf("%lx\t%lx\t%s\n",
237 index, count, page_flag_name(flags0)); 275 index, count, page_flag_name(flags0));
276 }
238 277
239 flags0 = flags; 278 flags0 = flags;
240 index = offset; 279 index = offset;
280 voff = voffset;
241 count = 1; 281 count = 1;
242} 282}
243 283
244static void show_page(unsigned long offset, uint64_t flags) 284static void show_page(unsigned long offset, uint64_t flags)
245{ 285{
246 printf("%lu\t%s\n", offset, page_flag_name(flags)); 286 if (opt_pid)
287 printf("%lx\t", voffset);
288 printf("%lx\t%s\n", offset, page_flag_name(flags));
247} 289}
248 290
249static void show_summary(void) 291static void show_summary(void)
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count)
383 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); 425 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
384 426
385 while (count) { 427 while (count) {
428 uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
429
386 batch = min_t(unsigned long, count, PAGES_BATCH); 430 batch = min_t(unsigned long, count, PAGES_BATCH);
387 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); 431 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
388 if (n == 0) 432 if (n == 0)
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count)
404 } 448 }
405} 449}
406 450
451
452#define PAGEMAP_BATCH 4096
453static unsigned long task_pfn(unsigned long pgoff)
454{
455 static uint64_t buf[PAGEMAP_BATCH];
456 static unsigned long start;
457 static long count;
458 uint64_t pfn;
459
460 if (pgoff < start || pgoff >= start + count) {
461 if (lseek64(pagemap_fd,
462 (uint64_t)pgoff * PM_ENTRY_BYTES,
463 SEEK_SET) < 0) {
464 perror("pagemap seek");
465 exit(EXIT_FAILURE);
466 }
467 count = read(pagemap_fd, buf, sizeof(buf));
468 if (count == 0)
469 return 0;
470 if (count < 0) {
471 perror("pagemap read");
472 exit(EXIT_FAILURE);
473 }
474 if (count % PM_ENTRY_BYTES) {
475 fatal("pagemap read not aligned.\n");
476 exit(EXIT_FAILURE);
477 }
478 count /= PM_ENTRY_BYTES;
479 start = pgoff;
480 }
481
482 pfn = buf[pgoff - start];
483 if (pfn & PM_PRESENT)
484 pfn = PM_PFRAME(pfn);
485 else
486 pfn = 0;
487
488 return pfn;
489}
490
491static void walk_task(unsigned long index, unsigned long count)
492{
493 int i = 0;
494 const unsigned long end = index + count;
495
496 while (index < end) {
497
498 while (pg_end[i] <= index)
499 if (++i >= nr_vmas)
500 return;
501 if (pg_start[i] >= end)
502 return;
503
504 voffset = max_t(unsigned long, pg_start[i], index);
505 index = min_t(unsigned long, pg_end[i], end);
506
507 assert(voffset < index);
508 for (; voffset < index; voffset++) {
509 unsigned long pfn = task_pfn(voffset);
510 if (pfn)
511 walk_pfn(pfn, 1);
512 }
513 }
514}
515
516static void add_addr_range(unsigned long offset, unsigned long size)
517{
518 if (nr_addr_ranges >= MAX_ADDR_RANGES)
519 fatal("too many addr ranges\n");
520
521 opt_offset[nr_addr_ranges] = offset;
522 opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
523 nr_addr_ranges++;
524}
525
407static void walk_addr_ranges(void) 526static void walk_addr_ranges(void)
408{ 527{
409 int i; 528 int i;
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void)
415 } 534 }
416 535
417 if (!nr_addr_ranges) 536 if (!nr_addr_ranges)
418 walk_pfn(0, ULONG_MAX); 537 add_addr_range(0, ULONG_MAX);
419 538
420 for (i = 0; i < nr_addr_ranges; i++) 539 for (i = 0; i < nr_addr_ranges; i++)
421 walk_pfn(opt_offset[i], opt_size[i]); 540 if (!opt_pid)
541 walk_pfn(opt_offset[i], opt_size[i]);
542 else
543 walk_task(opt_offset[i], opt_size[i]);
422 544
423 close(kpageflags_fd); 545 close(kpageflags_fd);
424} 546}
@@ -446,8 +568,8 @@ static void usage(void)
446" -r|--raw Raw mode, for kernel developers\n" 568" -r|--raw Raw mode, for kernel developers\n"
447" -a|--addr addr-spec Walk a range of pages\n" 569" -a|--addr addr-spec Walk a range of pages\n"
448" -b|--bits bits-spec Walk pages with specified bits\n" 570" -b|--bits bits-spec Walk pages with specified bits\n"
449#if 0 /* planned features */
450" -p|--pid pid Walk process address space\n" 571" -p|--pid pid Walk process address space\n"
572#if 0 /* planned features */
451" -f|--file filename Walk file address space\n" 573" -f|--file filename Walk file address space\n"
452#endif 574#endif
453" -l|--list Show page details in ranges\n" 575" -l|--list Show page details in ranges\n"
@@ -459,7 +581,7 @@ static void usage(void)
459" N+M pages range from N to N+M-1\n" 581" N+M pages range from N to N+M-1\n"
460" N,M pages range from N to M-1\n" 582" N,M pages range from N to M-1\n"
461" N, pages range from N to end\n" 583" N, pages range from N to end\n"
462" ,M pages range from 0 to M\n" 584" ,M pages range from 0 to M-1\n"
463"bits-spec:\n" 585"bits-spec:\n"
464" bit1,bit2 (flags & (bit1|bit2)) != 0\n" 586" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
465" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" 587" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str)
496 618
497static void parse_pid(const char *str) 619static void parse_pid(const char *str)
498{ 620{
621 FILE *file;
622 char buf[5000];
623
499 opt_pid = parse_number(str); 624 opt_pid = parse_number(str);
500}
501 625
502static void parse_file(const char *name) 626 sprintf(buf, "/proc/%d/pagemap", opt_pid);
503{ 627 pagemap_fd = open(buf, O_RDONLY);
628 if (pagemap_fd < 0) {
629 perror(buf);
630 exit(EXIT_FAILURE);
631 }
632
633 sprintf(buf, "/proc/%d/maps", opt_pid);
634 file = fopen(buf, "r");
635 if (!file) {
636 perror(buf);
637 exit(EXIT_FAILURE);
638 }
639
640 while (fgets(buf, sizeof(buf), file) != NULL) {
641 unsigned long vm_start;
642 unsigned long vm_end;
643 unsigned long long pgoff;
644 int major, minor;
645 char r, w, x, s;
646 unsigned long ino;
647 int n;
648
649 n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
650 &vm_start,
651 &vm_end,
652 &r, &w, &x, &s,
653 &pgoff,
654 &major, &minor,
655 &ino);
656 if (n < 10) {
657 fprintf(stderr, "unexpected line: %s\n", buf);
658 continue;
659 }
660 pg_start[nr_vmas] = vm_start / page_size;
661 pg_end[nr_vmas] = vm_end / page_size;
662 if (++nr_vmas >= MAX_VMAS) {
663 fprintf(stderr, "too many VMAs\n");
664 break;
665 }
666 }
667 fclose(file);
504} 668}
505 669
506static void add_addr_range(unsigned long offset, unsigned long size) 670static void parse_file(const char *name)
507{ 671{
508 if (nr_addr_ranges >= MAX_ADDR_RANGES)
509 fatal("too much addr ranges\n");
510
511 opt_offset[nr_addr_ranges] = offset;
512 opt_size[nr_addr_ranges] = size;
513 nr_addr_ranges++;
514} 672}
515 673
516static void parse_addr_range(const char *optarg) 674static void parse_addr_range(const char *optarg)
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
676 } 834 }
677 } 835 }
678 836
837 if (opt_list && opt_pid)
838 printf("voffset\t");
679 if (opt_list == 1) 839 if (opt_list == 1)
680 printf("offset\tcount\tflags\n"); 840 printf("offset\tlen\tflags\n");
681 if (opt_list == 2) 841 if (opt_list == 2)
682 printf("offset\tflags\n"); 842 printf("offset\tflags\n");
683 843