aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/vm
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/vm')
-rw-r--r--Documentation/vm/.gitignore1
-rw-r--r--Documentation/vm/00-INDEX4
-rw-r--r--Documentation/vm/hugetlbpage.txt147
-rw-r--r--Documentation/vm/ksm.txt89
-rw-r--r--Documentation/vm/locking2
-rw-r--r--Documentation/vm/map_hugetlb.c77
-rw-r--r--Documentation/vm/page-types.c248
-rw-r--r--Documentation/vm/slabinfo.c68
8 files changed, 505 insertions, 131 deletions
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
index 33e8a023df02..09b164a5700f 100644
--- a/Documentation/vm/.gitignore
+++ b/Documentation/vm/.gitignore
@@ -1 +1,2 @@
1page-types
1slabinfo 2slabinfo
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2f77ced35df7..e57d6a9dd32b 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -6,6 +6,8 @@ balance
6 - various information on memory balancing. 6 - various information on memory balancing.
7hugetlbpage.txt 7hugetlbpage.txt
8 - a brief summary of hugetlbpage support in the Linux kernel. 8 - a brief summary of hugetlbpage support in the Linux kernel.
9ksm.txt
10 - how to use the Kernel Samepage Merging feature.
9locking 11locking
10 - info on how locking and synchronization is done in the Linux vm code. 12 - info on how locking and synchronization is done in the Linux vm code.
11numa 13numa
@@ -20,3 +22,5 @@ slabinfo.c
20 - source code for a tool to get reports about slabs. 22 - source code for a tool to get reports about slabs.
21slub.txt 23slub.txt
22 - a short users guide for SLUB. 24 - a short users guide for SLUB.
25map_hugetlb.c
26 - an example program that uses the MAP_HUGETLB mmap flag.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index ea8714fcc3ad..82a7bd1800b2 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -18,13 +18,13 @@ First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
18automatically when CONFIG_HUGETLBFS is selected) configuration 18automatically when CONFIG_HUGETLBFS is selected) configuration
19options. 19options.
20 20
21The kernel built with hugepage support should show the number of configured 21The kernel built with huge page support should show the number of configured
22hugepages in the system by running the "cat /proc/meminfo" command. 22huge pages in the system by running the "cat /proc/meminfo" command.
23 23
24/proc/meminfo also provides information about the total number of hugetlb 24/proc/meminfo also provides information about the total number of hugetlb
25pages configured in the kernel. It also displays information about the 25pages configured in the kernel. It also displays information about the
26number of free hugetlb pages at any time. It also displays information about 26number of free hugetlb pages at any time. It also displays information about
27the configured hugepage size - this is needed for generating the proper 27the configured huge page size - this is needed for generating the proper
28alignment and size of the arguments to the above system calls. 28alignment and size of the arguments to the above system calls.
29 29
30The output of "cat /proc/meminfo" will have lines like: 30The output of "cat /proc/meminfo" will have lines like:
@@ -37,25 +37,27 @@ HugePages_Surp: yyy
37Hugepagesize: zzz kB 37Hugepagesize: zzz kB
38 38
39where: 39where:
40HugePages_Total is the size of the pool of hugepages. 40HugePages_Total is the size of the pool of huge pages.
41HugePages_Free is the number of hugepages in the pool that are not yet 41HugePages_Free is the number of huge pages in the pool that are not yet
42allocated. 42 allocated.
43HugePages_Rsvd is short for "reserved," and is the number of hugepages 43HugePages_Rsvd is short for "reserved," and is the number of huge pages for
44for which a commitment to allocate from the pool has been made, but no 44 which a commitment to allocate from the pool has been made,
45allocation has yet been made. It's vaguely analogous to overcommit. 45 but no allocation has yet been made. Reserved huge pages
46HugePages_Surp is short for "surplus," and is the number of hugepages in 46 guarantee that an application will be able to allocate a
47the pool above the value in /proc/sys/vm/nr_hugepages. The maximum 47 huge page from the pool of huge pages at fault time.
48number of surplus hugepages is controlled by 48HugePages_Surp is short for "surplus," and is the number of huge pages in
49/proc/sys/vm/nr_overcommit_hugepages. 49 the pool above the value in /proc/sys/vm/nr_hugepages. The
50 maximum number of surplus huge pages is controlled by
51 /proc/sys/vm/nr_overcommit_hugepages.
50 52
51/proc/filesystems should also show a filesystem of type "hugetlbfs" configured 53/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
52in the kernel. 54in the kernel.
53 55
54/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb 56/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
55pages in the kernel. Super user can dynamically request more (or free some 57pages in the kernel. Super user can dynamically request more (or free some
56pre-configured) hugepages. 58pre-configured) huge pages.
57The allocation (or deallocation) of hugetlb pages is possible only if there are 59The allocation (or deallocation) of hugetlb pages is possible only if there are
58enough physically contiguous free pages in system (freeing of hugepages is 60enough physically contiguous free pages in system (freeing of huge pages is
59possible only if there are enough hugetlb pages free that can be transferred 61possible only if there are enough hugetlb pages free that can be transferred
60back to regular memory pool). 62back to regular memory pool).
61 63
@@ -67,43 +69,82 @@ use either the mmap system call or shared memory system calls to start using
67the huge pages. It is required that the system administrator preallocate 69the huge pages. It is required that the system administrator preallocate
68enough memory for huge page purposes. 70enough memory for huge page purposes.
69 71
70Use the following command to dynamically allocate/deallocate hugepages: 72The administrator can preallocate huge pages on the kernel boot command line by
73specifying the "hugepages=N" parameter, where 'N' = the number of huge pages
74requested. This is the most reliable method for preallocating huge pages as
75memory has not yet become fragmented.
76
77Some platforms support multiple huge page sizes. To preallocate huge pages
78of a specific size, one must preceed the huge pages boot command parameters
79with a huge page size selection parameter "hugepagesz=<size>". <size> must
80be specified in bytes with optional scale suffix [kKmMgG]. The default huge
81page size may be selected with the "default_hugepagesz=<size>" boot parameter.
82
83/proc/sys/vm/nr_hugepages indicates the current number of configured [default
84size] hugetlb pages in the kernel. Super user can dynamically request more
85(or free some pre-configured) huge pages.
86
87Use the following command to dynamically allocate/deallocate default sized
88huge pages:
71 89
72 echo 20 > /proc/sys/vm/nr_hugepages 90 echo 20 > /proc/sys/vm/nr_hugepages
73 91
74This command will try to configure 20 hugepages in the system. The success 92This command will try to configure 20 default sized huge pages in the system.
75or failure of allocation depends on the amount of physically contiguous 93On a NUMA platform, the kernel will attempt to distribute the huge page pool
76memory that is preset in system at this time. System administrators may want 94over the all on-line nodes. These huge pages, allocated when nr_hugepages
77to put this command in one of the local rc init files. This will enable the 95is increased, are called "persistent huge pages".
78kernel to request huge pages early in the boot process (when the possibility 96
79of getting physical contiguous pages is still very high). In either 97The success or failure of huge page allocation depends on the amount of
80case, administrators will want to verify the number of hugepages actually 98physically contiguous memory that is preset in system at the time of the
81allocated by checking the sysctl or meminfo. 99allocation attempt. If the kernel is unable to allocate huge pages from
82 100some nodes in a NUMA system, it will attempt to make up the difference by
83/proc/sys/vm/nr_overcommit_hugepages indicates how large the pool of 101allocating extra pages on other nodes with sufficient available contiguous
84hugepages can grow, if more hugepages than /proc/sys/vm/nr_hugepages are 102memory, if any.
85requested by applications. echo'ing any non-zero value into this file 103
86indicates that the hugetlb subsystem is allowed to try to obtain 104System administrators may want to put this command in one of the local rc init
87hugepages from the buddy allocator, if the normal pool is exhausted. As 105files. This will enable the kernel to request huge pages early in the boot
88these surplus hugepages go out of use, they are freed back to the buddy 106process when the possibility of getting physical contiguous pages is still
107very high. Administrators can verify the number of huge pages actually
108allocated by checking the sysctl or meminfo. To check the per node
109distribution of huge pages in a NUMA system, use:
110
111 cat /sys/devices/system/node/node*/meminfo | fgrep Huge
112
113/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
114huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
115requested by applications. Writing any non-zero value into this file
116indicates that the hugetlb subsystem is allowed to try to obtain "surplus"
117huge pages from the buddy allocator, when the normal pool is exhausted. As
118these surplus huge pages go out of use, they are freed back to the buddy
89allocator. 119allocator.
90 120
121When increasing the huge page pool size via nr_hugepages, any surplus
122pages will first be promoted to persistent huge pages. Then, additional
123huge pages will be allocated, if necessary and if possible, to fulfill
124the new huge page pool size.
125
126The administrator may shrink the pool of preallocated huge pages for
127the default huge page size by setting the nr_hugepages sysctl to a
128smaller value. The kernel will attempt to balance the freeing of huge pages
129across all on-line nodes. Any free huge pages on the selected nodes will
130be freed back to the buddy allocator.
131
91Caveat: Shrinking the pool via nr_hugepages such that it becomes less 132Caveat: Shrinking the pool via nr_hugepages such that it becomes less
92than the number of hugepages in use will convert the balance to surplus 133than the number of huge pages in use will convert the balance to surplus
93huge pages even if it would exceed the overcommit value. As long as 134huge pages even if it would exceed the overcommit value. As long as
94this condition holds, however, no more surplus huge pages will be 135this condition holds, however, no more surplus huge pages will be
95allowed on the system until one of the two sysctls are increased 136allowed on the system until one of the two sysctls are increased
96sufficiently, or the surplus huge pages go out of use and are freed. 137sufficiently, or the surplus huge pages go out of use and are freed.
97 138
98With support for multiple hugepage pools at run-time available, much of 139With support for multiple huge page pools at run-time available, much of
99the hugepage userspace interface has been duplicated in sysfs. The above 140the huge page userspace interface has been duplicated in sysfs. The above
100information applies to the default hugepage size (which will be 141information applies to the default huge page size which will be
101controlled by the proc interfaces for backwards compatibility). The root 142controlled by the /proc interfaces for backwards compatibility. The root
102hugepage control directory is 143huge page control directory in sysfs is:
103 144
104 /sys/kernel/mm/hugepages 145 /sys/kernel/mm/hugepages
105 146
106For each hugepage size supported by the running kernel, a subdirectory 147For each huge page size supported by the running kernel, a subdirectory
107will exist, of the form 148will exist, of the form
108 149
109 hugepages-${size}kB 150 hugepages-${size}kB
@@ -116,9 +157,9 @@ Inside each of these directories, the same set of files will exist:
116 resv_hugepages 157 resv_hugepages
117 surplus_hugepages 158 surplus_hugepages
118 159
119which function as described above for the default hugepage-sized case. 160which function as described above for the default huge page-sized case.
120 161
121If the user applications are going to request hugepages using mmap system 162If the user applications are going to request huge pages using mmap system
122call, then it is required that system administrator mount a file system of 163call, then it is required that system administrator mount a file system of
123type hugetlbfs: 164type hugetlbfs:
124 165
@@ -127,7 +168,7 @@ type hugetlbfs:
127 none /mnt/huge 168 none /mnt/huge
128 169
129This command mounts a (pseudo) filesystem of type hugetlbfs on the directory 170This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
130/mnt/huge. Any files created on /mnt/huge uses hugepages. The uid and gid 171/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid
131options sets the owner and group of the root of the file system. By default 172options sets the owner and group of the root of the file system. By default
132the uid and gid of the current process are taken. The mode option sets the 173the uid and gid of the current process are taken. The mode option sets the
133mode of root of file system to value & 0777. This value is given in octal. 174mode of root of file system to value & 0777. This value is given in octal.
@@ -146,24 +187,26 @@ Regular chown, chgrp, and chmod commands (with right permissions) could be
146used to change the file attributes on hugetlbfs. 187used to change the file attributes on hugetlbfs.
147 188
148Also, it is important to note that no such mount command is required if the 189Also, it is important to note that no such mount command is required if the
149applications are going to use only shmat/shmget system calls. Users who 190applications are going to use only shmat/shmget system calls or mmap with
150wish to use hugetlb page via shared memory segment should be a member of 191MAP_HUGETLB. Users who wish to use hugetlb page via shared memory segment
151a supplementary group and system admin needs to configure that gid into 192should be a member of a supplementary group and system admin needs to
152/proc/sys/vm/hugetlb_shm_group. It is possible for same or different 193configure that gid into /proc/sys/vm/hugetlb_shm_group. It is possible for
153applications to use any combination of mmaps and shm* calls, though the 194same or different applications to use any combination of mmaps and shm*
154mount of filesystem will be required for using mmap calls. 195calls, though the mount of filesystem will be required for using mmap calls
196without MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see
197map_hugetlb.c.
155 198
156******************************************************************* 199*******************************************************************
157 200
158/* 201/*
159 * Example of using hugepage memory in a user application using Sys V shared 202 * Example of using huge page memory in a user application using Sys V shared
160 * memory system calls. In this example the app is requesting 256MB of 203 * memory system calls. In this example the app is requesting 256MB of
161 * memory that is backed by huge pages. The application uses the flag 204 * memory that is backed by huge pages. The application uses the flag
162 * SHM_HUGETLB in the shmget system call to inform the kernel that it is 205 * SHM_HUGETLB in the shmget system call to inform the kernel that it is
163 * requesting hugepages. 206 * requesting huge pages.
164 * 207 *
165 * For the ia64 architecture, the Linux kernel reserves Region number 4 for 208 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
166 * hugepages. That means the addresses starting with 0x800000... will need 209 * huge pages. That means the addresses starting with 0x800000... will need
167 * to be specified. Specifying a fixed address is not required on ppc64, 210 * to be specified. Specifying a fixed address is not required on ppc64,
168 * i386 or x86_64. 211 * i386 or x86_64.
169 * 212 *
@@ -252,14 +295,14 @@ int main(void)
252******************************************************************* 295*******************************************************************
253 296
254/* 297/*
255 * Example of using hugepage memory in a user application using the mmap 298 * Example of using huge page memory in a user application using the mmap
256 * system call. Before running this application, make sure that the 299 * system call. Before running this application, make sure that the
257 * administrator has mounted the hugetlbfs filesystem (on some directory 300 * administrator has mounted the hugetlbfs filesystem (on some directory
258 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this 301 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
259 * example, the app is requesting memory of size 256MB that is backed by 302 * example, the app is requesting memory of size 256MB that is backed by
260 * huge pages. 303 * huge pages.
261 * 304 *
262 * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. 305 * For ia64 architecture, Linux kernel reserves Region number 4 for huge pages.
263 * That means the addresses starting with 0x800000... will need to be 306 * That means the addresses starting with 0x800000... will need to be
264 * specified. Specifying a fixed address is not required on ppc64, i386 307 * specified. Specifying a fixed address is not required on ppc64, i386
265 * or x86_64. 308 * or x86_64.
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
new file mode 100644
index 000000000000..72a22f65960e
--- /dev/null
+++ b/Documentation/vm/ksm.txt
@@ -0,0 +1,89 @@
1How to use the Kernel Samepage Merging feature
2----------------------------------------------
3
4KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
5added to the Linux kernel in 2.6.32. See mm/ksm.c for its implementation,
6and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
7
8The KSM daemon ksmd periodically scans those areas of user memory which
9have been registered with it, looking for pages of identical content which
10can be replaced by a single write-protected page (which is automatically
11copied if a process later wants to update its content).
12
13KSM was originally developed for use with KVM (where it was known as
14Kernel Shared Memory), to fit more virtual machines into physical memory,
15by sharing the data common between them. But it can be useful to any
16application which generates many instances of the same data.
17
18KSM only merges anonymous (private) pages, never pagecache (file) pages.
19KSM's merged pages are at present locked into kernel memory for as long
20as they are shared: so cannot be swapped out like the user pages they
21replace (but swapping KSM pages should follow soon in a later release).
22
23KSM only operates on those areas of address space which an application
24has advised to be likely candidates for merging, by using the madvise(2)
25system call: int madvise(addr, length, MADV_MERGEABLE).
26
27The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
28that advice and restore unshared pages: whereupon KSM unmerges whatever
29it merged in that range. Note: this unmerging call may suddenly require
30more memory than is available - possibly failing with EAGAIN, but more
31probably arousing the Out-Of-Memory killer.
32
33If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
34and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
35built with CONFIG_KSM=y, those calls will normally succeed: even if the
36the KSM daemon is not currently running, MADV_MERGEABLE still registers
37the range for whenever the KSM daemon is started; even if the range
38cannot contain any pages which KSM could actually merge; even if
39MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
40
41Like other madvise calls, they are intended for use on mapped areas of
42the user address space: they will report ENOMEM if the specified range
43includes unmapped gaps (though working on the intervening mapped areas),
44and might fail with EAGAIN if not enough memory for internal structures.
45
46Applications should be considerate in their use of MADV_MERGEABLE,
47restricting its use to areas likely to benefit. KSM's scans may use
48a lot of processing power, and its kernel-resident pages are a limited
49resource. Some installations will disable KSM for these reasons.
50
51The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
52readable by all but writable only by root:
53
54max_kernel_pages - set to maximum number of kernel pages that KSM may use
55 e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
56 Value 0 imposes no limit on the kernel pages KSM may use;
57 but note that any process using MADV_MERGEABLE can cause
58 KSM to allocate these pages, unswappable until it exits.
59 Default: 2000 (chosen for demonstration purposes)
60
61pages_to_scan - how many present pages to scan before ksmd goes to sleep
62 e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
63 Default: 200 (chosen for demonstration purposes)
64
65sleep_millisecs - how many milliseconds ksmd should sleep before next scan
66 e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
67 Default: 20 (chosen for demonstration purposes)
68
69run - set 0 to stop ksmd from running but keep merged pages,
70 set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
71 set 2 to stop ksmd and unmerge all pages currently merged,
72 but leave mergeable areas registered for next run
73 Default: 1 (for immediate use by apps which register)
74
75The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
76
77pages_shared - how many shared unswappable kernel pages KSM is using
78pages_sharing - how many more sites are sharing them i.e. how much saved
79pages_unshared - how many pages unique but repeatedly checked for merging
80pages_volatile - how many pages changing too fast to be placed in a tree
81full_scans - how many times all mergeable areas have been scanned
82
83A high ratio of pages_sharing to pages_shared indicates good sharing, but
84a high ratio of pages_unshared to pages_sharing indicates wasted effort.
85pages_volatile embraces several different kinds of activity, but a high
86proportion there would also indicate poor use of madvise MADV_MERGEABLE.
87
88Izik Eidus,
89Hugh Dickins, 30 July 2009
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
80mm start up ... this is a loose form of stability on mm_users. For 80mm start up ... this is a loose form of stability on mm_users. For
81example, it is used in copy_mm to protect against a racing tlb_gather_mmu 81example, it is used in copy_mm to protect against a racing tlb_gather_mmu
82single address space optimization, so that the zap_page_range (from 82single address space optimization, so that the zap_page_range (from
83vmtruncate) does not lose sending ipi's to cloned threads that might 83truncate) does not lose sending ipi's to cloned threads that might
84be spawned underneath it and go to user mode to drag in pte's into tlbs. 84be spawned underneath it and go to user mode to drag in pte's into tlbs.
85 85
86swap_lock 86swap_lock
diff --git a/Documentation/vm/map_hugetlb.c b/Documentation/vm/map_hugetlb.c
new file mode 100644
index 000000000000..e2bdae37f499
--- /dev/null
+++ b/Documentation/vm/map_hugetlb.c
@@ -0,0 +1,77 @@
1/*
2 * Example of using hugepage memory in a user application using the mmap
3 * system call with MAP_HUGETLB flag. Before running this program make
4 * sure the administrator has allocated enough default sized huge pages
5 * to cover the 256 MB allocation.
6 *
7 * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
8 * That means the addresses starting with 0x800000... will need to be
9 * specified. Specifying a fixed address is not required on ppc64, i386
10 * or x86_64.
11 */
12#include <stdlib.h>
13#include <stdio.h>
14#include <unistd.h>
15#include <sys/mman.h>
16#include <fcntl.h>
17
18#define LENGTH (256UL*1024*1024)
19#define PROTECTION (PROT_READ | PROT_WRITE)
20
21#ifndef MAP_HUGETLB
22#define MAP_HUGETLB 0x40
23#endif
24
25/* Only ia64 requires this */
26#ifdef __ia64__
27#define ADDR (void *)(0x8000000000000000UL)
28#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
29#else
30#define ADDR (void *)(0x0UL)
31#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
32#endif
33
34void check_bytes(char *addr)
35{
36 printf("First hex is %x\n", *((unsigned int *)addr));
37}
38
39void write_bytes(char *addr)
40{
41 unsigned long i;
42
43 for (i = 0; i < LENGTH; i++)
44 *(addr + i) = (char)i;
45}
46
47void read_bytes(char *addr)
48{
49 unsigned long i;
50
51 check_bytes(addr);
52 for (i = 0; i < LENGTH; i++)
53 if (*(addr + i) != (char)i) {
54 printf("Mismatch at %lu\n", i);
55 break;
56 }
57}
58
59int main(void)
60{
61 void *addr;
62
63 addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
64 if (addr == MAP_FAILED) {
65 perror("mmap");
66 exit(1);
67 }
68
69 printf("Returned address is %p\n", addr);
70 check_bytes(addr);
71 write_bytes(addr);
72 read_bytes(addr);
73
74 munmap(addr, LENGTH);
75
76 return 0;
77}
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 0833f44ba16b..fa1a30d9e9d5 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -5,6 +5,7 @@
5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> 5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
6 */ 6 */
7 7
8#define _LARGEFILE64_SOURCE
8#include <stdio.h> 9#include <stdio.h>
9#include <stdlib.h> 10#include <stdlib.h>
10#include <unistd.h> 11#include <unistd.h>
@@ -13,12 +14,33 @@
13#include <string.h> 14#include <string.h>
14#include <getopt.h> 15#include <getopt.h>
15#include <limits.h> 16#include <limits.h>
17#include <assert.h>
16#include <sys/types.h> 18#include <sys/types.h>
17#include <sys/errno.h> 19#include <sys/errno.h>
18#include <sys/fcntl.h> 20#include <sys/fcntl.h>
19 21
20 22
21/* 23/*
24 * pagemap kernel ABI bits
25 */
26
27#define PM_ENTRY_BYTES sizeof(uint64_t)
28#define PM_STATUS_BITS 3
29#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
30#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
31#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
32#define PM_PSHIFT_BITS 6
33#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
34#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
35#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
36#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
37#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
38
39#define PM_PRESENT PM_STATUS(4LL)
40#define PM_SWAP PM_STATUS(2LL)
41
42
43/*
22 * kernel page flags 44 * kernel page flags
23 */ 45 */
24 46
@@ -126,6 +148,14 @@ static int nr_addr_ranges;
126static unsigned long opt_offset[MAX_ADDR_RANGES]; 148static unsigned long opt_offset[MAX_ADDR_RANGES];
127static unsigned long opt_size[MAX_ADDR_RANGES]; 149static unsigned long opt_size[MAX_ADDR_RANGES];
128 150
151#define MAX_VMAS 10240
152static int nr_vmas;
153static unsigned long pg_start[MAX_VMAS];
154static unsigned long pg_end[MAX_VMAS];
155static unsigned long voffset;
156
157static int pagemap_fd;
158
129#define MAX_BIT_FILTERS 64 159#define MAX_BIT_FILTERS 64
130static int nr_bit_filters; 160static int nr_bit_filters;
131static uint64_t opt_mask[MAX_BIT_FILTERS]; 161static uint64_t opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int page_size;
135 165
136#define PAGES_BATCH (64 << 10) /* 64k pages */ 166#define PAGES_BATCH (64 << 10) /* 64k pages */
137static int kpageflags_fd; 167static int kpageflags_fd;
138static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
139 168
140#define HASH_SHIFT 13 169#define HASH_SHIFT 13
141#define HASH_SIZE (1 << HASH_SHIFT) 170#define HASH_SIZE (1 << HASH_SHIFT)
@@ -158,12 +187,17 @@ static uint64_t page_flags[HASH_SIZE];
158 type __min2 = (y); \ 187 type __min2 = (y); \
159 __min1 < __min2 ? __min1 : __min2; }) 188 __min1 < __min2 ? __min1 : __min2; })
160 189
161unsigned long pages2mb(unsigned long pages) 190#define max_t(type, x, y) ({ \
191 type __max1 = (x); \
192 type __max2 = (y); \
193 __max1 > __max2 ? __max1 : __max2; })
194
195static unsigned long pages2mb(unsigned long pages)
162{ 196{
163 return (pages * page_size) >> 20; 197 return (pages * page_size) >> 20;
164} 198}
165 199
166void fatal(const char *x, ...) 200static void fatal(const char *x, ...)
167{ 201{
168 va_list ap; 202 va_list ap;
169 203
@@ -178,7 +212,7 @@ void fatal(const char *x, ...)
178 * page flag names 212 * page flag names
179 */ 213 */
180 214
181char *page_flag_name(uint64_t flags) 215static char *page_flag_name(uint64_t flags)
182{ 216{
183 static char buf[65]; 217 static char buf[65];
184 int present; 218 int present;
@@ -197,7 +231,7 @@ char *page_flag_name(uint64_t flags)
197 return buf; 231 return buf;
198} 232}
199 233
200char *page_flag_longname(uint64_t flags) 234static char *page_flag_longname(uint64_t flags)
201{ 235{
202 static char buf[1024]; 236 static char buf[1024];
203 int i, n; 237 int i, n;
@@ -221,32 +255,40 @@ char *page_flag_longname(uint64_t flags)
221 * page list and summary 255 * page list and summary
222 */ 256 */
223 257
224void show_page_range(unsigned long offset, uint64_t flags) 258static void show_page_range(unsigned long offset, uint64_t flags)
225{ 259{
226 static uint64_t flags0; 260 static uint64_t flags0;
261 static unsigned long voff;
227 static unsigned long index; 262 static unsigned long index;
228 static unsigned long count; 263 static unsigned long count;
229 264
230 if (flags == flags0 && offset == index + count) { 265 if (flags == flags0 && offset == index + count &&
266 (!opt_pid || voffset == voff + count)) {
231 count++; 267 count++;
232 return; 268 return;
233 } 269 }
234 270
235 if (count) 271 if (count) {
236 printf("%lu\t%lu\t%s\n", 272 if (opt_pid)
273 printf("%lx\t", voff);
274 printf("%lx\t%lx\t%s\n",
237 index, count, page_flag_name(flags0)); 275 index, count, page_flag_name(flags0));
276 }
238 277
239 flags0 = flags; 278 flags0 = flags;
240 index = offset; 279 index = offset;
280 voff = voffset;
241 count = 1; 281 count = 1;
242} 282}
243 283
244void show_page(unsigned long offset, uint64_t flags) 284static void show_page(unsigned long offset, uint64_t flags)
245{ 285{
246 printf("%lu\t%s\n", offset, page_flag_name(flags)); 286 if (opt_pid)
287 printf("%lx\t", voffset);
288 printf("%lx\t%s\n", offset, page_flag_name(flags));
247} 289}
248 290
249void show_summary(void) 291static void show_summary(void)
250{ 292{
251 int i; 293 int i;
252 294
@@ -272,7 +314,7 @@ void show_summary(void)
272 * page flag filters 314 * page flag filters
273 */ 315 */
274 316
275int bit_mask_ok(uint64_t flags) 317static int bit_mask_ok(uint64_t flags)
276{ 318{
277 int i; 319 int i;
278 320
@@ -289,7 +331,7 @@ int bit_mask_ok(uint64_t flags)
289 return 1; 331 return 1;
290} 332}
291 333
292uint64_t expand_overloaded_flags(uint64_t flags) 334static uint64_t expand_overloaded_flags(uint64_t flags)
293{ 335{
294 /* SLOB/SLUB overload several page flags */ 336 /* SLOB/SLUB overload several page flags */
295 if (flags & BIT(SLAB)) { 337 if (flags & BIT(SLAB)) {
@@ -308,7 +350,7 @@ uint64_t expand_overloaded_flags(uint64_t flags)
308 return flags; 350 return flags;
309} 351}
310 352
311uint64_t well_known_flags(uint64_t flags) 353static uint64_t well_known_flags(uint64_t flags)
312{ 354{
313 /* hide flags intended only for kernel hacker */ 355 /* hide flags intended only for kernel hacker */
314 flags &= ~KPF_HACKERS_BITS; 356 flags &= ~KPF_HACKERS_BITS;
@@ -325,7 +367,7 @@ uint64_t well_known_flags(uint64_t flags)
325 * page frame walker 367 * page frame walker
326 */ 368 */
327 369
328int hash_slot(uint64_t flags) 370static int hash_slot(uint64_t flags)
329{ 371{
330 int k = HASH_KEY(flags); 372 int k = HASH_KEY(flags);
331 int i; 373 int i;
@@ -352,7 +394,7 @@ int hash_slot(uint64_t flags)
352 exit(EXIT_FAILURE); 394 exit(EXIT_FAILURE);
353} 395}
354 396
355void add_page(unsigned long offset, uint64_t flags) 397static void add_page(unsigned long offset, uint64_t flags)
356{ 398{
357 flags = expand_overloaded_flags(flags); 399 flags = expand_overloaded_flags(flags);
358 400
@@ -371,7 +413,7 @@ void add_page(unsigned long offset, uint64_t flags)
371 total_pages++; 413 total_pages++;
372} 414}
373 415
374void walk_pfn(unsigned long index, unsigned long count) 416static void walk_pfn(unsigned long index, unsigned long count)
375{ 417{
376 unsigned long batch; 418 unsigned long batch;
377 unsigned long n; 419 unsigned long n;
@@ -383,6 +425,8 @@ void walk_pfn(unsigned long index, unsigned long count)
383 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); 425 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
384 426
385 while (count) { 427 while (count) {
428 uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
429
386 batch = min_t(unsigned long, count, PAGES_BATCH); 430 batch = min_t(unsigned long, count, PAGES_BATCH);
387 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); 431 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
388 if (n == 0) 432 if (n == 0)
@@ -404,7 +448,82 @@ void walk_pfn(unsigned long index, unsigned long count)
404 } 448 }
405} 449}
406 450
407void walk_addr_ranges(void) 451
452#define PAGEMAP_BATCH 4096
453static unsigned long task_pfn(unsigned long pgoff)
454{
455 static uint64_t buf[PAGEMAP_BATCH];
456 static unsigned long start;
457 static long count;
458 uint64_t pfn;
459
460 if (pgoff < start || pgoff >= start + count) {
461 if (lseek64(pagemap_fd,
462 (uint64_t)pgoff * PM_ENTRY_BYTES,
463 SEEK_SET) < 0) {
464 perror("pagemap seek");
465 exit(EXIT_FAILURE);
466 }
467 count = read(pagemap_fd, buf, sizeof(buf));
468 if (count == 0)
469 return 0;
470 if (count < 0) {
471 perror("pagemap read");
472 exit(EXIT_FAILURE);
473 }
474 if (count % PM_ENTRY_BYTES) {
475 fatal("pagemap read not aligned.\n");
476 exit(EXIT_FAILURE);
477 }
478 count /= PM_ENTRY_BYTES;
479 start = pgoff;
480 }
481
482 pfn = buf[pgoff - start];
483 if (pfn & PM_PRESENT)
484 pfn = PM_PFRAME(pfn);
485 else
486 pfn = 0;
487
488 return pfn;
489}
490
491static void walk_task(unsigned long index, unsigned long count)
492{
493 int i = 0;
494 const unsigned long end = index + count;
495
496 while (index < end) {
497
498 while (pg_end[i] <= index)
499 if (++i >= nr_vmas)
500 return;
501 if (pg_start[i] >= end)
502 return;
503
504 voffset = max_t(unsigned long, pg_start[i], index);
505 index = min_t(unsigned long, pg_end[i], end);
506
507 assert(voffset < index);
508 for (; voffset < index; voffset++) {
509 unsigned long pfn = task_pfn(voffset);
510 if (pfn)
511 walk_pfn(pfn, 1);
512 }
513 }
514}
515
516static void add_addr_range(unsigned long offset, unsigned long size)
517{
518 if (nr_addr_ranges >= MAX_ADDR_RANGES)
519 fatal("too many addr ranges\n");
520
521 opt_offset[nr_addr_ranges] = offset;
522 opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
523 nr_addr_ranges++;
524}
525
526static void walk_addr_ranges(void)
408{ 527{
409 int i; 528 int i;
410 529
@@ -415,10 +534,13 @@ void walk_addr_ranges(void)
415 } 534 }
416 535
417 if (!nr_addr_ranges) 536 if (!nr_addr_ranges)
418 walk_pfn(0, ULONG_MAX); 537 add_addr_range(0, ULONG_MAX);
419 538
420 for (i = 0; i < nr_addr_ranges; i++) 539 for (i = 0; i < nr_addr_ranges; i++)
421 walk_pfn(opt_offset[i], opt_size[i]); 540 if (!opt_pid)
541 walk_pfn(opt_offset[i], opt_size[i]);
542 else
543 walk_task(opt_offset[i], opt_size[i]);
422 544
423 close(kpageflags_fd); 545 close(kpageflags_fd);
424} 546}
@@ -428,7 +550,7 @@ void walk_addr_ranges(void)
428 * user interface 550 * user interface
429 */ 551 */
430 552
431const char *page_flag_type(uint64_t flag) 553static const char *page_flag_type(uint64_t flag)
432{ 554{
433 if (flag & KPF_HACKERS_BITS) 555 if (flag & KPF_HACKERS_BITS)
434 return "(r)"; 556 return "(r)";
@@ -437,7 +559,7 @@ const char *page_flag_type(uint64_t flag)
437 return " "; 559 return " ";
438} 560}
439 561
440void usage(void) 562static void usage(void)
441{ 563{
442 int i, j; 564 int i, j;
443 565
@@ -446,8 +568,8 @@ void usage(void)
446" -r|--raw Raw mode, for kernel developers\n" 568" -r|--raw Raw mode, for kernel developers\n"
447" -a|--addr addr-spec Walk a range of pages\n" 569" -a|--addr addr-spec Walk a range of pages\n"
448" -b|--bits bits-spec Walk pages with specified bits\n" 570" -b|--bits bits-spec Walk pages with specified bits\n"
449#if 0 /* planned features */
450" -p|--pid pid Walk process address space\n" 571" -p|--pid pid Walk process address space\n"
572#if 0 /* planned features */
451" -f|--file filename Walk file address space\n" 573" -f|--file filename Walk file address space\n"
452#endif 574#endif
453" -l|--list Show page details in ranges\n" 575" -l|--list Show page details in ranges\n"
@@ -459,7 +581,7 @@ void usage(void)
459" N+M pages range from N to N+M-1\n" 581" N+M pages range from N to N+M-1\n"
460" N,M pages range from N to M-1\n" 582" N,M pages range from N to M-1\n"
461" N, pages range from N to end\n" 583" N, pages range from N to end\n"
462" ,M pages range from 0 to M\n" 584" ,M pages range from 0 to M-1\n"
463"bits-spec:\n" 585"bits-spec:\n"
464" bit1,bit2 (flags & (bit1|bit2)) != 0\n" 586" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
465" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" 587" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
@@ -482,7 +604,7 @@ void usage(void)
482 "(r) raw mode bits (o) overloaded bits\n"); 604 "(r) raw mode bits (o) overloaded bits\n");
483} 605}
484 606
485unsigned long long parse_number(const char *str) 607static unsigned long long parse_number(const char *str)
486{ 608{
487 unsigned long long n; 609 unsigned long long n;
488 610
@@ -494,26 +616,62 @@ unsigned long long parse_number(const char *str)
494 return n; 616 return n;
495} 617}
496 618
497void parse_pid(const char *str) 619static void parse_pid(const char *str)
498{ 620{
621 FILE *file;
622 char buf[5000];
623
499 opt_pid = parse_number(str); 624 opt_pid = parse_number(str);
500}
501 625
502void parse_file(const char *name) 626 sprintf(buf, "/proc/%d/pagemap", opt_pid);
503{ 627 pagemap_fd = open(buf, O_RDONLY);
628 if (pagemap_fd < 0) {
629 perror(buf);
630 exit(EXIT_FAILURE);
631 }
632
633 sprintf(buf, "/proc/%d/maps", opt_pid);
634 file = fopen(buf, "r");
635 if (!file) {
636 perror(buf);
637 exit(EXIT_FAILURE);
638 }
639
640 while (fgets(buf, sizeof(buf), file) != NULL) {
641 unsigned long vm_start;
642 unsigned long vm_end;
643 unsigned long long pgoff;
644 int major, minor;
645 char r, w, x, s;
646 unsigned long ino;
647 int n;
648
649 n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
650 &vm_start,
651 &vm_end,
652 &r, &w, &x, &s,
653 &pgoff,
654 &major, &minor,
655 &ino);
656 if (n < 10) {
657 fprintf(stderr, "unexpected line: %s\n", buf);
658 continue;
659 }
660 pg_start[nr_vmas] = vm_start / page_size;
661 pg_end[nr_vmas] = vm_end / page_size;
662 if (++nr_vmas >= MAX_VMAS) {
663 fprintf(stderr, "too many VMAs\n");
664 break;
665 }
666 }
667 fclose(file);
504} 668}
505 669
506void add_addr_range(unsigned long offset, unsigned long size) 670static void parse_file(const char *name)
507{ 671{
508 if (nr_addr_ranges >= MAX_ADDR_RANGES)
509 fatal("too much addr ranges\n");
510
511 opt_offset[nr_addr_ranges] = offset;
512 opt_size[nr_addr_ranges] = size;
513 nr_addr_ranges++;
514} 672}
515 673
516void parse_addr_range(const char *optarg) 674static void parse_addr_range(const char *optarg)
517{ 675{
518 unsigned long offset; 676 unsigned long offset;
519 unsigned long size; 677 unsigned long size;
@@ -547,7 +705,7 @@ void parse_addr_range(const char *optarg)
547 add_addr_range(offset, size); 705 add_addr_range(offset, size);
548} 706}
549 707
550void add_bits_filter(uint64_t mask, uint64_t bits) 708static void add_bits_filter(uint64_t mask, uint64_t bits)
551{ 709{
552 if (nr_bit_filters >= MAX_BIT_FILTERS) 710 if (nr_bit_filters >= MAX_BIT_FILTERS)
553 fatal("too much bit filters\n"); 711 fatal("too much bit filters\n");
@@ -557,7 +715,7 @@ void add_bits_filter(uint64_t mask, uint64_t bits)
557 nr_bit_filters++; 715 nr_bit_filters++;
558} 716}
559 717
560uint64_t parse_flag_name(const char *str, int len) 718static uint64_t parse_flag_name(const char *str, int len)
561{ 719{
562 int i; 720 int i;
563 721
@@ -577,7 +735,7 @@ uint64_t parse_flag_name(const char *str, int len)
577 return parse_number(str); 735 return parse_number(str);
578} 736}
579 737
580uint64_t parse_flag_names(const char *str, int all) 738static uint64_t parse_flag_names(const char *str, int all)
581{ 739{
582 const char *p = str; 740 const char *p = str;
583 uint64_t flags = 0; 741 uint64_t flags = 0;
@@ -596,7 +754,7 @@ uint64_t parse_flag_names(const char *str, int all)
596 return flags; 754 return flags;
597} 755}
598 756
599void parse_bits_mask(const char *optarg) 757static void parse_bits_mask(const char *optarg)
600{ 758{
601 uint64_t mask; 759 uint64_t mask;
602 uint64_t bits; 760 uint64_t bits;
@@ -621,7 +779,7 @@ void parse_bits_mask(const char *optarg)
621} 779}
622 780
623 781
624struct option opts[] = { 782static struct option opts[] = {
625 { "raw" , 0, NULL, 'r' }, 783 { "raw" , 0, NULL, 'r' },
626 { "pid" , 1, NULL, 'p' }, 784 { "pid" , 1, NULL, 'p' },
627 { "file" , 1, NULL, 'f' }, 785 { "file" , 1, NULL, 'f' },
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
676 } 834 }
677 } 835 }
678 836
837 if (opt_list && opt_pid)
838 printf("voffset\t");
679 if (opt_list == 1) 839 if (opt_list == 1)
680 printf("offset\tcount\tflags\n"); 840 printf("offset\tlen\tflags\n");
681 if (opt_list == 2) 841 if (opt_list == 2)
682 printf("offset\tflags\n"); 842 printf("offset\tflags\n");
683 843
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index df3227605d59..92e729f4b676 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -87,7 +87,7 @@ int page_size;
87 87
88regex_t pattern; 88regex_t pattern;
89 89
90void fatal(const char *x, ...) 90static void fatal(const char *x, ...)
91{ 91{
92 va_list ap; 92 va_list ap;
93 93
@@ -97,7 +97,7 @@ void fatal(const char *x, ...)
97 exit(EXIT_FAILURE); 97 exit(EXIT_FAILURE);
98} 98}
99 99
100void usage(void) 100static void usage(void)
101{ 101{
102 printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" 102 printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
103 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" 103 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
@@ -131,7 +131,7 @@ void usage(void)
131 ); 131 );
132} 132}
133 133
134unsigned long read_obj(const char *name) 134static unsigned long read_obj(const char *name)
135{ 135{
136 FILE *f = fopen(name, "r"); 136 FILE *f = fopen(name, "r");
137 137
@@ -151,7 +151,7 @@ unsigned long read_obj(const char *name)
151/* 151/*
152 * Get the contents of an attribute 152 * Get the contents of an attribute
153 */ 153 */
154unsigned long get_obj(const char *name) 154static unsigned long get_obj(const char *name)
155{ 155{
156 if (!read_obj(name)) 156 if (!read_obj(name))
157 return 0; 157 return 0;
@@ -159,7 +159,7 @@ unsigned long get_obj(const char *name)
159 return atol(buffer); 159 return atol(buffer);
160} 160}
161 161
162unsigned long get_obj_and_str(const char *name, char **x) 162static unsigned long get_obj_and_str(const char *name, char **x)
163{ 163{
164 unsigned long result = 0; 164 unsigned long result = 0;
165 char *p; 165 char *p;
@@ -178,7 +178,7 @@ unsigned long get_obj_and_str(const char *name, char **x)
178 return result; 178 return result;
179} 179}
180 180
181void set_obj(struct slabinfo *s, const char *name, int n) 181static void set_obj(struct slabinfo *s, const char *name, int n)
182{ 182{
183 char x[100]; 183 char x[100];
184 FILE *f; 184 FILE *f;
@@ -192,7 +192,7 @@ void set_obj(struct slabinfo *s, const char *name, int n)
192 fclose(f); 192 fclose(f);
193} 193}
194 194
195unsigned long read_slab_obj(struct slabinfo *s, const char *name) 195static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
196{ 196{
197 char x[100]; 197 char x[100];
198 FILE *f; 198 FILE *f;
@@ -215,7 +215,7 @@ unsigned long read_slab_obj(struct slabinfo *s, const char *name)
215/* 215/*
216 * Put a size string together 216 * Put a size string together
217 */ 217 */
218int store_size(char *buffer, unsigned long value) 218static int store_size(char *buffer, unsigned long value)
219{ 219{
220 unsigned long divisor = 1; 220 unsigned long divisor = 1;
221 char trailer = 0; 221 char trailer = 0;
@@ -247,7 +247,7 @@ int store_size(char *buffer, unsigned long value)
247 return n; 247 return n;
248} 248}
249 249
250void decode_numa_list(int *numa, char *t) 250static void decode_numa_list(int *numa, char *t)
251{ 251{
252 int node; 252 int node;
253 int nr; 253 int nr;
@@ -272,7 +272,7 @@ void decode_numa_list(int *numa, char *t)
272 } 272 }
273} 273}
274 274
275void slab_validate(struct slabinfo *s) 275static void slab_validate(struct slabinfo *s)
276{ 276{
277 if (strcmp(s->name, "*") == 0) 277 if (strcmp(s->name, "*") == 0)
278 return; 278 return;
@@ -280,7 +280,7 @@ void slab_validate(struct slabinfo *s)
280 set_obj(s, "validate", 1); 280 set_obj(s, "validate", 1);
281} 281}
282 282
283void slab_shrink(struct slabinfo *s) 283static void slab_shrink(struct slabinfo *s)
284{ 284{
285 if (strcmp(s->name, "*") == 0) 285 if (strcmp(s->name, "*") == 0)
286 return; 286 return;
@@ -290,7 +290,7 @@ void slab_shrink(struct slabinfo *s)
290 290
291int line = 0; 291int line = 0;
292 292
293void first_line(void) 293static void first_line(void)
294{ 294{
295 if (show_activity) 295 if (show_activity)
296 printf("Name Objects Alloc Free %%Fast Fallb O\n"); 296 printf("Name Objects Alloc Free %%Fast Fallb O\n");
@@ -302,7 +302,7 @@ void first_line(void)
302/* 302/*
303 * Find the shortest alias of a slab 303 * Find the shortest alias of a slab
304 */ 304 */
305struct aliasinfo *find_one_alias(struct slabinfo *find) 305static struct aliasinfo *find_one_alias(struct slabinfo *find)
306{ 306{
307 struct aliasinfo *a; 307 struct aliasinfo *a;
308 struct aliasinfo *best = NULL; 308 struct aliasinfo *best = NULL;
@@ -318,18 +318,18 @@ struct aliasinfo *find_one_alias(struct slabinfo *find)
318 return best; 318 return best;
319} 319}
320 320
321unsigned long slab_size(struct slabinfo *s) 321static unsigned long slab_size(struct slabinfo *s)
322{ 322{
323 return s->slabs * (page_size << s->order); 323 return s->slabs * (page_size << s->order);
324} 324}
325 325
326unsigned long slab_activity(struct slabinfo *s) 326static unsigned long slab_activity(struct slabinfo *s)
327{ 327{
328 return s->alloc_fastpath + s->free_fastpath + 328 return s->alloc_fastpath + s->free_fastpath +
329 s->alloc_slowpath + s->free_slowpath; 329 s->alloc_slowpath + s->free_slowpath;
330} 330}
331 331
332void slab_numa(struct slabinfo *s, int mode) 332static void slab_numa(struct slabinfo *s, int mode)
333{ 333{
334 int node; 334 int node;
335 335
@@ -374,7 +374,7 @@ void slab_numa(struct slabinfo *s, int mode)
374 line++; 374 line++;
375} 375}
376 376
377void show_tracking(struct slabinfo *s) 377static void show_tracking(struct slabinfo *s)
378{ 378{
379 printf("\n%s: Kernel object allocation\n", s->name); 379 printf("\n%s: Kernel object allocation\n", s->name);
380 printf("-----------------------------------------------------------------------\n"); 380 printf("-----------------------------------------------------------------------\n");
@@ -392,7 +392,7 @@ void show_tracking(struct slabinfo *s)
392 392
393} 393}
394 394
395void ops(struct slabinfo *s) 395static void ops(struct slabinfo *s)
396{ 396{
397 if (strcmp(s->name, "*") == 0) 397 if (strcmp(s->name, "*") == 0)
398 return; 398 return;
@@ -405,14 +405,14 @@ void ops(struct slabinfo *s)
405 printf("\n%s has no kmem_cache operations\n", s->name); 405 printf("\n%s has no kmem_cache operations\n", s->name);
406} 406}
407 407
408const char *onoff(int x) 408static const char *onoff(int x)
409{ 409{
410 if (x) 410 if (x)
411 return "On "; 411 return "On ";
412 return "Off"; 412 return "Off";
413} 413}
414 414
415void slab_stats(struct slabinfo *s) 415static void slab_stats(struct slabinfo *s)
416{ 416{
417 unsigned long total_alloc; 417 unsigned long total_alloc;
418 unsigned long total_free; 418 unsigned long total_free;
@@ -477,7 +477,7 @@ void slab_stats(struct slabinfo *s)
477 s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); 477 s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
478} 478}
479 479
480void report(struct slabinfo *s) 480static void report(struct slabinfo *s)
481{ 481{
482 if (strcmp(s->name, "*") == 0) 482 if (strcmp(s->name, "*") == 0)
483 return; 483 return;
@@ -518,7 +518,7 @@ void report(struct slabinfo *s)
518 slab_stats(s); 518 slab_stats(s);
519} 519}
520 520
521void slabcache(struct slabinfo *s) 521static void slabcache(struct slabinfo *s)
522{ 522{
523 char size_str[20]; 523 char size_str[20];
524 char dist_str[40]; 524 char dist_str[40];
@@ -593,7 +593,7 @@ void slabcache(struct slabinfo *s)
593/* 593/*
594 * Analyze debug options. Return false if something is amiss. 594 * Analyze debug options. Return false if something is amiss.
595 */ 595 */
596int debug_opt_scan(char *opt) 596static int debug_opt_scan(char *opt)
597{ 597{
598 if (!opt || !opt[0] || strcmp(opt, "-") == 0) 598 if (!opt || !opt[0] || strcmp(opt, "-") == 0)
599 return 1; 599 return 1;
@@ -642,7 +642,7 @@ int debug_opt_scan(char *opt)
642 return 1; 642 return 1;
643} 643}
644 644
645int slab_empty(struct slabinfo *s) 645static int slab_empty(struct slabinfo *s)
646{ 646{
647 if (s->objects > 0) 647 if (s->objects > 0)
648 return 0; 648 return 0;
@@ -657,7 +657,7 @@ int slab_empty(struct slabinfo *s)
657 return 1; 657 return 1;
658} 658}
659 659
660void slab_debug(struct slabinfo *s) 660static void slab_debug(struct slabinfo *s)
661{ 661{
662 if (strcmp(s->name, "*") == 0) 662 if (strcmp(s->name, "*") == 0)
663 return; 663 return;
@@ -717,7 +717,7 @@ void slab_debug(struct slabinfo *s)
717 set_obj(s, "trace", 1); 717 set_obj(s, "trace", 1);
718} 718}
719 719
720void totals(void) 720static void totals(void)
721{ 721{
722 struct slabinfo *s; 722 struct slabinfo *s;
723 723
@@ -976,7 +976,7 @@ void totals(void)
976 b1, b2, b3); 976 b1, b2, b3);
977} 977}
978 978
979void sort_slabs(void) 979static void sort_slabs(void)
980{ 980{
981 struct slabinfo *s1,*s2; 981 struct slabinfo *s1,*s2;
982 982
@@ -1005,7 +1005,7 @@ void sort_slabs(void)
1005 } 1005 }
1006} 1006}
1007 1007
1008void sort_aliases(void) 1008static void sort_aliases(void)
1009{ 1009{
1010 struct aliasinfo *a1,*a2; 1010 struct aliasinfo *a1,*a2;
1011 1011
@@ -1030,7 +1030,7 @@ void sort_aliases(void)
1030 } 1030 }
1031} 1031}
1032 1032
1033void link_slabs(void) 1033static void link_slabs(void)
1034{ 1034{
1035 struct aliasinfo *a; 1035 struct aliasinfo *a;
1036 struct slabinfo *s; 1036 struct slabinfo *s;
@@ -1048,7 +1048,7 @@ void link_slabs(void)
1048 } 1048 }
1049} 1049}
1050 1050
1051void alias(void) 1051static void alias(void)
1052{ 1052{
1053 struct aliasinfo *a; 1053 struct aliasinfo *a;
1054 char *active = NULL; 1054 char *active = NULL;
@@ -1079,7 +1079,7 @@ void alias(void)
1079} 1079}
1080 1080
1081 1081
1082void rename_slabs(void) 1082static void rename_slabs(void)
1083{ 1083{
1084 struct slabinfo *s; 1084 struct slabinfo *s;
1085 struct aliasinfo *a; 1085 struct aliasinfo *a;
@@ -1102,12 +1102,12 @@ void rename_slabs(void)
1102 } 1102 }
1103} 1103}
1104 1104
1105int slab_mismatch(char *slab) 1105static int slab_mismatch(char *slab)
1106{ 1106{
1107 return regexec(&pattern, slab, 0, NULL, 0); 1107 return regexec(&pattern, slab, 0, NULL, 0);
1108} 1108}
1109 1109
1110void read_slab_dir(void) 1110static void read_slab_dir(void)
1111{ 1111{
1112 DIR *dir; 1112 DIR *dir;
1113 struct dirent *de; 1113 struct dirent *de;
@@ -1209,7 +1209,7 @@ void read_slab_dir(void)
1209 fatal("Too many aliases\n"); 1209 fatal("Too many aliases\n");
1210} 1210}
1211 1211
1212void output_slabs(void) 1212static void output_slabs(void)
1213{ 1213{
1214 struct slabinfo *slab; 1214 struct slabinfo *slab;
1215 1215