diff options
author | Andrew Morton <akpm@osdl.org> | 2006-01-08 04:00:39 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-08 23:12:40 -0500 |
commit | 9d0243bca345d5ce25d3f4b74b7facb3a6df1232 (patch) | |
tree | a3a0a763bf83a483282dc1c3caab587941a98fc2 | |
parent | bec6b0c89b234090681a4516e20ac5debe3e7c59 (diff) |
[PATCH] drop-pagecache
Add /proc/sys/vm/drop_caches. When written to, this will cause the kernel to
discard as much pagecache and/or reclaimable slab objects as it can. THis
operation requires root permissions.
It won't drop dirty data, so the user should run `sync' first.
Caveats:
a) Holds inode_lock for exorbitant amounts of time.
b) Needs to be taught about NUMA nodes: propagate these all the way through
so the discarding can be controlled on a per-node basis.
This is a debugging feature: useful for getting consistent results between
filesystem benchmarks. We could possibly put it under a config option, but
it's less than 300 bytes.
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 17 | ||||
-rw-r--r-- | Documentation/sysctl/vm.txt | 3 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/drop_caches.c | 68 | ||||
-rw-r--r-- | include/linux/mm.h | 7 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 | ||||
-rw-r--r-- | mm/truncate.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 3 |
9 files changed, 107 insertions, 5 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d4773565ea2f..a4dcf42c2fd9 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent | |||
1302 | unnecessary page faults in thrashing situation. The unit of the value is | 1302 | unnecessary page faults in thrashing situation. The unit of the value is |
1303 | second. The value would be useful to tune thrashing behavior. | 1303 | second. The value would be useful to tune thrashing behavior. |
1304 | 1304 | ||
1305 | drop_caches | ||
1306 | ----------- | ||
1307 | |||
1308 | Writing to this will cause the kernel to drop clean caches, dentries and | ||
1309 | inodes from memory, causing that memory to become free. | ||
1310 | |||
1311 | To free pagecache: | ||
1312 | echo 1 > /proc/sys/vm/drop_caches | ||
1313 | To free dentries and inodes: | ||
1314 | echo 2 > /proc/sys/vm/drop_caches | ||
1315 | To free pagecache, dentries and inodes: | ||
1316 | echo 3 > /proc/sys/vm/drop_caches | ||
1317 | |||
1318 | As this is a non-destructive operation and dirty objects are not freeable, the | ||
1319 | user should run `sync' first. | ||
1320 | |||
1321 | |||
1305 | 2.5 /proc/sys/dev - Device specific parameters | 1322 | 2.5 /proc/sys/dev - Device specific parameters |
1306 | ---------------------------------------------- | 1323 | ---------------------------------------------- |
1307 | 1324 | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 2f1aae32a5d9..89ba1a42a17d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm: | |||
26 | - min_free_kbytes | 26 | - min_free_kbytes |
27 | - laptop_mode | 27 | - laptop_mode |
28 | - block_dump | 28 | - block_dump |
29 | - drop-caches | ||
29 | 30 | ||
30 | ============================================================== | 31 | ============================================================== |
31 | 32 | ||
32 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, | 33 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, |
33 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, | 34 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, |
34 | block_dump, swap_token_timeout: | 35 | block_dump, swap_token_timeout, drop-caches: |
35 | 36 | ||
36 | See Documentation/filesystems/proc.txt | 37 | See Documentation/filesystems/proc.txt |
37 | 38 | ||
diff --git a/fs/Makefile b/fs/Makefile index 73676111ebbe..35e9aec608e4 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ | |||
10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ |
11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ |
13 | ioprio.o pnode.o | 13 | ioprio.o pnode.o drop_caches.o |
14 | 14 | ||
15 | obj-$(CONFIG_INOTIFY) += inotify.o | 15 | obj-$(CONFIG_INOTIFY) += inotify.o |
16 | obj-$(CONFIG_EPOLL) += eventpoll.o | 16 | obj-$(CONFIG_EPOLL) += eventpoll.o |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c new file mode 100644 index 000000000000..4e4762389bdc --- /dev/null +++ b/fs/drop_caches.c | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Implement the manual drop-all-pagecache function | ||
3 | */ | ||
4 | |||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/fs.h> | ||
8 | #include <linux/writeback.h> | ||
9 | #include <linux/sysctl.h> | ||
10 | #include <linux/gfp.h> | ||
11 | |||
12 | /* A global variable is a bit ugly, but it keeps the code simple */ | ||
13 | int sysctl_drop_caches; | ||
14 | |||
15 | static void drop_pagecache_sb(struct super_block *sb) | ||
16 | { | ||
17 | struct inode *inode; | ||
18 | |||
19 | spin_lock(&inode_lock); | ||
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | ||
21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | ||
22 | continue; | ||
23 | invalidate_inode_pages(inode->i_mapping); | ||
24 | } | ||
25 | spin_unlock(&inode_lock); | ||
26 | } | ||
27 | |||
28 | void drop_pagecache(void) | ||
29 | { | ||
30 | struct super_block *sb; | ||
31 | |||
32 | spin_lock(&sb_lock); | ||
33 | restart: | ||
34 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
35 | sb->s_count++; | ||
36 | spin_unlock(&sb_lock); | ||
37 | down_read(&sb->s_umount); | ||
38 | if (sb->s_root) | ||
39 | drop_pagecache_sb(sb); | ||
40 | up_read(&sb->s_umount); | ||
41 | spin_lock(&sb_lock); | ||
42 | if (__put_super_and_need_restart(sb)) | ||
43 | goto restart; | ||
44 | } | ||
45 | spin_unlock(&sb_lock); | ||
46 | } | ||
47 | |||
48 | void drop_slab(void) | ||
49 | { | ||
50 | int nr_objects; | ||
51 | |||
52 | do { | ||
53 | nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | ||
54 | } while (nr_objects > 10); | ||
55 | } | ||
56 | |||
57 | int drop_caches_sysctl_handler(ctl_table *table, int write, | ||
58 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
59 | { | ||
60 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
61 | if (write) { | ||
62 | if (sysctl_drop_caches & 1) | ||
63 | drop_pagecache(); | ||
64 | if (sysctl_drop_caches & 2) | ||
65 | drop_slab(); | ||
66 | } | ||
67 | return 0; | ||
68 | } | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index bc01fff3aa01..83c651f25188 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr); | |||
1036 | /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ | 1036 | /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ |
1037 | #define OOM_DISABLE -17 | 1037 | #define OOM_DISABLE -17 |
1038 | 1038 | ||
1039 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | ||
1040 | void __user *, size_t *, loff_t *); | ||
1041 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | ||
1042 | unsigned long lru_pages); | ||
1043 | void drop_pagecache(void); | ||
1044 | void drop_slab(void); | ||
1045 | |||
1039 | #endif /* __KERNEL__ */ | 1046 | #endif /* __KERNEL__ */ |
1040 | #endif /* _LINUX_MM_H */ | 1047 | #endif /* _LINUX_MM_H */ |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a9b80fc7f0f3..4cd267fe87ec 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -180,6 +180,7 @@ enum | |||
180 | VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ | 180 | VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ |
181 | VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ | 181 | VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ |
182 | VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ | 182 | VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ |
183 | VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ | ||
183 | }; | 184 | }; |
184 | 185 | ||
185 | 186 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a85047bb5739..8dcf6fd5b0f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -68,6 +68,7 @@ extern int min_free_kbytes; | |||
68 | extern int printk_ratelimit_jiffies; | 68 | extern int printk_ratelimit_jiffies; |
69 | extern int printk_ratelimit_burst; | 69 | extern int printk_ratelimit_burst; |
70 | extern int pid_max_min, pid_max_max; | 70 | extern int pid_max_min, pid_max_max; |
71 | extern int sysctl_drop_caches; | ||
71 | 72 | ||
72 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 73 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
73 | int unknown_nmi_panic; | 74 | int unknown_nmi_panic; |
@@ -775,6 +776,15 @@ static ctl_table vm_table[] = { | |||
775 | .strategy = &sysctl_intvec, | 776 | .strategy = &sysctl_intvec, |
776 | }, | 777 | }, |
777 | { | 778 | { |
779 | .ctl_name = VM_DROP_PAGECACHE, | ||
780 | .procname = "drop_caches", | ||
781 | .data = &sysctl_drop_caches, | ||
782 | .maxlen = sizeof(int), | ||
783 | .mode = 0644, | ||
784 | .proc_handler = drop_caches_sysctl_handler, | ||
785 | .strategy = &sysctl_intvec, | ||
786 | }, | ||
787 | { | ||
778 | .ctl_name = VM_MIN_FREE_KBYTES, | 788 | .ctl_name = VM_MIN_FREE_KBYTES, |
779 | .procname = "min_free_kbytes", | 789 | .procname = "min_free_kbytes", |
780 | .data = &min_free_kbytes, | 790 | .data = &min_free_kbytes, |
diff --git a/mm/truncate.c b/mm/truncate.c index 7dee32745901..b1a463d0fe71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -249,7 +249,6 @@ unlock: | |||
249 | break; | 249 | break; |
250 | } | 250 | } |
251 | pagevec_release(&pvec); | 251 | pagevec_release(&pvec); |
252 | cond_resched(); | ||
253 | } | 252 | } |
254 | return ret; | 253 | return ret; |
255 | } | 254 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index be8235fb1939..428c5801d4b4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); | |||
180 | * | 180 | * |
181 | * Returns the number of slab objects which we shrunk. | 181 | * Returns the number of slab objects which we shrunk. |
182 | */ | 182 | */ |
183 | static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 183 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) |
184 | unsigned long lru_pages) | ||
185 | { | 184 | { |
186 | struct shrinker *shrinker; | 185 | struct shrinker *shrinker; |
187 | int ret = 0; | 186 | int ret = 0; |