aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-01-08 04:00:39 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:12:40 -0500
commit9d0243bca345d5ce25d3f4b74b7facb3a6df1232 (patch)
treea3a0a763bf83a483282dc1c3caab587941a98fc2
parentbec6b0c89b234090681a4516e20ac5debe3e7c59 (diff)
[PATCH] drop-pagecache
Add /proc/sys/vm/drop_caches. When written to, this will cause the kernel to discard as much pagecache and/or reclaimable slab objects as it can. THis operation requires root permissions. It won't drop dirty data, so the user should run `sync' first. Caveats: a) Holds inode_lock for exorbitant amounts of time. b) Needs to be taught about NUMA nodes: propagate these all the way through so the discarding can be controlled on a per-node basis. This is a debugging feature: useful for getting consistent results between filesystem benchmarks. We could possibly put it under a config option, but it's less than 300 bytes. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/filesystems/proc.txt17
-rw-r--r--Documentation/sysctl/vm.txt3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/drop_caches.c68
-rw-r--r--include/linux/mm.h7
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--kernel/sysctl.c10
-rw-r--r--mm/truncate.c1
-rw-r--r--mm/vmscan.c3
9 files changed, 107 insertions, 5 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d4773565ea2f..a4dcf42c2fd9 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
1302unnecessary page faults in thrashing situation. The unit of the value is 1302unnecessary page faults in thrashing situation. The unit of the value is
1303second. The value would be useful to tune thrashing behavior. 1303second. The value would be useful to tune thrashing behavior.
1304 1304
1305drop_caches
1306-----------
1307
1308Writing to this will cause the kernel to drop clean caches, dentries and
1309inodes from memory, causing that memory to become free.
1310
1311To free pagecache:
1312 echo 1 > /proc/sys/vm/drop_caches
1313To free dentries and inodes:
1314 echo 2 > /proc/sys/vm/drop_caches
1315To free pagecache, dentries and inodes:
1316 echo 3 > /proc/sys/vm/drop_caches
1317
1318As this is a non-destructive operation and dirty objects are not freeable, the
1319user should run `sync' first.
1320
1321
13052.5 /proc/sys/dev - Device specific parameters 13222.5 /proc/sys/dev - Device specific parameters
1306---------------------------------------------- 1323----------------------------------------------
1307 1324
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 2f1aae32a5d9..89ba1a42a17d 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm:
26- min_free_kbytes 26- min_free_kbytes
27- laptop_mode 27- laptop_mode
28- block_dump 28- block_dump
29- drop-caches
29 30
30============================================================== 31==============================================================
31 32
32dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, 33dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
33dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, 34dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
34block_dump, swap_token_timeout: 35block_dump, swap_token_timeout, drop-caches:
35 36
36See Documentation/filesystems/proc.txt 37See Documentation/filesystems/proc.txt
37 38
diff --git a/fs/Makefile b/fs/Makefile
index 73676111ebbe..35e9aec608e4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o pnode.o 13 ioprio.o pnode.o drop_caches.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644
index 000000000000..4e4762389bdc
--- /dev/null
+++ b/fs/drop_caches.c
@@ -0,0 +1,68 @@
1/*
2 * Implement the manual drop-all-pagecache function
3 */
4
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/fs.h>
8#include <linux/writeback.h>
9#include <linux/sysctl.h>
10#include <linux/gfp.h>
11
12/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches;
14
15static void drop_pagecache_sb(struct super_block *sb)
16{
17 struct inode *inode;
18
19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue;
23 invalidate_inode_pages(inode->i_mapping);
24 }
25 spin_unlock(&inode_lock);
26}
27
28void drop_pagecache(void)
29{
30 struct super_block *sb;
31
32 spin_lock(&sb_lock);
33restart:
34 list_for_each_entry(sb, &super_blocks, s_list) {
35 sb->s_count++;
36 spin_unlock(&sb_lock);
37 down_read(&sb->s_umount);
38 if (sb->s_root)
39 drop_pagecache_sb(sb);
40 up_read(&sb->s_umount);
41 spin_lock(&sb_lock);
42 if (__put_super_and_need_restart(sb))
43 goto restart;
44 }
45 spin_unlock(&sb_lock);
46}
47
48void drop_slab(void)
49{
50 int nr_objects;
51
52 do {
53 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
54 } while (nr_objects > 10);
55}
56
57int drop_caches_sysctl_handler(ctl_table *table, int write,
58 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
59{
60 proc_dointvec_minmax(table, write, file, buffer, length, ppos);
61 if (write) {
62 if (sysctl_drop_caches & 1)
63 drop_pagecache();
64 if (sysctl_drop_caches & 2)
65 drop_slab();
66 }
67 return 0;
68}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc01fff3aa01..83c651f25188 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr);
1036/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ 1036/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
1037#define OOM_DISABLE -17 1037#define OOM_DISABLE -17
1038 1038
1039int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
1040 void __user *, size_t *, loff_t *);
1041int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
1042 unsigned long lru_pages);
1043void drop_pagecache(void);
1044void drop_slab(void);
1045
1039#endif /* __KERNEL__ */ 1046#endif /* __KERNEL__ */
1040#endif /* _LINUX_MM_H */ 1047#endif /* _LINUX_MM_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index a9b80fc7f0f3..4cd267fe87ec 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -180,6 +180,7 @@ enum
180 VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ 180 VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
181 VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ 181 VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
182 VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ 182 VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
183 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
183}; 184};
184 185
185 186
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a85047bb5739..8dcf6fd5b0f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,6 +68,7 @@ extern int min_free_kbytes;
68extern int printk_ratelimit_jiffies; 68extern int printk_ratelimit_jiffies;
69extern int printk_ratelimit_burst; 69extern int printk_ratelimit_burst;
70extern int pid_max_min, pid_max_max; 70extern int pid_max_min, pid_max_max;
71extern int sysctl_drop_caches;
71 72
72#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 73#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
73int unknown_nmi_panic; 74int unknown_nmi_panic;
@@ -775,6 +776,15 @@ static ctl_table vm_table[] = {
775 .strategy = &sysctl_intvec, 776 .strategy = &sysctl_intvec,
776 }, 777 },
777 { 778 {
779 .ctl_name = VM_DROP_PAGECACHE,
780 .procname = "drop_caches",
781 .data = &sysctl_drop_caches,
782 .maxlen = sizeof(int),
783 .mode = 0644,
784 .proc_handler = drop_caches_sysctl_handler,
785 .strategy = &sysctl_intvec,
786 },
787 {
778 .ctl_name = VM_MIN_FREE_KBYTES, 788 .ctl_name = VM_MIN_FREE_KBYTES,
779 .procname = "min_free_kbytes", 789 .procname = "min_free_kbytes",
780 .data = &min_free_kbytes, 790 .data = &min_free_kbytes,
diff --git a/mm/truncate.c b/mm/truncate.c
index 7dee32745901..b1a463d0fe71 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -249,7 +249,6 @@ unlock:
249 break; 249 break;
250 } 250 }
251 pagevec_release(&pvec); 251 pagevec_release(&pvec);
252 cond_resched();
253 } 252 }
254 return ret; 253 return ret;
255} 254}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index be8235fb1939..428c5801d4b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker);
180 * 180 *
181 * Returns the number of slab objects which we shrunk. 181 * Returns the number of slab objects which we shrunk.
182 */ 182 */
183static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, 183int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
184 unsigned long lru_pages)
185{ 184{
186 struct shrinker *shrinker; 185 struct shrinker *shrinker;
187 int ret = 0; 186 int ret = 0;