diff options
| author | Andrew Morton <akpm@osdl.org> | 2006-01-08 04:00:39 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-08 23:12:40 -0500 |
| commit | 9d0243bca345d5ce25d3f4b74b7facb3a6df1232 (patch) | |
| tree | a3a0a763bf83a483282dc1c3caab587941a98fc2 | |
| parent | bec6b0c89b234090681a4516e20ac5debe3e7c59 (diff) | |
[PATCH] drop-pagecache
Add /proc/sys/vm/drop_caches. When written to, this will cause the kernel to
discard as much pagecache and/or reclaimable slab objects as it can. THis
operation requires root permissions.
It won't drop dirty data, so the user should run `sync' first.
Caveats:
a) Holds inode_lock for exorbitant amounts of time.
b) Needs to be taught about NUMA nodes: propagate these all the way through
so the discarding can be controlled on a per-node basis.
This is a debugging feature: useful for getting consistent results between
filesystem benchmarks. We could possibly put it under a config option, but
it's less than 300 bytes.
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | Documentation/filesystems/proc.txt | 17 | ||||
| -rw-r--r-- | Documentation/sysctl/vm.txt | 3 | ||||
| -rw-r--r-- | fs/Makefile | 2 | ||||
| -rw-r--r-- | fs/drop_caches.c | 68 | ||||
| -rw-r--r-- | include/linux/mm.h | 7 | ||||
| -rw-r--r-- | include/linux/sysctl.h | 1 | ||||
| -rw-r--r-- | kernel/sysctl.c | 10 | ||||
| -rw-r--r-- | mm/truncate.c | 1 | ||||
| -rw-r--r-- | mm/vmscan.c | 3 |
9 files changed, 107 insertions, 5 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d4773565ea2f..a4dcf42c2fd9 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
| @@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent | |||
| 1302 | unnecessary page faults in thrashing situation. The unit of the value is | 1302 | unnecessary page faults in thrashing situation. The unit of the value is |
| 1303 | second. The value would be useful to tune thrashing behavior. | 1303 | second. The value would be useful to tune thrashing behavior. |
| 1304 | 1304 | ||
| 1305 | drop_caches | ||
| 1306 | ----------- | ||
| 1307 | |||
| 1308 | Writing to this will cause the kernel to drop clean caches, dentries and | ||
| 1309 | inodes from memory, causing that memory to become free. | ||
| 1310 | |||
| 1311 | To free pagecache: | ||
| 1312 | echo 1 > /proc/sys/vm/drop_caches | ||
| 1313 | To free dentries and inodes: | ||
| 1314 | echo 2 > /proc/sys/vm/drop_caches | ||
| 1315 | To free pagecache, dentries and inodes: | ||
| 1316 | echo 3 > /proc/sys/vm/drop_caches | ||
| 1317 | |||
| 1318 | As this is a non-destructive operation and dirty objects are not freeable, the | ||
| 1319 | user should run `sync' first. | ||
| 1320 | |||
| 1321 | |||
| 1305 | 2.5 /proc/sys/dev - Device specific parameters | 1322 | 2.5 /proc/sys/dev - Device specific parameters |
| 1306 | ---------------------------------------------- | 1323 | ---------------------------------------------- |
| 1307 | 1324 | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 2f1aae32a5d9..89ba1a42a17d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
| @@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm: | |||
| 26 | - min_free_kbytes | 26 | - min_free_kbytes |
| 27 | - laptop_mode | 27 | - laptop_mode |
| 28 | - block_dump | 28 | - block_dump |
| 29 | - drop-caches | ||
| 29 | 30 | ||
| 30 | ============================================================== | 31 | ============================================================== |
| 31 | 32 | ||
| 32 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, | 33 | dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, |
| 33 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, | 34 | dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, |
| 34 | block_dump, swap_token_timeout: | 35 | block_dump, swap_token_timeout, drop-caches: |
| 35 | 36 | ||
| 36 | See Documentation/filesystems/proc.txt | 37 | See Documentation/filesystems/proc.txt |
| 37 | 38 | ||
diff --git a/fs/Makefile b/fs/Makefile index 73676111ebbe..35e9aec608e4 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ | |||
| 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ |
| 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ |
| 12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ |
| 13 | ioprio.o pnode.o | 13 | ioprio.o pnode.o drop_caches.o |
| 14 | 14 | ||
| 15 | obj-$(CONFIG_INOTIFY) += inotify.o | 15 | obj-$(CONFIG_INOTIFY) += inotify.o |
| 16 | obj-$(CONFIG_EPOLL) += eventpoll.o | 16 | obj-$(CONFIG_EPOLL) += eventpoll.o |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c new file mode 100644 index 000000000000..4e4762389bdc --- /dev/null +++ b/fs/drop_caches.c | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* | ||
| 2 | * Implement the manual drop-all-pagecache function | ||
| 3 | */ | ||
| 4 | |||
| 5 | #include <linux/kernel.h> | ||
| 6 | #include <linux/mm.h> | ||
| 7 | #include <linux/fs.h> | ||
| 8 | #include <linux/writeback.h> | ||
| 9 | #include <linux/sysctl.h> | ||
| 10 | #include <linux/gfp.h> | ||
| 11 | |||
| 12 | /* A global variable is a bit ugly, but it keeps the code simple */ | ||
| 13 | int sysctl_drop_caches; | ||
| 14 | |||
| 15 | static void drop_pagecache_sb(struct super_block *sb) | ||
| 16 | { | ||
| 17 | struct inode *inode; | ||
| 18 | |||
| 19 | spin_lock(&inode_lock); | ||
| 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | ||
| 21 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) | ||
| 22 | continue; | ||
| 23 | invalidate_inode_pages(inode->i_mapping); | ||
| 24 | } | ||
| 25 | spin_unlock(&inode_lock); | ||
| 26 | } | ||
| 27 | |||
| 28 | void drop_pagecache(void) | ||
| 29 | { | ||
| 30 | struct super_block *sb; | ||
| 31 | |||
| 32 | spin_lock(&sb_lock); | ||
| 33 | restart: | ||
| 34 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
| 35 | sb->s_count++; | ||
| 36 | spin_unlock(&sb_lock); | ||
| 37 | down_read(&sb->s_umount); | ||
| 38 | if (sb->s_root) | ||
| 39 | drop_pagecache_sb(sb); | ||
| 40 | up_read(&sb->s_umount); | ||
| 41 | spin_lock(&sb_lock); | ||
| 42 | if (__put_super_and_need_restart(sb)) | ||
| 43 | goto restart; | ||
| 44 | } | ||
| 45 | spin_unlock(&sb_lock); | ||
| 46 | } | ||
| 47 | |||
| 48 | void drop_slab(void) | ||
| 49 | { | ||
| 50 | int nr_objects; | ||
| 51 | |||
| 52 | do { | ||
| 53 | nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | ||
| 54 | } while (nr_objects > 10); | ||
| 55 | } | ||
| 56 | |||
| 57 | int drop_caches_sysctl_handler(ctl_table *table, int write, | ||
| 58 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
| 59 | { | ||
| 60 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
| 61 | if (write) { | ||
| 62 | if (sysctl_drop_caches & 1) | ||
| 63 | drop_pagecache(); | ||
| 64 | if (sysctl_drop_caches & 2) | ||
| 65 | drop_slab(); | ||
| 66 | } | ||
| 67 | return 0; | ||
| 68 | } | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index bc01fff3aa01..83c651f25188 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr); | |||
| 1036 | /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ | 1036 | /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ |
| 1037 | #define OOM_DISABLE -17 | 1037 | #define OOM_DISABLE -17 |
| 1038 | 1038 | ||
| 1039 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | ||
| 1040 | void __user *, size_t *, loff_t *); | ||
| 1041 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | ||
| 1042 | unsigned long lru_pages); | ||
| 1043 | void drop_pagecache(void); | ||
| 1044 | void drop_slab(void); | ||
| 1045 | |||
| 1039 | #endif /* __KERNEL__ */ | 1046 | #endif /* __KERNEL__ */ |
| 1040 | #endif /* _LINUX_MM_H */ | 1047 | #endif /* _LINUX_MM_H */ |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a9b80fc7f0f3..4cd267fe87ec 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
| @@ -180,6 +180,7 @@ enum | |||
| 180 | VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ | 180 | VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ |
| 181 | VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ | 181 | VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ |
| 182 | VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ | 182 | VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ |
| 183 | VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ | ||
| 183 | }; | 184 | }; |
| 184 | 185 | ||
| 185 | 186 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a85047bb5739..8dcf6fd5b0f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -68,6 +68,7 @@ extern int min_free_kbytes; | |||
| 68 | extern int printk_ratelimit_jiffies; | 68 | extern int printk_ratelimit_jiffies; |
| 69 | extern int printk_ratelimit_burst; | 69 | extern int printk_ratelimit_burst; |
| 70 | extern int pid_max_min, pid_max_max; | 70 | extern int pid_max_min, pid_max_max; |
| 71 | extern int sysctl_drop_caches; | ||
| 71 | 72 | ||
| 72 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | 73 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) |
| 73 | int unknown_nmi_panic; | 74 | int unknown_nmi_panic; |
| @@ -775,6 +776,15 @@ static ctl_table vm_table[] = { | |||
| 775 | .strategy = &sysctl_intvec, | 776 | .strategy = &sysctl_intvec, |
| 776 | }, | 777 | }, |
| 777 | { | 778 | { |
| 779 | .ctl_name = VM_DROP_PAGECACHE, | ||
| 780 | .procname = "drop_caches", | ||
| 781 | .data = &sysctl_drop_caches, | ||
| 782 | .maxlen = sizeof(int), | ||
| 783 | .mode = 0644, | ||
| 784 | .proc_handler = drop_caches_sysctl_handler, | ||
| 785 | .strategy = &sysctl_intvec, | ||
| 786 | }, | ||
| 787 | { | ||
| 778 | .ctl_name = VM_MIN_FREE_KBYTES, | 788 | .ctl_name = VM_MIN_FREE_KBYTES, |
| 779 | .procname = "min_free_kbytes", | 789 | .procname = "min_free_kbytes", |
| 780 | .data = &min_free_kbytes, | 790 | .data = &min_free_kbytes, |
diff --git a/mm/truncate.c b/mm/truncate.c index 7dee32745901..b1a463d0fe71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -249,7 +249,6 @@ unlock: | |||
| 249 | break; | 249 | break; |
| 250 | } | 250 | } |
| 251 | pagevec_release(&pvec); | 251 | pagevec_release(&pvec); |
| 252 | cond_resched(); | ||
| 253 | } | 252 | } |
| 254 | return ret; | 253 | return ret; |
| 255 | } | 254 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index be8235fb1939..428c5801d4b4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); | |||
| 180 | * | 180 | * |
| 181 | * Returns the number of slab objects which we shrunk. | 181 | * Returns the number of slab objects which we shrunk. |
| 182 | */ | 182 | */ |
| 183 | static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 183 | int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) |
| 184 | unsigned long lru_pages) | ||
| 185 | { | 184 | { |
| 186 | struct shrinker *shrinker; | 185 | struct shrinker *shrinker; |
| 187 | int ret = 0; | 186 | int ret = 0; |
