aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 03:24:37 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-15 00:53:56 -0400
commit0ff922452df86f3e9a2c6f705c4588ec62d096a7 (patch)
treeac84041bfb63f12d0e2db733c46b2cd2438b4882
parentca74e92b4698276b6696f15a801759f50944f387 (diff)
[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection
Unlike earlier iterations of the delay accounting patches, now delays are only collected for the actual I/O waits rather than try and cover the delays seen in I/O submission paths. Account separately for block I/O delays incurred as a result of swapin page faults whose frequency can be affected by the task/process' rss limit. Hence swapin delays can act as feedback for rss limit changes independent of I/O priority changes. Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/delayacct.h25
-rw-r--r--include/linux/sched.h13
-rw-r--r--kernel/delayacct.c19
-rw-r--r--kernel/sched.c5
-rw-r--r--mm/memory.c4
5 files changed, 66 insertions, 0 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 9572cfa1f129..0ecbf9aad8e1 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -19,6 +19,13 @@
19 19
20#include <linux/sched.h> 20#include <linux/sched.h>
21 21
22/*
23 * Per-task flags relevant to delay accounting
24 * maintained privately to avoid exhausting similar flags in sched.h:PF_*
25 * Used to set current->delays->flags
26 */
27#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
28
22#ifdef CONFIG_TASK_DELAY_ACCT 29#ifdef CONFIG_TASK_DELAY_ACCT
23 30
24extern int delayacct_on; /* Delay accounting turned on/off */ 31extern int delayacct_on; /* Delay accounting turned on/off */
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache;
26extern void delayacct_init(void); 33extern void delayacct_init(void);
27extern void __delayacct_tsk_init(struct task_struct *); 34extern void __delayacct_tsk_init(struct task_struct *);
28extern void __delayacct_tsk_exit(struct task_struct *); 35extern void __delayacct_tsk_exit(struct task_struct *);
36extern void __delayacct_blkio_start(void);
37extern void __delayacct_blkio_end(void);
29 38
30static inline void delayacct_set_flag(int flag) 39static inline void delayacct_set_flag(int flag)
31{ 40{
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk)
53 __delayacct_tsk_exit(tsk); 62 __delayacct_tsk_exit(tsk);
54} 63}
55 64
65static inline void delayacct_blkio_start(void)
66{
67 if (current->delays)
68 __delayacct_blkio_start();
69}
70
71static inline void delayacct_blkio_end(void)
72{
73 if (current->delays)
74 __delayacct_blkio_end();
75}
76
56#else 77#else
57static inline void delayacct_set_flag(int flag) 78static inline void delayacct_set_flag(int flag)
58{} 79{}
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk)
64{} 85{}
65static inline void delayacct_tsk_exit(struct task_struct *tsk) 86static inline void delayacct_tsk_exit(struct task_struct *tsk)
66{} 87{}
88static inline void delayacct_blkio_start(void)
89{}
90static inline void delayacct_blkio_end(void)
91{}
67#endif /* CONFIG_TASK_DELAY_ACCT */ 92#endif /* CONFIG_TASK_DELAY_ACCT */
68 93
69#endif 94#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7a54e62763c5..2f43f1fb7de7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -566,6 +566,19 @@ struct task_delay_info {
566 * Atomicity of updates to XXX_delay, XXX_count protected by 566 * Atomicity of updates to XXX_delay, XXX_count protected by
567 * single lock above (split into XXX_lock if contention is an issue). 567 * single lock above (split into XXX_lock if contention is an issue).
568 */ 568 */
569
570 /*
571 * XXX_count is incremented on every XXX operation, the delay
572 * associated with the operation is added to XXX_delay.
573 * XXX_delay contains the accumulated delay time in nanoseconds.
574 */
575 struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
576 u64 blkio_delay; /* wait for sync block io completion */
577 u64 swapin_delay; /* wait for swapin block io completion */
578 u32 blkio_count; /* total count of the number of sync block */
579 /* io operations performed */
580 u32 swapin_count; /* total count of the number of swapin block */
581 /* io operations performed */
569}; 582};
570#endif 583#endif
571 584
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index fbf7f2284952..3546b0800f9f 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
85 spin_unlock(&current->delays->lock); 85 spin_unlock(&current->delays->lock);
86} 86}
87 87
88void __delayacct_blkio_start(void)
89{
90 delayacct_start(&current->delays->blkio_start);
91}
92
93void __delayacct_blkio_end(void)
94{
95 if (current->delays->flags & DELAYACCT_PF_SWAPIN)
96 /* Swapin block I/O */
97 delayacct_end(&current->delays->blkio_start,
98 &current->delays->blkio_end,
99 &current->delays->swapin_delay,
100 &current->delays->swapin_count);
101 else /* Other block I/O */
102 delayacct_end(&current->delays->blkio_start,
103 &current->delays->blkio_end,
104 &current->delays->blkio_delay,
105 &current->delays->blkio_count);
106}
diff --git a/kernel/sched.c b/kernel/sched.c
index e9a0b61f12ab..9d42cbfc4f8b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -51,6 +51,7 @@
51#include <linux/times.h> 51#include <linux/times.h>
52#include <linux/acct.h> 52#include <linux/acct.h>
53#include <linux/kprobes.h> 53#include <linux/kprobes.h>
54#include <linux/delayacct.h>
54#include <asm/tlb.h> 55#include <asm/tlb.h>
55 56
56#include <asm/unistd.h> 57#include <asm/unistd.h>
@@ -4534,9 +4535,11 @@ void __sched io_schedule(void)
4534{ 4535{
4535 struct rq *rq = &__raw_get_cpu_var(runqueues); 4536 struct rq *rq = &__raw_get_cpu_var(runqueues);
4536 4537
4538 delayacct_blkio_start();
4537 atomic_inc(&rq->nr_iowait); 4539 atomic_inc(&rq->nr_iowait);
4538 schedule(); 4540 schedule();
4539 atomic_dec(&rq->nr_iowait); 4541 atomic_dec(&rq->nr_iowait);
4542 delayacct_blkio_end();
4540} 4543}
4541EXPORT_SYMBOL(io_schedule); 4544EXPORT_SYMBOL(io_schedule);
4542 4545
@@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout)
4545 struct rq *rq = &__raw_get_cpu_var(runqueues); 4548 struct rq *rq = &__raw_get_cpu_var(runqueues);
4546 long ret; 4549 long ret;
4547 4550
4551 delayacct_blkio_start();
4548 atomic_inc(&rq->nr_iowait); 4552 atomic_inc(&rq->nr_iowait);
4549 ret = schedule_timeout(timeout); 4553 ret = schedule_timeout(timeout);
4550 atomic_dec(&rq->nr_iowait); 4554 atomic_dec(&rq->nr_iowait);
4555 delayacct_blkio_end();
4551 return ret; 4556 return ret;
4552} 4557}
4553 4558
diff --git a/mm/memory.c b/mm/memory.c
index de8bc85dc8f3..109e9866237e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -47,6 +47,7 @@
47#include <linux/pagemap.h> 47#include <linux/pagemap.h>
48#include <linux/rmap.h> 48#include <linux/rmap.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/delayacct.h>
50#include <linux/init.h> 51#include <linux/init.h>
51 52
52#include <asm/pgalloc.h> 53#include <asm/pgalloc.h>
@@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
1934 migration_entry_wait(mm, pmd, address); 1935 migration_entry_wait(mm, pmd, address);
1935 goto out; 1936 goto out;
1936 } 1937 }
1938 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
1937 page = lookup_swap_cache(entry); 1939 page = lookup_swap_cache(entry);
1938 if (!page) { 1940 if (!page) {
1939 swapin_readahead(entry, address, vma); 1941 swapin_readahead(entry, address, vma);
@@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
1946 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1948 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1947 if (likely(pte_same(*page_table, orig_pte))) 1949 if (likely(pte_same(*page_table, orig_pte)))
1948 ret = VM_FAULT_OOM; 1950 ret = VM_FAULT_OOM;
1951 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
1949 goto unlock; 1952 goto unlock;
1950 } 1953 }
1951 1954
@@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
1955 grab_swap_token(); 1958 grab_swap_token();
1956 } 1959 }
1957 1960
1961 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
1958 mark_page_accessed(page); 1962 mark_page_accessed(page);
1959 lock_page(page); 1963 lock_page(page);
1960 1964