diff options
author | Shailabh Nagar <nagar@watson.ibm.com> | 2006-07-14 03:24:37 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-15 00:53:56 -0400 |
commit | 0ff922452df86f3e9a2c6f705c4588ec62d096a7 (patch) | |
tree | ac84041bfb63f12d0e2db733c46b2cd2438b4882 | |
parent | ca74e92b4698276b6696f15a801759f50944f387 (diff) |
[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection
Unlike earlier iterations of the delay accounting patches, now delays are only
collected for the actual I/O waits rather than try and cover the delays seen
in I/O submission paths.
Account separately for block I/O delays incurred as a result of swapin page
faults whose frequency can be affected by the task/process' rss limit. Hence
swapin delays can act as feedback for rss limit changes independent of I/O
priority changes.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/delayacct.h | 25 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | kernel/delayacct.c | 19 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | mm/memory.c | 4 |
5 files changed, 66 insertions, 0 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 9572cfa1f129..0ecbf9aad8e1 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h | |||
@@ -19,6 +19,13 @@ | |||
19 | 19 | ||
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | 21 | ||
22 | /* | ||
23 | * Per-task flags relevant to delay accounting | ||
24 | * maintained privately to avoid exhausting similar flags in sched.h:PF_* | ||
25 | * Used to set current->delays->flags | ||
26 | */ | ||
27 | #define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ | ||
28 | |||
22 | #ifdef CONFIG_TASK_DELAY_ACCT | 29 | #ifdef CONFIG_TASK_DELAY_ACCT |
23 | 30 | ||
24 | extern int delayacct_on; /* Delay accounting turned on/off */ | 31 | extern int delayacct_on; /* Delay accounting turned on/off */ |
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache; | |||
26 | extern void delayacct_init(void); | 33 | extern void delayacct_init(void); |
27 | extern void __delayacct_tsk_init(struct task_struct *); | 34 | extern void __delayacct_tsk_init(struct task_struct *); |
28 | extern void __delayacct_tsk_exit(struct task_struct *); | 35 | extern void __delayacct_tsk_exit(struct task_struct *); |
36 | extern void __delayacct_blkio_start(void); | ||
37 | extern void __delayacct_blkio_end(void); | ||
29 | 38 | ||
30 | static inline void delayacct_set_flag(int flag) | 39 | static inline void delayacct_set_flag(int flag) |
31 | { | 40 | { |
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk) | |||
53 | __delayacct_tsk_exit(tsk); | 62 | __delayacct_tsk_exit(tsk); |
54 | } | 63 | } |
55 | 64 | ||
65 | static inline void delayacct_blkio_start(void) | ||
66 | { | ||
67 | if (current->delays) | ||
68 | __delayacct_blkio_start(); | ||
69 | } | ||
70 | |||
71 | static inline void delayacct_blkio_end(void) | ||
72 | { | ||
73 | if (current->delays) | ||
74 | __delayacct_blkio_end(); | ||
75 | } | ||
76 | |||
56 | #else | 77 | #else |
57 | static inline void delayacct_set_flag(int flag) | 78 | static inline void delayacct_set_flag(int flag) |
58 | {} | 79 | {} |
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk) | |||
64 | {} | 85 | {} |
65 | static inline void delayacct_tsk_exit(struct task_struct *tsk) | 86 | static inline void delayacct_tsk_exit(struct task_struct *tsk) |
66 | {} | 87 | {} |
88 | static inline void delayacct_blkio_start(void) | ||
89 | {} | ||
90 | static inline void delayacct_blkio_end(void) | ||
91 | {} | ||
67 | #endif /* CONFIG_TASK_DELAY_ACCT */ | 92 | #endif /* CONFIG_TASK_DELAY_ACCT */ |
68 | 93 | ||
69 | #endif | 94 | #endif |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a54e62763c5..2f43f1fb7de7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -566,6 +566,19 @@ struct task_delay_info { | |||
566 | * Atomicity of updates to XXX_delay, XXX_count protected by | 566 | * Atomicity of updates to XXX_delay, XXX_count protected by |
567 | * single lock above (split into XXX_lock if contention is an issue). | 567 | * single lock above (split into XXX_lock if contention is an issue). |
568 | */ | 568 | */ |
569 | |||
570 | /* | ||
571 | * XXX_count is incremented on every XXX operation, the delay | ||
572 | * associated with the operation is added to XXX_delay. | ||
573 | * XXX_delay contains the accumulated delay time in nanoseconds. | ||
574 | */ | ||
575 | struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ | ||
576 | u64 blkio_delay; /* wait for sync block io completion */ | ||
577 | u64 swapin_delay; /* wait for swapin block io completion */ | ||
578 | u32 blkio_count; /* total count of the number of sync block */ | ||
579 | /* io operations performed */ | ||
580 | u32 swapin_count; /* total count of the number of swapin block */ | ||
581 | /* io operations performed */ | ||
569 | }; | 582 | }; |
570 | #endif | 583 | #endif |
571 | 584 | ||
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index fbf7f2284952..3546b0800f9f 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
@@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end, | |||
85 | spin_unlock(¤t->delays->lock); | 85 | spin_unlock(¤t->delays->lock); |
86 | } | 86 | } |
87 | 87 | ||
88 | void __delayacct_blkio_start(void) | ||
89 | { | ||
90 | delayacct_start(¤t->delays->blkio_start); | ||
91 | } | ||
92 | |||
93 | void __delayacct_blkio_end(void) | ||
94 | { | ||
95 | if (current->delays->flags & DELAYACCT_PF_SWAPIN) | ||
96 | /* Swapin block I/O */ | ||
97 | delayacct_end(¤t->delays->blkio_start, | ||
98 | ¤t->delays->blkio_end, | ||
99 | ¤t->delays->swapin_delay, | ||
100 | ¤t->delays->swapin_count); | ||
101 | else /* Other block I/O */ | ||
102 | delayacct_end(¤t->delays->blkio_start, | ||
103 | ¤t->delays->blkio_end, | ||
104 | ¤t->delays->blkio_delay, | ||
105 | ¤t->delays->blkio_count); | ||
106 | } | ||
diff --git a/kernel/sched.c b/kernel/sched.c index e9a0b61f12ab..9d42cbfc4f8b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/acct.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | ||
54 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
55 | 56 | ||
56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
@@ -4534,9 +4535,11 @@ void __sched io_schedule(void) | |||
4534 | { | 4535 | { |
4535 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4536 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4536 | 4537 | ||
4538 | delayacct_blkio_start(); | ||
4537 | atomic_inc(&rq->nr_iowait); | 4539 | atomic_inc(&rq->nr_iowait); |
4538 | schedule(); | 4540 | schedule(); |
4539 | atomic_dec(&rq->nr_iowait); | 4541 | atomic_dec(&rq->nr_iowait); |
4542 | delayacct_blkio_end(); | ||
4540 | } | 4543 | } |
4541 | EXPORT_SYMBOL(io_schedule); | 4544 | EXPORT_SYMBOL(io_schedule); |
4542 | 4545 | ||
@@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout) | |||
4545 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4548 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4546 | long ret; | 4549 | long ret; |
4547 | 4550 | ||
4551 | delayacct_blkio_start(); | ||
4548 | atomic_inc(&rq->nr_iowait); | 4552 | atomic_inc(&rq->nr_iowait); |
4549 | ret = schedule_timeout(timeout); | 4553 | ret = schedule_timeout(timeout); |
4550 | atomic_dec(&rq->nr_iowait); | 4554 | atomic_dec(&rq->nr_iowait); |
4555 | delayacct_blkio_end(); | ||
4551 | return ret; | 4556 | return ret; |
4552 | } | 4557 | } |
4553 | 4558 | ||
diff --git a/mm/memory.c b/mm/memory.c index de8bc85dc8f3..109e9866237e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/pagemap.h> | 47 | #include <linux/pagemap.h> |
48 | #include <linux/rmap.h> | 48 | #include <linux/rmap.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/delayacct.h> | ||
50 | #include <linux/init.h> | 51 | #include <linux/init.h> |
51 | 52 | ||
52 | #include <asm/pgalloc.h> | 53 | #include <asm/pgalloc.h> |
@@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1934 | migration_entry_wait(mm, pmd, address); | 1935 | migration_entry_wait(mm, pmd, address); |
1935 | goto out; | 1936 | goto out; |
1936 | } | 1937 | } |
1938 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | ||
1937 | page = lookup_swap_cache(entry); | 1939 | page = lookup_swap_cache(entry); |
1938 | if (!page) { | 1940 | if (!page) { |
1939 | swapin_readahead(entry, address, vma); | 1941 | swapin_readahead(entry, address, vma); |
@@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1946 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1948 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1947 | if (likely(pte_same(*page_table, orig_pte))) | 1949 | if (likely(pte_same(*page_table, orig_pte))) |
1948 | ret = VM_FAULT_OOM; | 1950 | ret = VM_FAULT_OOM; |
1951 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
1949 | goto unlock; | 1952 | goto unlock; |
1950 | } | 1953 | } |
1951 | 1954 | ||
@@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1955 | grab_swap_token(); | 1958 | grab_swap_token(); |
1956 | } | 1959 | } |
1957 | 1960 | ||
1961 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
1958 | mark_page_accessed(page); | 1962 | mark_page_accessed(page); |
1959 | lock_page(page); | 1963 | lock_page(page); |
1960 | 1964 | ||