diff options
author | Jens Axboe <axboe@suse.de> | 2005-06-27 04:55:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-27 17:33:29 -0400 |
commit | 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 (patch) | |
tree | 9a97c91d1362e69703aa286021daffb8a5456f4c /fs | |
parent | 020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff) |
[PATCH] Update cfq io scheduler to time sliced design
This updates the CFQ io scheduler to the new time sliced design (cfq
v3). It provides full process fairness, while giving excellent
aggregate system throughput even for many competing processes. It
supports io priorities, either inherited from the cpu nice value or set
directly with the ioprio_get/set syscalls. The latter closely mimic
set/getpriority.
This import is based on my latest from -mm.
Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/ioprio.c | 172 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 12 |
3 files changed, 185 insertions, 0 deletions
diff --git a/fs/Makefile b/fs/Makefile index fc92e59e9fa..20edcf28bfd 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -10,6 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ | |||
10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ |
11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ |
13 | ioprio.o | ||
13 | 14 | ||
14 | obj-$(CONFIG_EPOLL) += eventpoll.o | 15 | obj-$(CONFIG_EPOLL) += eventpoll.o |
15 | obj-$(CONFIG_COMPAT) += compat.o | 16 | obj-$(CONFIG_COMPAT) += compat.o |
diff --git a/fs/ioprio.c b/fs/ioprio.c new file mode 100644 index 00000000000..663e420636d --- /dev/null +++ b/fs/ioprio.c | |||
@@ -0,0 +1,172 @@ | |||
1 | /* | ||
2 | * fs/ioprio.c | ||
3 | * | ||
4 | * Copyright (C) 2004 Jens Axboe <axboe@suse.de> | ||
5 | * | ||
6 | * Helper functions for setting/querying io priorities of processes. The | ||
7 | * system calls closely mimmick getpriority/setpriority, see the man page for | ||
8 | * those. The prio argument is a composite of prio class and prio data, where | ||
9 | * the data argument has meaning within that class. The standard scheduling | ||
10 | * classes have 8 distinct prio levels, with 0 being the highest prio and 7 | ||
11 | * being the lowest. | ||
12 | * | ||
13 | * IOW, setting BE scheduling class with prio 2 is done ala: | ||
14 | * | ||
15 | * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; | ||
16 | * | ||
17 | * ioprio_set(PRIO_PROCESS, pid, prio); | ||
18 | * | ||
19 | * See also Documentation/block/ioprio.txt | ||
20 | * | ||
21 | */ | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/ioprio.h> | ||
24 | #include <linux/blkdev.h> | ||
25 | |||
26 | static int set_task_ioprio(struct task_struct *task, int ioprio) | ||
27 | { | ||
28 | struct io_context *ioc; | ||
29 | |||
30 | if (task->uid != current->euid && | ||
31 | task->uid != current->uid && !capable(CAP_SYS_NICE)) | ||
32 | return -EPERM; | ||
33 | |||
34 | task_lock(task); | ||
35 | |||
36 | task->ioprio = ioprio; | ||
37 | |||
38 | ioc = task->io_context; | ||
39 | if (ioc && ioc->set_ioprio) | ||
40 | ioc->set_ioprio(ioc, ioprio); | ||
41 | |||
42 | task_unlock(task); | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | asmlinkage int sys_ioprio_set(int which, int who, int ioprio) | ||
47 | { | ||
48 | int class = IOPRIO_PRIO_CLASS(ioprio); | ||
49 | int data = IOPRIO_PRIO_DATA(ioprio); | ||
50 | struct task_struct *p, *g; | ||
51 | struct user_struct *user; | ||
52 | int ret; | ||
53 | |||
54 | switch (class) { | ||
55 | case IOPRIO_CLASS_RT: | ||
56 | if (!capable(CAP_SYS_ADMIN)) | ||
57 | return -EPERM; | ||
58 | /* fall through, rt has prio field too */ | ||
59 | case IOPRIO_CLASS_BE: | ||
60 | if (data >= IOPRIO_BE_NR || data < 0) | ||
61 | return -EINVAL; | ||
62 | |||
63 | break; | ||
64 | case IOPRIO_CLASS_IDLE: | ||
65 | break; | ||
66 | default: | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | ret = -ESRCH; | ||
71 | read_lock_irq(&tasklist_lock); | ||
72 | switch (which) { | ||
73 | case IOPRIO_WHO_PROCESS: | ||
74 | if (!who) | ||
75 | p = current; | ||
76 | else | ||
77 | p = find_task_by_pid(who); | ||
78 | if (p) | ||
79 | ret = set_task_ioprio(p, ioprio); | ||
80 | break; | ||
81 | case IOPRIO_WHO_PGRP: | ||
82 | if (!who) | ||
83 | who = process_group(current); | ||
84 | do_each_task_pid(who, PIDTYPE_PGID, p) { | ||
85 | ret = set_task_ioprio(p, ioprio); | ||
86 | if (ret) | ||
87 | break; | ||
88 | } while_each_task_pid(who, PIDTYPE_PGID, p); | ||
89 | break; | ||
90 | case IOPRIO_WHO_USER: | ||
91 | if (!who) | ||
92 | user = current->user; | ||
93 | else | ||
94 | user = find_user(who); | ||
95 | |||
96 | if (!user) | ||
97 | break; | ||
98 | |||
99 | do_each_thread(g, p) { | ||
100 | if (p->uid != who) | ||
101 | continue; | ||
102 | ret = set_task_ioprio(p, ioprio); | ||
103 | if (ret) | ||
104 | break; | ||
105 | } while_each_thread(g, p); | ||
106 | |||
107 | if (who) | ||
108 | free_uid(user); | ||
109 | break; | ||
110 | default: | ||
111 | ret = -EINVAL; | ||
112 | } | ||
113 | |||
114 | read_unlock_irq(&tasklist_lock); | ||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | asmlinkage int sys_ioprio_get(int which, int who) | ||
119 | { | ||
120 | struct task_struct *g, *p; | ||
121 | struct user_struct *user; | ||
122 | int ret = -ESRCH; | ||
123 | |||
124 | read_lock_irq(&tasklist_lock); | ||
125 | switch (which) { | ||
126 | case IOPRIO_WHO_PROCESS: | ||
127 | if (!who) | ||
128 | p = current; | ||
129 | else | ||
130 | p = find_task_by_pid(who); | ||
131 | if (p) | ||
132 | ret = p->ioprio; | ||
133 | break; | ||
134 | case IOPRIO_WHO_PGRP: | ||
135 | if (!who) | ||
136 | who = process_group(current); | ||
137 | do_each_task_pid(who, PIDTYPE_PGID, p) { | ||
138 | if (ret == -ESRCH) | ||
139 | ret = p->ioprio; | ||
140 | else | ||
141 | ret = ioprio_best(ret, p->ioprio); | ||
142 | } while_each_task_pid(who, PIDTYPE_PGID, p); | ||
143 | break; | ||
144 | case IOPRIO_WHO_USER: | ||
145 | if (!who) | ||
146 | user = current->user; | ||
147 | else | ||
148 | user = find_user(who); | ||
149 | |||
150 | if (!user) | ||
151 | break; | ||
152 | |||
153 | do_each_thread(g, p) { | ||
154 | if (p->uid != user->uid) | ||
155 | continue; | ||
156 | if (ret == -ESRCH) | ||
157 | ret = p->ioprio; | ||
158 | else | ||
159 | ret = ioprio_best(ret, p->ioprio); | ||
160 | } while_each_thread(g, p); | ||
161 | |||
162 | if (who) | ||
163 | free_uid(user); | ||
164 | break; | ||
165 | default: | ||
166 | ret = -EINVAL; | ||
167 | } | ||
168 | |||
169 | read_unlock_irq(&tasklist_lock); | ||
170 | return ret; | ||
171 | } | ||
172 | |||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 7b87707acc3..d1bcf0da672 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -645,18 +645,22 @@ struct buffer_chunk { | |||
645 | 645 | ||
646 | static void write_chunk(struct buffer_chunk *chunk) { | 646 | static void write_chunk(struct buffer_chunk *chunk) { |
647 | int i; | 647 | int i; |
648 | get_fs_excl(); | ||
648 | for (i = 0; i < chunk->nr ; i++) { | 649 | for (i = 0; i < chunk->nr ; i++) { |
649 | submit_logged_buffer(chunk->bh[i]) ; | 650 | submit_logged_buffer(chunk->bh[i]) ; |
650 | } | 651 | } |
651 | chunk->nr = 0; | 652 | chunk->nr = 0; |
653 | put_fs_excl(); | ||
652 | } | 654 | } |
653 | 655 | ||
654 | static void write_ordered_chunk(struct buffer_chunk *chunk) { | 656 | static void write_ordered_chunk(struct buffer_chunk *chunk) { |
655 | int i; | 657 | int i; |
658 | get_fs_excl(); | ||
656 | for (i = 0; i < chunk->nr ; i++) { | 659 | for (i = 0; i < chunk->nr ; i++) { |
657 | submit_ordered_buffer(chunk->bh[i]) ; | 660 | submit_ordered_buffer(chunk->bh[i]) ; |
658 | } | 661 | } |
659 | chunk->nr = 0; | 662 | chunk->nr = 0; |
663 | put_fs_excl(); | ||
660 | } | 664 | } |
661 | 665 | ||
662 | static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, | 666 | static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, |
@@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list | |||
918 | return 0 ; | 922 | return 0 ; |
919 | } | 923 | } |
920 | 924 | ||
925 | get_fs_excl(); | ||
926 | |||
921 | /* before we can put our commit blocks on disk, we have to make sure everyone older than | 927 | /* before we can put our commit blocks on disk, we have to make sure everyone older than |
922 | ** us is on disk too | 928 | ** us is on disk too |
923 | */ | 929 | */ |
@@ -1055,6 +1061,7 @@ put_jl: | |||
1055 | 1061 | ||
1056 | if (retval) | 1062 | if (retval) |
1057 | reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); | 1063 | reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); |
1064 | put_fs_excl(); | ||
1058 | return retval; | 1065 | return retval; |
1059 | } | 1066 | } |
1060 | 1067 | ||
@@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s, | |||
1251 | return 0 ; | 1258 | return 0 ; |
1252 | } | 1259 | } |
1253 | 1260 | ||
1261 | get_fs_excl(); | ||
1262 | |||
1254 | /* if all the work is already done, get out of here */ | 1263 | /* if all the work is already done, get out of here */ |
1255 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && | 1264 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && |
1256 | atomic_read(&(jl->j_commit_left)) <= 0) { | 1265 | atomic_read(&(jl->j_commit_left)) <= 0) { |
@@ -1450,6 +1459,7 @@ flush_older_and_return: | |||
1450 | put_journal_list(s, jl); | 1459 | put_journal_list(s, jl); |
1451 | if (flushall) | 1460 | if (flushall) |
1452 | up(&journal->j_flush_sem); | 1461 | up(&journal->j_flush_sem); |
1462 | put_fs_excl(); | ||
1453 | return err ; | 1463 | return err ; |
1454 | } | 1464 | } |
1455 | 1465 | ||
@@ -2719,6 +2729,7 @@ relock: | |||
2719 | th->t_trans_id = journal->j_trans_id ; | 2729 | th->t_trans_id = journal->j_trans_id ; |
2720 | unlock_journal(p_s_sb) ; | 2730 | unlock_journal(p_s_sb) ; |
2721 | INIT_LIST_HEAD (&th->t_list); | 2731 | INIT_LIST_HEAD (&th->t_list); |
2732 | get_fs_excl(); | ||
2722 | return 0 ; | 2733 | return 0 ; |
2723 | 2734 | ||
2724 | out_fail: | 2735 | out_fail: |
@@ -3526,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b | |||
3526 | BUG_ON (th->t_refcount > 1); | 3537 | BUG_ON (th->t_refcount > 1); |
3527 | BUG_ON (!th->t_trans_id); | 3538 | BUG_ON (!th->t_trans_id); |
3528 | 3539 | ||
3540 | put_fs_excl(); | ||
3529 | current->journal_info = th->t_handle_save; | 3541 | current->journal_info = th->t_handle_save; |
3530 | reiserfs_check_lock_depth(p_s_sb, "journal end"); | 3542 | reiserfs_check_lock_depth(p_s_sb, "journal end"); |
3531 | if (journal->j_len == 0) { | 3543 | if (journal->j_len == 0) { |