aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2005-06-27 04:55:12 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-27 17:33:29 -0400
commit22e2c507c301c3dbbcf91b4948b88f78842ee6c9 (patch)
tree9a97c91d1362e69703aa286021daffb8a5456f4c /fs
parent020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff)
[PATCH] Update cfq io scheduler to time sliced design
This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile1
-rw-r--r--fs/ioprio.c172
-rw-r--r--fs/reiserfs/journal.c12
3 files changed, 185 insertions, 0 deletions
diff --git a/fs/Makefile b/fs/Makefile
index fc92e59e9fa..20edcf28bfd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,6 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o
13 14
14obj-$(CONFIG_EPOLL) += eventpoll.o 15obj-$(CONFIG_EPOLL) += eventpoll.o
15obj-$(CONFIG_COMPAT) += compat.o 16obj-$(CONFIG_COMPAT) += compat.o
diff --git a/fs/ioprio.c b/fs/ioprio.c
new file mode 100644
index 00000000000..663e420636d
--- /dev/null
+++ b/fs/ioprio.c
@@ -0,0 +1,172 @@
1/*
2 * fs/ioprio.c
3 *
4 * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
5 *
6 * Helper functions for setting/querying io priorities of processes. The
7 * system calls closely mimmick getpriority/setpriority, see the man page for
8 * those. The prio argument is a composite of prio class and prio data, where
9 * the data argument has meaning within that class. The standard scheduling
10 * classes have 8 distinct prio levels, with 0 being the highest prio and 7
11 * being the lowest.
12 *
13 * IOW, setting BE scheduling class with prio 2 is done ala:
14 *
15 * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
16 *
17 * ioprio_set(PRIO_PROCESS, pid, prio);
18 *
19 * See also Documentation/block/ioprio.txt
20 *
21 */
22#include <linux/kernel.h>
23#include <linux/ioprio.h>
24#include <linux/blkdev.h>
25
26static int set_task_ioprio(struct task_struct *task, int ioprio)
27{
28 struct io_context *ioc;
29
30 if (task->uid != current->euid &&
31 task->uid != current->uid && !capable(CAP_SYS_NICE))
32 return -EPERM;
33
34 task_lock(task);
35
36 task->ioprio = ioprio;
37
38 ioc = task->io_context;
39 if (ioc && ioc->set_ioprio)
40 ioc->set_ioprio(ioc, ioprio);
41
42 task_unlock(task);
43 return 0;
44}
45
46asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
47{
48 int class = IOPRIO_PRIO_CLASS(ioprio);
49 int data = IOPRIO_PRIO_DATA(ioprio);
50 struct task_struct *p, *g;
51 struct user_struct *user;
52 int ret;
53
54 switch (class) {
55 case IOPRIO_CLASS_RT:
56 if (!capable(CAP_SYS_ADMIN))
57 return -EPERM;
58 /* fall through, rt has prio field too */
59 case IOPRIO_CLASS_BE:
60 if (data >= IOPRIO_BE_NR || data < 0)
61 return -EINVAL;
62
63 break;
64 case IOPRIO_CLASS_IDLE:
65 break;
66 default:
67 return -EINVAL;
68 }
69
70 ret = -ESRCH;
71 read_lock_irq(&tasklist_lock);
72 switch (which) {
73 case IOPRIO_WHO_PROCESS:
74 if (!who)
75 p = current;
76 else
77 p = find_task_by_pid(who);
78 if (p)
79 ret = set_task_ioprio(p, ioprio);
80 break;
81 case IOPRIO_WHO_PGRP:
82 if (!who)
83 who = process_group(current);
84 do_each_task_pid(who, PIDTYPE_PGID, p) {
85 ret = set_task_ioprio(p, ioprio);
86 if (ret)
87 break;
88 } while_each_task_pid(who, PIDTYPE_PGID, p);
89 break;
90 case IOPRIO_WHO_USER:
91 if (!who)
92 user = current->user;
93 else
94 user = find_user(who);
95
96 if (!user)
97 break;
98
99 do_each_thread(g, p) {
100 if (p->uid != who)
101 continue;
102 ret = set_task_ioprio(p, ioprio);
103 if (ret)
104 break;
105 } while_each_thread(g, p);
106
107 if (who)
108 free_uid(user);
109 break;
110 default:
111 ret = -EINVAL;
112 }
113
114 read_unlock_irq(&tasklist_lock);
115 return ret;
116}
117
118asmlinkage int sys_ioprio_get(int which, int who)
119{
120 struct task_struct *g, *p;
121 struct user_struct *user;
122 int ret = -ESRCH;
123
124 read_lock_irq(&tasklist_lock);
125 switch (which) {
126 case IOPRIO_WHO_PROCESS:
127 if (!who)
128 p = current;
129 else
130 p = find_task_by_pid(who);
131 if (p)
132 ret = p->ioprio;
133 break;
134 case IOPRIO_WHO_PGRP:
135 if (!who)
136 who = process_group(current);
137 do_each_task_pid(who, PIDTYPE_PGID, p) {
138 if (ret == -ESRCH)
139 ret = p->ioprio;
140 else
141 ret = ioprio_best(ret, p->ioprio);
142 } while_each_task_pid(who, PIDTYPE_PGID, p);
143 break;
144 case IOPRIO_WHO_USER:
145 if (!who)
146 user = current->user;
147 else
148 user = find_user(who);
149
150 if (!user)
151 break;
152
153 do_each_thread(g, p) {
154 if (p->uid != user->uid)
155 continue;
156 if (ret == -ESRCH)
157 ret = p->ioprio;
158 else
159 ret = ioprio_best(ret, p->ioprio);
160 } while_each_thread(g, p);
161
162 if (who)
163 free_uid(user);
164 break;
165 default:
166 ret = -EINVAL;
167 }
168
169 read_unlock_irq(&tasklist_lock);
170 return ret;
171}
172
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 7b87707acc3..d1bcf0da672 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -645,18 +645,22 @@ struct buffer_chunk {
645 645
646static void write_chunk(struct buffer_chunk *chunk) { 646static void write_chunk(struct buffer_chunk *chunk) {
647 int i; 647 int i;
648 get_fs_excl();
648 for (i = 0; i < chunk->nr ; i++) { 649 for (i = 0; i < chunk->nr ; i++) {
649 submit_logged_buffer(chunk->bh[i]) ; 650 submit_logged_buffer(chunk->bh[i]) ;
650 } 651 }
651 chunk->nr = 0; 652 chunk->nr = 0;
653 put_fs_excl();
652} 654}
653 655
654static void write_ordered_chunk(struct buffer_chunk *chunk) { 656static void write_ordered_chunk(struct buffer_chunk *chunk) {
655 int i; 657 int i;
658 get_fs_excl();
656 for (i = 0; i < chunk->nr ; i++) { 659 for (i = 0; i < chunk->nr ; i++) {
657 submit_ordered_buffer(chunk->bh[i]) ; 660 submit_ordered_buffer(chunk->bh[i]) ;
658 } 661 }
659 chunk->nr = 0; 662 chunk->nr = 0;
663 put_fs_excl();
660} 664}
661 665
662static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 666static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list
918 return 0 ; 922 return 0 ;
919 } 923 }
920 924
925 get_fs_excl();
926
921 /* before we can put our commit blocks on disk, we have to make sure everyone older than 927 /* before we can put our commit blocks on disk, we have to make sure everyone older than
922 ** us is on disk too 928 ** us is on disk too
923 */ 929 */
@@ -1055,6 +1061,7 @@ put_jl:
1055 1061
1056 if (retval) 1062 if (retval)
1057 reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); 1063 reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
1064 put_fs_excl();
1058 return retval; 1065 return retval;
1059} 1066}
1060 1067
@@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s,
1251 return 0 ; 1258 return 0 ;
1252 } 1259 }
1253 1260
1261 get_fs_excl();
1262
1254 /* if all the work is already done, get out of here */ 1263 /* if all the work is already done, get out of here */
1255 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1264 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1256 atomic_read(&(jl->j_commit_left)) <= 0) { 1265 atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1450,6 +1459,7 @@ flush_older_and_return:
1450 put_journal_list(s, jl); 1459 put_journal_list(s, jl);
1451 if (flushall) 1460 if (flushall)
1452 up(&journal->j_flush_sem); 1461 up(&journal->j_flush_sem);
1462 put_fs_excl();
1453 return err ; 1463 return err ;
1454} 1464}
1455 1465
@@ -2719,6 +2729,7 @@ relock:
2719 th->t_trans_id = journal->j_trans_id ; 2729 th->t_trans_id = journal->j_trans_id ;
2720 unlock_journal(p_s_sb) ; 2730 unlock_journal(p_s_sb) ;
2721 INIT_LIST_HEAD (&th->t_list); 2731 INIT_LIST_HEAD (&th->t_list);
2732 get_fs_excl();
2722 return 0 ; 2733 return 0 ;
2723 2734
2724out_fail: 2735out_fail:
@@ -3526,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b
3526 BUG_ON (th->t_refcount > 1); 3537 BUG_ON (th->t_refcount > 1);
3527 BUG_ON (!th->t_trans_id); 3538 BUG_ON (!th->t_trans_id);
3528 3539
3540 put_fs_excl();
3529 current->journal_info = th->t_handle_save; 3541 current->journal_info = th->t_handle_save;
3530 reiserfs_check_lock_depth(p_s_sb, "journal end"); 3542 reiserfs_check_lock_depth(p_s_sb, "journal end");
3531 if (journal->j_len == 0) { 3543 if (journal->j_len == 0) {