diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2008-01-24 02:52:45 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-01-28 04:50:29 -0500 |
commit | fd0928df98b9578be8a786ac0cb78a47a5e17a20 (patch) | |
tree | 70a34cf207bea1bec28e59cf0dba7d20e7f8b0f1 | |
parent | 91525300baf162e83e923b09ca286f9205e21522 (diff) |
ioprio: move io priority from task_struct to io_context
This is where it belongs and then it doesn't take up space for a
process that doesn't do IO.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/cfq-iosched.c | 34 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 30 | ||||
-rw-r--r-- | fs/ioprio.c | 29 | ||||
-rw-r--r-- | include/linux/blkdev.h | 81 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/iocontext.h | 79 | ||||
-rw-r--r-- | include/linux/ioprio.h | 13 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/fork.c | 32 |
9 files changed, 178 insertions, 122 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 13553e015d72..533af75329e6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -199,7 +199,7 @@ CFQ_CFQQ_FNS(sync); | |||
199 | 199 | ||
200 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 200 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
201 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, | 201 | static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, |
202 | struct task_struct *, gfp_t); | 202 | struct io_context *, gfp_t); |
203 | static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, | 203 | static struct cfq_io_context *cfq_cic_rb_lookup(struct cfq_data *, |
204 | struct io_context *); | 204 | struct io_context *); |
205 | 205 | ||
@@ -1273,7 +1273,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1273 | return cic; | 1273 | return cic; |
1274 | } | 1274 | } |
1275 | 1275 | ||
1276 | static void cfq_init_prio_data(struct cfq_queue *cfqq) | 1276 | static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) |
1277 | { | 1277 | { |
1278 | struct task_struct *tsk = current; | 1278 | struct task_struct *tsk = current; |
1279 | int ioprio_class; | 1279 | int ioprio_class; |
@@ -1281,7 +1281,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1281 | if (!cfq_cfqq_prio_changed(cfqq)) | 1281 | if (!cfq_cfqq_prio_changed(cfqq)) |
1282 | return; | 1282 | return; |
1283 | 1283 | ||
1284 | ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio); | 1284 | ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio); |
1285 | switch (ioprio_class) { | 1285 | switch (ioprio_class) { |
1286 | default: | 1286 | default: |
1287 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); | 1287 | printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); |
@@ -1293,11 +1293,11 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq) | |||
1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1293 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1294 | break; | 1294 | break; |
1295 | case IOPRIO_CLASS_RT: | 1295 | case IOPRIO_CLASS_RT: |
1296 | cfqq->ioprio = task_ioprio(tsk); | 1296 | cfqq->ioprio = task_ioprio(ioc); |
1297 | cfqq->ioprio_class = IOPRIO_CLASS_RT; | 1297 | cfqq->ioprio_class = IOPRIO_CLASS_RT; |
1298 | break; | 1298 | break; |
1299 | case IOPRIO_CLASS_BE: | 1299 | case IOPRIO_CLASS_BE: |
1300 | cfqq->ioprio = task_ioprio(tsk); | 1300 | cfqq->ioprio = task_ioprio(ioc); |
1301 | cfqq->ioprio_class = IOPRIO_CLASS_BE; | 1301 | cfqq->ioprio_class = IOPRIO_CLASS_BE; |
1302 | break; | 1302 | break; |
1303 | case IOPRIO_CLASS_IDLE: | 1303 | case IOPRIO_CLASS_IDLE: |
@@ -1330,8 +1330,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1330 | cfqq = cic->cfqq[ASYNC]; | 1330 | cfqq = cic->cfqq[ASYNC]; |
1331 | if (cfqq) { | 1331 | if (cfqq) { |
1332 | struct cfq_queue *new_cfqq; | 1332 | struct cfq_queue *new_cfqq; |
1333 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc->task, | 1333 | new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); |
1334 | GFP_ATOMIC); | ||
1335 | if (new_cfqq) { | 1334 | if (new_cfqq) { |
1336 | cic->cfqq[ASYNC] = new_cfqq; | 1335 | cic->cfqq[ASYNC] = new_cfqq; |
1337 | cfq_put_queue(cfqq); | 1336 | cfq_put_queue(cfqq); |
@@ -1363,13 +1362,13 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc) | |||
1363 | 1362 | ||
1364 | static struct cfq_queue * | 1363 | static struct cfq_queue * |
1365 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, | 1364 | cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, |
1366 | struct task_struct *tsk, gfp_t gfp_mask) | 1365 | struct io_context *ioc, gfp_t gfp_mask) |
1367 | { | 1366 | { |
1368 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 1367 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
1369 | struct cfq_io_context *cic; | 1368 | struct cfq_io_context *cic; |
1370 | 1369 | ||
1371 | retry: | 1370 | retry: |
1372 | cic = cfq_cic_rb_lookup(cfqd, tsk->io_context); | 1371 | cic = cfq_cic_rb_lookup(cfqd, ioc); |
1373 | /* cic always exists here */ | 1372 | /* cic always exists here */ |
1374 | cfqq = cic_to_cfqq(cic, is_sync); | 1373 | cfqq = cic_to_cfqq(cic, is_sync); |
1375 | 1374 | ||
@@ -1412,7 +1411,7 @@ retry: | |||
1412 | cfq_mark_cfqq_prio_changed(cfqq); | 1411 | cfq_mark_cfqq_prio_changed(cfqq); |
1413 | cfq_mark_cfqq_queue_new(cfqq); | 1412 | cfq_mark_cfqq_queue_new(cfqq); |
1414 | 1413 | ||
1415 | cfq_init_prio_data(cfqq); | 1414 | cfq_init_prio_data(cfqq, ioc); |
1416 | } | 1415 | } |
1417 | 1416 | ||
1418 | if (new_cfqq) | 1417 | if (new_cfqq) |
@@ -1439,11 +1438,11 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) | |||
1439 | } | 1438 | } |
1440 | 1439 | ||
1441 | static struct cfq_queue * | 1440 | static struct cfq_queue * |
1442 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | 1441 | cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, |
1443 | gfp_t gfp_mask) | 1442 | gfp_t gfp_mask) |
1444 | { | 1443 | { |
1445 | const int ioprio = task_ioprio(tsk); | 1444 | const int ioprio = task_ioprio(ioc); |
1446 | const int ioprio_class = task_ioprio_class(tsk); | 1445 | const int ioprio_class = task_ioprio_class(ioc); |
1447 | struct cfq_queue **async_cfqq = NULL; | 1446 | struct cfq_queue **async_cfqq = NULL; |
1448 | struct cfq_queue *cfqq = NULL; | 1447 | struct cfq_queue *cfqq = NULL; |
1449 | 1448 | ||
@@ -1453,7 +1452,7 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct task_struct *tsk, | |||
1453 | } | 1452 | } |
1454 | 1453 | ||
1455 | if (!cfqq) { | 1454 | if (!cfqq) { |
1456 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, tsk, gfp_mask); | 1455 | cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); |
1457 | if (!cfqq) | 1456 | if (!cfqq) |
1458 | return NULL; | 1457 | return NULL; |
1459 | } | 1458 | } |
@@ -1793,7 +1792,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
1793 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1792 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1794 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1793 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
1795 | 1794 | ||
1796 | cfq_init_prio_data(cfqq); | 1795 | cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); |
1797 | 1796 | ||
1798 | cfq_add_rq_rb(rq); | 1797 | cfq_add_rq_rb(rq); |
1799 | 1798 | ||
@@ -1900,7 +1899,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) | |||
1900 | 1899 | ||
1901 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); | 1900 | cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); |
1902 | if (cfqq) { | 1901 | if (cfqq) { |
1903 | cfq_init_prio_data(cfqq); | 1902 | cfq_init_prio_data(cfqq, cic->ioc); |
1904 | cfq_prio_boost(cfqq); | 1903 | cfq_prio_boost(cfqq); |
1905 | 1904 | ||
1906 | return __cfq_may_queue(cfqq); | 1905 | return __cfq_may_queue(cfqq); |
@@ -1938,7 +1937,6 @@ static int | |||
1938 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | 1937 | cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
1939 | { | 1938 | { |
1940 | struct cfq_data *cfqd = q->elevator->elevator_data; | 1939 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1941 | struct task_struct *tsk = current; | ||
1942 | struct cfq_io_context *cic; | 1940 | struct cfq_io_context *cic; |
1943 | const int rw = rq_data_dir(rq); | 1941 | const int rw = rq_data_dir(rq); |
1944 | const int is_sync = rq_is_sync(rq); | 1942 | const int is_sync = rq_is_sync(rq); |
@@ -1956,7 +1954,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
1956 | 1954 | ||
1957 | cfqq = cic_to_cfqq(cic, is_sync); | 1955 | cfqq = cic_to_cfqq(cic, is_sync); |
1958 | if (!cfqq) { | 1956 | if (!cfqq) { |
1959 | cfqq = cfq_get_queue(cfqd, is_sync, tsk, gfp_mask); | 1957 | cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); |
1960 | 1958 | ||
1961 | if (!cfqq) | 1959 | if (!cfqq) |
1962 | goto queue_fail; | 1960 | goto queue_fail; |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 3d0422f48453..b9bb02e845cd 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -3904,6 +3904,26 @@ void exit_io_context(void) | |||
3904 | put_io_context(ioc); | 3904 | put_io_context(ioc); |
3905 | } | 3905 | } |
3906 | 3906 | ||
3907 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | ||
3908 | { | ||
3909 | struct io_context *ret; | ||
3910 | |||
3911 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | ||
3912 | if (ret) { | ||
3913 | atomic_set(&ret->refcount, 1); | ||
3914 | ret->task = current; | ||
3915 | ret->ioprio_changed = 0; | ||
3916 | ret->ioprio = 0; | ||
3917 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3918 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3919 | ret->aic = NULL; | ||
3920 | ret->cic_root.rb_node = NULL; | ||
3921 | ret->ioc_data = NULL; | ||
3922 | } | ||
3923 | |||
3924 | return ret; | ||
3925 | } | ||
3926 | |||
3907 | /* | 3927 | /* |
3908 | * If the current task has no IO context then create one and initialise it. | 3928 | * If the current task has no IO context then create one and initialise it. |
3909 | * Otherwise, return its existing IO context. | 3929 | * Otherwise, return its existing IO context. |
@@ -3921,16 +3941,8 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node) | |||
3921 | if (likely(ret)) | 3941 | if (likely(ret)) |
3922 | return ret; | 3942 | return ret; |
3923 | 3943 | ||
3924 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | 3944 | ret = alloc_io_context(gfp_flags, node); |
3925 | if (ret) { | 3945 | if (ret) { |
3926 | atomic_set(&ret->refcount, 1); | ||
3927 | ret->task = current; | ||
3928 | ret->ioprio_changed = 0; | ||
3929 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3930 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3931 | ret->aic = NULL; | ||
3932 | ret->cic_root.rb_node = NULL; | ||
3933 | ret->ioc_data = NULL; | ||
3934 | /* make sure set_task_ioprio() sees the settings above */ | 3946 | /* make sure set_task_ioprio() sees the settings above */ |
3935 | smp_wmb(); | 3947 | smp_wmb(); |
3936 | tsk->io_context = ret; | 3948 | tsk->io_context = ret; |
diff --git a/fs/ioprio.c b/fs/ioprio.c index e4e01bc7f338..a7600401ecf7 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -41,18 +41,29 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) | |||
41 | return err; | 41 | return err; |
42 | 42 | ||
43 | task_lock(task); | 43 | task_lock(task); |
44 | do { | ||
45 | ioc = task->io_context; | ||
46 | /* see wmb() in current_io_context() */ | ||
47 | smp_read_barrier_depends(); | ||
48 | if (ioc) | ||
49 | break; | ||
44 | 50 | ||
45 | task->ioprio = ioprio; | 51 | ioc = alloc_io_context(GFP_ATOMIC, -1); |
46 | 52 | if (!ioc) { | |
47 | ioc = task->io_context; | 53 | err = -ENOMEM; |
48 | /* see wmb() in current_io_context() */ | 54 | break; |
49 | smp_read_barrier_depends(); | 55 | } |
56 | task->io_context = ioc; | ||
57 | ioc->task = task; | ||
58 | } while (1); | ||
50 | 59 | ||
51 | if (ioc) | 60 | if (!err) { |
61 | ioc->ioprio = ioprio; | ||
52 | ioc->ioprio_changed = 1; | 62 | ioc->ioprio_changed = 1; |
63 | } | ||
53 | 64 | ||
54 | task_unlock(task); | 65 | task_unlock(task); |
55 | return 0; | 66 | return err; |
56 | } | 67 | } |
57 | 68 | ||
58 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | 69 | asmlinkage long sys_ioprio_set(int which, int who, int ioprio) |
@@ -148,7 +159,9 @@ static int get_task_ioprio(struct task_struct *p) | |||
148 | ret = security_task_getioprio(p); | 159 | ret = security_task_getioprio(p); |
149 | if (ret) | 160 | if (ret) |
150 | goto out; | 161 | goto out; |
151 | ret = p->ioprio; | 162 | ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); |
163 | if (p->io_context) | ||
164 | ret = p->io_context->ioprio; | ||
152 | out: | 165 | out: |
153 | return ret; | 166 | return ret; |
154 | } | 167 | } |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49b7a4c31a6d..510a18ba1ec5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -34,83 +34,10 @@ struct sg_io_hdr; | |||
34 | #define BLKDEV_MIN_RQ 4 | 34 | #define BLKDEV_MIN_RQ 4 |
35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 35 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
36 | 36 | ||
37 | /* | ||
38 | * This is the per-process anticipatory I/O scheduler state. | ||
39 | */ | ||
40 | struct as_io_context { | ||
41 | spinlock_t lock; | ||
42 | |||
43 | void (*dtor)(struct as_io_context *aic); /* destructor */ | ||
44 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | ||
45 | |||
46 | unsigned long state; | ||
47 | atomic_t nr_queued; /* queued reads & sync writes */ | ||
48 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | ||
49 | |||
50 | /* IO History tracking */ | ||
51 | /* Thinktime */ | ||
52 | unsigned long last_end_request; | ||
53 | unsigned long ttime_total; | ||
54 | unsigned long ttime_samples; | ||
55 | unsigned long ttime_mean; | ||
56 | /* Layout pattern */ | ||
57 | unsigned int seek_samples; | ||
58 | sector_t last_request_pos; | ||
59 | u64 seek_total; | ||
60 | sector_t seek_mean; | ||
61 | }; | ||
62 | |||
63 | struct cfq_queue; | ||
64 | struct cfq_io_context { | ||
65 | struct rb_node rb_node; | ||
66 | void *key; | ||
67 | |||
68 | struct cfq_queue *cfqq[2]; | ||
69 | |||
70 | struct io_context *ioc; | ||
71 | |||
72 | unsigned long last_end_request; | ||
73 | sector_t last_request_pos; | ||
74 | |||
75 | unsigned long ttime_total; | ||
76 | unsigned long ttime_samples; | ||
77 | unsigned long ttime_mean; | ||
78 | |||
79 | unsigned int seek_samples; | ||
80 | u64 seek_total; | ||
81 | sector_t seek_mean; | ||
82 | |||
83 | struct list_head queue_list; | ||
84 | |||
85 | void (*dtor)(struct io_context *); /* destructor */ | ||
86 | void (*exit)(struct io_context *); /* called on task exit */ | ||
87 | }; | ||
88 | |||
89 | /* | ||
90 | * This is the per-process I/O subsystem state. It is refcounted and | ||
91 | * kmalloc'ed. Currently all fields are modified in process io context | ||
92 | * (apart from the atomic refcount), so require no locking. | ||
93 | */ | ||
94 | struct io_context { | ||
95 | atomic_t refcount; | ||
96 | struct task_struct *task; | ||
97 | |||
98 | unsigned int ioprio_changed; | ||
99 | |||
100 | /* | ||
101 | * For request batching | ||
102 | */ | ||
103 | unsigned long last_waited; /* Time last woken after wait for request */ | ||
104 | int nr_batch_requests; /* Number of requests left in the batch */ | ||
105 | |||
106 | struct as_io_context *aic; | ||
107 | struct rb_root cic_root; | ||
108 | void *ioc_data; | ||
109 | }; | ||
110 | |||
111 | void put_io_context(struct io_context *ioc); | 37 | void put_io_context(struct io_context *ioc); |
112 | void exit_io_context(void); | 38 | void exit_io_context(void); |
113 | struct io_context *get_io_context(gfp_t gfp_flags, int node); | 39 | struct io_context *get_io_context(gfp_t gfp_flags, int node); |
40 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node); | ||
114 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); | 41 | void copy_io_context(struct io_context **pdst, struct io_context **psrc); |
115 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); | 42 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); |
116 | 43 | ||
@@ -894,6 +821,12 @@ static inline void exit_io_context(void) | |||
894 | { | 821 | { |
895 | } | 822 | } |
896 | 823 | ||
824 | static inline int put_io_context(struct io_context *ioc) | ||
825 | { | ||
826 | return 1; | ||
827 | } | ||
828 | |||
829 | |||
897 | #endif /* CONFIG_BLOCK */ | 830 | #endif /* CONFIG_BLOCK */ |
898 | 831 | ||
899 | #endif | 832 | #endif |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 796019b22b6f..e6b3f7080679 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -137,7 +137,6 @@ extern struct group_info init_groups; | |||
137 | .time_slice = HZ, \ | 137 | .time_slice = HZ, \ |
138 | .nr_cpus_allowed = NR_CPUS, \ | 138 | .nr_cpus_allowed = NR_CPUS, \ |
139 | }, \ | 139 | }, \ |
140 | .ioprio = 0, \ | ||
141 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ | 140 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ |
142 | .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ | 141 | .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ |
143 | .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ | 142 | .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ |
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h new file mode 100644 index 000000000000..186807ea62e2 --- /dev/null +++ b/include/linux/iocontext.h | |||
@@ -0,0 +1,79 @@ | |||
1 | #ifndef IOCONTEXT_H | ||
2 | #define IOCONTEXT_H | ||
3 | |||
4 | /* | ||
5 | * This is the per-process anticipatory I/O scheduler state. | ||
6 | */ | ||
7 | struct as_io_context { | ||
8 | spinlock_t lock; | ||
9 | |||
10 | void (*dtor)(struct as_io_context *aic); /* destructor */ | ||
11 | void (*exit)(struct as_io_context *aic); /* called on task exit */ | ||
12 | |||
13 | unsigned long state; | ||
14 | atomic_t nr_queued; /* queued reads & sync writes */ | ||
15 | atomic_t nr_dispatched; /* number of requests gone to the drivers */ | ||
16 | |||
17 | /* IO History tracking */ | ||
18 | /* Thinktime */ | ||
19 | unsigned long last_end_request; | ||
20 | unsigned long ttime_total; | ||
21 | unsigned long ttime_samples; | ||
22 | unsigned long ttime_mean; | ||
23 | /* Layout pattern */ | ||
24 | unsigned int seek_samples; | ||
25 | sector_t last_request_pos; | ||
26 | u64 seek_total; | ||
27 | sector_t seek_mean; | ||
28 | }; | ||
29 | |||
30 | struct cfq_queue; | ||
31 | struct cfq_io_context { | ||
32 | struct rb_node rb_node; | ||
33 | void *key; | ||
34 | |||
35 | struct cfq_queue *cfqq[2]; | ||
36 | |||
37 | struct io_context *ioc; | ||
38 | |||
39 | unsigned long last_end_request; | ||
40 | sector_t last_request_pos; | ||
41 | |||
42 | unsigned long ttime_total; | ||
43 | unsigned long ttime_samples; | ||
44 | unsigned long ttime_mean; | ||
45 | |||
46 | unsigned int seek_samples; | ||
47 | u64 seek_total; | ||
48 | sector_t seek_mean; | ||
49 | |||
50 | struct list_head queue_list; | ||
51 | |||
52 | void (*dtor)(struct io_context *); /* destructor */ | ||
53 | void (*exit)(struct io_context *); /* called on task exit */ | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * This is the per-process I/O subsystem state. It is refcounted and | ||
58 | * kmalloc'ed. Currently all fields are modified in process io context | ||
59 | * (apart from the atomic refcount), so require no locking. | ||
60 | */ | ||
61 | struct io_context { | ||
62 | atomic_t refcount; | ||
63 | struct task_struct *task; | ||
64 | |||
65 | unsigned short ioprio; | ||
66 | unsigned short ioprio_changed; | ||
67 | |||
68 | /* | ||
69 | * For request batching | ||
70 | */ | ||
71 | unsigned long last_waited; /* Time last woken after wait for request */ | ||
72 | int nr_batch_requests; /* Number of requests left in the batch */ | ||
73 | |||
74 | struct as_io_context *aic; | ||
75 | struct rb_root cic_root; | ||
76 | void *ioc_data; | ||
77 | }; | ||
78 | |||
79 | #endif | ||
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index baf29387cab4..2a3bb1bb7433 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define IOPRIO_H | 2 | #define IOPRIO_H |
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/iocontext.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * Gives us 8 prio classes with 13-bits of data for each class | 8 | * Gives us 8 prio classes with 13-bits of data for each class |
@@ -45,18 +46,18 @@ enum { | |||
45 | * the cpu scheduler nice value to an io priority | 46 | * the cpu scheduler nice value to an io priority |
46 | */ | 47 | */ |
47 | #define IOPRIO_NORM (4) | 48 | #define IOPRIO_NORM (4) |
48 | static inline int task_ioprio(struct task_struct *task) | 49 | static inline int task_ioprio(struct io_context *ioc) |
49 | { | 50 | { |
50 | if (ioprio_valid(task->ioprio)) | 51 | if (ioprio_valid(ioc->ioprio)) |
51 | return IOPRIO_PRIO_DATA(task->ioprio); | 52 | return IOPRIO_PRIO_DATA(ioc->ioprio); |
52 | 53 | ||
53 | return IOPRIO_NORM; | 54 | return IOPRIO_NORM; |
54 | } | 55 | } |
55 | 56 | ||
56 | static inline int task_ioprio_class(struct task_struct *task) | 57 | static inline int task_ioprio_class(struct io_context *ioc) |
57 | { | 58 | { |
58 | if (ioprio_valid(task->ioprio)) | 59 | if (ioprio_valid(ioc->ioprio)) |
59 | return IOPRIO_PRIO_CLASS(task->ioprio); | 60 | return IOPRIO_PRIO_CLASS(ioc->ioprio); |
60 | 61 | ||
61 | return IOPRIO_CLASS_BE; | 62 | return IOPRIO_CLASS_BE; |
62 | } | 63 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index df5b24ee80b3..80837e7d527e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -975,7 +975,6 @@ struct task_struct { | |||
975 | struct hlist_head preempt_notifiers; | 975 | struct hlist_head preempt_notifiers; |
976 | #endif | 976 | #endif |
977 | 977 | ||
978 | unsigned short ioprio; | ||
979 | /* | 978 | /* |
980 | * fpu_counter contains the number of consecutive context switches | 979 | * fpu_counter contains the number of consecutive context switches |
981 | * that the FPU is used. If this is over a threshold, the lazy fpu | 980 | * that the FPU is used. If this is over a threshold, the lazy fpu |
diff --git a/kernel/fork.c b/kernel/fork.c index 39d22b3357de..2a86c9dff744 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/random.h> | 51 | #include <linux/random.h> |
52 | #include <linux/tty.h> | 52 | #include <linux/tty.h> |
53 | #include <linux/proc_fs.h> | 53 | #include <linux/proc_fs.h> |
54 | #include <linux/blkdev.h> | ||
54 | 55 | ||
55 | #include <asm/pgtable.h> | 56 | #include <asm/pgtable.h> |
56 | #include <asm/pgalloc.h> | 57 | #include <asm/pgalloc.h> |
@@ -791,6 +792,26 @@ out: | |||
791 | return error; | 792 | return error; |
792 | } | 793 | } |
793 | 794 | ||
795 | static int copy_io(struct task_struct *tsk) | ||
796 | { | ||
797 | #ifdef CONFIG_BLOCK | ||
798 | struct io_context *ioc = current->io_context; | ||
799 | |||
800 | if (!ioc) | ||
801 | return 0; | ||
802 | |||
803 | if (ioprio_valid(ioc->ioprio)) { | ||
804 | tsk->io_context = alloc_io_context(GFP_KERNEL, -1); | ||
805 | if (unlikely(!tsk->io_context)) | ||
806 | return -ENOMEM; | ||
807 | |||
808 | tsk->io_context->task = tsk; | ||
809 | tsk->io_context->ioprio = ioc->ioprio; | ||
810 | } | ||
811 | #endif | ||
812 | return 0; | ||
813 | } | ||
814 | |||
794 | /* | 815 | /* |
795 | * Helper to unshare the files of the current task. | 816 | * Helper to unshare the files of the current task. |
796 | * We don't want to expose copy_files internals to | 817 | * We don't want to expose copy_files internals to |
@@ -1156,15 +1177,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1156 | goto bad_fork_cleanup_mm; | 1177 | goto bad_fork_cleanup_mm; |
1157 | if ((retval = copy_namespaces(clone_flags, p))) | 1178 | if ((retval = copy_namespaces(clone_flags, p))) |
1158 | goto bad_fork_cleanup_keys; | 1179 | goto bad_fork_cleanup_keys; |
1180 | if ((retval = copy_io(p))) | ||
1181 | goto bad_fork_cleanup_namespaces; | ||
1159 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1182 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
1160 | if (retval) | 1183 | if (retval) |
1161 | goto bad_fork_cleanup_namespaces; | 1184 | goto bad_fork_cleanup_io; |
1162 | 1185 | ||
1163 | if (pid != &init_struct_pid) { | 1186 | if (pid != &init_struct_pid) { |
1164 | retval = -ENOMEM; | 1187 | retval = -ENOMEM; |
1165 | pid = alloc_pid(task_active_pid_ns(p)); | 1188 | pid = alloc_pid(task_active_pid_ns(p)); |
1166 | if (!pid) | 1189 | if (!pid) |
1167 | goto bad_fork_cleanup_namespaces; | 1190 | goto bad_fork_cleanup_io; |
1168 | 1191 | ||
1169 | if (clone_flags & CLONE_NEWPID) { | 1192 | if (clone_flags & CLONE_NEWPID) { |
1170 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); | 1193 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); |
@@ -1234,9 +1257,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1234 | /* Need tasklist lock for parent etc handling! */ | 1257 | /* Need tasklist lock for parent etc handling! */ |
1235 | write_lock_irq(&tasklist_lock); | 1258 | write_lock_irq(&tasklist_lock); |
1236 | 1259 | ||
1237 | /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ | ||
1238 | p->ioprio = current->ioprio; | ||
1239 | |||
1240 | /* | 1260 | /* |
1241 | * The task hasn't been attached yet, so its cpus_allowed mask will | 1261 | * The task hasn't been attached yet, so its cpus_allowed mask will |
1242 | * not be changed, nor will its assigned CPU. | 1262 | * not be changed, nor will its assigned CPU. |
@@ -1328,6 +1348,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1328 | bad_fork_free_pid: | 1348 | bad_fork_free_pid: |
1329 | if (pid != &init_struct_pid) | 1349 | if (pid != &init_struct_pid) |
1330 | free_pid(pid); | 1350 | free_pid(pid); |
1351 | bad_fork_cleanup_io: | ||
1352 | put_io_context(p->io_context); | ||
1331 | bad_fork_cleanup_namespaces: | 1353 | bad_fork_cleanup_namespaces: |
1332 | exit_task_namespaces(p); | 1354 | exit_task_namespaces(p); |
1333 | bad_fork_cleanup_keys: | 1355 | bad_fork_cleanup_keys: |