aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-05-28 11:00:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-05-28 11:00:51 -0400
commitb4412323cc954bd0a2144b1c2ed573dd2eddb32c (patch)
treea0dd14e6d46efbb36a0898c158e8efb49e4a22ef
parentdc1d60a014aa9614518f9856ff661716d0969ffd (diff)
parentd6de8be711b28049a5cb93c954722c311c7d3f7f (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: cfq-iosched: fix RCU problem in cfq_cic_lookup() block: make blktrace use per-cpu buffers for message notes Added in elevator switch message to blktrace stream Added in MESSAGE notes for blktraces block: reorder cfq_queue to save space on 64bit builds block: Move the second call to get_request to the end of the loop splice: handle try_to_release_page() failure splice: fix sendfile() issue with relay
-rw-r--r--block/blk-core.c37
-rw-r--r--block/blktrace.c23
-rw-r--r--block/cfq-iosched.c36
-rw-r--r--block/elevator.c2
-rw-r--r--fs/splice.c17
-rw-r--r--include/linux/blktrace_api.h26
-rw-r--r--kernel/relay.c2
7 files changed, 110 insertions, 33 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 6a9cc0d22a61..1905aaba49fb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -806,35 +806,32 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
806 rq = get_request(q, rw_flags, bio, GFP_NOIO); 806 rq = get_request(q, rw_flags, bio, GFP_NOIO);
807 while (!rq) { 807 while (!rq) {
808 DEFINE_WAIT(wait); 808 DEFINE_WAIT(wait);
809 struct io_context *ioc;
809 struct request_list *rl = &q->rq; 810 struct request_list *rl = &q->rq;
810 811
811 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 812 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
812 TASK_UNINTERRUPTIBLE); 813 TASK_UNINTERRUPTIBLE);
813 814
814 rq = get_request(q, rw_flags, bio, GFP_NOIO); 815 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
815
816 if (!rq) {
817 struct io_context *ioc;
818 816
819 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 817 __generic_unplug_device(q);
820 818 spin_unlock_irq(q->queue_lock);
821 __generic_unplug_device(q); 819 io_schedule();
822 spin_unlock_irq(q->queue_lock);
823 io_schedule();
824 820
825 /* 821 /*
826 * After sleeping, we become a "batching" process and 822 * After sleeping, we become a "batching" process and
827 * will be able to allocate at least one request, and 823 * will be able to allocate at least one request, and
828 * up to a big batch of them for a small period time. 824 * up to a big batch of them for a small period time.
829 * See ioc_batching, ioc_set_batching 825 * See ioc_batching, ioc_set_batching
830 */ 826 */
831 ioc = current_io_context(GFP_NOIO, q->node); 827 ioc = current_io_context(GFP_NOIO, q->node);
832 ioc_set_batching(q, ioc); 828 ioc_set_batching(q, ioc);
833 829
834 spin_lock_irq(q->queue_lock); 830 spin_lock_irq(q->queue_lock);
835 }
836 finish_wait(&rl->wait[rw], &wait); 831 finish_wait(&rl->wait[rw], &wait);
837 } 832
833 rq = get_request(q, rw_flags, bio, GFP_NOIO);
834 };
838 835
839 return rq; 836 return rq;
840} 837}
diff --git a/block/blktrace.c b/block/blktrace.c
index b2cbb4e5d767..7ae87cc4a163 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -75,6 +75,23 @@ static void trace_note_time(struct blk_trace *bt)
75 local_irq_restore(flags); 75 local_irq_restore(flags);
76} 76}
77 77
78void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
79{
80 int n;
81 va_list args;
82 char *buf;
83
84 preempt_disable();
85 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
86 va_start(args, fmt);
87 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
88 va_end(args);
89
90 trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
91 preempt_enable();
92}
93EXPORT_SYMBOL_GPL(__trace_note_message);
94
78static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, 95static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
79 pid_t pid) 96 pid_t pid)
80{ 97{
@@ -232,6 +249,7 @@ static void blk_trace_cleanup(struct blk_trace *bt)
232 debugfs_remove(bt->dropped_file); 249 debugfs_remove(bt->dropped_file);
233 blk_remove_tree(bt->dir); 250 blk_remove_tree(bt->dir);
234 free_percpu(bt->sequence); 251 free_percpu(bt->sequence);
252 free_percpu(bt->msg_data);
235 kfree(bt); 253 kfree(bt);
236} 254}
237 255
@@ -346,6 +364,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
346 if (!bt->sequence) 364 if (!bt->sequence)
347 goto err; 365 goto err;
348 366
367 bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
368 if (!bt->msg_data)
369 goto err;
370
349 ret = -ENOENT; 371 ret = -ENOENT;
350 dir = blk_create_tree(buts->name); 372 dir = blk_create_tree(buts->name);
351 if (!dir) 373 if (!dir)
@@ -392,6 +414,7 @@ err:
392 if (bt->dropped_file) 414 if (bt->dropped_file)
393 debugfs_remove(bt->dropped_file); 415 debugfs_remove(bt->dropped_file);
394 free_percpu(bt->sequence); 416 free_percpu(bt->sequence);
417 free_percpu(bt->msg_data);
395 if (bt->rchan) 418 if (bt->rchan)
396 relay_close(bt->rchan); 419 relay_close(bt->rchan);
397 kfree(bt); 420 kfree(bt);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b399c62936e0..d01b411c72f0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -124,6 +124,8 @@ struct cfq_data {
124struct cfq_queue { 124struct cfq_queue {
125 /* reference count */ 125 /* reference count */
126 atomic_t ref; 126 atomic_t ref;
127 /* various state flags, see below */
128 unsigned int flags;
127 /* parent cfq_data */ 129 /* parent cfq_data */
128 struct cfq_data *cfqd; 130 struct cfq_data *cfqd;
129 /* service_tree member */ 131 /* service_tree member */
@@ -138,14 +140,14 @@ struct cfq_queue {
138 int queued[2]; 140 int queued[2];
139 /* currently allocated requests */ 141 /* currently allocated requests */
140 int allocated[2]; 142 int allocated[2];
141 /* pending metadata requests */
142 int meta_pending;
143 /* fifo list of requests in sort_list */ 143 /* fifo list of requests in sort_list */
144 struct list_head fifo; 144 struct list_head fifo;
145 145
146 unsigned long slice_end; 146 unsigned long slice_end;
147 long slice_resid; 147 long slice_resid;
148 148
149 /* pending metadata requests */
150 int meta_pending;
149 /* number of requests that are on the dispatch list or inside driver */ 151 /* number of requests that are on the dispatch list or inside driver */
150 int dispatched; 152 int dispatched;
151 153
@@ -153,8 +155,6 @@ struct cfq_queue {
153 unsigned short ioprio, org_ioprio; 155 unsigned short ioprio, org_ioprio;
154 unsigned short ioprio_class, org_ioprio_class; 156 unsigned short ioprio_class, org_ioprio_class;
155 157
156 /* various state flags, see below */
157 unsigned int flags;
158}; 158};
159 159
160enum cfqq_state_flags { 160enum cfqq_state_flags {
@@ -1142,6 +1142,9 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1142 kmem_cache_free(cfq_pool, cfqq); 1142 kmem_cache_free(cfq_pool, cfqq);
1143} 1143}
1144 1144
1145/*
1146 * Must always be called with the rcu_read_lock() held
1147 */
1145static void 1148static void
1146__call_for_each_cic(struct io_context *ioc, 1149__call_for_each_cic(struct io_context *ioc,
1147 void (*func)(struct io_context *, struct cfq_io_context *)) 1150 void (*func)(struct io_context *, struct cfq_io_context *))
@@ -1197,6 +1200,11 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
1197 cfq_cic_free(cic); 1200 cfq_cic_free(cic);
1198} 1201}
1199 1202
1203/*
1204 * Must be called with rcu_read_lock() held or preemption otherwise disabled.
1205 * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
1206 * and ->trim() which is called with the task lock held
1207 */
1200static void cfq_free_io_context(struct io_context *ioc) 1208static void cfq_free_io_context(struct io_context *ioc)
1201{ 1209{
1202 /* 1210 /*
@@ -1502,20 +1510,24 @@ static struct cfq_io_context *
1502cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) 1510cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1503{ 1511{
1504 struct cfq_io_context *cic; 1512 struct cfq_io_context *cic;
1513 unsigned long flags;
1505 void *k; 1514 void *k;
1506 1515
1507 if (unlikely(!ioc)) 1516 if (unlikely(!ioc))
1508 return NULL; 1517 return NULL;
1509 1518
1519 rcu_read_lock();
1520
1510 /* 1521 /*
1511 * we maintain a last-hit cache, to avoid browsing over the tree 1522 * we maintain a last-hit cache, to avoid browsing over the tree
1512 */ 1523 */
1513 cic = rcu_dereference(ioc->ioc_data); 1524 cic = rcu_dereference(ioc->ioc_data);
1514 if (cic && cic->key == cfqd) 1525 if (cic && cic->key == cfqd) {
1526 rcu_read_unlock();
1515 return cic; 1527 return cic;
1528 }
1516 1529
1517 do { 1530 do {
1518 rcu_read_lock();
1519 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); 1531 cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
1520 rcu_read_unlock(); 1532 rcu_read_unlock();
1521 if (!cic) 1533 if (!cic)
@@ -1524,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
1524 k = cic->key; 1536 k = cic->key;
1525 if (unlikely(!k)) { 1537 if (unlikely(!k)) {
1526 cfq_drop_dead_cic(cfqd, ioc, cic); 1538 cfq_drop_dead_cic(cfqd, ioc, cic);
1539 rcu_read_lock();
1527 continue; 1540 continue;
1528 } 1541 }
1529 1542
1543 spin_lock_irqsave(&ioc->lock, flags);
1530 rcu_assign_pointer(ioc->ioc_data, cic); 1544 rcu_assign_pointer(ioc->ioc_data, cic);
1545 spin_unlock_irqrestore(&ioc->lock, flags);
1531 break; 1546 break;
1532 } while (1); 1547 } while (1);
1533 1548
@@ -2134,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q)
2134 2149
2135static void cfq_slab_kill(void) 2150static void cfq_slab_kill(void)
2136{ 2151{
2152 /*
2153 * Caller already ensured that pending RCU callbacks are completed,
2154 * so we should have no busy allocations at this point.
2155 */
2137 if (cfq_pool) 2156 if (cfq_pool)
2138 kmem_cache_destroy(cfq_pool); 2157 kmem_cache_destroy(cfq_pool);
2139 if (cfq_ioc_pool) 2158 if (cfq_ioc_pool)
@@ -2292,6 +2311,11 @@ static void __exit cfq_exit(void)
2292 ioc_gone = &all_gone; 2311 ioc_gone = &all_gone;
2293 /* ioc_gone's update must be visible before reading ioc_count */ 2312 /* ioc_gone's update must be visible before reading ioc_count */
2294 smp_wmb(); 2313 smp_wmb();
2314
2315 /*
2316 * this also protects us from entering cfq_slab_kill() with
2317 * pending RCU callbacks
2318 */
2295 if (elv_ioc_count_read(ioc_count)) 2319 if (elv_ioc_count_read(ioc_count))
2296 wait_for_completion(ioc_gone); 2320 wait_for_completion(ioc_gone);
2297 cfq_slab_kill(); 2321 cfq_slab_kill();
diff --git a/block/elevator.c b/block/elevator.c
index 980f8ae147b4..902dd1344d56 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1110,6 +1110,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1110 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 1110 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
1111 spin_unlock_irq(q->queue_lock); 1111 spin_unlock_irq(q->queue_lock);
1112 1112
1113 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
1114
1113 return 1; 1115 return 1;
1114 1116
1115fail_register: 1117fail_register:
diff --git a/fs/splice.c b/fs/splice.c
index 78150038b584..aa5f6f60b305 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
58 */ 58 */
59 wait_on_page_writeback(page); 59 wait_on_page_writeback(page);
60 60
61 if (PagePrivate(page)) 61 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
62 try_to_release_page(page, GFP_KERNEL); 62 goto out_unlock;
63 63
64 /* 64 /*
65 * If we succeeded in removing the mapping, set LRU flag 65 * If we succeeded in removing the mapping, set LRU flag
@@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
75 * Raced with truncate or failed to remove page from current 75 * Raced with truncate or failed to remove page from current
76 * address space, unlock and return failure. 76 * address space, unlock and return failure.
77 */ 77 */
78out_unlock:
78 unlock_page(page); 79 unlock_page(page);
79 return 1; 80 return 1;
80} 81}
@@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
983 984
984 while (len) { 985 while (len) {
985 size_t read_len; 986 size_t read_len;
986 loff_t pos = sd->pos; 987 loff_t pos = sd->pos, prev_pos = pos;
987 988
988 ret = do_splice_to(in, &pos, pipe, len, flags); 989 ret = do_splice_to(in, &pos, pipe, len, flags);
989 if (unlikely(ret <= 0)) 990 if (unlikely(ret <= 0))
@@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
998 * could get stuck data in the internal pipe: 999 * could get stuck data in the internal pipe:
999 */ 1000 */
1000 ret = actor(pipe, sd); 1001 ret = actor(pipe, sd);
1001 if (unlikely(ret <= 0)) 1002 if (unlikely(ret <= 0)) {
1003 sd->pos = prev_pos;
1002 goto out_release; 1004 goto out_release;
1005 }
1003 1006
1004 bytes += ret; 1007 bytes += ret;
1005 len -= ret; 1008 len -= ret;
1006 sd->pos = pos; 1009 sd->pos = pos;
1007 1010
1008 if (ret < read_len) 1011 if (ret < read_len) {
1012 sd->pos = prev_pos + ret;
1009 goto out_release; 1013 goto out_release;
1014 }
1010 } 1015 }
1011 1016
1012done: 1017done:
@@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1072 1077
1073 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1078 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1074 if (ret > 0) 1079 if (ret > 0)
1075 *ppos += ret; 1080 *ppos = sd.pos;
1076 1081
1077 return ret; 1082 return ret;
1078} 1083}
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfc3147e5cf9..e3ef903aae88 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -55,6 +55,7 @@ enum blktrace_act {
55enum blktrace_notify { 55enum blktrace_notify {
56 __BLK_TN_PROCESS = 0, /* establish pid/name mapping */ 56 __BLK_TN_PROCESS = 0, /* establish pid/name mapping */
57 __BLK_TN_TIMESTAMP, /* include system clock */ 57 __BLK_TN_TIMESTAMP, /* include system clock */
58 __BLK_TN_MESSAGE, /* Character string message */
58}; 59};
59 60
60 61
@@ -79,6 +80,7 @@ enum blktrace_notify {
79 80
80#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) 81#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
81#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) 82#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
83#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
82 84
83#define BLK_IO_TRACE_MAGIC 0x65617400 85#define BLK_IO_TRACE_MAGIC 0x65617400
84#define BLK_IO_TRACE_VERSION 0x07 86#define BLK_IO_TRACE_VERSION 0x07
@@ -119,6 +121,7 @@ struct blk_trace {
119 int trace_state; 121 int trace_state;
120 struct rchan *rchan; 122 struct rchan *rchan;
121 unsigned long *sequence; 123 unsigned long *sequence;
124 unsigned char *msg_data;
122 u16 act_mask; 125 u16 act_mask;
123 u64 start_lba; 126 u64 start_lba;
124 u64 end_lba; 127 u64 end_lba;
@@ -149,7 +152,28 @@ extern void blk_trace_shutdown(struct request_queue *);
149extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); 152extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
150extern int do_blk_trace_setup(struct request_queue *q, 153extern int do_blk_trace_setup(struct request_queue *q,
151 char *name, dev_t dev, struct blk_user_trace_setup *buts); 154 char *name, dev_t dev, struct blk_user_trace_setup *buts);
155extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
152 156
157/**
158 * blk_add_trace_msg - Add a (simple) message to the blktrace stream
159 * @q: queue the io is for
160 * @fmt: format to print message in
161 * args... Variable argument list for format
162 *
163 * Description:
164 * Records a (simple) message onto the blktrace stream.
165 *
166 * NOTE: BLK_TN_MAX_MSG characters are output at most.
167 * NOTE: Can not use 'static inline' due to presence of var args...
168 *
169 **/
170#define blk_add_trace_msg(q, fmt, ...) \
171 do { \
172 struct blk_trace *bt = (q)->blk_trace; \
173 if (unlikely(bt)) \
174 __trace_note_message(bt, fmt, ##__VA_ARGS__); \
175 } while (0)
176#define BLK_TN_MAX_MSG 128
153 177
154/** 178/**
155 * blk_add_trace_rq - Add a trace for a request oriented action 179 * blk_add_trace_rq - Add a trace for a request oriented action
@@ -299,6 +323,8 @@ extern int blk_trace_remove(struct request_queue *q);
299#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 323#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
300#define blk_trace_startstop(q, start) (-ENOTTY) 324#define blk_trace_startstop(q, start) (-ENOTTY)
301#define blk_trace_remove(q) (-ENOTTY) 325#define blk_trace_remove(q) (-ENOTTY)
326#define blk_add_trace_msg(q, fmt, ...) do { } while (0)
327
302#endif /* CONFIG_BLK_DEV_IO_TRACE */ 328#endif /* CONFIG_BLK_DEV_IO_TRACE */
303#endif /* __KERNEL__ */ 329#endif /* __KERNEL__ */
304#endif 330#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index bc24dcdc570f..7de644cdec43 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
1191 ret = 0; 1191 ret = 0;
1192 spliced = 0; 1192 spliced = 0;
1193 1193
1194 while (len) { 1194 while (len && !spliced) {
1195 ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); 1195 ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
1196 if (ret < 0) 1196 if (ret < 0)
1197 break; 1197 break;