diff options
author | Jens Axboe <axboe@suse.de> | 2006-03-23 14:00:26 -0500 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2006-03-23 14:00:26 -0500 |
commit | 2056a782f8e7e65fd4bfd027506b4ce1c5e9ccd4 (patch) | |
tree | d4fe59a7ca0c110690937085548936a4535c39db /include | |
parent | 6dac40a7ce2483a47b54af07afebeb84131c7228 (diff) |
[PATCH] Block queue IO tracing support (blktrace) as of 2006-03-23
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blkdev.h | 3 | ||||
-rw-r--r-- | include/linux/blktrace_api.h | 277 | ||||
-rw-r--r-- | include/linux/compat_ioctl.h | 4 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 1 |
5 files changed, 289 insertions, 0 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 56bb6a4e15f3..c179966f1a2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -22,6 +22,7 @@ typedef struct request_queue request_queue_t; | |||
22 | struct elevator_queue; | 22 | struct elevator_queue; |
23 | typedef struct elevator_queue elevator_t; | 23 | typedef struct elevator_queue elevator_t; |
24 | struct request_pm_state; | 24 | struct request_pm_state; |
25 | struct blk_trace; | ||
25 | 26 | ||
26 | #define BLKDEV_MIN_RQ 4 | 27 | #define BLKDEV_MIN_RQ 4 |
27 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ | 28 | #define BLKDEV_MAX_RQ 128 /* Default maximum */ |
@@ -416,6 +417,8 @@ struct request_queue | |||
416 | unsigned int sg_reserved_size; | 417 | unsigned int sg_reserved_size; |
417 | int node; | 418 | int node; |
418 | 419 | ||
420 | struct blk_trace *blk_trace; | ||
421 | |||
419 | /* | 422 | /* |
420 | * reserved for flush operations | 423 | * reserved for flush operations |
421 | */ | 424 | */ |
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h new file mode 100644 index 000000000000..b34d3e73d5ea --- /dev/null +++ b/include/linux/blktrace_api.h | |||
@@ -0,0 +1,277 @@ | |||
1 | #ifndef BLKTRACE_H | ||
2 | #define BLKTRACE_H | ||
3 | |||
4 | #include <linux/config.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/relay.h> | ||
7 | |||
8 | /* | ||
9 | * Trace categories | ||
10 | */ | ||
11 | enum blktrace_cat { | ||
12 | BLK_TC_READ = 1 << 0, /* reads */ | ||
13 | BLK_TC_WRITE = 1 << 1, /* writes */ | ||
14 | BLK_TC_BARRIER = 1 << 2, /* barrier */ | ||
15 | BLK_TC_SYNC = 1 << 3, /* barrier */ | ||
16 | BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ | ||
17 | BLK_TC_REQUEUE = 1 << 5, /* requeueing */ | ||
18 | BLK_TC_ISSUE = 1 << 6, /* issue */ | ||
19 | BLK_TC_COMPLETE = 1 << 7, /* completions */ | ||
20 | BLK_TC_FS = 1 << 8, /* fs requests */ | ||
21 | BLK_TC_PC = 1 << 9, /* pc requests */ | ||
22 | BLK_TC_NOTIFY = 1 << 10, /* special message */ | ||
23 | |||
24 | BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ | ||
25 | }; | ||
26 | |||
27 | #define BLK_TC_SHIFT (16) | ||
28 | #define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT) | ||
29 | |||
30 | /* | ||
31 | * Basic trace actions | ||
32 | */ | ||
33 | enum blktrace_act { | ||
34 | __BLK_TA_QUEUE = 1, /* queued */ | ||
35 | __BLK_TA_BACKMERGE, /* back merged to existing rq */ | ||
36 | __BLK_TA_FRONTMERGE, /* front merge to existing rq */ | ||
37 | __BLK_TA_GETRQ, /* allocated new request */ | ||
38 | __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */ | ||
39 | __BLK_TA_REQUEUE, /* request requeued */ | ||
40 | __BLK_TA_ISSUE, /* sent to driver */ | ||
41 | __BLK_TA_COMPLETE, /* completed by driver */ | ||
42 | __BLK_TA_PLUG, /* queue was plugged */ | ||
43 | __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */ | ||
44 | __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */ | ||
45 | __BLK_TA_INSERT, /* insert request */ | ||
46 | __BLK_TA_SPLIT, /* bio was split */ | ||
47 | __BLK_TA_BOUNCE, /* bio was bounced */ | ||
48 | __BLK_TA_REMAP, /* bio was remapped */ | ||
49 | }; | ||
50 | |||
51 | /* | ||
52 | * Trace actions in full. Additionally, read or write is masked | ||
53 | */ | ||
54 | #define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
55 | #define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
56 | #define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
57 | #define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
58 | #define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
59 | #define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE)) | ||
60 | #define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE)) | ||
61 | #define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE)) | ||
62 | #define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
63 | #define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
64 | #define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
65 | #define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
66 | #define BLK_TA_SPLIT (__BLK_TA_SPLIT) | ||
67 | #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) | ||
68 | #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) | ||
69 | |||
70 | #define BLK_IO_TRACE_MAGIC 0x65617400 | ||
71 | #define BLK_IO_TRACE_VERSION 0x07 | ||
72 | |||
73 | /* | ||
74 | * The trace itself | ||
75 | */ | ||
76 | struct blk_io_trace { | ||
77 | u32 magic; /* MAGIC << 8 | version */ | ||
78 | u32 sequence; /* event number */ | ||
79 | u64 time; /* in microseconds */ | ||
80 | u64 sector; /* disk offset */ | ||
81 | u32 bytes; /* transfer length */ | ||
82 | u32 action; /* what happened */ | ||
83 | u32 pid; /* who did it */ | ||
84 | u32 device; /* device number */ | ||
85 | u32 cpu; /* on what cpu did it happen */ | ||
86 | u16 error; /* completion error */ | ||
87 | u16 pdu_len; /* length of data after this trace */ | ||
88 | }; | ||
89 | |||
90 | /* | ||
91 | * The remap event | ||
92 | */ | ||
93 | struct blk_io_trace_remap { | ||
94 | u32 device; | ||
95 | u32 __pad; | ||
96 | u64 sector; | ||
97 | }; | ||
98 | |||
99 | enum { | ||
100 | Blktrace_setup = 1, | ||
101 | Blktrace_running, | ||
102 | Blktrace_stopped, | ||
103 | }; | ||
104 | |||
105 | struct blk_trace { | ||
106 | int trace_state; | ||
107 | struct rchan *rchan; | ||
108 | unsigned long *sequence; | ||
109 | u16 act_mask; | ||
110 | u64 start_lba; | ||
111 | u64 end_lba; | ||
112 | u32 pid; | ||
113 | u32 dev; | ||
114 | struct dentry *dir; | ||
115 | struct dentry *dropped_file; | ||
116 | atomic_t dropped; | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * User setup structure passed with BLKTRACESTART | ||
121 | */ | ||
122 | struct blk_user_trace_setup { | ||
123 | char name[BDEVNAME_SIZE]; /* output */ | ||
124 | u16 act_mask; /* input */ | ||
125 | u32 buf_size; /* input */ | ||
126 | u32 buf_nr; /* input */ | ||
127 | u64 start_lba; | ||
128 | u64 end_lba; | ||
129 | u32 pid; | ||
130 | }; | ||
131 | |||
132 | #if defined(CONFIG_BLK_DEV_IO_TRACE) | ||
133 | extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); | ||
134 | extern void blk_trace_shutdown(request_queue_t *); | ||
135 | extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); | ||
136 | |||
137 | /** | ||
138 | * blk_add_trace_rq - Add a trace for a request oriented action | ||
139 | * @q: queue the io is for | ||
140 | * @rq: the source request | ||
141 | * @what: the action | ||
142 | * | ||
143 | * Description: | ||
144 | * Records an action against a request. Will log the bio offset + size. | ||
145 | * | ||
146 | **/ | ||
147 | static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, | ||
148 | u32 what) | ||
149 | { | ||
150 | struct blk_trace *bt = q->blk_trace; | ||
151 | int rw = rq->flags & 0x07; | ||
152 | |||
153 | if (likely(!bt)) | ||
154 | return; | ||
155 | |||
156 | if (blk_pc_request(rq)) { | ||
157 | what |= BLK_TC_ACT(BLK_TC_PC); | ||
158 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); | ||
159 | } else { | ||
160 | what |= BLK_TC_ACT(BLK_TC_FS); | ||
161 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | /** | ||
166 | * blk_add_trace_bio - Add a trace for a bio oriented action | ||
167 | * @q: queue the io is for | ||
168 | * @bio: the source bio | ||
169 | * @what: the action | ||
170 | * | ||
171 | * Description: | ||
172 | * Records an action against a bio. Will log the bio offset + size. | ||
173 | * | ||
174 | **/ | ||
175 | static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | ||
176 | u32 what) | ||
177 | { | ||
178 | struct blk_trace *bt = q->blk_trace; | ||
179 | |||
180 | if (likely(!bt)) | ||
181 | return; | ||
182 | |||
183 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * blk_add_trace_generic - Add a trace for a generic action | ||
188 | * @q: queue the io is for | ||
189 | * @bio: the source bio | ||
190 | * @rw: the data direction | ||
191 | * @what: the action | ||
192 | * | ||
193 | * Description: | ||
194 | * Records a simple trace | ||
195 | * | ||
196 | **/ | ||
197 | static inline void blk_add_trace_generic(struct request_queue *q, | ||
198 | struct bio *bio, int rw, u32 what) | ||
199 | { | ||
200 | struct blk_trace *bt = q->blk_trace; | ||
201 | |||
202 | if (likely(!bt)) | ||
203 | return; | ||
204 | |||
205 | if (bio) | ||
206 | blk_add_trace_bio(q, bio, what); | ||
207 | else | ||
208 | __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); | ||
209 | } | ||
210 | |||
211 | /** | ||
212 | * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload | ||
213 | * @q: queue the io is for | ||
214 | * @what: the action | ||
215 | * @bio: the source bio | ||
216 | * @pdu: the integer payload | ||
217 | * | ||
218 | * Description: | ||
219 | * Adds a trace with some integer payload. This might be an unplug | ||
220 | * option given as the action, with the depth at unplug time given | ||
221 | * as the payload | ||
222 | * | ||
223 | **/ | ||
224 | static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, | ||
225 | struct bio *bio, unsigned int pdu) | ||
226 | { | ||
227 | struct blk_trace *bt = q->blk_trace; | ||
228 | u64 rpdu = cpu_to_be64(pdu); | ||
229 | |||
230 | if (likely(!bt)) | ||
231 | return; | ||
232 | |||
233 | if (bio) | ||
234 | __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); | ||
235 | else | ||
236 | __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * blk_add_trace_remap - Add a trace for a remap operation | ||
241 | * @q: queue the io is for | ||
242 | * @bio: the source bio | ||
243 | * @dev: target device | ||
244 | * @from: source sector | ||
245 | * @to: target sector | ||
246 | * | ||
247 | * Description: | ||
248 | * Device mapper or raid target sometimes need to split a bio because | ||
249 | * it spans a stripe (or similar). Add a trace for that action. | ||
250 | * | ||
251 | **/ | ||
252 | static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, | ||
253 | dev_t dev, sector_t from, sector_t to) | ||
254 | { | ||
255 | struct blk_trace *bt = q->blk_trace; | ||
256 | struct blk_io_trace_remap r; | ||
257 | |||
258 | if (likely(!bt)) | ||
259 | return; | ||
260 | |||
261 | r.device = cpu_to_be32(dev); | ||
262 | r.sector = cpu_to_be64(to); | ||
263 | |||
264 | __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); | ||
265 | } | ||
266 | |||
267 | #else /* !CONFIG_BLK_DEV_IO_TRACE */ | ||
268 | #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) | ||
269 | #define blk_trace_shutdown(q) do { } while (0) | ||
270 | #define blk_add_trace_rq(q, rq, what) do { } while (0) | ||
271 | #define blk_add_trace_bio(q, rq, what) do { } while (0) | ||
272 | #define blk_add_trace_generic(q, rq, rw, what) do { } while (0) | ||
273 | #define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) | ||
274 | #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) | ||
275 | #endif /* CONFIG_BLK_DEV_IO_TRACE */ | ||
276 | |||
277 | #endif | ||
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index ae7dfb790df3..efb518f16bb3 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h | |||
@@ -97,6 +97,10 @@ COMPATIBLE_IOCTL(BLKRRPART) | |||
97 | COMPATIBLE_IOCTL(BLKFLSBUF) | 97 | COMPATIBLE_IOCTL(BLKFLSBUF) |
98 | COMPATIBLE_IOCTL(BLKSECTSET) | 98 | COMPATIBLE_IOCTL(BLKSECTSET) |
99 | COMPATIBLE_IOCTL(BLKSSZGET) | 99 | COMPATIBLE_IOCTL(BLKSSZGET) |
100 | COMPATIBLE_IOCTL(BLKTRACESTART) | ||
101 | COMPATIBLE_IOCTL(BLKTRACESTOP) | ||
102 | COMPATIBLE_IOCTL(BLKTRACESETUP) | ||
103 | COMPATIBLE_IOCTL(BLKTRACETEARDOWN) | ||
100 | ULONG_IOCTL(BLKRASET) | 104 | ULONG_IOCTL(BLKRASET) |
101 | ULONG_IOCTL(BLKFRASET) | 105 | ULONG_IOCTL(BLKFRASET) |
102 | /* RAID */ | 106 | /* RAID */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f9c9dea636d0..9b34a1b03455 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -197,6 +197,10 @@ extern int dir_notify_enable; | |||
197 | #define BLKBSZGET _IOR(0x12,112,size_t) | 197 | #define BLKBSZGET _IOR(0x12,112,size_t) |
198 | #define BLKBSZSET _IOW(0x12,113,size_t) | 198 | #define BLKBSZSET _IOW(0x12,113,size_t) |
199 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ | 199 | #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ |
200 | #define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) | ||
201 | #define BLKTRACESTART _IO(0x12,116) | ||
202 | #define BLKTRACESTOP _IO(0x12,117) | ||
203 | #define BLKTRACETEARDOWN _IO(0x12,118) | ||
200 | 204 | ||
201 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 205 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
202 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 206 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 62e6314382f0..e60a91d5b369 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -706,6 +706,7 @@ struct task_struct { | |||
706 | prio_array_t *array; | 706 | prio_array_t *array; |
707 | 707 | ||
708 | unsigned short ioprio; | 708 | unsigned short ioprio; |
709 | unsigned int btrace_seq; | ||
709 | 710 | ||
710 | unsigned long sleep_avg; | 711 | unsigned long sleep_avg; |
711 | unsigned long long timestamp, last_ran; | 712 | unsigned long long timestamp, last_ran; |