diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2014-05-02 07:20:05 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2014-07-10 12:35:16 -0400 |
commit | db1866ffeed2e142208a801f7598326b92ebf7c5 (patch) | |
tree | c3abaf11a7f9d4547f021120948cedac88e67d79 /drivers/block/drbd | |
parent | 4d3d5aa83aa45f1c7c9644b30e3a67e42c26695f (diff) |
drbd: debugfs: add in_flight_summary data
To help diagnosing "high latency" or "hung" IO situations on DRBD,
present per drbd resource group a summary of operations currently in progress.
First item is a list of oldest drbd_request objects
waiting for various things:
* still being prepared
* waiting for activity log transaction
* waiting for local disk
* waiting to be sent
* waiting for peer acknowledgement ("receive ack", "write ack")
* waiting for peer epoch acknowledgement ("barrier ack")
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_debugfs.c | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 9b120c3d9703..d393aee37da6 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c | |||
@@ -2,7 +2,9 @@ | |||
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/debugfs.h> | 4 | #include <linux/debugfs.h> |
5 | #include <linux/seq_file.h> | ||
5 | #include <linux/stat.h> | 6 | #include <linux/stat.h> |
7 | #include <linux/jiffies.h> | ||
6 | #include <linux/list.h> | 8 | #include <linux/list.h> |
7 | 9 | ||
8 | #include "drbd_int.h" | 10 | #include "drbd_int.h" |
@@ -13,6 +15,194 @@ static struct dentry *drbd_debugfs_root; | |||
13 | static struct dentry *drbd_debugfs_resources; | 15 | static struct dentry *drbd_debugfs_resources; |
14 | static struct dentry *drbd_debugfs_minors; | 16 | static struct dentry *drbd_debugfs_minors; |
15 | 17 | ||
18 | static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt) | ||
19 | { | ||
20 | if (valid) | ||
21 | seq_printf(m, "\t%d", jiffies_to_msecs(dt)); | ||
22 | else | ||
23 | seq_printf(m, "\t-"); | ||
24 | } | ||
25 | |||
26 | static void seq_print_rq_state_bit(struct seq_file *m, | ||
27 | bool is_set, char *sep, const char *name) | ||
28 | { | ||
29 | if (!is_set) | ||
30 | return; | ||
31 | seq_putc(m, *sep); | ||
32 | seq_puts(m, name); | ||
33 | *sep = '|'; | ||
34 | } | ||
35 | |||
36 | /* pretty print enum drbd_req_state_bits req->rq_state */ | ||
37 | static void seq_print_request_state(struct seq_file *m, struct drbd_request *req) | ||
38 | { | ||
39 | unsigned int s = req->rq_state; | ||
40 | char sep = ' '; | ||
41 | seq_printf(m, "\t0x%08x", s); | ||
42 | seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed"); | ||
43 | |||
44 | /* RQ_WRITE ignored, already reported */ | ||
45 | seq_puts(m, "\tlocal:"); | ||
46 | seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL"); | ||
47 | seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed"); | ||
48 | seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended"); | ||
49 | sep = ' '; | ||
50 | seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending"); | ||
51 | seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed"); | ||
52 | seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted"); | ||
53 | seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok"); | ||
54 | if (sep == ' ') | ||
55 | seq_puts(m, " -"); | ||
56 | |||
57 | /* for_each_connection ... */ | ||
58 | seq_printf(m, "\tnet:"); | ||
59 | sep = ' '; | ||
60 | seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending"); | ||
61 | seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued"); | ||
62 | seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent"); | ||
63 | seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done"); | ||
64 | seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis"); | ||
65 | seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok"); | ||
66 | if (sep == ' ') | ||
67 | seq_puts(m, " -"); | ||
68 | |||
69 | seq_printf(m, " :"); | ||
70 | sep = ' '; | ||
71 | seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B"); | ||
72 | seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C"); | ||
73 | seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr"); | ||
74 | if (sep == ' ') | ||
75 | seq_puts(m, " -"); | ||
76 | seq_printf(m, "\n"); | ||
77 | } | ||
78 | |||
79 | static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now) | ||
80 | { | ||
81 | /* change anything here, fixup header below! */ | ||
82 | unsigned int s = req->rq_state; | ||
83 | |||
84 | #define RQ_HDR_1 "epoch\tsector\tsize\trw" | ||
85 | seq_printf(m, "0x%x\t%llu\t%u\t%s", | ||
86 | req->epoch, | ||
87 | (unsigned long long)req->i.sector, req->i.size >> 9, | ||
88 | (s & RQ_WRITE) ? "W" : "R"); | ||
89 | |||
90 | #define RQ_HDR_2 "\tstart\tin AL\tsubmit" | ||
91 | seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif)); | ||
92 | seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif); | ||
93 | seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif); | ||
94 | |||
95 | #define RQ_HDR_3 "\tsent\tacked\tdone" | ||
96 | seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif); | ||
97 | seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif); | ||
98 | seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif); | ||
99 | |||
100 | #define RQ_HDR_4 "\tstate\n" | ||
101 | seq_print_request_state(m, req); | ||
102 | } | ||
103 | #define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4 | ||
104 | |||
105 | static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now) | ||
106 | { | ||
107 | seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr); | ||
108 | seq_print_one_request(m, req, now); | ||
109 | } | ||
110 | |||
111 | static void seq_print_resource_transfer_log_summary(struct seq_file *m, | ||
112 | struct drbd_resource *resource, | ||
113 | struct drbd_connection *connection, | ||
114 | unsigned long now) | ||
115 | { | ||
116 | struct drbd_request *req; | ||
117 | unsigned int count = 0; | ||
118 | unsigned int show_state = 0; | ||
119 | |||
120 | seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR); | ||
121 | spin_lock_irq(&resource->req_lock); | ||
122 | list_for_each_entry(req, &connection->transfer_log, tl_requests) { | ||
123 | unsigned int tmp = 0; | ||
124 | unsigned int s; | ||
125 | ++count; | ||
126 | |||
127 | /* don't disable irq "forever" */ | ||
128 | if (!(count & 0x1ff)) { | ||
129 | struct drbd_request *req_next; | ||
130 | kref_get(&req->kref); | ||
131 | spin_unlock_irq(&resource->req_lock); | ||
132 | cond_resched(); | ||
133 | spin_lock_irq(&resource->req_lock); | ||
134 | req_next = list_next_entry(req, tl_requests); | ||
135 | if (kref_put(&req->kref, drbd_req_destroy)) | ||
136 | req = req_next; | ||
137 | if (&req->tl_requests == &connection->transfer_log) | ||
138 | break; | ||
139 | } | ||
140 | |||
141 | s = req->rq_state; | ||
142 | |||
143 | /* This is meant to summarize timing issues, to be able to tell | ||
144 | * local disk problems from network problems. | ||
145 | * Skip requests, if we have shown an even older request with | ||
146 | * similar aspects already. */ | ||
147 | if (req->master_bio == NULL) | ||
148 | tmp |= 1; | ||
149 | if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING)) | ||
150 | tmp |= 2; | ||
151 | if (s & RQ_NET_MASK) { | ||
152 | if (!(s & RQ_NET_SENT)) | ||
153 | tmp |= 4; | ||
154 | if (s & RQ_NET_PENDING) | ||
155 | tmp |= 8; | ||
156 | if (!(s & RQ_NET_DONE)) | ||
157 | tmp |= 16; | ||
158 | } | ||
159 | if ((tmp & show_state) == tmp) | ||
160 | continue; | ||
161 | show_state |= tmp; | ||
162 | seq_printf(m, "%u\t", count); | ||
163 | seq_print_minor_vnr_req(m, req, now); | ||
164 | if (show_state == 0x1f) | ||
165 | break; | ||
166 | } | ||
167 | spin_unlock_irq(&resource->req_lock); | ||
168 | } | ||
169 | |||
170 | /* TODO: transfer_log and friends should be moved to resource */ | ||
171 | static int in_flight_summary_show(struct seq_file *m, void *pos) | ||
172 | { | ||
173 | struct drbd_resource *resource = m->private; | ||
174 | struct drbd_connection *connection; | ||
175 | unsigned long jif = jiffies; | ||
176 | |||
177 | connection = first_connection(resource); | ||
178 | /* This does not happen, actually. | ||
179 | * But be robust and prepare for future code changes. */ | ||
180 | if (!connection) | ||
181 | return 0; | ||
182 | |||
183 | seq_puts(m, "oldest application requests\n"); | ||
184 | seq_print_resource_transfer_log_summary(m, resource, connection, jif); | ||
185 | seq_putc(m, '\n'); | ||
186 | |||
187 | jif = jiffies - jif; | ||
188 | if (jif) | ||
189 | seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif)); | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static int in_flight_summary_open(struct inode *inode, struct file *file) | ||
194 | { | ||
195 | return single_open(file, in_flight_summary_show, inode->i_private); | ||
196 | } | ||
197 | |||
198 | static const struct file_operations in_flight_summary_fops = { | ||
199 | .owner = THIS_MODULE, | ||
200 | .open = in_flight_summary_open, | ||
201 | .read = seq_read, | ||
202 | .llseek = seq_lseek, | ||
203 | .release = single_release, | ||
204 | }; | ||
205 | |||
16 | void drbd_debugfs_resource_add(struct drbd_resource *resource) | 206 | void drbd_debugfs_resource_add(struct drbd_resource *resource) |
17 | { | 207 | { |
18 | struct dentry *dentry; | 208 | struct dentry *dentry; |
@@ -34,6 +224,12 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource) | |||
34 | goto fail; | 224 | goto fail; |
35 | resource->debugfs_res_connections = dentry; | 225 | resource->debugfs_res_connections = dentry; |
36 | 226 | ||
227 | dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP, | ||
228 | resource->debugfs_res, resource, | ||
229 | &in_flight_summary_fops); | ||
230 | if (IS_ERR_OR_NULL(dentry)) | ||
231 | goto fail; | ||
232 | resource->debugfs_res_in_flight_summary = dentry; | ||
37 | return; | 233 | return; |
38 | 234 | ||
39 | fail: | 235 | fail: |