aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2014-05-02 07:20:05 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2014-07-10 12:35:16 -0400
commitdb1866ffeed2e142208a801f7598326b92ebf7c5 (patch)
treec3abaf11a7f9d4547f021120948cedac88e67d79 /drivers/block/drbd
parent4d3d5aa83aa45f1c7c9644b30e3a67e42c26695f (diff)
drbd: debugfs: add in_flight_summary data
To help diagnosing "high latency" or "hung" IO situations on DRBD, present per drbd resource group a summary of operations currently in progress. First item is a list of oldest drbd_request objects waiting for various things: * still being prepared * waiting for activity log transaction * waiting for local disk * waiting to be sent * waiting for peer acknowledgement ("receive ack", "write ack") * waiting for peer epoch acknowledgement ("barrier ack") Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_debugfs.c196
1 files changed, 196 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 9b120c3d9703..d393aee37da6 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -2,7 +2,9 @@
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/debugfs.h> 4#include <linux/debugfs.h>
5#include <linux/seq_file.h>
5#include <linux/stat.h> 6#include <linux/stat.h>
7#include <linux/jiffies.h>
6#include <linux/list.h> 8#include <linux/list.h>
7 9
8#include "drbd_int.h" 10#include "drbd_int.h"
@@ -13,6 +15,194 @@ static struct dentry *drbd_debugfs_root;
13static struct dentry *drbd_debugfs_resources; 15static struct dentry *drbd_debugfs_resources;
14static struct dentry *drbd_debugfs_minors; 16static struct dentry *drbd_debugfs_minors;
15 17
18static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
19{
20 if (valid)
21 seq_printf(m, "\t%d", jiffies_to_msecs(dt));
22 else
23 seq_printf(m, "\t-");
24}
25
26static void seq_print_rq_state_bit(struct seq_file *m,
27 bool is_set, char *sep, const char *name)
28{
29 if (!is_set)
30 return;
31 seq_putc(m, *sep);
32 seq_puts(m, name);
33 *sep = '|';
34}
35
36/* pretty print enum drbd_req_state_bits req->rq_state */
37static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
38{
39 unsigned int s = req->rq_state;
40 char sep = ' ';
41 seq_printf(m, "\t0x%08x", s);
42 seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
43
44 /* RQ_WRITE ignored, already reported */
45 seq_puts(m, "\tlocal:");
46 seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
47 seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
48 seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
49 sep = ' ';
50 seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
51 seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
52 seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
53 seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
54 if (sep == ' ')
55 seq_puts(m, " -");
56
57 /* for_each_connection ... */
58 seq_printf(m, "\tnet:");
59 sep = ' ';
60 seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
61 seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
62 seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
63 seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
64 seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
65 seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
66 if (sep == ' ')
67 seq_puts(m, " -");
68
69 seq_printf(m, " :");
70 sep = ' ';
71 seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
72 seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
73 seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
74 if (sep == ' ')
75 seq_puts(m, " -");
76 seq_printf(m, "\n");
77}
78
79static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
80{
81 /* change anything here, fixup header below! */
82 unsigned int s = req->rq_state;
83
84#define RQ_HDR_1 "epoch\tsector\tsize\trw"
85 seq_printf(m, "0x%x\t%llu\t%u\t%s",
86 req->epoch,
87 (unsigned long long)req->i.sector, req->i.size >> 9,
88 (s & RQ_WRITE) ? "W" : "R");
89
90#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
91 seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
92 seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
93 seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
94
95#define RQ_HDR_3 "\tsent\tacked\tdone"
96 seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
97 seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
98 seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
99
100#define RQ_HDR_4 "\tstate\n"
101 seq_print_request_state(m, req);
102}
103#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
104
105static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
106{
107 seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
108 seq_print_one_request(m, req, now);
109}
110
111static void seq_print_resource_transfer_log_summary(struct seq_file *m,
112 struct drbd_resource *resource,
113 struct drbd_connection *connection,
114 unsigned long now)
115{
116 struct drbd_request *req;
117 unsigned int count = 0;
118 unsigned int show_state = 0;
119
120 seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
121 spin_lock_irq(&resource->req_lock);
122 list_for_each_entry(req, &connection->transfer_log, tl_requests) {
123 unsigned int tmp = 0;
124 unsigned int s;
125 ++count;
126
127 /* don't disable irq "forever" */
128 if (!(count & 0x1ff)) {
129 struct drbd_request *req_next;
130 kref_get(&req->kref);
131 spin_unlock_irq(&resource->req_lock);
132 cond_resched();
133 spin_lock_irq(&resource->req_lock);
134 req_next = list_next_entry(req, tl_requests);
135 if (kref_put(&req->kref, drbd_req_destroy))
136 req = req_next;
137 if (&req->tl_requests == &connection->transfer_log)
138 break;
139 }
140
141 s = req->rq_state;
142
143 /* This is meant to summarize timing issues, to be able to tell
144 * local disk problems from network problems.
145 * Skip requests, if we have shown an even older request with
146 * similar aspects already. */
147 if (req->master_bio == NULL)
148 tmp |= 1;
149 if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
150 tmp |= 2;
151 if (s & RQ_NET_MASK) {
152 if (!(s & RQ_NET_SENT))
153 tmp |= 4;
154 if (s & RQ_NET_PENDING)
155 tmp |= 8;
156 if (!(s & RQ_NET_DONE))
157 tmp |= 16;
158 }
159 if ((tmp & show_state) == tmp)
160 continue;
161 show_state |= tmp;
162 seq_printf(m, "%u\t", count);
163 seq_print_minor_vnr_req(m, req, now);
164 if (show_state == 0x1f)
165 break;
166 }
167 spin_unlock_irq(&resource->req_lock);
168}
169
170/* TODO: transfer_log and friends should be moved to resource */
171static int in_flight_summary_show(struct seq_file *m, void *pos)
172{
173 struct drbd_resource *resource = m->private;
174 struct drbd_connection *connection;
175 unsigned long jif = jiffies;
176
177 connection = first_connection(resource);
178 /* This does not happen, actually.
179 * But be robust and prepare for future code changes. */
180 if (!connection)
181 return 0;
182
183 seq_puts(m, "oldest application requests\n");
184 seq_print_resource_transfer_log_summary(m, resource, connection, jif);
185 seq_putc(m, '\n');
186
187 jif = jiffies - jif;
188 if (jif)
189 seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
190 return 0;
191}
192
193static int in_flight_summary_open(struct inode *inode, struct file *file)
194{
195 return single_open(file, in_flight_summary_show, inode->i_private);
196}
197
198static const struct file_operations in_flight_summary_fops = {
199 .owner = THIS_MODULE,
200 .open = in_flight_summary_open,
201 .read = seq_read,
202 .llseek = seq_lseek,
203 .release = single_release,
204};
205
16void drbd_debugfs_resource_add(struct drbd_resource *resource) 206void drbd_debugfs_resource_add(struct drbd_resource *resource)
17{ 207{
18 struct dentry *dentry; 208 struct dentry *dentry;
@@ -34,6 +224,12 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource)
34 goto fail; 224 goto fail;
35 resource->debugfs_res_connections = dentry; 225 resource->debugfs_res_connections = dentry;
36 226
227 dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
228 resource->debugfs_res, resource,
229 &in_flight_summary_fops);
230 if (IS_ERR_OR_NULL(dentry))
231 goto fail;
232 resource->debugfs_res_in_flight_summary = dentry;
37 return; 233 return;
38 234
39fail: 235fail: