aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2012-01-20 05:38:36 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2012-02-28 12:09:42 -0500
commita245769f254bbbea868e2cf8dc42daa061cd276f (patch)
tree1280ab339924584dba6aaf6e0c9e5a6f5ec0580b /fs
parent891003abb0db6bfffd61b76ad0ed39bb7c3db8e1 (diff)
GFS2: glock statistics gathering
The stats are divided into two sets: those relating to the super block and those relating to an individual glock. The super block stats are done on a per cpu basis in order to try and reduce the overhead of gathering them. They are also further divided by glock type. In the case of both the super block and glock statistics, the same information is gathered in each case. The super block statistics are used to provide default values for most of the glock statistics, so that newly created glocks should have, as far as possible, a sensible starting point. The statistics are divided into three pairs of mean and variance, plus two counters. The mean/variance pairs are smoothed exponential estimates and the algorithm used is one which will be very familiar to those used to calculation of round trip times in network code. The three pairs of mean/variance measure the following things: 1. DLM lock time (non-blocking requests) 2. DLM lock time (blocking requests) 3. Inter-request time (again to the DLM) A non-blocking request is one which will complete right away, whatever the state of the DLM lock in question. That currently means any requests when (a) the current state of the lock is exclusive (b) the requested state is either null or unlocked or (c) the "try lock" flag is set. A blocking request covers all the other lock requests. There are two counters. The first is there primarily to show how many lock requests have been made, and thus how much data has gone into the mean/variance calculations. The other counter is counting queueing of holders at the top layer of the glock code. Hopefully that number will be a lot larger than the number of dlm lock requests issued. So why gather these statistics? There are several reasons we'd like to get a better idea of these timings: 1. To be able to better set the glock "min hold time" 2. To spot performance issues more easily 3. To improve the algorithm for selecting resource groups for allocation (to base it on lock wait time, rather than blindly using a "try lock") Due to the smoothing action of the updates, a step change in some input quantity being sampled will only fully be taken into account after 8 samples (or 4 for the variance) and this needs to be carefully considered when interpreting the results. Knowing both the time it takes a lock request to complete and the average time between lock requests for a glock means we can compute the total percentage of the time for which the node is able to use a glock vs. time that the rest of the cluster has its share. That will be very useful when setting the lock min hold time. The other point to remember is that all times are in nanoseconds. Great care has been taken to ensure that we measure exactly the quantities that we want, as accurately as possible. There are always inaccuracies in any measuring system, but I hope this is as accurate as we can reasonably make it. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/glock.c210
-rw-r--r--fs/gfs2/incore.h49
-rw-r--r--fs/gfs2/lock_dlm.c123
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/trace_gfs2.h60
5 files changed, 431 insertions, 19 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 351a3e797789..dab2526071cc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -29,6 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h>
32 33
33#include "gfs2.h" 34#include "gfs2.h"
34#include "incore.h" 35#include "incore.h"
@@ -543,6 +544,11 @@ __acquires(&gl->gl_spin)
543 do_error(gl, 0); /* Fail queued try locks */ 544 do_error(gl, 0); /* Fail queued try locks */
544 } 545 }
545 gl->gl_req = target; 546 gl->gl_req = target;
547 set_bit(GLF_BLOCKING, &gl->gl_flags);
548 if ((gl->gl_req == LM_ST_UNLOCKED) ||
549 (gl->gl_state == LM_ST_EXCLUSIVE) ||
550 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
551 clear_bit(GLF_BLOCKING, &gl->gl_flags);
546 spin_unlock(&gl->gl_spin); 552 spin_unlock(&gl->gl_spin);
547 if (glops->go_xmote_th) 553 if (glops->go_xmote_th)
548 glops->go_xmote_th(gl); 554 glops->go_xmote_th(gl);
@@ -744,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
744 return -ENOMEM; 750 return -ENOMEM;
745 751
746 atomic_inc(&sdp->sd_glock_disposal); 752 atomic_inc(&sdp->sd_glock_disposal);
753 gl->gl_sbd = sdp;
747 gl->gl_flags = 0; 754 gl->gl_flags = 0;
748 gl->gl_name = name; 755 gl->gl_name = name;
749 atomic_set(&gl->gl_ref, 1); 756 atomic_set(&gl->gl_ref, 1);
@@ -752,12 +759,17 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
752 gl->gl_demote_state = LM_ST_EXCLUSIVE; 759 gl->gl_demote_state = LM_ST_EXCLUSIVE;
753 gl->gl_hash = hash; 760 gl->gl_hash = hash;
754 gl->gl_ops = glops; 761 gl->gl_ops = glops;
755 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); 762 gl->gl_dstamp = ktime_set(0, 0);
763 preempt_disable();
764 /* We use the global stats to estimate the initial per-glock stats */
765 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
766 preempt_enable();
767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
756 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 769 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
757 gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 770 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
758 gl->gl_tchange = jiffies; 771 gl->gl_tchange = jiffies;
759 gl->gl_object = NULL; 772 gl->gl_object = NULL;
760 gl->gl_sbd = sdp;
761 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 773 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
762 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 774 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
763 INIT_WORK(&gl->gl_delete, delete_work_func); 775 INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -999,6 +1011,8 @@ fail:
999 } 1011 }
1000 set_bit(GLF_QUEUED, &gl->gl_flags); 1012 set_bit(GLF_QUEUED, &gl->gl_flags);
1001 trace_gfs2_glock_queue(gh, 1); 1013 trace_gfs2_glock_queue(gh, 1);
1014 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1015 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1002 if (likely(insert_pt == NULL)) { 1016 if (likely(insert_pt == NULL)) {
1003 list_add_tail(&gh->gh_list, &gl->gl_holders); 1017 list_add_tail(&gh->gh_list, &gl->gl_holders);
1004 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1018 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
@@ -1658,6 +1672,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1658 *p++ = 'L'; 1672 *p++ = 'L';
1659 if (gl->gl_object) 1673 if (gl->gl_object)
1660 *p++ = 'o'; 1674 *p++ = 'o';
1675 if (test_bit(GLF_BLOCKING, gflags))
1676 *p++ = 'b';
1661 *p = 0; 1677 *p = 0;
1662 return buf; 1678 return buf;
1663} 1679}
@@ -1714,8 +1730,78 @@ out:
1714 return error; 1730 return error;
1715} 1731}
1716 1732
1733static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1734{
1735 struct gfs2_glock *gl = iter_ptr;
1736
1737 seq_printf(seq, "G: n:%u/%llx rtt:%lld/%lld rttb:%lld/%lld irt:%lld/%lld dcnt: %lld qcnt: %lld\n",
1738 gl->gl_name.ln_type,
1739 (unsigned long long)gl->gl_name.ln_number,
1740 (long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1741 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1742 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1743 (long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1744 (long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1745 (long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1746 (long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1747 (long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1748 return 0;
1749}
1750
1751static const char *gfs2_gltype[] = {
1752 "type",
1753 "reserved",
1754 "nondisk",
1755 "inode",
1756 "rgrp",
1757 "meta",
1758 "iopen",
1759 "flock",
1760 "plock",
1761 "quota",
1762 "journal",
1763};
1764
1765static const char *gfs2_stype[] = {
1766 [GFS2_LKS_SRTT] = "srtt",
1767 [GFS2_LKS_SRTTVAR] = "srttvar",
1768 [GFS2_LKS_SRTTB] = "srttb",
1769 [GFS2_LKS_SRTTVARB] = "srttvarb",
1770 [GFS2_LKS_SIRT] = "sirt",
1771 [GFS2_LKS_SIRTVAR] = "sirtvar",
1772 [GFS2_LKS_DCOUNT] = "dlm",
1773 [GFS2_LKS_QCOUNT] = "queue",
1774};
1775
1776#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1777
1778static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1779{
1780 struct gfs2_glock_iter *gi = seq->private;
1781 struct gfs2_sbd *sdp = gi->sdp;
1782 unsigned index = gi->hash >> 3;
1783 unsigned subindex = gi->hash & 0x07;
1784 s64 value;
1785 int i;
1786
1787 if (index == 0 && subindex != 0)
1788 return 0;
1717 1789
1790 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1791 (index == 0) ? "cpu": gfs2_stype[subindex]);
1718 1792
1793 for_each_possible_cpu(i) {
1794 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1795 if (index == 0) {
1796 value = i;
1797 } else {
1798 value = lkstats->lkstats[index - 1].stats[subindex];
1799 }
1800 seq_printf(seq, " %15lld", (long long)value);
1801 }
1802 seq_putc(seq, '\n');
1803 return 0;
1804}
1719 1805
1720int __init gfs2_glock_init(void) 1806int __init gfs2_glock_init(void)
1721{ 1807{
@@ -1828,6 +1914,35 @@ static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
1828 return dump_glock(seq, iter_ptr); 1914 return dump_glock(seq, iter_ptr);
1829} 1915}
1830 1916
1917static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
1918{
1919 struct gfs2_glock_iter *gi = seq->private;
1920
1921 gi->hash = *pos;
1922 if (*pos >= GFS2_NR_SBSTATS)
1923 return NULL;
1924 preempt_disable();
1925 return SEQ_START_TOKEN;
1926}
1927
1928static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
1929 loff_t *pos)
1930{
1931 struct gfs2_glock_iter *gi = seq->private;
1932 (*pos)++;
1933 gi->hash++;
1934 if (gi->hash >= GFS2_NR_SBSTATS) {
1935 preempt_enable();
1936 return NULL;
1937 }
1938 return SEQ_START_TOKEN;
1939}
1940
1941static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
1942{
1943 preempt_enable();
1944}
1945
1831static const struct seq_operations gfs2_glock_seq_ops = { 1946static const struct seq_operations gfs2_glock_seq_ops = {
1832 .start = gfs2_glock_seq_start, 1947 .start = gfs2_glock_seq_start,
1833 .next = gfs2_glock_seq_next, 1948 .next = gfs2_glock_seq_next,
@@ -1835,7 +1950,21 @@ static const struct seq_operations gfs2_glock_seq_ops = {
1835 .show = gfs2_glock_seq_show, 1950 .show = gfs2_glock_seq_show,
1836}; 1951};
1837 1952
1838static int gfs2_debugfs_open(struct inode *inode, struct file *file) 1953static const struct seq_operations gfs2_glstats_seq_ops = {
1954 .start = gfs2_glock_seq_start,
1955 .next = gfs2_glock_seq_next,
1956 .stop = gfs2_glock_seq_stop,
1957 .show = gfs2_glstats_seq_show,
1958};
1959
1960static const struct seq_operations gfs2_sbstats_seq_ops = {
1961 .start = gfs2_sbstats_seq_start,
1962 .next = gfs2_sbstats_seq_next,
1963 .stop = gfs2_sbstats_seq_stop,
1964 .show = gfs2_sbstats_seq_show,
1965};
1966
1967static int gfs2_glocks_open(struct inode *inode, struct file *file)
1839{ 1968{
1840 int ret = seq_open_private(file, &gfs2_glock_seq_ops, 1969 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
1841 sizeof(struct gfs2_glock_iter)); 1970 sizeof(struct gfs2_glock_iter));
@@ -1847,9 +1976,49 @@ static int gfs2_debugfs_open(struct inode *inode, struct file *file)
1847 return ret; 1976 return ret;
1848} 1977}
1849 1978
1850static const struct file_operations gfs2_debug_fops = { 1979static int gfs2_glstats_open(struct inode *inode, struct file *file)
1980{
1981 int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
1982 sizeof(struct gfs2_glock_iter));
1983 if (ret == 0) {
1984 struct seq_file *seq = file->private_data;
1985 struct gfs2_glock_iter *gi = seq->private;
1986 gi->sdp = inode->i_private;
1987 }
1988 return ret;
1989}
1990
1991static int gfs2_sbstats_open(struct inode *inode, struct file *file)
1992{
1993 int ret = seq_open_private(file, &gfs2_sbstats_seq_ops,
1994 sizeof(struct gfs2_glock_iter));
1995 if (ret == 0) {
1996 struct seq_file *seq = file->private_data;
1997 struct gfs2_glock_iter *gi = seq->private;
1998 gi->sdp = inode->i_private;
1999 }
2000 return ret;
2001}
2002
2003static const struct file_operations gfs2_glocks_fops = {
2004 .owner = THIS_MODULE,
2005 .open = gfs2_glocks_open,
2006 .read = seq_read,
2007 .llseek = seq_lseek,
2008 .release = seq_release_private,
2009};
2010
2011static const struct file_operations gfs2_glstats_fops = {
1851 .owner = THIS_MODULE, 2012 .owner = THIS_MODULE,
1852 .open = gfs2_debugfs_open, 2013 .open = gfs2_glstats_open,
2014 .read = seq_read,
2015 .llseek = seq_lseek,
2016 .release = seq_release_private,
2017};
2018
2019static const struct file_operations gfs2_sbstats_fops = {
2020 .owner = THIS_MODULE,
2021 .open = gfs2_sbstats_open,
1853 .read = seq_read, 2022 .read = seq_read,
1854 .llseek = seq_lseek, 2023 .llseek = seq_lseek,
1855 .release = seq_release_private, 2024 .release = seq_release_private,
@@ -1863,20 +2032,45 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
1863 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2032 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
1864 S_IFREG | S_IRUGO, 2033 S_IFREG | S_IRUGO,
1865 sdp->debugfs_dir, sdp, 2034 sdp->debugfs_dir, sdp,
1866 &gfs2_debug_fops); 2035 &gfs2_glocks_fops);
1867 if (!sdp->debugfs_dentry_glocks) 2036 if (!sdp->debugfs_dentry_glocks)
1868 return -ENOMEM; 2037 goto fail;
2038
2039 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
2040 S_IFREG | S_IRUGO,
2041 sdp->debugfs_dir, sdp,
2042 &gfs2_glstats_fops);
2043 if (!sdp->debugfs_dentry_glstats)
2044 goto fail;
2045
2046 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
2047 S_IFREG | S_IRUGO,
2048 sdp->debugfs_dir, sdp,
2049 &gfs2_sbstats_fops);
2050 if (!sdp->debugfs_dentry_sbstats)
2051 goto fail;
1869 2052
1870 return 0; 2053 return 0;
2054fail:
2055 gfs2_delete_debugfs_file(sdp);
2056 return -ENOMEM;
1871} 2057}
1872 2058
1873void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2059void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
1874{ 2060{
1875 if (sdp && sdp->debugfs_dir) { 2061 if (sdp->debugfs_dir) {
1876 if (sdp->debugfs_dentry_glocks) { 2062 if (sdp->debugfs_dentry_glocks) {
1877 debugfs_remove(sdp->debugfs_dentry_glocks); 2063 debugfs_remove(sdp->debugfs_dentry_glocks);
1878 sdp->debugfs_dentry_glocks = NULL; 2064 sdp->debugfs_dentry_glocks = NULL;
1879 } 2065 }
2066 if (sdp->debugfs_dentry_glstats) {
2067 debugfs_remove(sdp->debugfs_dentry_glstats);
2068 sdp->debugfs_dentry_glstats = NULL;
2069 }
2070 if (sdp->debugfs_dentry_sbstats) {
2071 debugfs_remove(sdp->debugfs_dentry_sbstats);
2072 sdp->debugfs_dentry_sbstats = NULL;
2073 }
1880 debugfs_remove(sdp->debugfs_dir); 2074 debugfs_remove(sdp->debugfs_dir);
1881 sdp->debugfs_dir = NULL; 2075 sdp->debugfs_dir = NULL;
1882 } 2076 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 97742a7ea9cc..4d546df58ac9 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -19,6 +19,8 @@
19#include <linux/rculist_bl.h> 19#include <linux/rculist_bl.h>
20#include <linux/completion.h> 20#include <linux/completion.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <linux/ktime.h>
23#include <linux/percpu.h>
22 24
23#define DIO_WAIT 0x00000010 25#define DIO_WAIT 0x00000010
24#define DIO_METADATA 0x00000020 26#define DIO_METADATA 0x00000020
@@ -205,6 +207,22 @@ struct gfs2_glock_operations {
205}; 207};
206 208
207enum { 209enum {
210 GFS2_LKS_SRTT = 0, /* Non blocking smoothed round trip time */
211 GFS2_LKS_SRTTVAR = 1, /* Non blocking smoothed variance */
212 GFS2_LKS_SRTTB = 2, /* Blocking smoothed round trip time */
213 GFS2_LKS_SRTTVARB = 3, /* Blocking smoothed variance */
214 GFS2_LKS_SIRT = 4, /* Smoothed Inter-request time */
215 GFS2_LKS_SIRTVAR = 5, /* Smoothed Inter-request variance */
216 GFS2_LKS_DCOUNT = 6, /* Count of dlm requests */
217 GFS2_LKS_QCOUNT = 7, /* Count of gfs2_holder queues */
218 GFS2_NR_LKSTATS
219};
220
221struct gfs2_lkstats {
222 s64 stats[GFS2_NR_LKSTATS];
223};
224
225enum {
208 /* States */ 226 /* States */
209 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ 227 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
210 HIF_FIRST = 7, 228 HIF_FIRST = 7,
@@ -238,10 +256,12 @@ enum {
238 GLF_QUEUED = 12, 256 GLF_QUEUED = 12,
239 GLF_LRU = 13, 257 GLF_LRU = 13,
240 GLF_OBJECT = 14, /* Used only for tracing */ 258 GLF_OBJECT = 14, /* Used only for tracing */
259 GLF_BLOCKING = 15,
241}; 260};
242 261
243struct gfs2_glock { 262struct gfs2_glock {
244 struct hlist_bl_node gl_list; 263 struct hlist_bl_node gl_list;
264 struct gfs2_sbd *gl_sbd;
245 unsigned long gl_flags; /* GLF_... */ 265 unsigned long gl_flags; /* GLF_... */
246 struct lm_lockname gl_name; 266 struct lm_lockname gl_name;
247 atomic_t gl_ref; 267 atomic_t gl_ref;
@@ -261,16 +281,14 @@ struct gfs2_glock {
261 struct list_head gl_holders; 281 struct list_head gl_holders;
262 282
263 const struct gfs2_glock_operations *gl_ops; 283 const struct gfs2_glock_operations *gl_ops;
264 char gl_strname[GDLM_STRNAME_BYTES]; 284 ktime_t gl_dstamp;
285 struct gfs2_lkstats gl_stats;
265 struct dlm_lksb gl_lksb; 286 struct dlm_lksb gl_lksb;
266 char gl_lvb[32]; 287 char gl_lvb[32];
267 unsigned long gl_tchange; 288 unsigned long gl_tchange;
268 void *gl_object; 289 void *gl_object;
269 290
270 struct list_head gl_lru; 291 struct list_head gl_lru;
271
272 struct gfs2_sbd *gl_sbd;
273
274 struct list_head gl_ail_list; 292 struct list_head gl_ail_list;
275 atomic_t gl_ail_count; 293 atomic_t gl_ail_count;
276 atomic_t gl_revokes; 294 atomic_t gl_revokes;
@@ -560,8 +578,14 @@ struct lm_lockstruct {
560 uint32_t *ls_recover_result; /* result of last jid recovery */ 578 uint32_t *ls_recover_result; /* result of last jid recovery */
561}; 579};
562 580
581struct gfs2_pcpu_lkstats {
582 /* One struct for each glock type */
583 struct gfs2_lkstats lkstats[10];
584};
585
563struct gfs2_sbd { 586struct gfs2_sbd {
564 struct super_block *sd_vfs; 587 struct super_block *sd_vfs;
588 struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
565 struct kobject sd_kobj; 589 struct kobject sd_kobj;
566 unsigned long sd_flags; /* SDF_... */ 590 unsigned long sd_flags; /* SDF_... */
567 struct gfs2_sb_host sd_sb; 591 struct gfs2_sb_host sd_sb;
@@ -725,8 +749,23 @@ struct gfs2_sbd {
725 749
726 unsigned long sd_last_warning; 750 unsigned long sd_last_warning;
727 struct dentry *debugfs_dir; /* debugfs directory */ 751 struct dentry *debugfs_dir; /* debugfs directory */
728 struct dentry *debugfs_dentry_glocks; /* for debugfs */ 752 struct dentry *debugfs_dentry_glocks;
753 struct dentry *debugfs_dentry_glstats;
754 struct dentry *debugfs_dentry_sbstats;
729}; 755};
730 756
757static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
758{
759 gl->gl_stats.stats[which]++;
760}
761
762static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
763{
764 const struct gfs2_sbd *sdp = gl->gl_sbd;
765 preempt_disable();
766 this_cpu_ptr(sdp->sd_lkstats)->lkstats[gl->gl_name.ln_type].stats[which]++;
767 preempt_enable();
768}
769
731#endif /* __INCORE_DOT_H__ */ 770#endif /* __INCORE_DOT_H__ */
732 771
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8944d1e32ab5..f8411bd1b805 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -18,14 +18,106 @@
18#include "glock.h" 18#include "glock.h"
19#include "util.h" 19#include "util.h"
20#include "sys.h" 20#include "sys.h"
21#include "trace_gfs2.h"
21 22
22extern struct workqueue_struct *gfs2_control_wq; 23extern struct workqueue_struct *gfs2_control_wq;
23 24
25/**
26 * gfs2_update_stats - Update time based stats
27 * @mv: Pointer to mean/variance structure to update
28 * @sample: New data to include
29 *
30 * @delta is the difference between the current rtt sample and the
31 * running average srtt. We add 1/8 of that to the srtt in order to
32 * update the current srtt estimate. The varience estimate is a bit
33 * more complicated. We subtract the abs value of the @delta from
34 * the current variance estimate and add 1/4 of that to the running
35 * total.
36 *
37 * Note that the index points at the array entry containing the smoothed
38 * mean value, and the variance is always in the following entry
39 *
40 * Reference: TCP/IP Illustrated, vol 2, p. 831,832
41 * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
42 * they are not scaled fixed point.
43 */
44
45static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
46 s64 sample)
47{
48 s64 delta = sample - s->stats[index];
49 s->stats[index] += (delta >> 3);
50 index++;
51 s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
52}
53
54/**
55 * gfs2_update_reply_times - Update locking statistics
56 * @gl: The glock to update
57 *
58 * This assumes that gl->gl_dstamp has been set earlier.
59 *
60 * The rtt (lock round trip time) is an estimate of the time
61 * taken to perform a dlm lock request. We update it on each
62 * reply from the dlm.
63 *
64 * The blocking flag is set on the glock for all dlm requests
65 * which may potentially block due to lock requests from other nodes.
66 * DLM requests where the current lock state is exclusive, the
67 * requested state is null (or unlocked) or where the TRY or
68 * TRY_1CB flags are set are classified as non-blocking. All
69 * other DLM requests are counted as (potentially) blocking.
70 */
71static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
72{
73 struct gfs2_pcpu_lkstats *lks;
74 const unsigned gltype = gl->gl_name.ln_type;
75 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
76 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
77 s64 rtt;
78
79 preempt_disable();
80 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
81 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
82 gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
83 gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
84 preempt_enable();
85
86 trace_gfs2_glock_lock_time(gl, rtt);
87}
88
89/**
90 * gfs2_update_request_times - Update locking statistics
91 * @gl: The glock to update
92 *
93 * The irt (lock inter-request times) measures the average time
94 * between requests to the dlm. It is updated immediately before
95 * each dlm call.
96 */
97
98static inline void gfs2_update_request_times(struct gfs2_glock *gl)
99{
100 struct gfs2_pcpu_lkstats *lks;
101 const unsigned gltype = gl->gl_name.ln_type;
102 ktime_t dstamp;
103 s64 irt;
104
105 preempt_disable();
106 dstamp = gl->gl_dstamp;
107 gl->gl_dstamp = ktime_get_real();
108 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
109 lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
110 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
111 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
112 preempt_enable();
113}
114
24static void gdlm_ast(void *arg) 115static void gdlm_ast(void *arg)
25{ 116{
26 struct gfs2_glock *gl = arg; 117 struct gfs2_glock *gl = arg;
27 unsigned ret = gl->gl_state; 118 unsigned ret = gl->gl_state;
28 119
120 gfs2_update_reply_times(gl);
29 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
30 122
31 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 123 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
@@ -111,7 +203,7 @@ static int make_mode(const unsigned int lmstate)
111static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, 203static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
112 const int req) 204 const int req)
113{ 205{
114 u32 lkf = 0; 206 u32 lkf = DLM_LKF_VALBLK;
115 207
116 if (gfs_flags & LM_FLAG_TRY) 208 if (gfs_flags & LM_FLAG_TRY)
117 lkf |= DLM_LKF_NOQUEUE; 209 lkf |= DLM_LKF_NOQUEUE;
@@ -138,26 +230,43 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
138 if (lkid != 0) 230 if (lkid != 0)
139 lkf |= DLM_LKF_CONVERT; 231 lkf |= DLM_LKF_CONVERT;
140 232
141 lkf |= DLM_LKF_VALBLK;
142
143 return lkf; 233 return lkf;
144} 234}
145 235
236static void gfs2_reverse_hex(char *c, u64 value)
237{
238 while (value) {
239 *c-- = hex_asc[value & 0x0f];
240 value >>= 4;
241 }
242}
243
146static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, 244static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
147 unsigned int flags) 245 unsigned int flags)
148{ 246{
149 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 247 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
150 int req; 248 int req;
151 u32 lkf; 249 u32 lkf;
250 char strname[GDLM_STRNAME_BYTES] = "";
152 251
153 req = make_mode(req_state); 252 req = make_mode(req_state);
154 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 253 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
155 254 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
255 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
256 if (gl->gl_lksb.sb_lkid) {
257 gfs2_update_request_times(gl);
258 } else {
259 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
260 strname[GDLM_STRNAME_BYTES - 1] = '\0';
261 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
262 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
263 gl->gl_dstamp = ktime_get_real();
264 }
156 /* 265 /*
157 * Submit the actual lock request. 266 * Submit the actual lock request.
158 */ 267 */
159 268
160 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 269 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
161 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 270 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
162} 271}
163 272
@@ -172,6 +281,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
172 return; 281 return;
173 } 282 }
174 283
284 clear_bit(GLF_BLOCKING, &gl->gl_flags);
285 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
286 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
287 gfs2_update_request_times(gl);
175 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 288 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
176 NULL, gl); 289 NULL, gl);
177 if (error) { 290 if (error) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 24f609c9ef91..a55baa7f3239 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -68,6 +68,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
68 68
69 sb->s_fs_info = sdp; 69 sb->s_fs_info = sdp;
70 sdp->sd_vfs = sb; 70 sdp->sd_vfs = sb;
71 sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
72 if (!sdp->sd_lkstats) {
73 kfree(sdp);
74 return NULL;
75 }
76
71 set_bit(SDF_NOJOURNALID, &sdp->sd_flags); 77 set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
72 gfs2_tune_init(&sdp->sd_tune); 78 gfs2_tune_init(&sdp->sd_tune);
73 79
@@ -1221,6 +1227,7 @@ fail_sys:
1221 gfs2_sys_fs_del(sdp); 1227 gfs2_sys_fs_del(sdp);
1222fail: 1228fail:
1223 gfs2_delete_debugfs_file(sdp); 1229 gfs2_delete_debugfs_file(sdp);
1230 free_percpu(sdp->sd_lkstats);
1224 kfree(sdp); 1231 kfree(sdp);
1225 sb->s_fs_info = NULL; 1232 sb->s_fs_info = NULL;
1226 return error; 1233 return error;
@@ -1393,6 +1400,7 @@ static void gfs2_kill_sb(struct super_block *sb)
1393 shrink_dcache_sb(sb); 1400 shrink_dcache_sb(sb);
1394 kill_block_super(sb); 1401 kill_block_super(sb);
1395 gfs2_delete_debugfs_file(sdp); 1402 gfs2_delete_debugfs_file(sdp);
1403 free_percpu(sdp->sd_lkstats);
1396 kfree(sdp); 1404 kfree(sdp);
1397} 1405}
1398 1406
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 5d07609ec57d..dfa89cd75534 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -11,6 +11,7 @@
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h> 13#include <linux/writeback.h>
14#include <linux/ktime.h>
14#include "incore.h" 15#include "incore.h"
15#include "glock.h" 16#include "glock.h"
16 17
@@ -43,7 +44,8 @@
43 {(1UL << GLF_FROZEN), "F" }, \ 44 {(1UL << GLF_FROZEN), "F" }, \
44 {(1UL << GLF_QUEUED), "q" }, \ 45 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \ 46 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" }) 47 {(1UL << GLF_OBJECT), "o" }, \
48 {(1UL << GLF_BLOCKING), "b" })
47 49
48#ifndef NUMPTY 50#ifndef NUMPTY
49#define NUMPTY 51#define NUMPTY
@@ -236,6 +238,62 @@ TRACE_EVENT(gfs2_glock_queue,
236 glock_trace_name(__entry->state)) 238 glock_trace_name(__entry->state))
237); 239);
238 240
241/* DLM sends a reply to GFS2 */
242TRACE_EVENT(gfs2_glock_lock_time,
243
244 TP_PROTO(const struct gfs2_glock *gl, s64 tdiff),
245
246 TP_ARGS(gl, tdiff),
247
248 TP_STRUCT__entry(
249 __field( dev_t, dev )
250 __field( u64, glnum )
251 __field( u32, gltype )
252 __field( int, status )
253 __field( char, flags )
254 __field( s64, tdiff )
255 __field( s64, srtt )
256 __field( s64, srttvar )
257 __field( s64, srttb )
258 __field( s64, srttvarb )
259 __field( s64, sirt )
260 __field( s64, sirtvar )
261 __field( s64, dcount )
262 __field( s64, qcount )
263 ),
264
265 TP_fast_assign(
266 __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
267 __entry->glnum = gl->gl_name.ln_number;
268 __entry->gltype = gl->gl_name.ln_type;
269 __entry->status = gl->gl_lksb.sb_status;
270 __entry->flags = gl->gl_lksb.sb_flags;
271 __entry->tdiff = tdiff;
272 __entry->srtt = gl->gl_stats.stats[GFS2_LKS_SRTT];
273 __entry->srttvar = gl->gl_stats.stats[GFS2_LKS_SRTTVAR];
274 __entry->srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
275 __entry->srttvarb = gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
276 __entry->sirt = gl->gl_stats.stats[GFS2_LKS_SIRT];
277 __entry->sirtvar = gl->gl_stats.stats[GFS2_LKS_SIRTVAR];
278 __entry->dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
279 __entry->qcount = gl->gl_stats.stats[GFS2_LKS_QCOUNT];
280 ),
281
282 TP_printk("%u,%u glock %d:%lld status:%d flags:%02x tdiff:%lld srtt:%lld/%lld srttb:%lld/%lld sirt:%lld/%lld dcnt:%lld qcnt:%lld",
283 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
284 (unsigned long long)__entry->glnum,
285 __entry->status, __entry->flags,
286 (long long)__entry->tdiff,
287 (long long)__entry->srtt,
288 (long long)__entry->srttvar,
289 (long long)__entry->srttb,
290 (long long)__entry->srttvarb,
291 (long long)__entry->sirt,
292 (long long)__entry->sirtvar,
293 (long long)__entry->dcount,
294 (long long)__entry->qcount)
295);
296
239/* Section 2 - Log/journal 297/* Section 2 - Log/journal
240 * 298 *
241 * Objectives: 299 * Objectives: