aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Banks <gnb@melbourne.sgi.com>2006-10-04 05:15:49 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-04 10:55:16 -0400
commitfce1456a19f5c08b688c29f00ef90fdfa074c79b (patch)
tree789afb4efe2764cabbd65f7d7069acb538711e8b
parent596bbe53eb3abfe7326b2f5e8afd614265c319c8 (diff)
[PATCH] knfsd: make nfsd readahead params cache SMP-friendly
Make the nfsd read-ahead params cache more SMP-friendly by changing the single global list and lock into a fixed 16-bucket hashtable with per-bucket locks. This reduces spinlock contention in nfsd_read() on read-heavy workloads on multiprocessor servers. Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate. The server had 128 nfsd threads, which sizes the RA cache at 256 entries, of which only a handful were used. Flat profiling shows nfsd_read(), including the inlined nfsd_get_raparms(), taking 10.4% of each CPU. This patch drops the contribution from nfsd() to 1.71% for each CPU. Signed-off-by: Greg Banks <gnb@melbourne.sgi.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/nfsd/vfs.c60
1 files changed, 44 insertions, 16 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bfd36e587ec5..137565a136d7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
54#include <linux/nfsd_idmap.h> 54#include <linux/nfsd_idmap.h>
55#include <linux/security.h> 55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 56#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59 60
@@ -81,10 +82,19 @@ struct raparms {
81 dev_t p_dev; 82 dev_t p_dev;
82 int p_set; 83 int p_set;
83 struct file_ra_state p_ra; 84 struct file_ra_state p_ra;
85 unsigned int p_hindex;
84}; 86};
85 87
88struct raparm_hbucket {
89 struct raparms *pb_head;
90 spinlock_t pb_lock;
91} ____cacheline_aligned_in_smp;
92
86static struct raparms * raparml; 93static struct raparms * raparml;
87static struct raparms * raparm_cache; 94#define RAPARM_HASH_BITS 4
95#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
96#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
97static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
88 98
89/* 99/*
90 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 100 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -743,16 +753,20 @@ nfsd_sync_dir(struct dentry *dp)
743 * Obtain the readahead parameters for the file 753 * Obtain the readahead parameters for the file
744 * specified by (dev, ino). 754 * specified by (dev, ino).
745 */ 755 */
746static DEFINE_SPINLOCK(ra_lock);
747 756
748static inline struct raparms * 757static inline struct raparms *
749nfsd_get_raparms(dev_t dev, ino_t ino) 758nfsd_get_raparms(dev_t dev, ino_t ino)
750{ 759{
751 struct raparms *ra, **rap, **frap = NULL; 760 struct raparms *ra, **rap, **frap = NULL;
752 int depth = 0; 761 int depth = 0;
762 unsigned int hash;
763 struct raparm_hbucket *rab;
764
765 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
766 rab = &raparm_hash[hash];
753 767
754 spin_lock(&ra_lock); 768 spin_lock(&rab->pb_lock);
755 for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { 769 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
756 if (ra->p_ino == ino && ra->p_dev == dev) 770 if (ra->p_ino == ino && ra->p_dev == dev)
757 goto found; 771 goto found;
758 depth++; 772 depth++;
@@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
761 } 775 }
762 depth = nfsdstats.ra_size*11/10; 776 depth = nfsdstats.ra_size*11/10;
763 if (!frap) { 777 if (!frap) {
764 spin_unlock(&ra_lock); 778 spin_unlock(&rab->pb_lock);
765 return NULL; 779 return NULL;
766 } 780 }
767 rap = frap; 781 rap = frap;
@@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
769 ra->p_dev = dev; 783 ra->p_dev = dev;
770 ra->p_ino = ino; 784 ra->p_ino = ino;
771 ra->p_set = 0; 785 ra->p_set = 0;
786 ra->p_hindex = hash;
772found: 787found:
773 if (rap != &raparm_cache) { 788 if (rap != &rab->pb_head) {
774 *rap = ra->p_next; 789 *rap = ra->p_next;
775 ra->p_next = raparm_cache; 790 ra->p_next = rab->pb_head;
776 raparm_cache = ra; 791 rab->pb_head = ra;
777 } 792 }
778 ra->p_count++; 793 ra->p_count++;
779 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 794 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
780 spin_unlock(&ra_lock); 795 spin_unlock(&rab->pb_lock);
781 return ra; 796 return ra;
782} 797}
783 798
@@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
853 868
854 /* Write back readahead params */ 869 /* Write back readahead params */
855 if (ra) { 870 if (ra) {
856 spin_lock(&ra_lock); 871 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
872 spin_lock(&rab->pb_lock);
857 ra->p_ra = file->f_ra; 873 ra->p_ra = file->f_ra;
858 ra->p_set = 1; 874 ra->p_set = 1;
859 ra->p_count--; 875 ra->p_count--;
860 spin_unlock(&ra_lock); 876 spin_unlock(&rab->pb_lock);
861 } 877 }
862 878
863 if (err >= 0) { 879 if (err >= 0) {
@@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
1833void 1849void
1834nfsd_racache_shutdown(void) 1850nfsd_racache_shutdown(void)
1835{ 1851{
1836 if (!raparm_cache) 1852 if (!raparml)
1837 return; 1853 return;
1838 dprintk("nfsd: freeing readahead buffers.\n"); 1854 dprintk("nfsd: freeing readahead buffers.\n");
1839 kfree(raparml); 1855 kfree(raparml);
1840 raparm_cache = raparml = NULL; 1856 raparml = NULL;
1841} 1857}
1842/* 1858/*
1843 * Initialize readahead param cache 1859 * Initialize readahead param cache
@@ -1846,19 +1862,31 @@ int
1846nfsd_racache_init(int cache_size) 1862nfsd_racache_init(int cache_size)
1847{ 1863{
1848 int i; 1864 int i;
1865 int j = 0;
1866 int nperbucket;
1849 1867
1850 if (raparm_cache) 1868
1869 if (raparml)
1851 return 0; 1870 return 0;
1871 if (cache_size < 2*RAPARM_HASH_SIZE)
1872 cache_size = 2*RAPARM_HASH_SIZE;
1852 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); 1873 raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
1853 1874
1854 if (raparml != NULL) { 1875 if (raparml != NULL) {
1855 dprintk("nfsd: allocating %d readahead buffers.\n", 1876 dprintk("nfsd: allocating %d readahead buffers.\n",
1856 cache_size); 1877 cache_size);
1878 for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
1879 raparm_hash[i].pb_head = NULL;
1880 spin_lock_init(&raparm_hash[i].pb_lock);
1881 }
1882 nperbucket = cache_size >> RAPARM_HASH_BITS;
1857 memset(raparml, 0, sizeof(struct raparms) * cache_size); 1883 memset(raparml, 0, sizeof(struct raparms) * cache_size);
1858 for (i = 0; i < cache_size - 1; i++) { 1884 for (i = 0; i < cache_size - 1; i++) {
1859 raparml[i].p_next = raparml + i + 1; 1885 if (i % nperbucket == 0)
1886 raparm_hash[j++].pb_head = raparml + i;
1887 if (i % nperbucket < nperbucket-1)
1888 raparml[i].p_next = raparml + i + 1;
1860 } 1889 }
1861 raparm_cache = raparml;
1862 } else { 1890 } else {
1863 printk(KERN_WARNING 1891 printk(KERN_WARNING
1864 "nfsd: Could not allocate memory read-ahead cache.\n"); 1892 "nfsd: Could not allocate memory read-ahead cache.\n");