diff options
author | Greg Banks <gnb@melbourne.sgi.com> | 2006-10-04 05:15:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 10:55:16 -0400 |
commit | fce1456a19f5c08b688c29f00ef90fdfa074c79b (patch) | |
tree | 789afb4efe2764cabbd65f7d7069acb538711e8b | |
parent | 596bbe53eb3abfe7326b2f5e8afd614265c319c8 (diff) |
[PATCH] knfsd: make nfsd readahead params cache SMP-friendly
Make the nfsd read-ahead params cache more SMP-friendly by changing the single
global list and lock into a fixed 16-bucket hashtable with per-bucket locks.
This reduces spinlock contention in nfsd_read() on read-heavy workloads on
multiprocessor servers.
Testing was on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K
streaming reads at full line rate. The server had 128 nfsd threads, which
sizes the RA cache at 256 entries, of which only a handful were used. Flat
profiling shows nfsd_read(), including the inlined nfsd_get_raparms(), taking
10.4% of each CPU. This patch drops the contribution from nfsd() to 1.71% for
each CPU.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/nfsd/vfs.c | 60 |
1 files changed, 44 insertions, 16 deletions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bfd36e587ec5..137565a136d7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/nfsd_idmap.h> | 54 | #include <linux/nfsd_idmap.h> |
55 | #include <linux/security.h> | 55 | #include <linux/security.h> |
56 | #endif /* CONFIG_NFSD_V4 */ | 56 | #endif /* CONFIG_NFSD_V4 */ |
57 | #include <linux/jhash.h> | ||
57 | 58 | ||
58 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
59 | 60 | ||
@@ -81,10 +82,19 @@ struct raparms { | |||
81 | dev_t p_dev; | 82 | dev_t p_dev; |
82 | int p_set; | 83 | int p_set; |
83 | struct file_ra_state p_ra; | 84 | struct file_ra_state p_ra; |
85 | unsigned int p_hindex; | ||
84 | }; | 86 | }; |
85 | 87 | ||
88 | struct raparm_hbucket { | ||
89 | struct raparms *pb_head; | ||
90 | spinlock_t pb_lock; | ||
91 | } ____cacheline_aligned_in_smp; | ||
92 | |||
86 | static struct raparms * raparml; | 93 | static struct raparms * raparml; |
87 | static struct raparms * raparm_cache; | 94 | #define RAPARM_HASH_BITS 4 |
95 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) | ||
96 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | ||
97 | static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; | ||
88 | 98 | ||
89 | /* | 99 | /* |
90 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed | 100 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed |
@@ -743,16 +753,20 @@ nfsd_sync_dir(struct dentry *dp) | |||
743 | * Obtain the readahead parameters for the file | 753 | * Obtain the readahead parameters for the file |
744 | * specified by (dev, ino). | 754 | * specified by (dev, ino). |
745 | */ | 755 | */ |
746 | static DEFINE_SPINLOCK(ra_lock); | ||
747 | 756 | ||
748 | static inline struct raparms * | 757 | static inline struct raparms * |
749 | nfsd_get_raparms(dev_t dev, ino_t ino) | 758 | nfsd_get_raparms(dev_t dev, ino_t ino) |
750 | { | 759 | { |
751 | struct raparms *ra, **rap, **frap = NULL; | 760 | struct raparms *ra, **rap, **frap = NULL; |
752 | int depth = 0; | 761 | int depth = 0; |
762 | unsigned int hash; | ||
763 | struct raparm_hbucket *rab; | ||
764 | |||
765 | hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; | ||
766 | rab = &raparm_hash[hash]; | ||
753 | 767 | ||
754 | spin_lock(&ra_lock); | 768 | spin_lock(&rab->pb_lock); |
755 | for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { | 769 | for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { |
756 | if (ra->p_ino == ino && ra->p_dev == dev) | 770 | if (ra->p_ino == ino && ra->p_dev == dev) |
757 | goto found; | 771 | goto found; |
758 | depth++; | 772 | depth++; |
@@ -761,7 +775,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) | |||
761 | } | 775 | } |
762 | depth = nfsdstats.ra_size*11/10; | 776 | depth = nfsdstats.ra_size*11/10; |
763 | if (!frap) { | 777 | if (!frap) { |
764 | spin_unlock(&ra_lock); | 778 | spin_unlock(&rab->pb_lock); |
765 | return NULL; | 779 | return NULL; |
766 | } | 780 | } |
767 | rap = frap; | 781 | rap = frap; |
@@ -769,15 +783,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) | |||
769 | ra->p_dev = dev; | 783 | ra->p_dev = dev; |
770 | ra->p_ino = ino; | 784 | ra->p_ino = ino; |
771 | ra->p_set = 0; | 785 | ra->p_set = 0; |
786 | ra->p_hindex = hash; | ||
772 | found: | 787 | found: |
773 | if (rap != &raparm_cache) { | 788 | if (rap != &rab->pb_head) { |
774 | *rap = ra->p_next; | 789 | *rap = ra->p_next; |
775 | ra->p_next = raparm_cache; | 790 | ra->p_next = rab->pb_head; |
776 | raparm_cache = ra; | 791 | rab->pb_head = ra; |
777 | } | 792 | } |
778 | ra->p_count++; | 793 | ra->p_count++; |
779 | nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; | 794 | nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; |
780 | spin_unlock(&ra_lock); | 795 | spin_unlock(&rab->pb_lock); |
781 | return ra; | 796 | return ra; |
782 | } | 797 | } |
783 | 798 | ||
@@ -853,11 +868,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
853 | 868 | ||
854 | /* Write back readahead params */ | 869 | /* Write back readahead params */ |
855 | if (ra) { | 870 | if (ra) { |
856 | spin_lock(&ra_lock); | 871 | struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; |
872 | spin_lock(&rab->pb_lock); | ||
857 | ra->p_ra = file->f_ra; | 873 | ra->p_ra = file->f_ra; |
858 | ra->p_set = 1; | 874 | ra->p_set = 1; |
859 | ra->p_count--; | 875 | ra->p_count--; |
860 | spin_unlock(&ra_lock); | 876 | spin_unlock(&rab->pb_lock); |
861 | } | 877 | } |
862 | 878 | ||
863 | if (err >= 0) { | 879 | if (err >= 0) { |
@@ -1833,11 +1849,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) | |||
1833 | void | 1849 | void |
1834 | nfsd_racache_shutdown(void) | 1850 | nfsd_racache_shutdown(void) |
1835 | { | 1851 | { |
1836 | if (!raparm_cache) | 1852 | if (!raparml) |
1837 | return; | 1853 | return; |
1838 | dprintk("nfsd: freeing readahead buffers.\n"); | 1854 | dprintk("nfsd: freeing readahead buffers.\n"); |
1839 | kfree(raparml); | 1855 | kfree(raparml); |
1840 | raparm_cache = raparml = NULL; | 1856 | raparml = NULL; |
1841 | } | 1857 | } |
1842 | /* | 1858 | /* |
1843 | * Initialize readahead param cache | 1859 | * Initialize readahead param cache |
@@ -1846,19 +1862,31 @@ int | |||
1846 | nfsd_racache_init(int cache_size) | 1862 | nfsd_racache_init(int cache_size) |
1847 | { | 1863 | { |
1848 | int i; | 1864 | int i; |
1865 | int j = 0; | ||
1866 | int nperbucket; | ||
1849 | 1867 | ||
1850 | if (raparm_cache) | 1868 | |
1869 | if (raparml) | ||
1851 | return 0; | 1870 | return 0; |
1871 | if (cache_size < 2*RAPARM_HASH_SIZE) | ||
1872 | cache_size = 2*RAPARM_HASH_SIZE; | ||
1852 | raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); | 1873 | raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); |
1853 | 1874 | ||
1854 | if (raparml != NULL) { | 1875 | if (raparml != NULL) { |
1855 | dprintk("nfsd: allocating %d readahead buffers.\n", | 1876 | dprintk("nfsd: allocating %d readahead buffers.\n", |
1856 | cache_size); | 1877 | cache_size); |
1878 | for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { | ||
1879 | raparm_hash[i].pb_head = NULL; | ||
1880 | spin_lock_init(&raparm_hash[i].pb_lock); | ||
1881 | } | ||
1882 | nperbucket = cache_size >> RAPARM_HASH_BITS; | ||
1857 | memset(raparml, 0, sizeof(struct raparms) * cache_size); | 1883 | memset(raparml, 0, sizeof(struct raparms) * cache_size); |
1858 | for (i = 0; i < cache_size - 1; i++) { | 1884 | for (i = 0; i < cache_size - 1; i++) { |
1859 | raparml[i].p_next = raparml + i + 1; | 1885 | if (i % nperbucket == 0) |
1886 | raparm_hash[j++].pb_head = raparml + i; | ||
1887 | if (i % nperbucket < nperbucket-1) | ||
1888 | raparml[i].p_next = raparml + i + 1; | ||
1860 | } | 1889 | } |
1861 | raparm_cache = raparml; | ||
1862 | } else { | 1890 | } else { |
1863 | printk(KERN_WARNING | 1891 | printk(KERN_WARNING |
1864 | "nfsd: Could not allocate memory read-ahead cache.\n"); | 1892 | "nfsd: Could not allocate memory read-ahead cache.\n"); |