aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2010-08-09 20:19:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-09 23:44:58 -0400
commit7e496299d4d2ad8083effed6c5a18313a919edc6 (patch)
tree8324a3fa378d28f6a63c8b5f5a86311e4eea3562
parent27f5e0f694fd0600274a76854636c0749e3bb1f6 (diff)
tmpfs: make tmpfs scalable with percpu_counter for used blocks
The current implementation of tmpfs is not scalable. We found that stat_lock is contended by multiple threads when we need to get a new page, leading to useless spinning inside this spin lock. This patch makes use of the percpu_counter library to maintain local count of used blocks to speed up getting and returning of pages. So the acquisition of stat_lock is unnecessary for getting and returning blocks, improving the performance of tmpfs on system with large number of cpus. On a 4 socket 32 core NHM-EX system, we saw improvement of 270%. The implementation below has a slight chance of race between threads causing a slight overshoot of the maximum configured blocks. However, any overshoot is small, and is bounded by the number of cpus. This happens when the number of used blocks is slightly below the maximum configured blocks when a thread checks the used block count, and another thread allocates the last block before the current thread does. This should not be a problem for tmpfs, as the overshoot is most likely to be a few blocks and bounded. If a strict limit is really desired, then configured the max blocks to be the limit less the number of cpus in system. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--mm/shmem.c40
2 files changed, 19 insertions, 24 deletions
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e164291fb3e7..399be5ad2f99 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/swap.h> 4#include <linux/swap.h>
5#include <linux/mempolicy.h> 5#include <linux/mempolicy.h>
6#include <linux/percpu_counter.h>
6 7
7/* inode in-kernel data */ 8/* inode in-kernel data */
8 9
@@ -23,7 +24,7 @@ struct shmem_inode_info {
23 24
24struct shmem_sb_info { 25struct shmem_sb_info {
25 unsigned long max_blocks; /* How many blocks are allowed */ 26 unsigned long max_blocks; /* How many blocks are allowed */
26 unsigned long free_blocks; /* How many are left for allocation */ 27 struct percpu_counter used_blocks; /* How many are allocated */
27 unsigned long max_inodes; /* How many inodes are allowed */ 28 unsigned long max_inodes; /* How many inodes are allowed */
28 unsigned long free_inodes; /* How many are left for allocation */ 29 unsigned long free_inodes; /* How many are left for allocation */
29 spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ 30 spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
diff --git a/mm/shmem.c b/mm/shmem.c
index f65f84062db5..0618fdad406c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -28,6 +28,7 @@
28#include <linux/file.h> 28#include <linux/file.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/percpu_counter.h>
31#include <linux/swap.h> 32#include <linux/swap.h>
32 33
33static struct vfsmount *shm_mnt; 34static struct vfsmount *shm_mnt;
@@ -233,10 +234,10 @@ static void shmem_free_blocks(struct inode *inode, long pages)
233{ 234{
234 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 235 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
235 if (sbinfo->max_blocks) { 236 if (sbinfo->max_blocks) {
236 spin_lock(&sbinfo->stat_lock); 237 percpu_counter_add(&sbinfo->used_blocks, -pages);
237 sbinfo->free_blocks += pages; 238 spin_lock(&inode->i_lock);
238 inode->i_blocks -= pages*BLOCKS_PER_PAGE; 239 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
239 spin_unlock(&sbinfo->stat_lock); 240 spin_unlock(&inode->i_lock);
240 } 241 }
241} 242}
242 243
@@ -416,19 +417,17 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
416 if (sgp == SGP_READ) 417 if (sgp == SGP_READ)
417 return shmem_swp_map(ZERO_PAGE(0)); 418 return shmem_swp_map(ZERO_PAGE(0));
418 /* 419 /*
419 * Test free_blocks against 1 not 0, since we have 1 data 420 * Test used_blocks against 1 less max_blocks, since we have 1 data
420 * page (and perhaps indirect index pages) yet to allocate: 421 * page (and perhaps indirect index pages) yet to allocate:
421 * a waste to allocate index if we cannot allocate data. 422 * a waste to allocate index if we cannot allocate data.
422 */ 423 */
423 if (sbinfo->max_blocks) { 424 if (sbinfo->max_blocks) {
424 spin_lock(&sbinfo->stat_lock); 425 if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
425 if (sbinfo->free_blocks <= 1) {
426 spin_unlock(&sbinfo->stat_lock);
427 return ERR_PTR(-ENOSPC); 426 return ERR_PTR(-ENOSPC);
428 } 427 percpu_counter_inc(&sbinfo->used_blocks);
429 sbinfo->free_blocks--; 428 spin_lock(&inode->i_lock);
430 inode->i_blocks += BLOCKS_PER_PAGE; 429 inode->i_blocks += BLOCKS_PER_PAGE;
431 spin_unlock(&sbinfo->stat_lock); 430 spin_unlock(&inode->i_lock);
432 } 431 }
433 432
434 spin_unlock(&info->lock); 433 spin_unlock(&info->lock);
@@ -1387,17 +1386,16 @@ repeat:
1387 shmem_swp_unmap(entry); 1386 shmem_swp_unmap(entry);
1388 sbinfo = SHMEM_SB(inode->i_sb); 1387 sbinfo = SHMEM_SB(inode->i_sb);
1389 if (sbinfo->max_blocks) { 1388 if (sbinfo->max_blocks) {
1390 spin_lock(&sbinfo->stat_lock); 1389 if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
1391 if (sbinfo->free_blocks == 0 ||
1392 shmem_acct_block(info->flags)) { 1390 shmem_acct_block(info->flags)) {
1393 spin_unlock(&sbinfo->stat_lock);
1394 spin_unlock(&info->lock); 1391 spin_unlock(&info->lock);
1395 error = -ENOSPC; 1392 error = -ENOSPC;
1396 goto failed; 1393 goto failed;
1397 } 1394 }
1398 sbinfo->free_blocks--; 1395 percpu_counter_inc(&sbinfo->used_blocks);
1396 spin_lock(&inode->i_lock);
1399 inode->i_blocks += BLOCKS_PER_PAGE; 1397 inode->i_blocks += BLOCKS_PER_PAGE;
1400 spin_unlock(&sbinfo->stat_lock); 1398 spin_unlock(&inode->i_lock);
1401 } else if (shmem_acct_block(info->flags)) { 1399 } else if (shmem_acct_block(info->flags)) {
1402 spin_unlock(&info->lock); 1400 spin_unlock(&info->lock);
1403 error = -ENOSPC; 1401 error = -ENOSPC;
@@ -1791,17 +1789,16 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1791 buf->f_type = TMPFS_MAGIC; 1789 buf->f_type = TMPFS_MAGIC;
1792 buf->f_bsize = PAGE_CACHE_SIZE; 1790 buf->f_bsize = PAGE_CACHE_SIZE;
1793 buf->f_namelen = NAME_MAX; 1791 buf->f_namelen = NAME_MAX;
1794 spin_lock(&sbinfo->stat_lock);
1795 if (sbinfo->max_blocks) { 1792 if (sbinfo->max_blocks) {
1796 buf->f_blocks = sbinfo->max_blocks; 1793 buf->f_blocks = sbinfo->max_blocks;
1797 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 1794 buf->f_bavail = buf->f_bfree =
1795 sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
1798 } 1796 }
1799 if (sbinfo->max_inodes) { 1797 if (sbinfo->max_inodes) {
1800 buf->f_files = sbinfo->max_inodes; 1798 buf->f_files = sbinfo->max_inodes;
1801 buf->f_ffree = sbinfo->free_inodes; 1799 buf->f_ffree = sbinfo->free_inodes;
1802 } 1800 }
1803 /* else leave those fields 0 like simple_statfs */ 1801 /* else leave those fields 0 like simple_statfs */
1804 spin_unlock(&sbinfo->stat_lock);
1805 return 0; 1802 return 0;
1806} 1803}
1807 1804
@@ -2242,7 +2239,6 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2242{ 2239{
2243 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2240 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2244 struct shmem_sb_info config = *sbinfo; 2241 struct shmem_sb_info config = *sbinfo;
2245 unsigned long blocks;
2246 unsigned long inodes; 2242 unsigned long inodes;
2247 int error = -EINVAL; 2243 int error = -EINVAL;
2248 2244
@@ -2250,9 +2246,8 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2250 return error; 2246 return error;
2251 2247
2252 spin_lock(&sbinfo->stat_lock); 2248 spin_lock(&sbinfo->stat_lock);
2253 blocks = sbinfo->max_blocks - sbinfo->free_blocks;
2254 inodes = sbinfo->max_inodes - sbinfo->free_inodes; 2249 inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2255 if (config.max_blocks < blocks) 2250 if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2256 goto out; 2251 goto out;
2257 if (config.max_inodes < inodes) 2252 if (config.max_inodes < inodes)
2258 goto out; 2253 goto out;
@@ -2269,7 +2264,6 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2269 2264
2270 error = 0; 2265 error = 0;
2271 sbinfo->max_blocks = config.max_blocks; 2266 sbinfo->max_blocks = config.max_blocks;
2272 sbinfo->free_blocks = config.max_blocks - blocks;
2273 sbinfo->max_inodes = config.max_inodes; 2267 sbinfo->max_inodes = config.max_inodes;
2274 sbinfo->free_inodes = config.max_inodes - inodes; 2268 sbinfo->free_inodes = config.max_inodes - inodes;
2275 2269
@@ -2344,7 +2338,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2344#endif 2338#endif
2345 2339
2346 spin_lock_init(&sbinfo->stat_lock); 2340 spin_lock_init(&sbinfo->stat_lock);
2347 sbinfo->free_blocks = sbinfo->max_blocks; 2341 percpu_counter_init(&sbinfo->used_blocks, 0);
2348 sbinfo->free_inodes = sbinfo->max_inodes; 2342 sbinfo->free_inodes = sbinfo->max_inodes;
2349 2343
2350 sb->s_maxbytes = SHMEM_MAX_BYTES; 2344 sb->s_maxbytes = SHMEM_MAX_BYTES;