aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-08-07 01:45:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-07 01:53:23 -0400
commit3ddcd0569cd68f00f3beae9a7959b72918bb91f4 (patch)
tree3f7c591316560b1c22e2cc0700fbcd29aa3fbd7f /fs
parent830c0f0edca67403d361fe976a25b17356c11f19 (diff)
vfs: optimize inode cache access patterns
The inode structure layout is largely random, and some of the vfs paths really do care. The path lookup in particular is already quite D$ intensive, and profiles show that accessing the 'inode->i_op->xyz' fields is quite costly. We already optimized the dcache to not unnecessarily load the d_op structure for members that are often NULL using the DCACHE_OP_xyz bits in dentry->d_flags, and this does something very similar for the inode ops that are used during pathname lookup. It also re-orders the fields so that the fields accessed by 'stat' are together at the beginning of the inode structure, and roughly in the order accessed. The effect of this seems to be in the 1-2% range for an empty kernel "make -j" run (which is fairly kernel-intensive, mostly in filename lookup), so it's visible. The numbers are fairly noisy, though, and likely depend a lot on exact microarchitecture. So there's more tuning to be done. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/inode.c1
-rw-r--r--fs/namei.c76
-rw-r--r--fs/stat.c4
3 files changed, 69 insertions, 12 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 5aab80dc008c..73920d555c88 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->i_nlink = 1;
146 inode->i_opflags = 0;
146 inode->i_uid = 0; 147 inode->i_uid = 0;
147 inode->i_gid = 0; 148 inode->i_gid = 0;
148 atomic_set(&inode->i_writecount, 0); 149 atomic_set(&inode->i_writecount, 0);
diff --git a/fs/namei.c b/fs/namei.c
index 3d607bd80e09..4a98bf154d88 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -308,6 +308,26 @@ int generic_permission(struct inode *inode, int mask)
308 return -EACCES; 308 return -EACCES;
309} 309}
310 310
311/*
312 * We _really_ want to just do "generic_permission()" without
313 * even looking at the inode->i_op values. So we keep a cache
314 * flag in inode->i_opflags, that says "this has not special
315 * permission function, use the fast case".
316 */
317static inline int do_inode_permission(struct inode *inode, int mask)
318{
319 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
320 if (likely(inode->i_op->permission))
321 return inode->i_op->permission(inode, mask);
322
323 /* This gets set once for the inode lifetime */
324 spin_lock(&inode->i_lock);
325 inode->i_opflags |= IOP_FASTPERM;
326 spin_unlock(&inode->i_lock);
327 }
328 return generic_permission(inode, mask);
329}
330
311/** 331/**
312 * inode_permission - check for access rights to a given inode 332 * inode_permission - check for access rights to a given inode
313 * @inode: inode to check permission on 333 * @inode: inode to check permission on
@@ -322,7 +342,7 @@ int inode_permission(struct inode *inode, int mask)
322{ 342{
323 int retval; 343 int retval;
324 344
325 if (mask & MAY_WRITE) { 345 if (unlikely(mask & MAY_WRITE)) {
326 umode_t mode = inode->i_mode; 346 umode_t mode = inode->i_mode;
327 347
328 /* 348 /*
@@ -339,11 +359,7 @@ int inode_permission(struct inode *inode, int mask)
339 return -EACCES; 359 return -EACCES;
340 } 360 }
341 361
342 if (inode->i_op->permission) 362 retval = do_inode_permission(inode, mask);
343 retval = inode->i_op->permission(inode, mask);
344 else
345 retval = generic_permission(inode, mask);
346
347 if (retval) 363 if (retval)
348 return retval; 364 return retval;
349 365
@@ -1245,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd)
1245 } 1261 }
1246} 1262}
1247 1263
1264/*
1265 * Do we need to follow links? We _really_ want to be able
1266 * to do this check without having to look at inode->i_op,
1267 * so we keep a cache of "no, this doesn't need follow_link"
1268 * for the common case.
1269 */
1270static inline int do_follow_link(struct inode *inode, int follow)
1271{
1272 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1273 if (likely(inode->i_op->follow_link))
1274 return follow;
1275
1276 /* This gets set once for the inode lifetime */
1277 spin_lock(&inode->i_lock);
1278 inode->i_opflags |= IOP_NOFOLLOW;
1279 spin_unlock(&inode->i_lock);
1280 }
1281 return 0;
1282}
1283
1248static inline int walk_component(struct nameidata *nd, struct path *path, 1284static inline int walk_component(struct nameidata *nd, struct path *path,
1249 struct qstr *name, int type, int follow) 1285 struct qstr *name, int type, int follow)
1250{ 1286{
@@ -1267,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1267 terminate_walk(nd); 1303 terminate_walk(nd);
1268 return -ENOENT; 1304 return -ENOENT;
1269 } 1305 }
1270 if (unlikely(inode->i_op->follow_link) && follow) { 1306 if (do_follow_link(inode, follow)) {
1271 if (nd->flags & LOOKUP_RCU) { 1307 if (nd->flags & LOOKUP_RCU) {
1272 if (unlikely(unlazy_walk(nd, path->dentry))) { 1308 if (unlikely(unlazy_walk(nd, path->dentry))) {
1273 terminate_walk(nd); 1309 terminate_walk(nd);
@@ -1320,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1320} 1356}
1321 1357
1322/* 1358/*
1359 * We really don't want to look at inode->i_op->lookup
1360 * when we don't have to. So we keep a cache bit in
1361 * the inode ->i_opflags field that says "yes, we can
1362 * do lookup on this inode".
1363 */
1364static inline int can_lookup(struct inode *inode)
1365{
1366 if (likely(inode->i_opflags & IOP_LOOKUP))
1367 return 1;
1368 if (likely(!inode->i_op->lookup))
1369 return 0;
1370
1371 /* We do this once for the lifetime of the inode */
1372 spin_lock(&inode->i_lock);
1373 inode->i_opflags |= IOP_LOOKUP;
1374 spin_unlock(&inode->i_lock);
1375 return 1;
1376}
1377
1378/*
1323 * Name resolution. 1379 * Name resolution.
1324 * This is the basic name resolution function, turning a pathname into 1380 * This is the basic name resolution function, turning a pathname into
1325 * the final dentry. We expect 'base' to be positive and a directory. 1381 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1398,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1398 if (err) 1454 if (err)
1399 return err; 1455 return err;
1400 } 1456 }
1457 if (can_lookup(nd->inode))
1458 continue;
1401 err = -ENOTDIR; 1459 err = -ENOTDIR;
1402 if (!nd->inode->i_op->lookup) 1460 break;
1403 break;
1404 continue;
1405 /* here ends the main loop */ 1461 /* here ends the main loop */
1406 1462
1407last_component: 1463last_component:
diff --git a/fs/stat.c b/fs/stat.c
index 961039121cb8..ba5316ffac61 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
27 stat->uid = inode->i_uid; 27 stat->uid = inode->i_uid;
28 stat->gid = inode->i_gid; 28 stat->gid = inode->i_gid;
29 stat->rdev = inode->i_rdev; 29 stat->rdev = inode->i_rdev;
30 stat->size = i_size_read(inode);
30 stat->atime = inode->i_atime; 31 stat->atime = inode->i_atime;
31 stat->mtime = inode->i_mtime; 32 stat->mtime = inode->i_mtime;
32 stat->ctime = inode->i_ctime; 33 stat->ctime = inode->i_ctime;
33 stat->size = i_size_read(inode);
34 stat->blocks = inode->i_blocks;
35 stat->blksize = (1 << inode->i_blkbits); 34 stat->blksize = (1 << inode->i_blkbits);
35 stat->blocks = inode->i_blocks;
36} 36}
37 37
38EXPORT_SYMBOL(generic_fillattr); 38EXPORT_SYMBOL(generic_fillattr);