diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-07 01:45:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-07 01:53:23 -0400 |
commit | 3ddcd0569cd68f00f3beae9a7959b72918bb91f4 (patch) | |
tree | 3f7c591316560b1c22e2cc0700fbcd29aa3fbd7f /fs | |
parent | 830c0f0edca67403d361fe976a25b17356c11f19 (diff) |
vfs: optimize inode cache access patterns
The inode structure layout is largely random, and some of the vfs paths
really do care. The path lookup in particular is already quite D$
intensive, and profiles show that accessing the 'inode->i_op->xyz'
fields is quite costly.
We already optimized the dcache to not unnecessarily load the d_op
structure for members that are often NULL using the DCACHE_OP_xyz bits
in dentry->d_flags, and this does something very similar for the inode
ops that are used during pathname lookup.
It also re-orders the fields so that the fields accessed by 'stat' are
together at the beginning of the inode structure, and roughly in the
order accessed.
The effect of this seems to be in the 1-2% range for an empty kernel
"make -j" run (which is fairly kernel-intensive, mostly in filename
lookup), so it's visible. The numbers are fairly noisy, though, and
likely depend a lot on exact microarchitecture. So there's more tuning
to be done.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/inode.c | 1 | ||||
-rw-r--r-- | fs/namei.c | 76 | ||||
-rw-r--r-- | fs/stat.c | 4 |
3 files changed, 69 insertions, 12 deletions
diff --git a/fs/inode.c b/fs/inode.c index 5aab80dc008c..73920d555c88 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
143 | inode->i_op = &empty_iops; | 143 | inode->i_op = &empty_iops; |
144 | inode->i_fop = &empty_fops; | 144 | inode->i_fop = &empty_fops; |
145 | inode->i_nlink = 1; | 145 | inode->i_nlink = 1; |
146 | inode->i_opflags = 0; | ||
146 | inode->i_uid = 0; | 147 | inode->i_uid = 0; |
147 | inode->i_gid = 0; | 148 | inode->i_gid = 0; |
148 | atomic_set(&inode->i_writecount, 0); | 149 | atomic_set(&inode->i_writecount, 0); |
diff --git a/fs/namei.c b/fs/namei.c index 3d607bd80e09..4a98bf154d88 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -308,6 +308,26 @@ int generic_permission(struct inode *inode, int mask) | |||
308 | return -EACCES; | 308 | return -EACCES; |
309 | } | 309 | } |
310 | 310 | ||
311 | /* | ||
312 | * We _really_ want to just do "generic_permission()" without | ||
313 | * even looking at the inode->i_op values. So we keep a cache | ||
314 | * flag in inode->i_opflags, that says "this has not special | ||
315 | * permission function, use the fast case". | ||
316 | */ | ||
317 | static inline int do_inode_permission(struct inode *inode, int mask) | ||
318 | { | ||
319 | if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { | ||
320 | if (likely(inode->i_op->permission)) | ||
321 | return inode->i_op->permission(inode, mask); | ||
322 | |||
323 | /* This gets set once for the inode lifetime */ | ||
324 | spin_lock(&inode->i_lock); | ||
325 | inode->i_opflags |= IOP_FASTPERM; | ||
326 | spin_unlock(&inode->i_lock); | ||
327 | } | ||
328 | return generic_permission(inode, mask); | ||
329 | } | ||
330 | |||
311 | /** | 331 | /** |
312 | * inode_permission - check for access rights to a given inode | 332 | * inode_permission - check for access rights to a given inode |
313 | * @inode: inode to check permission on | 333 | * @inode: inode to check permission on |
@@ -322,7 +342,7 @@ int inode_permission(struct inode *inode, int mask) | |||
322 | { | 342 | { |
323 | int retval; | 343 | int retval; |
324 | 344 | ||
325 | if (mask & MAY_WRITE) { | 345 | if (unlikely(mask & MAY_WRITE)) { |
326 | umode_t mode = inode->i_mode; | 346 | umode_t mode = inode->i_mode; |
327 | 347 | ||
328 | /* | 348 | /* |
@@ -339,11 +359,7 @@ int inode_permission(struct inode *inode, int mask) | |||
339 | return -EACCES; | 359 | return -EACCES; |
340 | } | 360 | } |
341 | 361 | ||
342 | if (inode->i_op->permission) | 362 | retval = do_inode_permission(inode, mask); |
343 | retval = inode->i_op->permission(inode, mask); | ||
344 | else | ||
345 | retval = generic_permission(inode, mask); | ||
346 | |||
347 | if (retval) | 363 | if (retval) |
348 | return retval; | 364 | return retval; |
349 | 365 | ||
@@ -1245,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd) | |||
1245 | } | 1261 | } |
1246 | } | 1262 | } |
1247 | 1263 | ||
1264 | /* | ||
1265 | * Do we need to follow links? We _really_ want to be able | ||
1266 | * to do this check without having to look at inode->i_op, | ||
1267 | * so we keep a cache of "no, this doesn't need follow_link" | ||
1268 | * for the common case. | ||
1269 | */ | ||
1270 | static inline int do_follow_link(struct inode *inode, int follow) | ||
1271 | { | ||
1272 | if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { | ||
1273 | if (likely(inode->i_op->follow_link)) | ||
1274 | return follow; | ||
1275 | |||
1276 | /* This gets set once for the inode lifetime */ | ||
1277 | spin_lock(&inode->i_lock); | ||
1278 | inode->i_opflags |= IOP_NOFOLLOW; | ||
1279 | spin_unlock(&inode->i_lock); | ||
1280 | } | ||
1281 | return 0; | ||
1282 | } | ||
1283 | |||
1248 | static inline int walk_component(struct nameidata *nd, struct path *path, | 1284 | static inline int walk_component(struct nameidata *nd, struct path *path, |
1249 | struct qstr *name, int type, int follow) | 1285 | struct qstr *name, int type, int follow) |
1250 | { | 1286 | { |
@@ -1267,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, | |||
1267 | terminate_walk(nd); | 1303 | terminate_walk(nd); |
1268 | return -ENOENT; | 1304 | return -ENOENT; |
1269 | } | 1305 | } |
1270 | if (unlikely(inode->i_op->follow_link) && follow) { | 1306 | if (do_follow_link(inode, follow)) { |
1271 | if (nd->flags & LOOKUP_RCU) { | 1307 | if (nd->flags & LOOKUP_RCU) { |
1272 | if (unlikely(unlazy_walk(nd, path->dentry))) { | 1308 | if (unlikely(unlazy_walk(nd, path->dentry))) { |
1273 | terminate_walk(nd); | 1309 | terminate_walk(nd); |
@@ -1320,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) | |||
1320 | } | 1356 | } |
1321 | 1357 | ||
1322 | /* | 1358 | /* |
1359 | * We really don't want to look at inode->i_op->lookup | ||
1360 | * when we don't have to. So we keep a cache bit in | ||
1361 | * the inode ->i_opflags field that says "yes, we can | ||
1362 | * do lookup on this inode". | ||
1363 | */ | ||
1364 | static inline int can_lookup(struct inode *inode) | ||
1365 | { | ||
1366 | if (likely(inode->i_opflags & IOP_LOOKUP)) | ||
1367 | return 1; | ||
1368 | if (likely(!inode->i_op->lookup)) | ||
1369 | return 0; | ||
1370 | |||
1371 | /* We do this once for the lifetime of the inode */ | ||
1372 | spin_lock(&inode->i_lock); | ||
1373 | inode->i_opflags |= IOP_LOOKUP; | ||
1374 | spin_unlock(&inode->i_lock); | ||
1375 | return 1; | ||
1376 | } | ||
1377 | |||
1378 | /* | ||
1323 | * Name resolution. | 1379 | * Name resolution. |
1324 | * This is the basic name resolution function, turning a pathname into | 1380 | * This is the basic name resolution function, turning a pathname into |
1325 | * the final dentry. We expect 'base' to be positive and a directory. | 1381 | * the final dentry. We expect 'base' to be positive and a directory. |
@@ -1398,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1398 | if (err) | 1454 | if (err) |
1399 | return err; | 1455 | return err; |
1400 | } | 1456 | } |
1457 | if (can_lookup(nd->inode)) | ||
1458 | continue; | ||
1401 | err = -ENOTDIR; | 1459 | err = -ENOTDIR; |
1402 | if (!nd->inode->i_op->lookup) | 1460 | break; |
1403 | break; | ||
1404 | continue; | ||
1405 | /* here ends the main loop */ | 1461 | /* here ends the main loop */ |
1406 | 1462 | ||
1407 | last_component: | 1463 | last_component: |
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) | |||
27 | stat->uid = inode->i_uid; | 27 | stat->uid = inode->i_uid; |
28 | stat->gid = inode->i_gid; | 28 | stat->gid = inode->i_gid; |
29 | stat->rdev = inode->i_rdev; | 29 | stat->rdev = inode->i_rdev; |
30 | stat->size = i_size_read(inode); | ||
30 | stat->atime = inode->i_atime; | 31 | stat->atime = inode->i_atime; |
31 | stat->mtime = inode->i_mtime; | 32 | stat->mtime = inode->i_mtime; |
32 | stat->ctime = inode->i_ctime; | 33 | stat->ctime = inode->i_ctime; |
33 | stat->size = i_size_read(inode); | ||
34 | stat->blocks = inode->i_blocks; | ||
35 | stat->blksize = (1 << inode->i_blkbits); | 34 | stat->blksize = (1 << inode->i_blkbits); |
35 | stat->blocks = inode->i_blocks; | ||
36 | } | 36 | } |
37 | 37 | ||
38 | EXPORT_SYMBOL(generic_fillattr); | 38 | EXPORT_SYMBOL(generic_fillattr); |