aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c613
1 files changed, 416 insertions, 197 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 03dfeb2e3928..972169668aed 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -74,9 +74,13 @@ static unsigned int i_hash_shift __read_mostly;
74 * allowing for low-overhead inode sync() operations. 74 * allowing for low-overhead inode sync() operations.
75 */ 75 */
76 76
77LIST_HEAD(inode_in_use);
78LIST_HEAD(inode_unused); 77LIST_HEAD(inode_unused);
79static struct hlist_head *inode_hashtable __read_mostly; 78
79struct inode_hash_bucket {
80 spinlock_t lock;
81 struct hlist_head head;
82};
83static struct inode_hash_bucket *inode_hashtable __read_mostly;
80 84
81/* 85/*
82 * A simple spinlock to protect the list manipulations. 86 * A simple spinlock to protect the list manipulations.
@@ -84,7 +88,8 @@ static struct hlist_head *inode_hashtable __read_mostly;
84 * NOTE! You also have to own the lock if you change 88 * NOTE! You also have to own the lock if you change
85 * the i_state of an inode while it is in use.. 89 * the i_state of an inode while it is in use..
86 */ 90 */
87DEFINE_SPINLOCK(inode_lock); 91static DEFINE_PER_CPU(spinlock_t, inode_cpulock);
92DEFINE_SPINLOCK(wb_inode_list_lock);
88 93
89/* 94/*
90 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 95 * iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -103,10 +108,37 @@ static DECLARE_RWSEM(iprune_sem);
103/* 108/*
104 * Statistics gathering.. 109 * Statistics gathering..
105 */ 110 */
106struct inodes_stat_t inodes_stat; 111struct inodes_stat_t inodes_stat = {
112 .nr_inodes = 0,
113 .nr_unused = 0,
114};
115struct percpu_counter nr_inodes;
107 116
108static struct kmem_cache *inode_cachep __read_mostly; 117static struct kmem_cache *inode_cachep __read_mostly;
109 118
119int get_nr_inodes(void)
120{
121 return percpu_counter_sum_positive(&nr_inodes);
122}
123
124/*
125 * Handle nr_dentry sysctl
126 */
127#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
128int proc_nr_inodes(ctl_table *table, int write,
129 void __user *buffer, size_t *lenp, loff_t *ppos)
130{
131 inodes_stat.nr_inodes = get_nr_inodes();
132 return proc_dointvec(table, write, buffer, lenp, ppos);
133}
134#else
135int proc_nr_inodes(ctl_table *table, int write,
136 void __user *buffer, size_t *lenp, loff_t *ppos)
137{
138 return -ENOSYS;
139}
140#endif
141
110static void wake_up_inode(struct inode *inode) 142static void wake_up_inode(struct inode *inode)
111{ 143{
112 /* 144 /*
@@ -134,7 +166,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
134 inode->i_sb = sb; 166 inode->i_sb = sb;
135 inode->i_blkbits = sb->s_blocksize_bits; 167 inode->i_blkbits = sb->s_blocksize_bits;
136 inode->i_flags = 0; 168 inode->i_flags = 0;
137 atomic_set(&inode->i_count, 1); 169 inode->i_count = 1;
138 inode->i_op = &empty_iops; 170 inode->i_op = &empty_iops;
139 inode->i_fop = &empty_fops; 171 inode->i_fop = &empty_fops;
140 inode->i_nlink = 1; 172 inode->i_nlink = 1;
@@ -162,7 +194,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
162 mutex_init(&inode->i_mutex); 194 mutex_init(&inode->i_mutex);
163 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 195 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
164 196
165 init_rwsem(&inode->i_alloc_sem); 197 init_anon_rwsem(&inode->i_alloc_sem);
166 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 198 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
167 199
168 mapping->a_ops = &empty_aops; 200 mapping->a_ops = &empty_aops;
@@ -237,13 +269,20 @@ void __destroy_inode(struct inode *inode)
237} 269}
238EXPORT_SYMBOL(__destroy_inode); 270EXPORT_SYMBOL(__destroy_inode);
239 271
272static void i_callback(struct rcu_head *head)
273{
274 struct inode *inode = container_of(head, struct inode, i_rcu);
275 INIT_LIST_HEAD(&inode->i_dentry);
276 kmem_cache_free(inode_cachep, inode);
277}
278
240void destroy_inode(struct inode *inode) 279void destroy_inode(struct inode *inode)
241{ 280{
242 __destroy_inode(inode); 281 __destroy_inode(inode);
243 if (inode->i_sb->s_op->destroy_inode) 282 if (inode->i_sb->s_op->destroy_inode)
244 inode->i_sb->s_op->destroy_inode(inode); 283 inode->i_sb->s_op->destroy_inode(inode);
245 else 284 else
246 kmem_cache_free(inode_cachep, (inode)); 285 call_rcu(&inode->i_rcu, i_callback);
247} 286}
248 287
249/* 288/*
@@ -257,6 +296,7 @@ void inode_init_once(struct inode *inode)
257 INIT_HLIST_NODE(&inode->i_hash); 296 INIT_HLIST_NODE(&inode->i_hash);
258 INIT_LIST_HEAD(&inode->i_dentry); 297 INIT_LIST_HEAD(&inode->i_dentry);
259 INIT_LIST_HEAD(&inode->i_devices); 298 INIT_LIST_HEAD(&inode->i_devices);
299 INIT_LIST_HEAD(&inode->i_list);
260 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 300 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
261 spin_lock_init(&inode->i_data.tree_lock); 301 spin_lock_init(&inode->i_data.tree_lock);
262 spin_lock_init(&inode->i_data.i_mmap_lock); 302 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -282,21 +322,6 @@ static void init_once(void *foo)
282 inode_init_once(inode); 322 inode_init_once(inode);
283} 323}
284 324
285/*
286 * inode_lock must be held
287 */
288void __iget(struct inode *inode)
289{
290 if (atomic_read(&inode->i_count)) {
291 atomic_inc(&inode->i_count);
292 return;
293 }
294 atomic_inc(&inode->i_count);
295 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
296 list_move(&inode->i_list, &inode_in_use);
297 inodes_stat.nr_unused--;
298}
299
300/** 325/**
301 * clear_inode - clear an inode 326 * clear_inode - clear an inode
302 * @inode: inode to clear 327 * @inode: inode to clear
@@ -340,65 +365,70 @@ static void dispose_list(struct list_head *head)
340 struct inode *inode; 365 struct inode *inode;
341 366
342 inode = list_first_entry(head, struct inode, i_list); 367 inode = list_first_entry(head, struct inode, i_list);
343 list_del(&inode->i_list); 368 list_del_init(&inode->i_list);
344 369
345 if (inode->i_data.nrpages) 370 if (inode->i_data.nrpages)
346 truncate_inode_pages(&inode->i_data, 0); 371 truncate_inode_pages(&inode->i_data, 0);
347 clear_inode(inode); 372 clear_inode(inode);
348 373
349 spin_lock(&inode_lock); 374 spin_lock(&inode->i_lock);
350 hlist_del_init(&inode->i_hash); 375 __remove_inode_hash(inode);
351 list_del_init(&inode->i_sb_list); 376 inode_sb_list_del(inode);
352 spin_unlock(&inode_lock); 377 spin_unlock(&inode->i_lock);
353 378
354 wake_up_inode(inode); 379 wake_up_inode(inode);
355 destroy_inode(inode); 380 destroy_inode(inode);
356 nr_disposed++; 381 nr_disposed++;
357 } 382 }
358 spin_lock(&inode_lock);
359 inodes_stat.nr_inodes -= nr_disposed;
360 spin_unlock(&inode_lock);
361} 383}
362 384
363/* 385/*
364 * Invalidate all inodes for a device. 386 * Invalidate all inodes for a device.
365 */ 387 */
366static int invalidate_list(struct list_head *head, struct list_head *dispose) 388static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose)
367{ 389{
368 struct list_head *next; 390 int busy = 0;
369 int busy = 0, count = 0; 391 int i;
370 392
371 next = head->next; 393 for_each_possible_cpu(i) {
372 for (;;) { 394 struct list_head *next;
373 struct list_head *tmp = next; 395 struct list_head *head;
374 struct inode *inode; 396#ifdef CONFIG_SMP
375 397 head = per_cpu_ptr(sb->s_inodes, i);
376 /* 398#else
377 * We can reschedule here without worrying about the list's 399 head = &sb->s_inodes;
378 * consistency because the per-sb list of inodes must not 400#endif
379 * change during umount anymore, and because iprune_sem keeps
380 * shrink_icache_memory() away.
381 */
382 cond_resched_lock(&inode_lock);
383 401
384 next = next->next; 402 next = head->next;
385 if (tmp == head) 403 for (;;) {
386 break; 404 struct list_head *tmp = next;
387 inode = list_entry(tmp, struct inode, i_sb_list); 405 struct inode *inode;
388 if (inode->i_state & I_NEW) 406
389 continue; 407 next = next->next;
390 invalidate_inode_buffers(inode); 408 if (tmp == head)
391 if (!atomic_read(&inode->i_count)) { 409 break;
392 list_move(&inode->i_list, dispose); 410 inode = list_entry(tmp, struct inode, i_sb_list);
393 WARN_ON(inode->i_state & I_NEW); 411 spin_lock(&inode->i_lock);
394 inode->i_state |= I_FREEING; 412 if (inode->i_state & I_NEW) {
395 count++; 413 spin_unlock(&inode->i_lock);
396 continue; 414 continue;
415 }
416 invalidate_inode_buffers(inode);
417 if (!inode->i_count) {
418 spin_lock(&wb_inode_list_lock);
419 list_del(&inode->i_list);
420 inodes_stat.nr_unused--;
421 spin_unlock(&wb_inode_list_lock);
422 WARN_ON(inode->i_state & I_NEW);
423 inode->i_state |= I_FREEING;
424 spin_unlock(&inode->i_lock);
425 list_add(&inode->i_list, dispose);
426 continue;
427 }
428 spin_unlock(&inode->i_lock);
429 busy = 1;
397 } 430 }
398 busy = 1;
399 } 431 }
400 /* only unused inodes may be cached with i_count zero */
401 inodes_stat.nr_unused -= count;
402 return busy; 432 return busy;
403} 433}
404 434
@@ -415,12 +445,17 @@ int invalidate_inodes(struct super_block *sb)
415 int busy; 445 int busy;
416 LIST_HEAD(throw_away); 446 LIST_HEAD(throw_away);
417 447
448 /*
449 * Don't need to worry about the list's consistency because the per-sb
450 * list of inodes must not change during umount anymore, and because
451 * iprune_sem keeps shrink_icache_memory() away.
452 */
418 down_write(&iprune_sem); 453 down_write(&iprune_sem);
419 spin_lock(&inode_lock); 454// spin_lock(&sb_inode_list_lock); XXX: is this safe?
420 inotify_unmount_inodes(&sb->s_inodes); 455 inotify_unmount_inodes(sb);
421 fsnotify_unmount_inodes(&sb->s_inodes); 456 fsnotify_unmount_inodes(sb);
422 busy = invalidate_list(&sb->s_inodes, &throw_away); 457 busy = invalidate_sb_inodes(sb, &throw_away);
423 spin_unlock(&inode_lock); 458// spin_unlock(&sb_inode_list_lock);
424 459
425 dispose_list(&throw_away); 460 dispose_list(&throw_away);
426 up_write(&iprune_sem); 461 up_write(&iprune_sem);
@@ -435,7 +470,7 @@ static int can_unuse(struct inode *inode)
435 return 0; 470 return 0;
436 if (inode_has_buffers(inode)) 471 if (inode_has_buffers(inode))
437 return 0; 472 return 0;
438 if (atomic_read(&inode->i_count)) 473 if (inode->i_count)
439 return 0; 474 return 0;
440 if (inode->i_data.nrpages) 475 if (inode->i_data.nrpages)
441 return 0; 476 return 0;
@@ -458,12 +493,12 @@ static int can_unuse(struct inode *inode)
458static void prune_icache(int nr_to_scan) 493static void prune_icache(int nr_to_scan)
459{ 494{
460 LIST_HEAD(freeable); 495 LIST_HEAD(freeable);
461 int nr_pruned = 0;
462 int nr_scanned; 496 int nr_scanned;
463 unsigned long reap = 0; 497 unsigned long reap = 0;
464 498
465 down_read(&iprune_sem); 499 down_read(&iprune_sem);
466 spin_lock(&inode_lock); 500again:
501 spin_lock(&wb_inode_list_lock);
467 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 502 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
468 struct inode *inode; 503 struct inode *inode;
469 504
@@ -472,36 +507,56 @@ static void prune_icache(int nr_to_scan)
472 507
473 inode = list_entry(inode_unused.prev, struct inode, i_list); 508 inode = list_entry(inode_unused.prev, struct inode, i_list);
474 509
475 if (inode->i_state || atomic_read(&inode->i_count)) { 510 if (!spin_trylock(&inode->i_lock)) {
511 spin_unlock(&wb_inode_list_lock);
512 goto again;
513 }
514 if (inode->i_count) {
515 list_del_init(&inode->i_list);
516 spin_unlock(&inode->i_lock);
517 inodes_stat.nr_unused--;
518 continue;
519 }
520 if (inode->i_state) {
476 list_move(&inode->i_list, &inode_unused); 521 list_move(&inode->i_list, &inode_unused);
522 spin_unlock(&inode->i_lock);
477 continue; 523 continue;
478 } 524 }
479 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 525 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
526 spin_unlock(&wb_inode_list_lock);
480 __iget(inode); 527 __iget(inode);
481 spin_unlock(&inode_lock); 528 spin_unlock(&inode->i_lock);
482 if (remove_inode_buffers(inode)) 529 if (remove_inode_buffers(inode))
483 reap += invalidate_mapping_pages(&inode->i_data, 530 reap += invalidate_mapping_pages(&inode->i_data,
484 0, -1); 531 0, -1);
485 iput(inode); 532 iput(inode);
486 spin_lock(&inode_lock); 533again2:
534 spin_lock(&wb_inode_list_lock);
487 535
536 /* XXX: may no longer work well */
488 if (inode != list_entry(inode_unused.next, 537 if (inode != list_entry(inode_unused.next,
489 struct inode, i_list)) 538 struct inode, i_list))
490 continue; /* wrong inode or list_empty */ 539 continue; /* wrong inode or list_empty */
491 if (!can_unuse(inode)) 540 if (!spin_trylock(&inode->i_lock)) {
541 spin_unlock(&wb_inode_list_lock);
542 goto again2;
543 }
544 if (!can_unuse(inode)) {
545 spin_unlock(&inode->i_lock);
492 continue; 546 continue;
547 }
493 } 548 }
494 list_move(&inode->i_list, &freeable); 549 list_move(&inode->i_list, &freeable);
495 WARN_ON(inode->i_state & I_NEW); 550 WARN_ON(inode->i_state & I_NEW);
496 inode->i_state |= I_FREEING; 551 inode->i_state |= I_FREEING;
497 nr_pruned++; 552 spin_unlock(&inode->i_lock);
553 inodes_stat.nr_unused--;
498 } 554 }
499 inodes_stat.nr_unused -= nr_pruned;
500 if (current_is_kswapd()) 555 if (current_is_kswapd())
501 __count_vm_events(KSWAPD_INODESTEAL, reap); 556 __count_vm_events(KSWAPD_INODESTEAL, reap);
502 else 557 else
503 __count_vm_events(PGINODESTEAL, reap); 558 __count_vm_events(PGINODESTEAL, reap);
504 spin_unlock(&inode_lock); 559 spin_unlock(&wb_inode_list_lock);
505 560
506 dispose_list(&freeable); 561 dispose_list(&freeable);
507 up_read(&iprune_sem); 562 up_read(&iprune_sem);
@@ -528,7 +583,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask)
528 return -1; 583 return -1;
529 prune_icache(nr); 584 prune_icache(nr);
530 } 585 }
531 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 586 return inodes_stat.nr_unused / 100 * sysctl_vfs_cache_pressure;
532} 587}
533 588
534static struct shrinker icache_shrinker = { 589static struct shrinker icache_shrinker = {
@@ -544,7 +599,7 @@ static void __wait_on_freeing_inode(struct inode *inode);
544 * add any additional branch in the common code. 599 * add any additional branch in the common code.
545 */ 600 */
546static struct inode *find_inode(struct super_block *sb, 601static struct inode *find_inode(struct super_block *sb,
547 struct hlist_head *head, 602 struct inode_hash_bucket *b,
548 int (*test)(struct inode *, void *), 603 int (*test)(struct inode *, void *),
549 void *data) 604 void *data)
550{ 605{
@@ -552,17 +607,27 @@ static struct inode *find_inode(struct super_block *sb,
552 struct inode *inode = NULL; 607 struct inode *inode = NULL;
553 608
554repeat: 609repeat:
555 hlist_for_each_entry(inode, node, head, i_hash) { 610 rcu_read_lock();
611 hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
556 if (inode->i_sb != sb) 612 if (inode->i_sb != sb)
557 continue; 613 continue;
558 if (!test(inode, data)) 614 spin_lock(&inode->i_lock);
615 if (hlist_unhashed(&inode->i_hash)) {
616 spin_unlock(&inode->i_lock);
559 continue; 617 continue;
618 }
619 if (!test(inode, data)) {
620 spin_unlock(&inode->i_lock);
621 continue;
622 }
560 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 623 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
624 rcu_read_unlock();
561 __wait_on_freeing_inode(inode); 625 __wait_on_freeing_inode(inode);
562 goto repeat; 626 goto repeat;
563 } 627 }
564 break; 628 break;
565 } 629 }
630 rcu_read_unlock();
566 return node ? inode : NULL; 631 return node ? inode : NULL;
567} 632}
568 633
@@ -571,23 +636,32 @@ repeat:
571 * iget_locked for details. 636 * iget_locked for details.
572 */ 637 */
573static struct inode *find_inode_fast(struct super_block *sb, 638static struct inode *find_inode_fast(struct super_block *sb,
574 struct hlist_head *head, unsigned long ino) 639 struct inode_hash_bucket *b,
640 unsigned long ino)
575{ 641{
576 struct hlist_node *node; 642 struct hlist_node *node;
577 struct inode *inode = NULL; 643 struct inode *inode = NULL;
578 644
579repeat: 645repeat:
580 hlist_for_each_entry(inode, node, head, i_hash) { 646 rcu_read_lock();
647 hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
581 if (inode->i_ino != ino) 648 if (inode->i_ino != ino)
582 continue; 649 continue;
583 if (inode->i_sb != sb) 650 if (inode->i_sb != sb)
584 continue; 651 continue;
652 spin_lock(&inode->i_lock);
653 if (hlist_unhashed(&inode->i_hash)) {
654 spin_unlock(&inode->i_lock);
655 continue;
656 }
585 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 657 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
658 rcu_read_unlock();
586 __wait_on_freeing_inode(inode); 659 __wait_on_freeing_inode(inode);
587 goto repeat; 660 goto repeat;
588 } 661 }
589 break; 662 break;
590 } 663 }
664 rcu_read_unlock();
591 return node ? inode : NULL; 665 return node ? inode : NULL;
592} 666}
593 667
@@ -601,16 +675,88 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
601 return tmp & I_HASHMASK; 675 return tmp & I_HASHMASK;
602} 676}
603 677
678static void inode_sb_list_add(struct inode *inode, struct super_block *sb)
679{
680 spinlock_t *lock;
681 struct list_head *list;
682#ifdef CONFIG_SMP
683 int cpu;
684#endif
685
686 lock = &get_cpu_var(inode_cpulock);
687#ifdef CONFIG_SMP
688 cpu = smp_processor_id();
689 list = per_cpu_ptr(sb->s_inodes, cpu);
690 inode->i_sb_list_cpu = cpu;
691#else
692 list = &sb->s_inodes;
693#endif
694 put_cpu_var(inode_cpulock);
695 spin_lock(lock);
696 list_add_rcu(&inode->i_sb_list, list);
697 spin_unlock(lock);
698}
699
700void inode_sb_list_del(struct inode *inode)
701{
702 spinlock_t *lock;
703
704#ifdef CONFIG_SMP
705 lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu);
706#else
707 lock = &__get_cpu_var(inode_cpulock);
708#endif
709 spin_lock(lock);
710 list_del_rcu(&inode->i_sb_list);
711 spin_unlock(lock);
712}
713
604static inline void 714static inline void
605__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 715__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
606 struct inode *inode) 716 struct inode *inode)
607{ 717{
608 inodes_stat.nr_inodes++; 718 inode_sb_list_add(inode, sb);
609 list_add(&inode->i_list, &inode_in_use); 719 percpu_counter_inc(&nr_inodes);
610 list_add(&inode->i_sb_list, &sb->s_inodes); 720 if (b) {
611 if (head) 721 spin_lock(&b->lock);
612 hlist_add_head(&inode->i_hash, head); 722 hlist_add_head(&inode->i_hash, &b->head);
723 spin_unlock(&b->lock);
724 }
725}
726
727#ifdef CONFIG_SMP
728/*
729 * Each cpu owns a range of 1024 numbers.
730 * 'shared_last_ino' is dirtied only once out of 1024 allocations,
731 * to renew the exhausted range.
732 *
733 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
734 * error if st_ino won't fit in target struct field. Use 32bit counter
735 * here to attempt to avoid that.
736 */
737static DEFINE_PER_CPU(int, last_ino);
738static atomic_t shared_last_ino;
739
740static int last_ino_get(void)
741{
742 int *p = &get_cpu_var(last_ino);
743 int res = *p;
744
745 if (unlikely((res & 1023) == 0))
746 res = atomic_add_return(1024, &shared_last_ino) - 1024;
747
748 *p = ++res;
749 put_cpu_var(last_ino);
750 return res;
751}
752#else
753static int last_ino_get(void)
754{
755 static int last_ino;
756
757 return ++last_ino;
613} 758}
759#endif
614 760
615/** 761/**
616 * inode_add_to_lists - add a new inode to relevant lists 762 * inode_add_to_lists - add a new inode to relevant lists
@@ -626,11 +772,11 @@ __inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
626 */ 772 */
627void inode_add_to_lists(struct super_block *sb, struct inode *inode) 773void inode_add_to_lists(struct super_block *sb, struct inode *inode)
628{ 774{
629 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 775 struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino);
630 776
631 spin_lock(&inode_lock); 777 spin_lock(&inode->i_lock);
632 __inode_add_to_lists(sb, head, inode); 778 __inode_add_to_lists(sb, b, inode);
633 spin_unlock(&inode_lock); 779 spin_unlock(&inode->i_lock);
634} 780}
635EXPORT_SYMBOL_GPL(inode_add_to_lists); 781EXPORT_SYMBOL_GPL(inode_add_to_lists);
636 782
@@ -648,23 +794,15 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
648 */ 794 */
649struct inode *new_inode(struct super_block *sb) 795struct inode *new_inode(struct super_block *sb)
650{ 796{
651 /*
652 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
653 * error if st_ino won't fit in target struct field. Use 32bit counter
654 * here to attempt to avoid that.
655 */
656 static unsigned int last_ino;
657 struct inode *inode; 797 struct inode *inode;
658 798
659 spin_lock_prefetch(&inode_lock);
660
661 inode = alloc_inode(sb); 799 inode = alloc_inode(sb);
662 if (inode) { 800 if (inode) {
663 spin_lock(&inode_lock); 801 spin_lock(&inode->i_lock);
664 __inode_add_to_lists(sb, NULL, inode); 802 inode->i_ino = last_ino_get();
665 inode->i_ino = ++last_ino;
666 inode->i_state = 0; 803 inode->i_state = 0;
667 spin_unlock(&inode_lock); 804 __inode_add_to_lists(sb, NULL, inode);
805 spin_unlock(&inode->i_lock);
668 } 806 }
669 return inode; 807 return inode;
670} 808}
@@ -712,7 +850,7 @@ EXPORT_SYMBOL(unlock_new_inode);
712 * -- rmk@arm.uk.linux.org 850 * -- rmk@arm.uk.linux.org
713 */ 851 */
714static struct inode *get_new_inode(struct super_block *sb, 852static struct inode *get_new_inode(struct super_block *sb,
715 struct hlist_head *head, 853 struct inode_hash_bucket *b,
716 int (*test)(struct inode *, void *), 854 int (*test)(struct inode *, void *),
717 int (*set)(struct inode *, void *), 855 int (*set)(struct inode *, void *),
718 void *data) 856 void *data)
@@ -723,16 +861,16 @@ static struct inode *get_new_inode(struct super_block *sb,
723 if (inode) { 861 if (inode) {
724 struct inode *old; 862 struct inode *old;
725 863
726 spin_lock(&inode_lock);
727 /* We released the lock, so.. */ 864 /* We released the lock, so.. */
728 old = find_inode(sb, head, test, data); 865 old = find_inode(sb, b, test, data);
729 if (!old) { 866 if (!old) {
867 spin_lock(&inode->i_lock);
730 if (set(inode, data)) 868 if (set(inode, data))
731 goto set_failed; 869 goto set_failed;
732 870
733 __inode_add_to_lists(sb, head, inode);
734 inode->i_state = I_NEW; 871 inode->i_state = I_NEW;
735 spin_unlock(&inode_lock); 872 __inode_add_to_lists(sb, b, inode);
873 spin_unlock(&inode->i_lock);
736 874
737 /* Return the locked inode with I_NEW set, the 875 /* Return the locked inode with I_NEW set, the
738 * caller is responsible for filling in the contents 876 * caller is responsible for filling in the contents
@@ -746,7 +884,7 @@ static struct inode *get_new_inode(struct super_block *sb,
746 * allocated. 884 * allocated.
747 */ 885 */
748 __iget(old); 886 __iget(old);
749 spin_unlock(&inode_lock); 887 spin_unlock(&old->i_lock);
750 destroy_inode(inode); 888 destroy_inode(inode);
751 inode = old; 889 inode = old;
752 wait_on_inode(inode); 890 wait_on_inode(inode);
@@ -754,7 +892,7 @@ static struct inode *get_new_inode(struct super_block *sb,
754 return inode; 892 return inode;
755 893
756set_failed: 894set_failed:
757 spin_unlock(&inode_lock); 895 spin_unlock(&inode->i_lock);
758 destroy_inode(inode); 896 destroy_inode(inode);
759 return NULL; 897 return NULL;
760} 898}
@@ -764,7 +902,7 @@ set_failed:
764 * comment at iget_locked for details. 902 * comment at iget_locked for details.
765 */ 903 */
766static struct inode *get_new_inode_fast(struct super_block *sb, 904static struct inode *get_new_inode_fast(struct super_block *sb,
767 struct hlist_head *head, unsigned long ino) 905 struct inode_hash_bucket *b, unsigned long ino)
768{ 906{
769 struct inode *inode; 907 struct inode *inode;
770 908
@@ -772,14 +910,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
772 if (inode) { 910 if (inode) {
773 struct inode *old; 911 struct inode *old;
774 912
775 spin_lock(&inode_lock);
776 /* We released the lock, so.. */ 913 /* We released the lock, so.. */
777 old = find_inode_fast(sb, head, ino); 914 old = find_inode_fast(sb, b, ino);
778 if (!old) { 915 if (!old) {
916 spin_lock(&inode->i_lock);
779 inode->i_ino = ino; 917 inode->i_ino = ino;
780 __inode_add_to_lists(sb, head, inode);
781 inode->i_state = I_NEW; 918 inode->i_state = I_NEW;
782 spin_unlock(&inode_lock); 919 __inode_add_to_lists(sb, b, inode);
920 spin_unlock(&inode->i_lock);
783 921
784 /* Return the locked inode with I_NEW set, the 922 /* Return the locked inode with I_NEW set, the
785 * caller is responsible for filling in the contents 923 * caller is responsible for filling in the contents
@@ -793,7 +931,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
793 * allocated. 931 * allocated.
794 */ 932 */
795 __iget(old); 933 __iget(old);
796 spin_unlock(&inode_lock); 934 spin_unlock(&old->i_lock);
797 destroy_inode(inode); 935 destroy_inode(inode);
798 inode = old; 936 inode = old;
799 wait_on_inode(inode); 937 wait_on_inode(inode);
@@ -801,6 +939,23 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
801 return inode; 939 return inode;
802} 940}
803 941
942static int test_inode_iunique(struct super_block *sb,
943 struct inode_hash_bucket *b, unsigned long ino)
944{
945 struct hlist_node *node;
946 struct inode *inode = NULL;
947
948 rcu_read_lock();
949 hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
950 if (inode->i_ino == ino && inode->i_sb == sb) {
951 rcu_read_unlock();
952 return 0;
953 }
954 }
955 rcu_read_unlock();
956 return 1;
957}
958
804/** 959/**
805 * iunique - get a unique inode number 960 * iunique - get a unique inode number
806 * @sb: superblock 961 * @sb: superblock
@@ -822,20 +977,19 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
822 * error if st_ino won't fit in target struct field. Use 32bit counter 977 * error if st_ino won't fit in target struct field. Use 32bit counter
823 * here to attempt to avoid that. 978 * here to attempt to avoid that.
824 */ 979 */
980 static DEFINE_SPINLOCK(unique_lock);
825 static unsigned int counter; 981 static unsigned int counter;
826 struct inode *inode; 982 struct inode_hash_bucket *b;
827 struct hlist_head *head;
828 ino_t res; 983 ino_t res;
829 984
830 spin_lock(&inode_lock); 985 spin_lock(&unique_lock);
831 do { 986 do {
832 if (counter <= max_reserved) 987 if (counter <= max_reserved)
833 counter = max_reserved + 1; 988 counter = max_reserved + 1;
834 res = counter++; 989 res = counter++;
835 head = inode_hashtable + hash(sb, res); 990 b = inode_hashtable + hash(sb, res);
836 inode = find_inode_fast(sb, head, res); 991 } while (!test_inode_iunique(sb, b, res));
837 } while (inode != NULL); 992 spin_unlock(&unique_lock);
838 spin_unlock(&inode_lock);
839 993
840 return res; 994 return res;
841} 995}
@@ -843,7 +997,9 @@ EXPORT_SYMBOL(iunique);
843 997
844struct inode *igrab(struct inode *inode) 998struct inode *igrab(struct inode *inode)
845{ 999{
846 spin_lock(&inode_lock); 1000 struct inode *ret = inode;
1001
1002 spin_lock(&inode->i_lock);
847 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 1003 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
848 __iget(inode); 1004 __iget(inode);
849 else 1005 else
@@ -852,9 +1008,10 @@ struct inode *igrab(struct inode *inode)
852 * called yet, and somebody is calling igrab 1008 * called yet, and somebody is calling igrab
853 * while the inode is getting freed. 1009 * while the inode is getting freed.
854 */ 1010 */
855 inode = NULL; 1011 ret = NULL;
856 spin_unlock(&inode_lock); 1012 spin_unlock(&inode->i_lock);
857 return inode; 1013
1014 return ret;
858} 1015}
859EXPORT_SYMBOL(igrab); 1016EXPORT_SYMBOL(igrab);
860 1017
@@ -878,21 +1035,20 @@ EXPORT_SYMBOL(igrab);
878 * Note, @test is called with the inode_lock held, so can't sleep. 1035 * Note, @test is called with the inode_lock held, so can't sleep.
879 */ 1036 */
880static struct inode *ifind(struct super_block *sb, 1037static struct inode *ifind(struct super_block *sb,
881 struct hlist_head *head, int (*test)(struct inode *, void *), 1038 struct inode_hash_bucket *b,
1039 int (*test)(struct inode *, void *),
882 void *data, const int wait) 1040 void *data, const int wait)
883{ 1041{
884 struct inode *inode; 1042 struct inode *inode;
885 1043
886 spin_lock(&inode_lock); 1044 inode = find_inode(sb, b, test, data);
887 inode = find_inode(sb, head, test, data);
888 if (inode) { 1045 if (inode) {
889 __iget(inode); 1046 __iget(inode);
890 spin_unlock(&inode_lock); 1047 spin_unlock(&inode->i_lock);
891 if (likely(wait)) 1048 if (likely(wait))
892 wait_on_inode(inode); 1049 wait_on_inode(inode);
893 return inode; 1050 return inode;
894 } 1051 }
895 spin_unlock(&inode_lock);
896 return NULL; 1052 return NULL;
897} 1053}
898 1054
@@ -912,19 +1068,18 @@ static struct inode *ifind(struct super_block *sb,
912 * Otherwise NULL is returned. 1068 * Otherwise NULL is returned.
913 */ 1069 */
914static struct inode *ifind_fast(struct super_block *sb, 1070static struct inode *ifind_fast(struct super_block *sb,
915 struct hlist_head *head, unsigned long ino) 1071 struct inode_hash_bucket *b,
1072 unsigned long ino)
916{ 1073{
917 struct inode *inode; 1074 struct inode *inode;
918 1075
919 spin_lock(&inode_lock); 1076 inode = find_inode_fast(sb, b, ino);
920 inode = find_inode_fast(sb, head, ino);
921 if (inode) { 1077 if (inode) {
922 __iget(inode); 1078 __iget(inode);
923 spin_unlock(&inode_lock); 1079 spin_unlock(&inode->i_lock);
924 wait_on_inode(inode); 1080 wait_on_inode(inode);
925 return inode; 1081 return inode;
926 } 1082 }
927 spin_unlock(&inode_lock);
928 return NULL; 1083 return NULL;
929} 1084}
930 1085
@@ -952,9 +1107,9 @@ static struct inode *ifind_fast(struct super_block *sb,
952struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1107struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
953 int (*test)(struct inode *, void *), void *data) 1108 int (*test)(struct inode *, void *), void *data)
954{ 1109{
955 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1110 struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
956 1111
957 return ifind(sb, head, test, data, 0); 1112 return ifind(sb, b, test, data, 0);
958} 1113}
959EXPORT_SYMBOL(ilookup5_nowait); 1114EXPORT_SYMBOL(ilookup5_nowait);
960 1115
@@ -980,9 +1135,9 @@ EXPORT_SYMBOL(ilookup5_nowait);
980struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1135struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
981 int (*test)(struct inode *, void *), void *data) 1136 int (*test)(struct inode *, void *), void *data)
982{ 1137{
983 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1138 struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
984 1139
985 return ifind(sb, head, test, data, 1); 1140 return ifind(sb, b, test, data, 1);
986} 1141}
987EXPORT_SYMBOL(ilookup5); 1142EXPORT_SYMBOL(ilookup5);
988 1143
@@ -1002,9 +1157,9 @@ EXPORT_SYMBOL(ilookup5);
1002 */ 1157 */
1003struct inode *ilookup(struct super_block *sb, unsigned long ino) 1158struct inode *ilookup(struct super_block *sb, unsigned long ino)
1004{ 1159{
1005 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1160 struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
1006 1161
1007 return ifind_fast(sb, head, ino); 1162 return ifind_fast(sb, b, ino);
1008} 1163}
1009EXPORT_SYMBOL(ilookup); 1164EXPORT_SYMBOL(ilookup);
1010 1165
@@ -1032,17 +1187,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1032 int (*test)(struct inode *, void *), 1187 int (*test)(struct inode *, void *),
1033 int (*set)(struct inode *, void *), void *data) 1188 int (*set)(struct inode *, void *), void *data)
1034{ 1189{
1035 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1190 struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
1036 struct inode *inode; 1191 struct inode *inode;
1037 1192
1038 inode = ifind(sb, head, test, data, 1); 1193 inode = ifind(sb, b, test, data, 1);
1039 if (inode) 1194 if (inode)
1040 return inode; 1195 return inode;
1041 /* 1196 /*
1042 * get_new_inode() will do the right thing, re-trying the search 1197 * get_new_inode() will do the right thing, re-trying the search
1043 * in case it had to block at any point. 1198 * in case it had to block at any point.
1044 */ 1199 */
1045 return get_new_inode(sb, head, test, set, data); 1200 return get_new_inode(sb, b, test, set, data);
1046} 1201}
1047EXPORT_SYMBOL(iget5_locked); 1202EXPORT_SYMBOL(iget5_locked);
1048 1203
@@ -1063,17 +1218,17 @@ EXPORT_SYMBOL(iget5_locked);
1063 */ 1218 */
1064struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1219struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1065{ 1220{
1066 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1221 struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
1067 struct inode *inode; 1222 struct inode *inode;
1068 1223
1069 inode = ifind_fast(sb, head, ino); 1224 inode = ifind_fast(sb, b, ino);
1070 if (inode) 1225 if (inode)
1071 return inode; 1226 return inode;
1072 /* 1227 /*
1073 * get_new_inode_fast() will do the right thing, re-trying the search 1228 * get_new_inode_fast() will do the right thing, re-trying the search
1074 * in case it had to block at any point. 1229 * in case it had to block at any point.
1075 */ 1230 */
1076 return get_new_inode_fast(sb, head, ino); 1231 return get_new_inode_fast(sb, b, ino);
1077} 1232}
1078EXPORT_SYMBOL(iget_locked); 1233EXPORT_SYMBOL(iget_locked);
1079 1234
@@ -1081,29 +1236,37 @@ int insert_inode_locked(struct inode *inode)
1081{ 1236{
1082 struct super_block *sb = inode->i_sb; 1237 struct super_block *sb = inode->i_sb;
1083 ino_t ino = inode->i_ino; 1238 ino_t ino = inode->i_ino;
1084 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1239 struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
1085 1240
1086 inode->i_state |= I_NEW; 1241 inode->i_state |= I_NEW;
1087 while (1) { 1242 while (1) {
1088 struct hlist_node *node; 1243 struct hlist_node *node;
1089 struct inode *old = NULL; 1244 struct inode *old = NULL;
1090 spin_lock(&inode_lock); 1245
1091 hlist_for_each_entry(old, node, head, i_hash) { 1246repeat:
1247 spin_lock(&b->lock);
1248 hlist_for_each_entry(old, node, &b->head, i_hash) {
1092 if (old->i_ino != ino) 1249 if (old->i_ino != ino)
1093 continue; 1250 continue;
1094 if (old->i_sb != sb) 1251 if (old->i_sb != sb)
1095 continue; 1252 continue;
1096 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1253 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
1097 continue; 1254 continue;
1255 if (!spin_trylock(&old->i_lock)) {
1256 spin_unlock(&b->lock);
1257 goto repeat;
1258 }
1098 break; 1259 break;
1099 } 1260 }
1100 if (likely(!node)) { 1261 if (likely(!node)) {
1101 hlist_add_head(&inode->i_hash, head); 1262 /* XXX: initialize inode->i_lock to locked? */
1102 spin_unlock(&inode_lock); 1263 hlist_add_head(&inode->i_hash, &b->head);
1264 spin_unlock(&b->lock);
1103 return 0; 1265 return 0;
1104 } 1266 }
1267 spin_unlock(&b->lock);
1105 __iget(old); 1268 __iget(old);
1106 spin_unlock(&inode_lock); 1269 spin_unlock(&old->i_lock);
1107 wait_on_inode(old); 1270 wait_on_inode(old);
1108 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1271 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1109 iput(old); 1272 iput(old);
@@ -1118,7 +1281,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1118 int (*test)(struct inode *, void *), void *data) 1281 int (*test)(struct inode *, void *), void *data)
1119{ 1282{
1120 struct super_block *sb = inode->i_sb; 1283 struct super_block *sb = inode->i_sb;
1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1284 struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
1122 1285
1123 inode->i_state |= I_NEW; 1286 inode->i_state |= I_NEW;
1124 1287
@@ -1126,23 +1289,30 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1126 struct hlist_node *node; 1289 struct hlist_node *node;
1127 struct inode *old = NULL; 1290 struct inode *old = NULL;
1128 1291
1129 spin_lock(&inode_lock); 1292repeat:
1130 hlist_for_each_entry(old, node, head, i_hash) { 1293 spin_lock(&b->lock);
1294 hlist_for_each_entry(old, node, &b->head, i_hash) {
1131 if (old->i_sb != sb) 1295 if (old->i_sb != sb)
1132 continue; 1296 continue;
1133 if (!test(old, data)) 1297 if (!test(old, data))
1134 continue; 1298 continue;
1135 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1299 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
1136 continue; 1300 continue;
1301 if (!spin_trylock(&old->i_lock)) {
1302 spin_unlock(&b->lock);
1303 goto repeat;
1304 }
1137 break; 1305 break;
1138 } 1306 }
1139 if (likely(!node)) { 1307 if (likely(!node)) {
1140 hlist_add_head(&inode->i_hash, head); 1308 /* XXX: initialize inode->i_lock to locked? */
1141 spin_unlock(&inode_lock); 1309 hlist_add_head(&inode->i_hash, &b->head);
1310 spin_unlock(&b->lock);
1142 return 0; 1311 return 0;
1143 } 1312 }
1313 spin_unlock(&b->lock);
1144 __iget(old); 1314 __iget(old);
1145 spin_unlock(&inode_lock); 1315 spin_unlock(&old->i_lock);
1146 wait_on_inode(old); 1316 wait_on_inode(old);
1147 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1317 if (unlikely(!hlist_unhashed(&old->i_hash))) {
1148 iput(old); 1318 iput(old);
@@ -1163,14 +1333,32 @@ EXPORT_SYMBOL(insert_inode_locked4);
1163 */ 1333 */
1164void __insert_inode_hash(struct inode *inode, unsigned long hashval) 1334void __insert_inode_hash(struct inode *inode, unsigned long hashval)
1165{ 1335{
1166 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1336 struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, hashval);
1167 spin_lock(&inode_lock); 1337
1168 hlist_add_head(&inode->i_hash, head); 1338 spin_lock(&inode->i_lock);
1169 spin_unlock(&inode_lock); 1339 spin_lock(&b->lock);
1340 hlist_add_head(&inode->i_hash, &b->head);
1341 spin_unlock(&b->lock);
1342 spin_unlock(&inode->i_lock);
1170} 1343}
1171EXPORT_SYMBOL(__insert_inode_hash); 1344EXPORT_SYMBOL(__insert_inode_hash);
1172 1345
1173/** 1346/**
1347 * __remove_inode_hash - remove an inode from the hash
1348 * @inode: inode to unhash
1349 *
1350 * Remove an inode from the superblock. inode->i_lock must be
1351 * held.
1352 */
1353void __remove_inode_hash(struct inode *inode)
1354{
1355 struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
1356 spin_lock(&b->lock);
1357 hlist_del_init(&inode->i_hash);
1358 spin_unlock(&b->lock);
1359}
1360
1361/**
1174 * remove_inode_hash - remove an inode from the hash 1362 * remove_inode_hash - remove an inode from the hash
1175 * @inode: inode to unhash 1363 * @inode: inode to unhash
1176 * 1364 *
@@ -1178,9 +1366,9 @@ EXPORT_SYMBOL(__insert_inode_hash);
1178 */ 1366 */
1179void remove_inode_hash(struct inode *inode) 1367void remove_inode_hash(struct inode *inode)
1180{ 1368{
1181 spin_lock(&inode_lock); 1369 spin_lock(&inode->i_lock);
1182 hlist_del_init(&inode->i_hash); 1370 __remove_inode_hash(inode);
1183 spin_unlock(&inode_lock); 1371 spin_unlock(&inode->i_lock);
1184} 1372}
1185EXPORT_SYMBOL(remove_inode_hash); 1373EXPORT_SYMBOL(remove_inode_hash);
1186 1374
@@ -1200,12 +1388,16 @@ void generic_delete_inode(struct inode *inode)
1200{ 1388{
1201 const struct super_operations *op = inode->i_sb->s_op; 1389 const struct super_operations *op = inode->i_sb->s_op;
1202 1390
1203 list_del_init(&inode->i_list); 1391 if (!list_empty(&inode->i_list)) {
1204 list_del_init(&inode->i_sb_list); 1392 spin_lock(&wb_inode_list_lock);
1393 list_del_init(&inode->i_list);
1394 spin_unlock(&wb_inode_list_lock);
1395 }
1396 inode_sb_list_del(inode);
1397 percpu_counter_dec(&nr_inodes);
1205 WARN_ON(inode->i_state & I_NEW); 1398 WARN_ON(inode->i_state & I_NEW);
1206 inode->i_state |= I_FREEING; 1399 inode->i_state |= I_FREEING;
1207 inodes_stat.nr_inodes--; 1400 spin_unlock(&inode->i_lock);
1208 spin_unlock(&inode_lock);
1209 1401
1210 security_inode_delete(inode); 1402 security_inode_delete(inode);
1211 1403
@@ -1222,9 +1414,15 @@ void generic_delete_inode(struct inode *inode)
1222 truncate_inode_pages(&inode->i_data, 0); 1414 truncate_inode_pages(&inode->i_data, 0);
1223 clear_inode(inode); 1415 clear_inode(inode);
1224 } 1416 }
1225 spin_lock(&inode_lock); 1417 /*
1226 hlist_del_init(&inode->i_hash); 1418 * i_lock not required to delete from hash. If there was a
1227 spin_unlock(&inode_lock); 1419 * concurrency window, then it would be possible for the other
1420 * thread to touch the inode after it has been freed, with
1421 * destroy_inode.
1422 * XXX: yes it is because find_inode_fast checks it. Maybe we
1423 * can avoid it though...
1424 */
1425 remove_inode_hash(inode);
1228 wake_up_inode(inode); 1426 wake_up_inode(inode);
1229 BUG_ON(inode->i_state != I_CLEAR); 1427 BUG_ON(inode->i_state != I_CLEAR);
1230 destroy_inode(inode); 1428 destroy_inode(inode);
@@ -1245,29 +1443,36 @@ int generic_detach_inode(struct inode *inode)
1245 struct super_block *sb = inode->i_sb; 1443 struct super_block *sb = inode->i_sb;
1246 1444
1247 if (!hlist_unhashed(&inode->i_hash)) { 1445 if (!hlist_unhashed(&inode->i_hash)) {
1248 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1446 if (list_empty(&inode->i_list)) {
1249 list_move(&inode->i_list, &inode_unused); 1447 spin_lock(&wb_inode_list_lock);
1250 inodes_stat.nr_unused++; 1448 list_add(&inode->i_list, &inode_unused);
1449 inodes_stat.nr_unused++;
1450 spin_unlock(&wb_inode_list_lock);
1451 }
1251 if (sb->s_flags & MS_ACTIVE) { 1452 if (sb->s_flags & MS_ACTIVE) {
1252 spin_unlock(&inode_lock); 1453 spin_unlock(&inode->i_lock);
1253 return 0; 1454 return 0;
1254 } 1455 }
1255 WARN_ON(inode->i_state & I_NEW); 1456 WARN_ON(inode->i_state & I_NEW);
1256 inode->i_state |= I_WILL_FREE; 1457 inode->i_state |= I_WILL_FREE;
1257 spin_unlock(&inode_lock); 1458 spin_unlock(&inode->i_lock);
1258 write_inode_now(inode, 1); 1459 write_inode_now(inode, 1);
1259 spin_lock(&inode_lock); 1460 spin_lock(&inode->i_lock);
1260 WARN_ON(inode->i_state & I_NEW); 1461 WARN_ON(inode->i_state & I_NEW);
1261 inode->i_state &= ~I_WILL_FREE; 1462 inode->i_state &= ~I_WILL_FREE;
1463 __remove_inode_hash(inode);
1464 }
1465 if (!list_empty(&inode->i_list)) {
1466 spin_lock(&wb_inode_list_lock);
1467 list_del_init(&inode->i_list);
1262 inodes_stat.nr_unused--; 1468 inodes_stat.nr_unused--;
1263 hlist_del_init(&inode->i_hash); 1469 spin_unlock(&wb_inode_list_lock);
1264 } 1470 }
1265 list_del_init(&inode->i_list); 1471 inode_sb_list_del(inode);
1266 list_del_init(&inode->i_sb_list); 1472 percpu_counter_dec(&nr_inodes);
1267 WARN_ON(inode->i_state & I_NEW); 1473 WARN_ON(inode->i_state & I_NEW);
1268 inode->i_state |= I_FREEING; 1474 inode->i_state |= I_FREEING;
1269 inodes_stat.nr_inodes--; 1475 spin_unlock(&inode->i_lock);
1270 spin_unlock(&inode_lock);
1271 return 1; 1476 return 1;
1272} 1477}
1273EXPORT_SYMBOL_GPL(generic_detach_inode); 1478EXPORT_SYMBOL_GPL(generic_detach_inode);
@@ -1332,8 +1537,12 @@ void iput(struct inode *inode)
1332 if (inode) { 1537 if (inode) {
1333 BUG_ON(inode->i_state == I_CLEAR); 1538 BUG_ON(inode->i_state == I_CLEAR);
1334 1539
1335 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1540 spin_lock(&inode->i_lock);
1541 inode->i_count--;
1542 if (inode->i_count == 0)
1336 iput_final(inode); 1543 iput_final(inode);
1544 else
1545 spin_unlock(&inode->i_lock);
1337 } 1546 }
1338} 1547}
1339EXPORT_SYMBOL(iput); 1548EXPORT_SYMBOL(iput);
@@ -1514,6 +1723,8 @@ EXPORT_SYMBOL(inode_wait);
1514 * wake_up_inode() after removing from the hash list will DTRT. 1723 * wake_up_inode() after removing from the hash list will DTRT.
1515 * 1724 *
1516 * This is called with inode_lock held. 1725 * This is called with inode_lock held.
1726 *
1727 * Called with i_lock held and returns with it dropped.
1517 */ 1728 */
1518static void __wait_on_freeing_inode(struct inode *inode) 1729static void __wait_on_freeing_inode(struct inode *inode)
1519{ 1730{
@@ -1521,10 +1732,9 @@ static void __wait_on_freeing_inode(struct inode *inode)
1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1732 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1522 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1733 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1734 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1524 spin_unlock(&inode_lock); 1735 spin_unlock(&inode->i_lock);
1525 schedule(); 1736 schedule();
1526 finish_wait(wq, &wait.wait); 1737 finish_wait(wq, &wait.wait);
1527 spin_lock(&inode_lock);
1528} 1738}
1529 1739
1530static __initdata unsigned long ihash_entries; 1740static __initdata unsigned long ihash_entries;
@@ -1552,7 +1762,7 @@ void __init inode_init_early(void)
1552 1762
1553 inode_hashtable = 1763 inode_hashtable =
1554 alloc_large_system_hash("Inode-cache", 1764 alloc_large_system_hash("Inode-cache",
1555 sizeof(struct hlist_head), 1765 sizeof(struct inode_hash_bucket),
1556 ihash_entries, 1766 ihash_entries,
1557 14, 1767 14,
1558 HASH_EARLY, 1768 HASH_EARLY,
@@ -1560,14 +1770,17 @@ void __init inode_init_early(void)
1560 &i_hash_mask, 1770 &i_hash_mask,
1561 0); 1771 0);
1562 1772
1563 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1773 for (loop = 0; loop < (1 << i_hash_shift); loop++) {
1564 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1774 spin_lock_init(&inode_hashtable[loop].lock);
1775 INIT_HLIST_HEAD(&inode_hashtable[loop].head);
1776 }
1565} 1777}
1566 1778
1567void __init inode_init(void) 1779void __init inode_init(void)
1568{ 1780{
1569 int loop; 1781 int loop;
1570 1782
1783 percpu_counter_init(&nr_inodes, 0);
1571 /* inode slab cache */ 1784 /* inode slab cache */
1572 inode_cachep = kmem_cache_create("inode_cache", 1785 inode_cachep = kmem_cache_create("inode_cache",
1573 sizeof(struct inode), 1786 sizeof(struct inode),
@@ -1577,13 +1790,17 @@ void __init inode_init(void)
1577 init_once); 1790 init_once);
1578 register_shrinker(&icache_shrinker); 1791 register_shrinker(&icache_shrinker);
1579 1792
1793 for_each_possible_cpu(loop) {
1794 spin_lock_init(&per_cpu(inode_cpulock, loop));
1795 }
1796
1580 /* Hash may have been set up in inode_init_early */ 1797 /* Hash may have been set up in inode_init_early */
1581 if (!hashdist) 1798 if (!hashdist)
1582 return; 1799 return;
1583 1800
1584 inode_hashtable = 1801 inode_hashtable =
1585 alloc_large_system_hash("Inode-cache", 1802 alloc_large_system_hash("Inode-cache",
1586 sizeof(struct hlist_head), 1803 sizeof(struct inode_hash_bucket),
1587 ihash_entries, 1804 ihash_entries,
1588 14, 1805 14,
1589 0, 1806 0,
@@ -1591,8 +1808,10 @@ void __init inode_init(void)
1591 &i_hash_mask, 1808 &i_hash_mask,
1592 0); 1809 0);
1593 1810
1594 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1811 for (loop = 0; loop < (1 << i_hash_shift); loop++) {
1595 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1812 spin_lock_init(&inode_hashtable[loop].lock);
1813 INIT_HLIST_HEAD(&inode_hashtable[loop].head);
1814 }
1596} 1815}
1597 1816
1598void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1817void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)