aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2013-09-12 17:54:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2013-09-12 17:54:48 -0400
commitbf2ba3bc185269eca274b458aac46ba1ad7c1121 (patch)
tree2cdaf7568df3ab762f42e7849fc9211dcb60d1bc
parentbcceeeba9b3ca99c29523bb7af16727f8a837db4 (diff)
parentf5e1dd34561e0fb06400b378d595198918833021 (diff)
Merge branch 'for-next' into for-linus
-rw-r--r--arch/x86/kvm/mmu.c25
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c82
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c44
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c51
-rw-r--r--drivers/md/bcache/btree.c43
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/dm-bufio.c64
-rw-r--r--drivers/staging/android/ashmem.c44
-rw-r--r--drivers/staging/android/lowmemorykiller.c43
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h38
-rw-r--r--drivers/staging/lustre/lustre/ldlm/ldlm_pool.c148
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c98
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c76
-rw-r--r--fs/dcache.c276
-rw-r--r--fs/drop_caches.c1
-rw-r--r--fs/ext4/extents_status.c33
-rw-r--r--fs/gfs2/glock.c30
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/quota.c18
-rw-r--r--fs/gfs2/quota.h6
-rw-r--r--fs/inode.c193
-rw-r--r--fs/internal.h6
-rw-r--r--fs/mbcache.c49
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfsd/nfscache.c32
-rw-r--r--fs/quota/dquot.c34
-rw-r--r--fs/super.c111
-rw-r--r--fs/ubifs/shrinker.c29
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/ubifs/ubifs.h5
-rw-r--r--fs/xfs/xfs_buf.c253
-rw-r--r--fs/xfs/xfs_buf.h17
-rw-r--r--fs/xfs/xfs_dquot.c7
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_icache.h2
-rw-r--r--fs/xfs/xfs_qm.c287
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--include/linux/dcache.h14
-rw-r--r--include/linux/fs.h25
-rw-r--r--include/linux/list_lru.h131
-rw-r--r--include/linux/shrinker.h54
-rw-r--r--include/trace/events/vmscan.h4
-rw-r--r--include/uapi/linux/fs.h6
-rw-r--r--kernel/sysctl.c6
-rw-r--r--mm/Makefile2
-rw-r--r--mm/huge_memory.c17
-rw-r--r--mm/list_lru.c139
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/vmscan.c241
-rw-r--r--net/sunrpc/auth.c41
54 files changed, 1755 insertions, 1129 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6e2d2c8f230b..dce0df8150df 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
4421 } 4421 }
4422} 4422}
4423 4423
4424static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) 4424static unsigned long
4425mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
4425{ 4426{
4426 struct kvm *kvm; 4427 struct kvm *kvm;
4427 int nr_to_scan = sc->nr_to_scan; 4428 int nr_to_scan = sc->nr_to_scan;
4428 4429 unsigned long freed = 0;
4429 if (nr_to_scan == 0)
4430 goto out;
4431 4430
4432 raw_spin_lock(&kvm_lock); 4431 raw_spin_lock(&kvm_lock);
4433 4432
@@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
4462 goto unlock; 4461 goto unlock;
4463 } 4462 }
4464 4463
4465 prepare_zap_oldest_mmu_page(kvm, &invalid_list); 4464 if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
4465 freed++;
4466 kvm_mmu_commit_zap_page(kvm, &invalid_list); 4466 kvm_mmu_commit_zap_page(kvm, &invalid_list);
4467 4467
4468unlock: 4468unlock:
4469 spin_unlock(&kvm->mmu_lock); 4469 spin_unlock(&kvm->mmu_lock);
4470 srcu_read_unlock(&kvm->srcu, idx); 4470 srcu_read_unlock(&kvm->srcu, idx);
4471 4471
4472 /*
4473 * unfair on small ones
4474 * per-vm shrinkers cry out
4475 * sadness comes quickly
4476 */
4472 list_move_tail(&kvm->vm_list, &vm_list); 4477 list_move_tail(&kvm->vm_list, &vm_list);
4473 break; 4478 break;
4474 } 4479 }
4475 4480
4476 raw_spin_unlock(&kvm_lock); 4481 raw_spin_unlock(&kvm_lock);
4482 return freed;
4477 4483
4478out: 4484}
4485
4486static unsigned long
4487mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
4488{
4479 return percpu_counter_read_positive(&kvm_total_used_mmu_pages); 4489 return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
4480} 4490}
4481 4491
4482static struct shrinker mmu_shrinker = { 4492static struct shrinker mmu_shrinker = {
4483 .shrink = mmu_shrink, 4493 .count_objects = mmu_shrink_count,
4494 .scan_objects = mmu_shrink_scan,
4484 .seeks = DEFAULT_SEEKS * 10, 4495 .seeks = DEFAULT_SEEKS * 10,
4485}; 4496};
4486 4497
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index fdaa0915ce56..d5c784d48671 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1667,7 +1667,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1667 return 0; 1667 return 0;
1668 1668
1669out_gem_unload: 1669out_gem_unload:
1670 if (dev_priv->mm.inactive_shrinker.shrink) 1670 if (dev_priv->mm.inactive_shrinker.scan_objects)
1671 unregister_shrinker(&dev_priv->mm.inactive_shrinker); 1671 unregister_shrinker(&dev_priv->mm.inactive_shrinker);
1672 1672
1673 if (dev->pdev->msi_enabled) 1673 if (dev->pdev->msi_enabled)
@@ -1706,7 +1706,7 @@ int i915_driver_unload(struct drm_device *dev)
1706 1706
1707 i915_teardown_sysfs(dev); 1707 i915_teardown_sysfs(dev);
1708 1708
1709 if (dev_priv->mm.inactive_shrinker.shrink) 1709 if (dev_priv->mm.inactive_shrinker.scan_objects)
1710 unregister_shrinker(&dev_priv->mm.inactive_shrinker); 1710 unregister_shrinker(&dev_priv->mm.inactive_shrinker);
1711 1711
1712 mutex_lock(&dev->struct_mutex); 1712 mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2d1cb10d846f..a7ff3db4f607 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
57 struct drm_i915_fence_reg *fence, 57 struct drm_i915_fence_reg *fence,
58 bool enable); 58 bool enable);
59 59
60static int i915_gem_inactive_shrink(struct shrinker *shrinker, 60static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
61 struct shrink_control *sc); 61 struct shrink_control *sc);
62static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
63 struct shrink_control *sc);
62static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 64static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
63static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 65static long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
64static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 66static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
65 67
66static bool cpu_cache_is_coherent(struct drm_device *dev, 68static bool cpu_cache_is_coherent(struct drm_device *dev,
@@ -1736,16 +1738,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1736 return __i915_gem_shrink(dev_priv, target, true); 1738 return __i915_gem_shrink(dev_priv, target, true);
1737} 1739}
1738 1740
1739static void 1741static long
1740i915_gem_shrink_all(struct drm_i915_private *dev_priv) 1742i915_gem_shrink_all(struct drm_i915_private *dev_priv)
1741{ 1743{
1742 struct drm_i915_gem_object *obj, *next; 1744 struct drm_i915_gem_object *obj, *next;
1745 long freed = 0;
1743 1746
1744 i915_gem_evict_everything(dev_priv->dev); 1747 i915_gem_evict_everything(dev_priv->dev);
1745 1748
1746 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, 1749 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
1747 global_list) 1750 global_list) {
1751 if (obj->pages_pin_count == 0)
1752 freed += obj->base.size >> PAGE_SHIFT;
1748 i915_gem_object_put_pages(obj); 1753 i915_gem_object_put_pages(obj);
1754 }
1755 return freed;
1749} 1756}
1750 1757
1751static int 1758static int
@@ -4526,7 +4533,8 @@ i915_gem_load(struct drm_device *dev)
4526 4533
4527 dev_priv->mm.interruptible = true; 4534 dev_priv->mm.interruptible = true;
4528 4535
4529 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4536 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
4537 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
4530 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4538 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4531 register_shrinker(&dev_priv->mm.inactive_shrinker); 4539 register_shrinker(&dev_priv->mm.inactive_shrinker);
4532} 4540}
@@ -4749,8 +4757,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4749#endif 4757#endif
4750} 4758}
4751 4759
4752static int 4760static unsigned long
4753i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4761i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
4754{ 4762{
4755 struct drm_i915_private *dev_priv = 4763 struct drm_i915_private *dev_priv =
4756 container_of(shrinker, 4764 container_of(shrinker,
@@ -4758,45 +4766,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4758 mm.inactive_shrinker); 4766 mm.inactive_shrinker);
4759 struct drm_device *dev = dev_priv->dev; 4767 struct drm_device *dev = dev_priv->dev;
4760 struct drm_i915_gem_object *obj; 4768 struct drm_i915_gem_object *obj;
4761 int nr_to_scan = sc->nr_to_scan;
4762 bool unlock = true; 4769 bool unlock = true;
4763 int cnt; 4770 unsigned long count;
4764 4771
4765 if (!mutex_trylock(&dev->struct_mutex)) { 4772 if (!mutex_trylock(&dev->struct_mutex)) {
4766 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4773 if (!mutex_is_locked_by(&dev->struct_mutex, current))
4767 return 0; 4774 return SHRINK_STOP;
4768 4775
4769 if (dev_priv->mm.shrinker_no_lock_stealing) 4776 if (dev_priv->mm.shrinker_no_lock_stealing)
4770 return 0; 4777 return SHRINK_STOP;
4771 4778
4772 unlock = false; 4779 unlock = false;
4773 } 4780 }
4774 4781
4775 if (nr_to_scan) { 4782 count = 0;
4776 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
4777 if (nr_to_scan > 0)
4778 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan,
4779 false);
4780 if (nr_to_scan > 0)
4781 i915_gem_shrink_all(dev_priv);
4782 }
4783
4784 cnt = 0;
4785 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 4783 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
4786 if (obj->pages_pin_count == 0) 4784 if (obj->pages_pin_count == 0)
4787 cnt += obj->base.size >> PAGE_SHIFT; 4785 count += obj->base.size >> PAGE_SHIFT;
4788 4786
4789 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 4787 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4790 if (obj->active) 4788 if (obj->active)
4791 continue; 4789 continue;
4792 4790
4793 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4791 if (obj->pin_count == 0 && obj->pages_pin_count == 0)
4794 cnt += obj->base.size >> PAGE_SHIFT; 4792 count += obj->base.size >> PAGE_SHIFT;
4795 } 4793 }
4796 4794
4797 if (unlock) 4795 if (unlock)
4798 mutex_unlock(&dev->struct_mutex); 4796 mutex_unlock(&dev->struct_mutex);
4799 return cnt; 4797 return count;
4800} 4798}
4801 4799
4802/* All the new VM stuff */ 4800/* All the new VM stuff */
@@ -4860,6 +4858,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4860 return 0; 4858 return 0;
4861} 4859}
4862 4860
4861static unsigned long
4862i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
4863{
4864 struct drm_i915_private *dev_priv =
4865 container_of(shrinker,
4866 struct drm_i915_private,
4867 mm.inactive_shrinker);
4868 struct drm_device *dev = dev_priv->dev;
4869 int nr_to_scan = sc->nr_to_scan;
4870 unsigned long freed;
4871 bool unlock = true;
4872
4873 if (!mutex_trylock(&dev->struct_mutex)) {
4874 if (!mutex_is_locked_by(&dev->struct_mutex, current))
4875 return 0;
4876
4877 if (dev_priv->mm.shrinker_no_lock_stealing)
4878 return 0;
4879
4880 unlock = false;
4881 }
4882
4883 freed = i915_gem_purge(dev_priv, nr_to_scan);
4884 if (freed < nr_to_scan)
4885 freed += __i915_gem_shrink(dev_priv, nr_to_scan,
4886 false);
4887 if (freed < nr_to_scan)
4888 freed += i915_gem_shrink_all(dev_priv);
4889
4890 if (unlock)
4891 mutex_unlock(&dev->struct_mutex);
4892 return freed;
4893}
4894
4863struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4895struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4864 struct i915_address_space *vm) 4896 struct i915_address_space *vm)
4865{ 4897{
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index bd2a3b40cd12..863bef9f9234 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -377,28 +377,26 @@ out:
377 return nr_free; 377 return nr_free;
378} 378}
379 379
380/* Get good estimation how many pages are free in pools */
381static int ttm_pool_get_num_unused_pages(void)
382{
383 unsigned i;
384 int total = 0;
385 for (i = 0; i < NUM_POOLS; ++i)
386 total += _manager->pools[i].npages;
387
388 return total;
389}
390
391/** 380/**
392 * Callback for mm to request pool to reduce number of page held. 381 * Callback for mm to request pool to reduce number of page held.
382 *
383 * XXX: (dchinner) Deadlock warning!
384 *
385 * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means
386 * this can deadlock when called a sc->gfp_mask that is not equal to
387 * GFP_KERNEL.
388 *
389 * This code is crying out for a shrinker per pool....
393 */ 390 */
394static int ttm_pool_mm_shrink(struct shrinker *shrink, 391static unsigned long
395 struct shrink_control *sc) 392ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
396{ 393{
397 static atomic_t start_pool = ATOMIC_INIT(0); 394 static atomic_t start_pool = ATOMIC_INIT(0);
398 unsigned i; 395 unsigned i;
399 unsigned pool_offset = atomic_add_return(1, &start_pool); 396 unsigned pool_offset = atomic_add_return(1, &start_pool);
400 struct ttm_page_pool *pool; 397 struct ttm_page_pool *pool;
401 int shrink_pages = sc->nr_to_scan; 398 int shrink_pages = sc->nr_to_scan;
399 unsigned long freed = 0;
402 400
403 pool_offset = pool_offset % NUM_POOLS; 401 pool_offset = pool_offset % NUM_POOLS;
404 /* select start pool in round robin fashion */ 402 /* select start pool in round robin fashion */
@@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink,
408 break; 406 break;
409 pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; 407 pool = &_manager->pools[(i + pool_offset)%NUM_POOLS];
410 shrink_pages = ttm_page_pool_free(pool, nr_free); 408 shrink_pages = ttm_page_pool_free(pool, nr_free);
409 freed += nr_free - shrink_pages;
411 } 410 }
412 /* return estimated number of unused pages in pool */ 411 return freed;
413 return ttm_pool_get_num_unused_pages(); 412}
413
414
415static unsigned long
416ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
417{
418 unsigned i;
419 unsigned long count = 0;
420
421 for (i = 0; i < NUM_POOLS; ++i)
422 count += _manager->pools[i].npages;
423
424 return count;
414} 425}
415 426
416static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) 427static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager)
417{ 428{
418 manager->mm_shrink.shrink = &ttm_pool_mm_shrink; 429 manager->mm_shrink.count_objects = ttm_pool_shrink_count;
430 manager->mm_shrink.scan_objects = ttm_pool_shrink_scan;
419 manager->mm_shrink.seeks = 1; 431 manager->mm_shrink.seeks = 1;
420 register_shrinker(&manager->mm_shrink); 432 register_shrinker(&manager->mm_shrink);
421} 433}
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index b8b394319b45..7957beeeaf73 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
918} 918}
919EXPORT_SYMBOL_GPL(ttm_dma_populate); 919EXPORT_SYMBOL_GPL(ttm_dma_populate);
920 920
921/* Get good estimation how many pages are free in pools */
922static int ttm_dma_pool_get_num_unused_pages(void)
923{
924 struct device_pools *p;
925 unsigned total = 0;
926
927 mutex_lock(&_manager->lock);
928 list_for_each_entry(p, &_manager->pools, pools)
929 total += p->pool->npages_free;
930 mutex_unlock(&_manager->lock);
931 return total;
932}
933
934/* Put all pages in pages list to correct pool to wait for reuse */ 921/* Put all pages in pages list to correct pool to wait for reuse */
935void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) 922void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
936{ 923{
@@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
1002 989
1003/** 990/**
1004 * Callback for mm to request pool to reduce number of page held. 991 * Callback for mm to request pool to reduce number of page held.
992 *
993 * XXX: (dchinner) Deadlock warning!
994 *
995 * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention
996 * needs to be paid to sc->gfp_mask to determine if this can be done or not.
997 * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really
998 * bad.
999 *
1000 * I'm getting sadder as I hear more pathetical whimpers about needing per-pool
1001 * shrinkers
1005 */ 1002 */
1006static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, 1003static unsigned long
1007 struct shrink_control *sc) 1004ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1008{ 1005{
1009 static atomic_t start_pool = ATOMIC_INIT(0); 1006 static atomic_t start_pool = ATOMIC_INIT(0);
1010 unsigned idx = 0; 1007 unsigned idx = 0;
1011 unsigned pool_offset = atomic_add_return(1, &start_pool); 1008 unsigned pool_offset = atomic_add_return(1, &start_pool);
1012 unsigned shrink_pages = sc->nr_to_scan; 1009 unsigned shrink_pages = sc->nr_to_scan;
1013 struct device_pools *p; 1010 struct device_pools *p;
1011 unsigned long freed = 0;
1014 1012
1015 if (list_empty(&_manager->pools)) 1013 if (list_empty(&_manager->pools))
1016 return 0; 1014 return SHRINK_STOP;
1017 1015
1018 mutex_lock(&_manager->lock); 1016 mutex_lock(&_manager->lock);
1019 pool_offset = pool_offset % _manager->npools; 1017 pool_offset = pool_offset % _manager->npools;
@@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
1029 continue; 1027 continue;
1030 nr_free = shrink_pages; 1028 nr_free = shrink_pages;
1031 shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); 1029 shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free);
1030 freed += nr_free - shrink_pages;
1031
1032 pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", 1032 pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n",
1033 p->pool->dev_name, p->pool->name, current->pid, 1033 p->pool->dev_name, p->pool->name, current->pid,
1034 nr_free, shrink_pages); 1034 nr_free, shrink_pages);
1035 } 1035 }
1036 mutex_unlock(&_manager->lock); 1036 mutex_unlock(&_manager->lock);
1037 /* return estimated number of unused pages in pool */ 1037 return freed;
1038 return ttm_dma_pool_get_num_unused_pages(); 1038}
1039
1040static unsigned long
1041ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1042{
1043 struct device_pools *p;
1044 unsigned long count = 0;
1045
1046 mutex_lock(&_manager->lock);
1047 list_for_each_entry(p, &_manager->pools, pools)
1048 count += p->pool->npages_free;
1049 mutex_unlock(&_manager->lock);
1050 return count;
1039} 1051}
1040 1052
1041static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) 1053static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
1042{ 1054{
1043 manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink; 1055 manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count;
1056 manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan;
1044 manager->mm_shrink.seeks = 1; 1057 manager->mm_shrink.seeks = 1;
1045 register_shrinker(&manager->mm_shrink); 1058 register_shrinker(&manager->mm_shrink);
1046} 1059}
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ee372884c405..f9764e61978b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
597 return 0; 597 return 0;
598} 598}
599 599
600static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) 600static unsigned long bch_mca_scan(struct shrinker *shrink,
601 struct shrink_control *sc)
601{ 602{
602 struct cache_set *c = container_of(shrink, struct cache_set, shrink); 603 struct cache_set *c = container_of(shrink, struct cache_set, shrink);
603 struct btree *b, *t; 604 struct btree *b, *t;
604 unsigned long i, nr = sc->nr_to_scan; 605 unsigned long i, nr = sc->nr_to_scan;
606 unsigned long freed = 0;
605 607
606 if (c->shrinker_disabled) 608 if (c->shrinker_disabled)
607 return 0; 609 return SHRINK_STOP;
608 610
609 if (c->try_harder) 611 if (c->try_harder)
610 return 0; 612 return SHRINK_STOP;
611
612 /*
613 * If nr == 0, we're supposed to return the number of items we have
614 * cached. Not allowed to return -1.
615 */
616 if (!nr)
617 return mca_can_free(c) * c->btree_pages;
618 613
619 /* Return -1 if we can't do anything right now */ 614 /* Return -1 if we can't do anything right now */
620 if (sc->gfp_mask & __GFP_WAIT) 615 if (sc->gfp_mask & __GFP_WAIT)
@@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
634 629
635 i = 0; 630 i = 0;
636 list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { 631 list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
637 if (!nr) 632 if (freed >= nr)
638 break; 633 break;
639 634
640 if (++i > 3 && 635 if (++i > 3 &&
641 !mca_reap(b, NULL, 0)) { 636 !mca_reap(b, NULL, 0)) {
642 mca_data_free(b); 637 mca_data_free(b);
643 rw_unlock(true, b); 638 rw_unlock(true, b);
644 --nr; 639 freed++;
645 } 640 }
646 } 641 }
647 642
@@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
652 if (list_empty(&c->btree_cache)) 647 if (list_empty(&c->btree_cache))
653 goto out; 648 goto out;
654 649
655 for (i = 0; nr && i < c->bucket_cache_used; i++) { 650 for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
656 b = list_first_entry(&c->btree_cache, struct btree, list); 651 b = list_first_entry(&c->btree_cache, struct btree, list);
657 list_rotate_left(&c->btree_cache); 652 list_rotate_left(&c->btree_cache);
658 653
@@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
661 mca_bucket_free(b); 656 mca_bucket_free(b);
662 mca_data_free(b); 657 mca_data_free(b);
663 rw_unlock(true, b); 658 rw_unlock(true, b);
664 --nr; 659 freed++;
665 } else 660 } else
666 b->accessed = 0; 661 b->accessed = 0;
667 } 662 }
668out: 663out:
669 nr = mca_can_free(c) * c->btree_pages;
670 mutex_unlock(&c->bucket_lock); 664 mutex_unlock(&c->bucket_lock);
671 return nr; 665 return freed;
666}
667
668static unsigned long bch_mca_count(struct shrinker *shrink,
669 struct shrink_control *sc)
670{
671 struct cache_set *c = container_of(shrink, struct cache_set, shrink);
672
673 if (c->shrinker_disabled)
674 return 0;
675
676 if (c->try_harder)
677 return 0;
678
679 return mca_can_free(c) * c->btree_pages;
672} 680}
673 681
674void bch_btree_cache_free(struct cache_set *c) 682void bch_btree_cache_free(struct cache_set *c)
@@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c)
737 c->verify_data = NULL; 745 c->verify_data = NULL;
738#endif 746#endif
739 747
740 c->shrink.shrink = bch_mca_shrink; 748 c->shrink.count_objects = bch_mca_count;
749 c->shrink.scan_objects = bch_mca_scan;
741 c->shrink.seeks = 4; 750 c->shrink.seeks = 4;
742 c->shrink.batch = c->btree_pages * 2; 751 c->shrink.batch = c->btree_pages * 2;
743 register_shrinker(&c->shrink); 752 register_shrinker(&c->shrink);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 12a2c2846f99..4fe6ab2fbe2e 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -556,7 +556,7 @@ STORE(__bch_cache_set)
556 struct shrink_control sc; 556 struct shrink_control sc;
557 sc.gfp_mask = GFP_KERNEL; 557 sc.gfp_mask = GFP_KERNEL;
558 sc.nr_to_scan = strtoul_or_return(buf); 558 sc.nr_to_scan = strtoul_or_return(buf);
559 c->shrink.shrink(&c->shrink, &sc); 559 c->shrink.scan_objects(&c->shrink, &sc);
560 } 560 }
561 561
562 sysfs_strtoul(congested_read_threshold_us, 562 sysfs_strtoul(congested_read_threshold_us,
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5227e079a6e3..173cbb20d104 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
1425 unsigned long max_jiffies) 1425 unsigned long max_jiffies)
1426{ 1426{
1427 if (jiffies - b->last_accessed < max_jiffies) 1427 if (jiffies - b->last_accessed < max_jiffies)
1428 return 1; 1428 return 0;
1429 1429
1430 if (!(gfp & __GFP_IO)) { 1430 if (!(gfp & __GFP_IO)) {
1431 if (test_bit(B_READING, &b->state) || 1431 if (test_bit(B_READING, &b->state) ||
1432 test_bit(B_WRITING, &b->state) || 1432 test_bit(B_WRITING, &b->state) ||
1433 test_bit(B_DIRTY, &b->state)) 1433 test_bit(B_DIRTY, &b->state))
1434 return 1; 1434 return 0;
1435 } 1435 }
1436 1436
1437 if (b->hold_count) 1437 if (b->hold_count)
1438 return 1; 1438 return 0;
1439 1439
1440 __make_buffer_clean(b); 1440 __make_buffer_clean(b);
1441 __unlink_buffer(b); 1441 __unlink_buffer(b);
1442 __free_buffer_wake(b); 1442 __free_buffer_wake(b);
1443 1443
1444 return 0; 1444 return 1;
1445} 1445}
1446 1446
1447static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, 1447static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1448 struct shrink_control *sc) 1448 gfp_t gfp_mask)
1449{ 1449{
1450 int l; 1450 int l;
1451 struct dm_buffer *b, *tmp; 1451 struct dm_buffer *b, *tmp;
1452 long freed = 0;
1452 1453
1453 for (l = 0; l < LIST_SIZE; l++) { 1454 for (l = 0; l < LIST_SIZE; l++) {
1454 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) 1455 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1455 if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && 1456 freed += __cleanup_old_buffer(b, gfp_mask, 0);
1456 !--nr_to_scan) 1457 if (!--nr_to_scan)
1457 return; 1458 break;
1459 }
1458 dm_bufio_cond_resched(); 1460 dm_bufio_cond_resched();
1459 } 1461 }
1462 return freed;
1460} 1463}
1461 1464
1462static int shrink(struct shrinker *shrinker, struct shrink_control *sc) 1465static unsigned long
1466dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1463{ 1467{
1464 struct dm_bufio_client *c = 1468 struct dm_bufio_client *c;
1465 container_of(shrinker, struct dm_bufio_client, shrinker); 1469 unsigned long freed;
1466 unsigned long r;
1467 unsigned long nr_to_scan = sc->nr_to_scan;
1468 1470
1471 c = container_of(shrink, struct dm_bufio_client, shrinker);
1469 if (sc->gfp_mask & __GFP_IO) 1472 if (sc->gfp_mask & __GFP_IO)
1470 dm_bufio_lock(c); 1473 dm_bufio_lock(c);
1471 else if (!dm_bufio_trylock(c)) 1474 else if (!dm_bufio_trylock(c))
1472 return !nr_to_scan ? 0 : -1; 1475 return SHRINK_STOP;
1473 1476
1474 if (nr_to_scan) 1477 freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
1475 __scan(c, nr_to_scan, sc); 1478 dm_bufio_unlock(c);
1479 return freed;
1480}
1476 1481
1477 r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; 1482static unsigned long
1478 if (r > INT_MAX) 1483dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1479 r = INT_MAX; 1484{
1485 struct dm_bufio_client *c;
1486 unsigned long count;
1480 1487
1481 dm_bufio_unlock(c); 1488 c = container_of(shrink, struct dm_bufio_client, shrinker);
1489 if (sc->gfp_mask & __GFP_IO)
1490 dm_bufio_lock(c);
1491 else if (!dm_bufio_trylock(c))
1492 return 0;
1482 1493
1483 return r; 1494 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
1495 dm_bufio_unlock(c);
1496 return count;
1484} 1497}
1485 1498
1486/* 1499/*
@@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
1582 __cache_size_refresh(); 1595 __cache_size_refresh();
1583 mutex_unlock(&dm_bufio_clients_lock); 1596 mutex_unlock(&dm_bufio_clients_lock);
1584 1597
1585 c->shrinker.shrink = shrink; 1598 c->shrinker.count_objects = dm_bufio_shrink_count;
1599 c->shrinker.scan_objects = dm_bufio_shrink_scan;
1586 c->shrinker.seeks = 1; 1600 c->shrinker.seeks = 1;
1587 c->shrinker.batch = 0; 1601 c->shrinker.batch = 0;
1588 register_shrinker(&c->shrinker); 1602 register_shrinker(&c->shrinker);
@@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void)
1669 struct dm_buffer *b; 1683 struct dm_buffer *b;
1670 b = list_entry(c->lru[LIST_CLEAN].prev, 1684 b = list_entry(c->lru[LIST_CLEAN].prev,
1671 struct dm_buffer, lru_list); 1685 struct dm_buffer, lru_list);
1672 if (__cleanup_old_buffer(b, 0, max_age * HZ)) 1686 if (!__cleanup_old_buffer(b, 0, max_age * HZ))
1673 break; 1687 break;
1674 dm_bufio_cond_resched(); 1688 dm_bufio_cond_resched();
1675 } 1689 }
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 21a3f7250531..8e76ddca0999 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -341,27 +341,26 @@ out:
341/* 341/*
342 * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab 342 * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab
343 * 343 *
344 * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how 344 * 'nr_to_scan' is the number of objects to scan for freeing.
345 * many objects (pages) we have in total.
346 * 345 *
347 * 'gfp_mask' is the mask of the allocation that got us into this mess. 346 * 'gfp_mask' is the mask of the allocation that got us into this mess.
348 * 347 *
349 * Return value is the number of objects (pages) remaining, or -1 if we cannot 348 * Return value is the number of objects freed or -1 if we cannot
350 * proceed without risk of deadlock (due to gfp_mask). 349 * proceed without risk of deadlock (due to gfp_mask).
351 * 350 *
352 * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial 351 * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial
353 * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' 352 * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan'
354 * pages freed. 353 * pages freed.
355 */ 354 */
356static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) 355static unsigned long
356ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
357{ 357{
358 struct ashmem_range *range, *next; 358 struct ashmem_range *range, *next;
359 unsigned long freed = 0;
359 360
360 /* We might recurse into filesystem code, so bail out if necessary */ 361 /* We might recurse into filesystem code, so bail out if necessary */
361 if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) 362 if (!(sc->gfp_mask & __GFP_FS))
362 return -1; 363 return SHRINK_STOP;
363 if (!sc->nr_to_scan)
364 return lru_count;
365 364
366 mutex_lock(&ashmem_mutex); 365 mutex_lock(&ashmem_mutex);
367 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { 366 list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
@@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
374 range->purged = ASHMEM_WAS_PURGED; 373 range->purged = ASHMEM_WAS_PURGED;
375 lru_del(range); 374 lru_del(range);
376 375
377 sc->nr_to_scan -= range_size(range); 376 freed += range_size(range);
378 if (sc->nr_to_scan <= 0) 377 if (--sc->nr_to_scan <= 0)
379 break; 378 break;
380 } 379 }
381 mutex_unlock(&ashmem_mutex); 380 mutex_unlock(&ashmem_mutex);
381 return freed;
382}
382 383
384static unsigned long
385ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
386{
387 /*
388 * note that lru_count is count of pages on the lru, not a count of
389 * objects on the list. This means the scan function needs to return the
390 * number of pages freed, not the number of objects scanned.
391 */
383 return lru_count; 392 return lru_count;
384} 393}
385 394
386static struct shrinker ashmem_shrinker = { 395static struct shrinker ashmem_shrinker = {
387 .shrink = ashmem_shrink, 396 .count_objects = ashmem_shrink_count,
397 .scan_objects = ashmem_shrink_scan,
398 /*
399 * XXX (dchinner): I wish people would comment on why they need on
400 * significant changes to the default value here
401 */
388 .seeks = DEFAULT_SEEKS * 4, 402 .seeks = DEFAULT_SEEKS * 4,
389}; 403};
390 404
@@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
690 if (capable(CAP_SYS_ADMIN)) { 704 if (capable(CAP_SYS_ADMIN)) {
691 struct shrink_control sc = { 705 struct shrink_control sc = {
692 .gfp_mask = GFP_KERNEL, 706 .gfp_mask = GFP_KERNEL,
693 .nr_to_scan = 0, 707 .nr_to_scan = LONG_MAX,
694 }; 708 };
695 ret = ashmem_shrink(&ashmem_shrinker, &sc); 709
696 sc.nr_to_scan = ret; 710 nodes_setall(sc.nodes_to_scan);
697 ashmem_shrink(&ashmem_shrinker, &sc); 711 ashmem_shrink_scan(&ashmem_shrinker, &sc);
698 } 712 }
699 break; 713 break;
700 } 714 }
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index fe74494868ef..6f094b37f1f1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout;
66 pr_info(x); \ 66 pr_info(x); \
67 } while (0) 67 } while (0)
68 68
69static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) 69static unsigned long lowmem_count(struct shrinker *s,
70 struct shrink_control *sc)
71{
72 return global_page_state(NR_ACTIVE_ANON) +
73 global_page_state(NR_ACTIVE_FILE) +
74 global_page_state(NR_INACTIVE_ANON) +
75 global_page_state(NR_INACTIVE_FILE);
76}
77
78static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
70{ 79{
71 struct task_struct *tsk; 80 struct task_struct *tsk;
72 struct task_struct *selected = NULL; 81 struct task_struct *selected = NULL;
73 int rem = 0; 82 unsigned long rem = 0;
74 int tasksize; 83 int tasksize;
75 int i; 84 int i;
76 short min_score_adj = OOM_SCORE_ADJ_MAX + 1; 85 short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
@@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
92 break; 101 break;
93 } 102 }
94 } 103 }
95 if (sc->nr_to_scan > 0) 104
96 lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", 105 lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
97 sc->nr_to_scan, sc->gfp_mask, other_free, 106 sc->nr_to_scan, sc->gfp_mask, other_free,
98 other_file, min_score_adj); 107 other_file, min_score_adj);
99 rem = global_page_state(NR_ACTIVE_ANON) + 108
100 global_page_state(NR_ACTIVE_FILE) + 109 if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
101 global_page_state(NR_INACTIVE_ANON) + 110 lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
102 global_page_state(NR_INACTIVE_FILE); 111 sc->nr_to_scan, sc->gfp_mask);
103 if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { 112 return 0;
104 lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
105 sc->nr_to_scan, sc->gfp_mask, rem);
106 return rem;
107 } 113 }
114
108 selected_oom_score_adj = min_score_adj; 115 selected_oom_score_adj = min_score_adj;
109 116
110 rcu_read_lock(); 117 rcu_read_lock();
@@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
154 lowmem_deathpending_timeout = jiffies + HZ; 161 lowmem_deathpending_timeout = jiffies + HZ;
155 send_sig(SIGKILL, selected, 0); 162 send_sig(SIGKILL, selected, 0);
156 set_tsk_thread_flag(selected, TIF_MEMDIE); 163 set_tsk_thread_flag(selected, TIF_MEMDIE);
157 rem -= selected_tasksize; 164 rem += selected_tasksize;
158 } 165 }
159 lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", 166
167 lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
160 sc->nr_to_scan, sc->gfp_mask, rem); 168 sc->nr_to_scan, sc->gfp_mask, rem);
161 rcu_read_unlock(); 169 rcu_read_unlock();
162 return rem; 170 return rem;
163} 171}
164 172
165static struct shrinker lowmem_shrinker = { 173static struct shrinker lowmem_shrinker = {
166 .shrink = lowmem_shrink, 174 .scan_objects = lowmem_scan,
175 .count_objects = lowmem_count,
167 .seeks = DEFAULT_SEEKS * 16 176 .seeks = DEFAULT_SEEKS * 16
168}; 177};
169 178
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
index 63efb7b456c6..2af15d41e77a 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -79,42 +79,4 @@
79 do { __oldfs = get_fs(); set_fs(get_ds());} while(0) 79 do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
80#define MMSPACE_CLOSE set_fs(__oldfs) 80#define MMSPACE_CLOSE set_fs(__oldfs)
81 81
82/*
83 * Shrinker
84 */
85
86# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \
87 struct shrinker *shrinker, \
88 struct shrink_control *sc
89# define shrink_param(sc, var) ((sc)->var)
90
91typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
92
93static inline
94struct shrinker *set_shrinker(int seek, shrinker_t func)
95{
96 struct shrinker *s;
97
98 s = kmalloc(sizeof(*s), GFP_KERNEL);
99 if (s == NULL)
100 return (NULL);
101
102 s->shrink = func;
103 s->seeks = seek;
104
105 register_shrinker(s);
106
107 return s;
108}
109
110static inline
111void remove_shrinker(struct shrinker *shrinker)
112{
113 if (shrinker == NULL)
114 return;
115
116 unregister_shrinker(shrinker);
117 kfree(shrinker);
118}
119
120#endif /* __LINUX_CFS_MEM_H__ */ 82#endif /* __LINUX_CFS_MEM_H__ */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 454027d68d54..0025ee6356da 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
521 int nr, unsigned int gfp_mask) 521 int nr, unsigned int gfp_mask)
522{ 522{
523 struct ldlm_namespace *ns; 523 struct ldlm_namespace *ns;
524 int canceled = 0, unused; 524 int unused;
525 525
526 ns = ldlm_pl2ns(pl); 526 ns = ldlm_pl2ns(pl);
527 527
@@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
540 unused = ns->ns_nr_unused; 540 unused = ns->ns_nr_unused;
541 spin_unlock(&ns->ns_lock); 541 spin_unlock(&ns->ns_lock);
542 542
543 if (nr) { 543 if (nr == 0)
544 canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, 544 return (unused / 100) * sysctl_vfs_cache_pressure;
545 LDLM_CANCEL_SHRINK); 545 else
546 } 546 return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
547 /*
548 * Return the number of potentially reclaimable locks.
549 */
550 return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
551} 547}
552 548
553struct ldlm_pool_ops ldlm_srv_pool_ops = { 549struct ldlm_pool_ops ldlm_srv_pool_ops = {
@@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
601 return recalc_interval_sec; 597 return recalc_interval_sec;
602} 598}
603 599
604/** 600/*
605 * Pool shrink wrapper. Will call either client or server pool recalc callback 601 * Pool shrink wrapper. Will call either client or server pool recalc callback
606 * depending what pool \a pl is used. 602 * depending what pool pl is used. When nr == 0, just return the number of
603 * freeable locks. Otherwise, return the number of canceled locks.
607 */ 604 */
608int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, 605int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
609 unsigned int gfp_mask) 606 unsigned int gfp_mask)
@@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl)
1017} 1014}
1018 1015
1019static struct ptlrpc_thread *ldlm_pools_thread; 1016static struct ptlrpc_thread *ldlm_pools_thread;
1020static struct shrinker *ldlm_pools_srv_shrinker;
1021static struct shrinker *ldlm_pools_cli_shrinker;
1022static struct completion ldlm_pools_comp; 1017static struct completion ldlm_pools_comp;
1023 1018
1024/* 1019/*
1025 * Cancel \a nr locks from all namespaces (if possible). Returns number of 1020 * count locks from all namespaces (if possible). Returns number of
1026 * cached locks after shrink is finished. All namespaces are asked to 1021 * cached locks.
1027 * cancel approximately equal amount of locks to keep balancing.
1028 */ 1022 */
1029static int ldlm_pools_shrink(ldlm_side_t client, int nr, 1023static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask)
1030 unsigned int gfp_mask)
1031{ 1024{
1032 int total = 0, cached = 0, nr_ns; 1025 int total = 0, nr_ns;
1033 struct ldlm_namespace *ns; 1026 struct ldlm_namespace *ns;
1034 struct ldlm_namespace *ns_old = NULL; /* loop detection */ 1027 struct ldlm_namespace *ns_old = NULL; /* loop detection */
1035 void *cookie; 1028 void *cookie;
1036 1029
1037 if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && 1030 if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
1038 !(gfp_mask & __GFP_FS)) 1031 return 0;
1039 return -1;
1040 1032
1041 CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", 1033 CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
1042 nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); 1034 client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
1043 1035
1044 cookie = cl_env_reenter(); 1036 cookie = cl_env_reenter();
1045 1037
@@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
1047 * Find out how many resources we may release. 1039 * Find out how many resources we may release.
1048 */ 1040 */
1049 for (nr_ns = ldlm_namespace_nr_read(client); 1041 for (nr_ns = ldlm_namespace_nr_read(client);
1050 nr_ns > 0; nr_ns--) 1042 nr_ns > 0; nr_ns--) {
1051 {
1052 mutex_lock(ldlm_namespace_lock(client)); 1043 mutex_lock(ldlm_namespace_lock(client));
1053 if (list_empty(ldlm_namespace_list(client))) { 1044 if (list_empty(ldlm_namespace_list(client))) {
1054 mutex_unlock(ldlm_namespace_lock(client)); 1045 mutex_unlock(ldlm_namespace_lock(client));
@@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
1078 ldlm_namespace_put(ns); 1069 ldlm_namespace_put(ns);
1079 } 1070 }
1080 1071
1081 if (nr == 0 || total == 0) { 1072 cl_env_reexit(cookie);
1082 cl_env_reexit(cookie); 1073 return total;
1083 return total; 1074}
1084 } 1075
1076static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask)
1077{
1078 unsigned long freed = 0;
1079 int tmp, nr_ns;
1080 struct ldlm_namespace *ns;
1081 void *cookie;
1082
1083 if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
1084 return -1;
1085
1086 cookie = cl_env_reenter();
1085 1087
1086 /* 1088 /*
1087 * Shrink at least ldlm_namespace_nr(client) namespaces. 1089 * Shrink at least ldlm_namespace_nr_read(client) namespaces.
1088 */ 1090 */
1089 for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns; 1091 for (tmp = nr_ns = ldlm_namespace_nr_read(client);
1090 nr_ns > 0; nr_ns--) 1092 tmp > 0; tmp--) {
1091 {
1092 int cancel, nr_locks; 1093 int cancel, nr_locks;
1093 1094
1094 /* 1095 /*
@@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
1097 mutex_lock(ldlm_namespace_lock(client)); 1098 mutex_lock(ldlm_namespace_lock(client));
1098 if (list_empty(ldlm_namespace_list(client))) { 1099 if (list_empty(ldlm_namespace_list(client))) {
1099 mutex_unlock(ldlm_namespace_lock(client)); 1100 mutex_unlock(ldlm_namespace_lock(client));
1100 /*
1101 * If list is empty, we can't return any @cached > 0,
1102 * that probably would cause needless shrinker
1103 * call.
1104 */
1105 cached = 0;
1106 break; 1101 break;
1107 } 1102 }
1108 ns = ldlm_namespace_first_locked(client); 1103 ns = ldlm_namespace_first_locked(client);
@@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
1111 mutex_unlock(ldlm_namespace_lock(client)); 1106 mutex_unlock(ldlm_namespace_lock(client));
1112 1107
1113 nr_locks = ldlm_pool_granted(&ns->ns_pool); 1108 nr_locks = ldlm_pool_granted(&ns->ns_pool);
1114 cancel = 1 + nr_locks * nr / total; 1109 /*
1115 ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); 1110 * We use to shrink propotionally but with new shrinker API,
1116 cached += ldlm_pool_granted(&ns->ns_pool); 1111 * we lost the total number of freeable locks.
1112 */
1113 cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
1114 freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
1117 ldlm_namespace_put(ns); 1115 ldlm_namespace_put(ns);
1118 } 1116 }
1119 cl_env_reexit(cookie); 1117 cl_env_reexit(cookie);
1120 /* we only decrease the SLV in server pools shrinker, return -1 to 1118 /*
1121 * kernel to avoid needless loop. LU-1128 */ 1119 * we only decrease the SLV in server pools shrinker, return
1122 return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; 1120 * SHRINK_STOP to kernel to avoid needless loop. LU-1128
1121 */
1122 return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed;
1123}
1124
1125static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc)
1126{
1127 return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask);
1123} 1128}
1124 1129
1125static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) 1130static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc)
1126{ 1131{
1127 return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, 1132 return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan,
1128 shrink_param(sc, nr_to_scan), 1133 sc->gfp_mask);
1129 shrink_param(sc, gfp_mask));
1130} 1134}
1131 1135
1132static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) 1136static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc)
1133{ 1137{
1134 return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, 1138 return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
1135 shrink_param(sc, nr_to_scan), 1139}
1136 shrink_param(sc, gfp_mask)); 1140
1141static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc)
1142{
1143 return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
1144 sc->gfp_mask);
1137} 1145}
1138 1146
1139int ldlm_pools_recalc(ldlm_side_t client) 1147int ldlm_pools_recalc(ldlm_side_t client)
@@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client)
1216 } 1224 }
1217 1225
1218 /* 1226 /*
1219 * Recalc at least ldlm_namespace_nr(client) namespaces. 1227 * Recalc at least ldlm_namespace_nr_read(client) namespaces.
1220 */ 1228 */
1221 for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { 1229 for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
1222 int skip; 1230 int skip;
@@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void)
1383 ldlm_pools_thread = NULL; 1391 ldlm_pools_thread = NULL;
1384} 1392}
1385 1393
1394static struct shrinker ldlm_pools_srv_shrinker = {
1395 .count_objects = ldlm_pools_srv_count,
1396 .scan_objects = ldlm_pools_srv_scan,
1397 .seeks = DEFAULT_SEEKS,
1398};
1399
1400static struct shrinker ldlm_pools_cli_shrinker = {
1401 .count_objects = ldlm_pools_cli_count,
1402 .scan_objects = ldlm_pools_cli_scan,
1403 .seeks = DEFAULT_SEEKS,
1404};
1405
1386int ldlm_pools_init(void) 1406int ldlm_pools_init(void)
1387{ 1407{
1388 int rc; 1408 int rc;
1389 1409
1390 rc = ldlm_pools_thread_start(); 1410 rc = ldlm_pools_thread_start();
1391 if (rc == 0) { 1411 if (rc == 0) {
1392 ldlm_pools_srv_shrinker = 1412 register_shrinker(&ldlm_pools_srv_shrinker);
1393 set_shrinker(DEFAULT_SEEKS, 1413 register_shrinker(&ldlm_pools_cli_shrinker);
1394 ldlm_pools_srv_shrink);
1395 ldlm_pools_cli_shrinker =
1396 set_shrinker(DEFAULT_SEEKS,
1397 ldlm_pools_cli_shrink);
1398 } 1414 }
1399 return rc; 1415 return rc;
1400} 1416}
@@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init);
1402 1418
1403void ldlm_pools_fini(void) 1419void ldlm_pools_fini(void)
1404{ 1420{
1405 if (ldlm_pools_srv_shrinker != NULL) { 1421 unregister_shrinker(&ldlm_pools_srv_shrinker);
1406 remove_shrinker(ldlm_pools_srv_shrinker); 1422 unregister_shrinker(&ldlm_pools_cli_shrinker);
1407 ldlm_pools_srv_shrinker = NULL;
1408 }
1409 if (ldlm_pools_cli_shrinker != NULL) {
1410 remove_shrinker(ldlm_pools_cli_shrinker);
1411 ldlm_pools_cli_shrinker = NULL;
1412 }
1413 ldlm_pools_thread_stop(); 1423 ldlm_pools_thread_stop();
1414} 1424}
1415EXPORT_SYMBOL(ldlm_pools_fini); 1425EXPORT_SYMBOL(ldlm_pools_fini);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index c29ac1c2defd..3a3d5bc5a628 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags,
1779} 1779}
1780EXPORT_SYMBOL(lu_env_refill_by_tags); 1780EXPORT_SYMBOL(lu_env_refill_by_tags);
1781 1781
1782static struct shrinker *lu_site_shrinker = NULL;
1783 1782
1784typedef struct lu_site_stats{ 1783typedef struct lu_site_stats{
1785 unsigned lss_populated; 1784 unsigned lss_populated;
@@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs,
1835 * objects without taking the lu_sites_guard lock, but this is not 1834 * objects without taking the lu_sites_guard lock, but this is not
1836 * possible in the current implementation. 1835 * possible in the current implementation.
1837 */ 1836 */
1838static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) 1837static unsigned long lu_cache_shrink_count(struct shrinker *sk,
1838 struct shrink_control *sc)
1839{ 1839{
1840 lu_site_stats_t stats; 1840 lu_site_stats_t stats;
1841 struct lu_site *s; 1841 struct lu_site *s;
1842 struct lu_site *tmp; 1842 struct lu_site *tmp;
1843 int cached = 0; 1843 unsigned long cached = 0;
1844 int remain = shrink_param(sc, nr_to_scan);
1845 LIST_HEAD(splice);
1846
1847 if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) {
1848 if (remain != 0)
1849 return -1;
1850 else
1851 /* We must not take the lu_sites_guard lock when
1852 * __GFP_FS is *not* set because of the deadlock
1853 * possibility detailed above. Additionally,
1854 * since we cannot determine the number of
1855 * objects in the cache without taking this
1856 * lock, we're in a particularly tough spot. As
1857 * a result, we'll just lie and say our cache is
1858 * empty. This _should_ be ok, as we can't
1859 * reclaim objects when __GFP_FS is *not* set
1860 * anyways.
1861 */
1862 return 0;
1863 }
1864 1844
1865 CDEBUG(D_INODE, "Shrink %d objects\n", remain); 1845 if (!(sc->gfp_mask & __GFP_FS))
1846 return 0;
1866 1847
1867 mutex_lock(&lu_sites_guard); 1848 mutex_lock(&lu_sites_guard);
1868 list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { 1849 list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
1869 if (shrink_param(sc, nr_to_scan) != 0) {
1870 remain = lu_site_purge(&lu_shrink_env, s, remain);
1871 /*
1872 * Move just shrunk site to the tail of site list to
1873 * assure shrinking fairness.
1874 */
1875 list_move_tail(&s->ls_linkage, &splice);
1876 }
1877
1878 memset(&stats, 0, sizeof(stats)); 1850 memset(&stats, 0, sizeof(stats));
1879 lu_site_stats_get(s->ls_obj_hash, &stats, 0); 1851 lu_site_stats_get(s->ls_obj_hash, &stats, 0);
1880 cached += stats.lss_total - stats.lss_busy; 1852 cached += stats.lss_total - stats.lss_busy;
1881 if (shrink_param(sc, nr_to_scan) && remain <= 0)
1882 break;
1883 } 1853 }
1884 list_splice(&splice, lu_sites.prev);
1885 mutex_unlock(&lu_sites_guard); 1854 mutex_unlock(&lu_sites_guard);
1886 1855
1887 cached = (cached / 100) * sysctl_vfs_cache_pressure; 1856 cached = (cached / 100) * sysctl_vfs_cache_pressure;
1888 if (shrink_param(sc, nr_to_scan) == 0) 1857 CDEBUG(D_INODE, "%ld objects cached\n", cached);
1889 CDEBUG(D_INODE, "%d objects cached\n", cached);
1890 return cached; 1858 return cached;
1891} 1859}
1892 1860
1861static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
1862 struct shrink_control *sc)
1863{
1864 struct lu_site *s;
1865 struct lu_site *tmp;
1866 unsigned long remain = sc->nr_to_scan, freed = 0;
1867 LIST_HEAD(splice);
1868
1869 if (!(sc->gfp_mask & __GFP_FS))
1870 /* We must not take the lu_sites_guard lock when
1871 * __GFP_FS is *not* set because of the deadlock
1872 * possibility detailed above. Additionally,
1873 * since we cannot determine the number of
1874 * objects in the cache without taking this
1875 * lock, we're in a particularly tough spot. As
1876 * a result, we'll just lie and say our cache is
1877 * empty. This _should_ be ok, as we can't
1878 * reclaim objects when __GFP_FS is *not* set
1879 * anyways.
1880 */
1881 return SHRINK_STOP;
1882
1883 mutex_lock(&lu_sites_guard);
1884 list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
1885 freed = lu_site_purge(&lu_shrink_env, s, remain);
1886 remain -= freed;
1887 /*
1888 * Move just shrunk site to the tail of site list to
1889 * assure shrinking fairness.
1890 */
1891 list_move_tail(&s->ls_linkage, &splice);
1892 }
1893 list_splice(&splice, lu_sites.prev);
1894 mutex_unlock(&lu_sites_guard);
1895
1896 return sc->nr_to_scan - remain;
1897}
1898
1893/* 1899/*
1894 * Debugging stuff. 1900 * Debugging stuff.
1895 */ 1901 */
@@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env,
1913 return 0; 1919 return 0;
1914} 1920}
1915 1921
1922static struct shrinker lu_site_shrinker = {
1923 .count_objects = lu_cache_shrink_count,
1924 .scan_objects = lu_cache_shrink_scan,
1925 .seeks = DEFAULT_SEEKS,
1926};
1927
1916/** 1928/**
1917 * Initialization of global lu_* data. 1929 * Initialization of global lu_* data.
1918 */ 1930 */
@@ -1947,9 +1959,7 @@ int lu_global_init(void)
1947 * inode, one for ea. Unfortunately setting this high value results in 1959 * inode, one for ea. Unfortunately setting this high value results in
1948 * lu_object/inode cache consuming all the memory. 1960 * lu_object/inode cache consuming all the memory.
1949 */ 1961 */
1950 lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); 1962 register_shrinker(&lu_site_shrinker);
1951 if (lu_site_shrinker == NULL)
1952 return -ENOMEM;
1953 1963
1954 return result; 1964 return result;
1955} 1965}
@@ -1959,11 +1969,7 @@ int lu_global_init(void)
1959 */ 1969 */
1960void lu_global_fini(void) 1970void lu_global_fini(void)
1961{ 1971{
1962 if (lu_site_shrinker != NULL) { 1972 unregister_shrinker(&lu_site_shrinker);
1963 remove_shrinker(lu_site_shrinker);
1964 lu_site_shrinker = NULL;
1965 }
1966
1967 lu_context_key_degister(&lu_global_key); 1973 lu_context_key_degister(&lu_global_key);
1968 1974
1969 /* 1975 /*
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 9013745ab105..e90c8fb7da6a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool {
121} page_pools; 121} page_pools;
122 122
123/* 123/*
124 * memory shrinker
125 */
126const int pools_shrinker_seeks = DEFAULT_SEEKS;
127static struct shrinker *pools_shrinker = NULL;
128
129
130/*
131 * /proc/fs/lustre/sptlrpc/encrypt_page_pools 124 * /proc/fs/lustre/sptlrpc/encrypt_page_pools
132 */ 125 */
133int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) 126int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
@@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages)
226} 219}
227 220
228/* 221/*
229 * could be called frequently for query (@nr_to_scan == 0).
230 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. 222 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
231 */ 223 */
232static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) 224static unsigned long enc_pools_shrink_count(struct shrinker *s,
225 struct shrink_control *sc)
233{ 226{
234 if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { 227 /*
228 * if no pool access for a long time, we consider it's fully idle.
229 * a little race here is fine.
230 */
231 if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
232 CACHE_QUIESCENT_PERIOD)) {
235 spin_lock(&page_pools.epp_lock); 233 spin_lock(&page_pools.epp_lock);
236 shrink_param(sc, nr_to_scan) = min_t(unsigned long, 234 page_pools.epp_idle_idx = IDLE_IDX_MAX;
237 shrink_param(sc, nr_to_scan),
238 page_pools.epp_free_pages -
239 PTLRPC_MAX_BRW_PAGES);
240 if (shrink_param(sc, nr_to_scan) > 0) {
241 enc_pools_release_free_pages(shrink_param(sc,
242 nr_to_scan));
243 CDEBUG(D_SEC, "released %ld pages, %ld left\n",
244 (long)shrink_param(sc, nr_to_scan),
245 page_pools.epp_free_pages);
246
247 page_pools.epp_st_shrinks++;
248 page_pools.epp_last_shrink = cfs_time_current_sec();
249 }
250 spin_unlock(&page_pools.epp_lock); 235 spin_unlock(&page_pools.epp_lock);
251 } 236 }
252 237
238 LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
239 return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
240 (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
241}
242
243/*
244 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
245 */
246static unsigned long enc_pools_shrink_scan(struct shrinker *s,
247 struct shrink_control *sc)
248{
249 spin_lock(&page_pools.epp_lock);
250 sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
251 page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
252 if (sc->nr_to_scan > 0) {
253 enc_pools_release_free_pages(sc->nr_to_scan);
254 CDEBUG(D_SEC, "released %ld pages, %ld left\n",
255 (long)sc->nr_to_scan, page_pools.epp_free_pages);
256
257 page_pools.epp_st_shrinks++;
258 page_pools.epp_last_shrink = cfs_time_current_sec();
259 }
260 spin_unlock(&page_pools.epp_lock);
261
253 /* 262 /*
254 * if no pool access for a long time, we consider it's fully idle. 263 * if no pool access for a long time, we consider it's fully idle.
255 * a little race here is fine. 264 * a little race here is fine.
@@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
262 } 271 }
263 272
264 LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); 273 LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
265 return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * 274 return sc->nr_to_scan;
266 (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
267} 275}
268 276
269static inline 277static inline
@@ -699,6 +707,12 @@ static inline void enc_pools_free(void)
699 sizeof(*page_pools.epp_pools)); 707 sizeof(*page_pools.epp_pools));
700} 708}
701 709
710static struct shrinker pools_shrinker = {
711 .count_objects = enc_pools_shrink_count,
712 .scan_objects = enc_pools_shrink_scan,
713 .seeks = DEFAULT_SEEKS,
714};
715
702int sptlrpc_enc_pool_init(void) 716int sptlrpc_enc_pool_init(void)
703{ 717{
704 /* 718 /*
@@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void)
736 if (page_pools.epp_pools == NULL) 750 if (page_pools.epp_pools == NULL)
737 return -ENOMEM; 751 return -ENOMEM;
738 752
739 pools_shrinker = set_shrinker(pools_shrinker_seeks, 753 register_shrinker(&pools_shrinker);
740 enc_pools_shrink);
741 if (pools_shrinker == NULL) {
742 enc_pools_free();
743 return -ENOMEM;
744 }
745 754
746 return 0; 755 return 0;
747} 756}
@@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void)
750{ 759{
751 unsigned long cleaned, npools; 760 unsigned long cleaned, npools;
752 761
753 LASSERT(pools_shrinker);
754 LASSERT(page_pools.epp_pools); 762 LASSERT(page_pools.epp_pools);
755 LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); 763 LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
756 764
757 remove_shrinker(pools_shrinker); 765 unregister_shrinker(&pools_shrinker);
758 766
759 npools = npages_to_npools(page_pools.epp_total_pages); 767 npools = npages_to_npools(page_pools.epp_total_pages);
760 cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); 768 cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d9df3c940e6..c932ed32c77b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -37,6 +37,7 @@
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include <linux/list_lru.h>
40#include "internal.h" 41#include "internal.h"
41#include "mount.h" 42#include "mount.h"
42 43
@@ -48,7 +49,7 @@
48 * - the dcache hash table 49 * - the dcache hash table
49 * s_anon bl list spinlock protects: 50 * s_anon bl list spinlock protects:
50 * - the s_anon list (see __d_drop) 51 * - the s_anon list (see __d_drop)
51 * dcache_lru_lock protects: 52 * dentry->d_sb->s_dentry_lru_lock protects:
52 * - the dcache lru lists and counters 53 * - the dcache lru lists and counters
53 * d_lock protects: 54 * d_lock protects:
54 * - d_flags 55 * - d_flags
@@ -63,7 +64,7 @@
63 * Ordering: 64 * Ordering:
64 * dentry->d_inode->i_lock 65 * dentry->d_inode->i_lock
65 * dentry->d_lock 66 * dentry->d_lock
66 * dcache_lru_lock 67 * dentry->d_sb->s_dentry_lru_lock
67 * dcache_hash_bucket lock 68 * dcache_hash_bucket lock
68 * s_anon lock 69 * s_anon lock
69 * 70 *
@@ -81,7 +82,6 @@
81int sysctl_vfs_cache_pressure __read_mostly = 100; 82int sysctl_vfs_cache_pressure __read_mostly = 100;
82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 83EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
83 84
84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
86 86
87EXPORT_SYMBOL(rename_lock); 87EXPORT_SYMBOL(rename_lock);
@@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = {
146 .age_limit = 45, 146 .age_limit = 45,
147}; 147};
148 148
149static DEFINE_PER_CPU(unsigned int, nr_dentry); 149static DEFINE_PER_CPU(long, nr_dentry);
150static DEFINE_PER_CPU(long, nr_dentry_unused);
150 151
151#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 152#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
152static int get_nr_dentry(void) 153
154/*
155 * Here we resort to our own counters instead of using generic per-cpu counters
156 * for consistency with what the vfs inode code does. We are expected to harvest
157 * better code and performance by having our own specialized counters.
158 *
159 * Please note that the loop is done over all possible CPUs, not over all online
160 * CPUs. The reason for this is that we don't want to play games with CPUs going
161 * on and off. If one of them goes off, we will just keep their counters.
162 *
163 * glommer: See cffbc8a for details, and if you ever intend to change this,
164 * please update all vfs counters to match.
165 */
166static long get_nr_dentry(void)
153{ 167{
154 int i; 168 int i;
155 int sum = 0; 169 long sum = 0;
156 for_each_possible_cpu(i) 170 for_each_possible_cpu(i)
157 sum += per_cpu(nr_dentry, i); 171 sum += per_cpu(nr_dentry, i);
158 return sum < 0 ? 0 : sum; 172 return sum < 0 ? 0 : sum;
159} 173}
160 174
175static long get_nr_dentry_unused(void)
176{
177 int i;
178 long sum = 0;
179 for_each_possible_cpu(i)
180 sum += per_cpu(nr_dentry_unused, i);
181 return sum < 0 ? 0 : sum;
182}
183
161int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 184int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
162 size_t *lenp, loff_t *ppos) 185 size_t *lenp, loff_t *ppos)
163{ 186{
164 dentry_stat.nr_dentry = get_nr_dentry(); 187 dentry_stat.nr_dentry = get_nr_dentry();
165 return proc_dointvec(table, write, buffer, lenp, ppos); 188 dentry_stat.nr_unused = get_nr_dentry_unused();
189 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
166} 190}
167#endif 191#endif
168 192
@@ -333,52 +357,35 @@ static void dentry_unlink_inode(struct dentry * dentry)
333} 357}
334 358
335/* 359/*
336 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. 360 * dentry_lru_(add|del)_list) must be called with d_lock held.
337 */ 361 */
338static void dentry_lru_add(struct dentry *dentry) 362static void dentry_lru_add(struct dentry *dentry)
339{ 363{
340 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { 364 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
341 spin_lock(&dcache_lru_lock); 365 if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
366 this_cpu_inc(nr_dentry_unused);
342 dentry->d_flags |= DCACHE_LRU_LIST; 367 dentry->d_flags |= DCACHE_LRU_LIST;
343 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
344 dentry->d_sb->s_nr_dentry_unused++;
345 dentry_stat.nr_unused++;
346 spin_unlock(&dcache_lru_lock);
347 } 368 }
348} 369}
349 370
350static void __dentry_lru_del(struct dentry *dentry)
351{
352 list_del_init(&dentry->d_lru);
353 dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
354 dentry->d_sb->s_nr_dentry_unused--;
355 dentry_stat.nr_unused--;
356}
357
358/* 371/*
359 * Remove a dentry with references from the LRU. 372 * Remove a dentry with references from the LRU.
373 *
374 * If we are on the shrink list, then we can get to try_prune_one_dentry() and
375 * lose our last reference through the parent walk. In this case, we need to
376 * remove ourselves from the shrink list, not the LRU.
360 */ 377 */
361static void dentry_lru_del(struct dentry *dentry) 378static void dentry_lru_del(struct dentry *dentry)
362{ 379{
363 if (!list_empty(&dentry->d_lru)) { 380 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
364 spin_lock(&dcache_lru_lock); 381 list_del_init(&dentry->d_lru);
365 __dentry_lru_del(dentry); 382 dentry->d_flags &= ~DCACHE_SHRINK_LIST;
366 spin_unlock(&dcache_lru_lock); 383 return;
367 } 384 }
368}
369 385
370static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) 386 if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
371{ 387 this_cpu_dec(nr_dentry_unused);
372 spin_lock(&dcache_lru_lock); 388 dentry->d_flags &= ~DCACHE_LRU_LIST;
373 if (list_empty(&dentry->d_lru)) {
374 dentry->d_flags |= DCACHE_LRU_LIST;
375 list_add_tail(&dentry->d_lru, list);
376 dentry->d_sb->s_nr_dentry_unused++;
377 dentry_stat.nr_unused++;
378 } else {
379 list_move_tail(&dentry->d_lru, list);
380 }
381 spin_unlock(&dcache_lru_lock);
382} 389}
383 390
384/** 391/**
@@ -474,7 +481,8 @@ EXPORT_SYMBOL(d_drop);
474 * If ref is non-zero, then decrement the refcount too. 481 * If ref is non-zero, then decrement the refcount too.
475 * Returns dentry requiring refcount drop, or NULL if we're done. 482 * Returns dentry requiring refcount drop, or NULL if we're done.
476 */ 483 */
477static inline struct dentry *dentry_kill(struct dentry *dentry) 484static inline struct dentry *
485dentry_kill(struct dentry *dentry, int unlock_on_failure)
478 __releases(dentry->d_lock) 486 __releases(dentry->d_lock)
479{ 487{
480 struct inode *inode; 488 struct inode *inode;
@@ -483,8 +491,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry)
483 inode = dentry->d_inode; 491 inode = dentry->d_inode;
484 if (inode && !spin_trylock(&inode->i_lock)) { 492 if (inode && !spin_trylock(&inode->i_lock)) {
485relock: 493relock:
486 spin_unlock(&dentry->d_lock); 494 if (unlock_on_failure) {
487 cpu_relax(); 495 spin_unlock(&dentry->d_lock);
496 cpu_relax();
497 }
488 return dentry; /* try again with same dentry */ 498 return dentry; /* try again with same dentry */
489 } 499 }
490 if (IS_ROOT(dentry)) 500 if (IS_ROOT(dentry))
@@ -567,7 +577,7 @@ repeat:
567 return; 577 return;
568 578
569kill_it: 579kill_it:
570 dentry = dentry_kill(dentry); 580 dentry = dentry_kill(dentry, 1);
571 if (dentry) 581 if (dentry)
572 goto repeat; 582 goto repeat;
573} 583}
@@ -787,12 +797,12 @@ EXPORT_SYMBOL(d_prune_aliases);
787 * 797 *
788 * This may fail if locks cannot be acquired no problem, just try again. 798 * This may fail if locks cannot be acquired no problem, just try again.
789 */ 799 */
790static void try_prune_one_dentry(struct dentry *dentry) 800static struct dentry * try_prune_one_dentry(struct dentry *dentry)
791 __releases(dentry->d_lock) 801 __releases(dentry->d_lock)
792{ 802{
793 struct dentry *parent; 803 struct dentry *parent;
794 804
795 parent = dentry_kill(dentry); 805 parent = dentry_kill(dentry, 0);
796 /* 806 /*
797 * If dentry_kill returns NULL, we have nothing more to do. 807 * If dentry_kill returns NULL, we have nothing more to do.
798 * if it returns the same dentry, trylocks failed. In either 808 * if it returns the same dentry, trylocks failed. In either
@@ -804,17 +814,18 @@ static void try_prune_one_dentry(struct dentry *dentry)
804 * fragmentation. 814 * fragmentation.
805 */ 815 */
806 if (!parent) 816 if (!parent)
807 return; 817 return NULL;
808 if (parent == dentry) 818 if (parent == dentry)
809 return; 819 return dentry;
810 820
811 /* Prune ancestors. */ 821 /* Prune ancestors. */
812 dentry = parent; 822 dentry = parent;
813 while (dentry) { 823 while (dentry) {
814 if (lockref_put_or_lock(&dentry->d_lockref)) 824 if (lockref_put_or_lock(&dentry->d_lockref))
815 return; 825 return NULL;
816 dentry = dentry_kill(dentry); 826 dentry = dentry_kill(dentry, 1);
817 } 827 }
828 return NULL;
818} 829}
819 830
820static void shrink_dentry_list(struct list_head *list) 831static void shrink_dentry_list(struct list_head *list)
@@ -833,76 +844,143 @@ static void shrink_dentry_list(struct list_head *list)
833 } 844 }
834 845
835 /* 846 /*
847 * The dispose list is isolated and dentries are not accounted
848 * to the LRU here, so we can simply remove it from the list
849 * here regardless of whether it is referenced or not.
850 */
851 list_del_init(&dentry->d_lru);
852 dentry->d_flags &= ~DCACHE_SHRINK_LIST;
853
854 /*
836 * We found an inuse dentry which was not removed from 855 * We found an inuse dentry which was not removed from
837 * the LRU because of laziness during lookup. Do not free 856 * the LRU because of laziness during lookup. Do not free it.
838 * it - just keep it off the LRU list.
839 */ 857 */
840 if (dentry->d_lockref.count) { 858 if (dentry->d_lockref.count) {
841 dentry_lru_del(dentry);
842 spin_unlock(&dentry->d_lock); 859 spin_unlock(&dentry->d_lock);
843 continue; 860 continue;
844 } 861 }
845
846 rcu_read_unlock(); 862 rcu_read_unlock();
847 863
848 try_prune_one_dentry(dentry); 864 dentry = try_prune_one_dentry(dentry);
849 865
850 rcu_read_lock(); 866 rcu_read_lock();
867 if (dentry) {
868 dentry->d_flags |= DCACHE_SHRINK_LIST;
869 list_add(&dentry->d_lru, list);
870 spin_unlock(&dentry->d_lock);
871 }
851 } 872 }
852 rcu_read_unlock(); 873 rcu_read_unlock();
853} 874}
854 875
876static enum lru_status
877dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
878{
879 struct list_head *freeable = arg;
880 struct dentry *dentry = container_of(item, struct dentry, d_lru);
881
882
883 /*
884 * we are inverting the lru lock/dentry->d_lock here,
885 * so use a trylock. If we fail to get the lock, just skip
886 * it
887 */
888 if (!spin_trylock(&dentry->d_lock))
889 return LRU_SKIP;
890
891 /*
892 * Referenced dentries are still in use. If they have active
893 * counts, just remove them from the LRU. Otherwise give them
894 * another pass through the LRU.
895 */
896 if (dentry->d_lockref.count) {
897 list_del_init(&dentry->d_lru);
898 spin_unlock(&dentry->d_lock);
899 return LRU_REMOVED;
900 }
901
902 if (dentry->d_flags & DCACHE_REFERENCED) {
903 dentry->d_flags &= ~DCACHE_REFERENCED;
904 spin_unlock(&dentry->d_lock);
905
906 /*
907 * The list move itself will be made by the common LRU code. At
908 * this point, we've dropped the dentry->d_lock but keep the
909 * lru lock. This is safe to do, since every list movement is
910 * protected by the lru lock even if both locks are held.
911 *
912 * This is guaranteed by the fact that all LRU management
913 * functions are intermediated by the LRU API calls like
914 * list_lru_add and list_lru_del. List movement in this file
915 * only ever occur through this functions or through callbacks
916 * like this one, that are called from the LRU API.
917 *
918 * The only exceptions to this are functions like
919 * shrink_dentry_list, and code that first checks for the
920 * DCACHE_SHRINK_LIST flag. Those are guaranteed to be
921 * operating only with stack provided lists after they are
922 * properly isolated from the main list. It is thus, always a
923 * local access.
924 */
925 return LRU_ROTATE;
926 }
927
928 dentry->d_flags |= DCACHE_SHRINK_LIST;
929 list_move_tail(&dentry->d_lru, freeable);
930 this_cpu_dec(nr_dentry_unused);
931 spin_unlock(&dentry->d_lock);
932
933 return LRU_REMOVED;
934}
935
855/** 936/**
856 * prune_dcache_sb - shrink the dcache 937 * prune_dcache_sb - shrink the dcache
857 * @sb: superblock 938 * @sb: superblock
858 * @count: number of entries to try to free 939 * @nr_to_scan : number of entries to try to free
940 * @nid: which node to scan for freeable entities
859 * 941 *
860 * Attempt to shrink the superblock dcache LRU by @count entries. This is 942 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
861 * done when we need more memory an called from the superblock shrinker 943 * done when we need more memory an called from the superblock shrinker
862 * function. 944 * function.
863 * 945 *
864 * This function may fail to free any resources if all the dentries are in 946 * This function may fail to free any resources if all the dentries are in
865 * use. 947 * use.
866 */ 948 */
867void prune_dcache_sb(struct super_block *sb, int count) 949long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
950 int nid)
868{ 951{
869 struct dentry *dentry; 952 LIST_HEAD(dispose);
870 LIST_HEAD(referenced); 953 long freed;
871 LIST_HEAD(tmp);
872 954
873relock: 955 freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
874 spin_lock(&dcache_lru_lock); 956 &dispose, &nr_to_scan);
875 while (!list_empty(&sb->s_dentry_lru)) { 957 shrink_dentry_list(&dispose);
876 dentry = list_entry(sb->s_dentry_lru.prev, 958 return freed;
877 struct dentry, d_lru); 959}
878 BUG_ON(dentry->d_sb != sb);
879
880 if (!spin_trylock(&dentry->d_lock)) {
881 spin_unlock(&dcache_lru_lock);
882 cpu_relax();
883 goto relock;
884 }
885 960
886 if (dentry->d_flags & DCACHE_REFERENCED) { 961static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
887 dentry->d_flags &= ~DCACHE_REFERENCED; 962 spinlock_t *lru_lock, void *arg)
888 list_move(&dentry->d_lru, &referenced); 963{
889 spin_unlock(&dentry->d_lock); 964 struct list_head *freeable = arg;
890 } else { 965 struct dentry *dentry = container_of(item, struct dentry, d_lru);
891 list_move_tail(&dentry->d_lru, &tmp);
892 dentry->d_flags |= DCACHE_SHRINK_LIST;
893 spin_unlock(&dentry->d_lock);
894 if (!--count)
895 break;
896 }
897 cond_resched_lock(&dcache_lru_lock);
898 }
899 if (!list_empty(&referenced))
900 list_splice(&referenced, &sb->s_dentry_lru);
901 spin_unlock(&dcache_lru_lock);
902 966
903 shrink_dentry_list(&tmp); 967 /*
968 * we are inverting the lru lock/dentry->d_lock here,
969 * so use a trylock. If we fail to get the lock, just skip
970 * it
971 */
972 if (!spin_trylock(&dentry->d_lock))
973 return LRU_SKIP;
974
975 dentry->d_flags |= DCACHE_SHRINK_LIST;
976 list_move_tail(&dentry->d_lru, freeable);
977 this_cpu_dec(nr_dentry_unused);
978 spin_unlock(&dentry->d_lock);
979
980 return LRU_REMOVED;
904} 981}
905 982
983
906/** 984/**
907 * shrink_dcache_sb - shrink dcache for a superblock 985 * shrink_dcache_sb - shrink dcache for a superblock
908 * @sb: superblock 986 * @sb: superblock
@@ -912,16 +990,17 @@ relock:
912 */ 990 */
913void shrink_dcache_sb(struct super_block *sb) 991void shrink_dcache_sb(struct super_block *sb)
914{ 992{
915 LIST_HEAD(tmp); 993 long freed;
916 994
917 spin_lock(&dcache_lru_lock); 995 do {
918 while (!list_empty(&sb->s_dentry_lru)) { 996 LIST_HEAD(dispose);
919 list_splice_init(&sb->s_dentry_lru, &tmp); 997
920 spin_unlock(&dcache_lru_lock); 998 freed = list_lru_walk(&sb->s_dentry_lru,
921 shrink_dentry_list(&tmp); 999 dentry_lru_isolate_shrink, &dispose, UINT_MAX);
922 spin_lock(&dcache_lru_lock); 1000
923 } 1001 this_cpu_sub(nr_dentry_unused, freed);
924 spin_unlock(&dcache_lru_lock); 1002 shrink_dentry_list(&dispose);
1003 } while (freed > 0);
925} 1004}
926EXPORT_SYMBOL(shrink_dcache_sb); 1005EXPORT_SYMBOL(shrink_dcache_sb);
927 1006
@@ -1283,7 +1362,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1283 if (dentry->d_lockref.count) { 1362 if (dentry->d_lockref.count) {
1284 dentry_lru_del(dentry); 1363 dentry_lru_del(dentry);
1285 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1364 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1286 dentry_lru_move_list(dentry, &data->dispose); 1365 dentry_lru_del(dentry);
1366 list_add_tail(&dentry->d_lru, &data->dispose);
1287 dentry->d_flags |= DCACHE_SHRINK_LIST; 1367 dentry->d_flags |= DCACHE_SHRINK_LIST;
1288 data->found++; 1368 data->found++;
1289 ret = D_WALK_NORETRY; 1369 ret = D_WALK_NORETRY;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c00e055b6282..9fd702f5bfb2 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -44,6 +44,7 @@ static void drop_slab(void)
44 .gfp_mask = GFP_KERNEL, 44 .gfp_mask = GFP_KERNEL,
45 }; 45 };
46 46
47 nodes_setall(shrink.nodes_to_scan);
47 do { 48 do {
48 nr_objects = shrink_slab(&shrink, 1000, 1000); 49 nr_objects = shrink_slab(&shrink, 1000, 1000);
49 } while (nr_objects > 10); 50 } while (nr_objects > 10);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 2d1bdbe78c04..3981ff783950 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
931 struct ext4_inode_info *ei; 931 struct ext4_inode_info *ei;
932 struct list_head *cur, *tmp; 932 struct list_head *cur, *tmp;
933 LIST_HEAD(skipped); 933 LIST_HEAD(skipped);
934 int ret, nr_shrunk = 0; 934 int nr_shrunk = 0;
935 int retried = 0, skip_precached = 1, nr_skipped = 0; 935 int retried = 0, skip_precached = 1, nr_skipped = 0;
936 936
937 spin_lock(&sbi->s_es_lru_lock); 937 spin_lock(&sbi->s_es_lru_lock);
938 938
939retry: 939retry:
940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
941 int shrunk;
942
941 /* 943 /*
942 * If we have already reclaimed all extents from extent 944 * If we have already reclaimed all extents from extent
943 * status tree, just stop the loop immediately. 945 * status tree, just stop the loop immediately.
@@ -964,13 +966,13 @@ retry:
964 continue; 966 continue;
965 967
966 write_lock(&ei->i_es_lock); 968 write_lock(&ei->i_es_lock);
967 ret = __es_try_to_reclaim_extents(ei, nr_to_scan); 969 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
968 if (ei->i_es_lru_nr == 0) 970 if (ei->i_es_lru_nr == 0)
969 list_del_init(&ei->i_es_lru); 971 list_del_init(&ei->i_es_lru);
970 write_unlock(&ei->i_es_lock); 972 write_unlock(&ei->i_es_lock);
971 973
972 nr_shrunk += ret; 974 nr_shrunk += shrunk;
973 nr_to_scan -= ret; 975 nr_to_scan -= shrunk;
974 if (nr_to_scan == 0) 976 if (nr_to_scan == 0)
975 break; 977 break;
976 } 978 }
@@ -1007,7 +1009,20 @@ retry:
1007 return nr_shrunk; 1009 return nr_shrunk;
1008} 1010}
1009 1011
1010static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 1012static unsigned long ext4_es_count(struct shrinker *shrink,
1013 struct shrink_control *sc)
1014{
1015 unsigned long nr;
1016 struct ext4_sb_info *sbi;
1017
1018 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1019 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1020 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
1021 return nr;
1022}
1023
1024static unsigned long ext4_es_scan(struct shrinker *shrink,
1025 struct shrink_control *sc)
1011{ 1026{
1012 struct ext4_sb_info *sbi = container_of(shrink, 1027 struct ext4_sb_info *sbi = container_of(shrink,
1013 struct ext4_sb_info, s_es_shrinker); 1028 struct ext4_sb_info, s_es_shrinker);
@@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
1022 1037
1023 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1038 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
1024 1039
1025 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1026 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 1040 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
1027 return ret; 1041 return nr_shrunk;
1028} 1042}
1029 1043
1030void ext4_es_register_shrinker(struct ext4_sb_info *sbi) 1044void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
@@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1032 INIT_LIST_HEAD(&sbi->s_es_lru); 1046 INIT_LIST_HEAD(&sbi->s_es_lru);
1033 spin_lock_init(&sbi->s_es_lru_lock); 1047 spin_lock_init(&sbi->s_es_lru_lock);
1034 sbi->s_es_last_sorted = 0; 1048 sbi->s_es_last_sorted = 0;
1035 sbi->s_es_shrinker.shrink = ext4_es_shrink; 1049 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1050 sbi->s_es_shrinker.count_objects = ext4_es_count;
1036 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 1051 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1037 register_shrinker(&sbi->s_es_shrinker); 1052 register_shrinker(&sbi->s_es_shrinker);
1038} 1053}
@@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1076 struct ext4_es_tree *tree = &ei->i_es_tree; 1091 struct ext4_es_tree *tree = &ei->i_es_tree;
1077 struct rb_node *node; 1092 struct rb_node *node;
1078 struct extent_status *es; 1093 struct extent_status *es;
1079 int nr_shrunk = 0; 1094 unsigned long nr_shrunk = 0;
1080 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 1095 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1081 DEFAULT_RATELIMIT_BURST); 1096 DEFAULT_RATELIMIT_BURST);
1082 1097
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 722329cac98f..c2f41b4d00b9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1427,21 +1427,22 @@ __acquires(&lru_lock)
1427 * gfs2_dispose_glock_lru() above. 1427 * gfs2_dispose_glock_lru() above.
1428 */ 1428 */
1429 1429
1430static void gfs2_scan_glock_lru(int nr) 1430static long gfs2_scan_glock_lru(int nr)
1431{ 1431{
1432 struct gfs2_glock *gl; 1432 struct gfs2_glock *gl;
1433 LIST_HEAD(skipped); 1433 LIST_HEAD(skipped);
1434 LIST_HEAD(dispose); 1434 LIST_HEAD(dispose);
1435 long freed = 0;
1435 1436
1436 spin_lock(&lru_lock); 1437 spin_lock(&lru_lock);
1437 while(nr && !list_empty(&lru_list)) { 1438 while ((nr-- >= 0) && !list_empty(&lru_list)) {
1438 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1439 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1439 1440
1440 /* Test for being demotable */ 1441 /* Test for being demotable */
1441 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1442 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1442 list_move(&gl->gl_lru, &dispose); 1443 list_move(&gl->gl_lru, &dispose);
1443 atomic_dec(&lru_count); 1444 atomic_dec(&lru_count);
1444 nr--; 1445 freed++;
1445 continue; 1446 continue;
1446 } 1447 }
1447 1448
@@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr)
1451 if (!list_empty(&dispose)) 1452 if (!list_empty(&dispose))
1452 gfs2_dispose_glock_lru(&dispose); 1453 gfs2_dispose_glock_lru(&dispose);
1453 spin_unlock(&lru_lock); 1454 spin_unlock(&lru_lock);
1455
1456 return freed;
1454} 1457}
1455 1458
1456static int gfs2_shrink_glock_memory(struct shrinker *shrink, 1459static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
1457 struct shrink_control *sc) 1460 struct shrink_control *sc)
1458{ 1461{
1459 if (sc->nr_to_scan) { 1462 if (!(sc->gfp_mask & __GFP_FS))
1460 if (!(sc->gfp_mask & __GFP_FS)) 1463 return SHRINK_STOP;
1461 return -1; 1464 return gfs2_scan_glock_lru(sc->nr_to_scan);
1462 gfs2_scan_glock_lru(sc->nr_to_scan); 1465}
1463 }
1464 1466
1465 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; 1467static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
1468 struct shrink_control *sc)
1469{
1470 return vfs_pressure_ratio(atomic_read(&lru_count));
1466} 1471}
1467 1472
1468static struct shrinker glock_shrinker = { 1473static struct shrinker glock_shrinker = {
1469 .shrink = gfs2_shrink_glock_memory,
1470 .seeks = DEFAULT_SEEKS, 1474 .seeks = DEFAULT_SEEKS,
1475 .count_objects = gfs2_glock_shrink_count,
1476 .scan_objects = gfs2_glock_shrink_scan,
1471}; 1477};
1472 1478
1473/** 1479/**
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7b0f5043cf24..351586e24e30 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -32,7 +32,8 @@
32struct workqueue_struct *gfs2_control_wq; 32struct workqueue_struct *gfs2_control_wq;
33 33
34static struct shrinker qd_shrinker = { 34static struct shrinker qd_shrinker = {
35 .shrink = gfs2_shrink_qd_memory, 35 .count_objects = gfs2_qd_shrink_count,
36 .scan_objects = gfs2_qd_shrink_scan,
36 .seeks = DEFAULT_SEEKS, 37 .seeks = DEFAULT_SEEKS,
37}; 38};
38 39
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3768c2f40e43..db441359ee8c 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list);
75static atomic_t qd_lru_count = ATOMIC_INIT(0); 75static atomic_t qd_lru_count = ATOMIC_INIT(0);
76static DEFINE_SPINLOCK(qd_lru_lock); 76static DEFINE_SPINLOCK(qd_lru_lock);
77 77
78int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) 78unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
79 struct shrink_control *sc)
79{ 80{
80 struct gfs2_quota_data *qd; 81 struct gfs2_quota_data *qd;
81 struct gfs2_sbd *sdp; 82 struct gfs2_sbd *sdp;
82 int nr_to_scan = sc->nr_to_scan; 83 int nr_to_scan = sc->nr_to_scan;
83 84 long freed = 0;
84 if (nr_to_scan == 0)
85 goto out;
86 85
87 if (!(sc->gfp_mask & __GFP_FS)) 86 if (!(sc->gfp_mask & __GFP_FS))
88 return -1; 87 return SHRINK_STOP;
89 88
90 spin_lock(&qd_lru_lock); 89 spin_lock(&qd_lru_lock);
91 while (nr_to_scan && !list_empty(&qd_lru_list)) { 90 while (nr_to_scan && !list_empty(&qd_lru_list)) {
@@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
110 kmem_cache_free(gfs2_quotad_cachep, qd); 109 kmem_cache_free(gfs2_quotad_cachep, qd);
111 spin_lock(&qd_lru_lock); 110 spin_lock(&qd_lru_lock);
112 nr_to_scan--; 111 nr_to_scan--;
112 freed++;
113 } 113 }
114 spin_unlock(&qd_lru_lock); 114 spin_unlock(&qd_lru_lock);
115 return freed;
116}
115 117
116out: 118unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
117 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; 119 struct shrink_control *sc)
120{
121 return vfs_pressure_ratio(atomic_read(&qd_lru_count));
118} 122}
119 123
120static u64 qd2index(struct gfs2_quota_data *qd) 124static u64 qd2index(struct gfs2_quota_data *qd)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 4f5e6e44ed83..0f64d9deb1b0 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
53 return ret; 53 return ret;
54} 54}
55 55
56extern int gfs2_shrink_qd_memory(struct shrinker *shrink, 56extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
57 struct shrink_control *sc); 57 struct shrink_control *sc);
58extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
59 struct shrink_control *sc);
58extern const struct quotactl_ops gfs2_quotactl_ops; 60extern const struct quotactl_ops gfs2_quotactl_ops;
59 61
60#endif /* __QUOTA_DOT_H__ */ 62#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 93a0625b46e4..b33ba8e021cc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
17#include <linux/prefetch.h> 17#include <linux/prefetch.h>
18#include <linux/buffer_head.h> /* for inode_has_buffers */ 18#include <linux/buffer_head.h> /* for inode_has_buffers */
19#include <linux/ratelimit.h> 19#include <linux/ratelimit.h>
20#include <linux/list_lru.h>
20#include "internal.h" 21#include "internal.h"
21 22
22/* 23/*
@@ -24,7 +25,7 @@
24 * 25 *
25 * inode->i_lock protects: 26 * inode->i_lock protects:
26 * inode->i_state, inode->i_hash, __iget() 27 * inode->i_state, inode->i_hash, __iget()
27 * inode->i_sb->s_inode_lru_lock protects: 28 * Inode LRU list locks protect:
28 * inode->i_sb->s_inode_lru, inode->i_lru 29 * inode->i_sb->s_inode_lru, inode->i_lru
29 * inode_sb_list_lock protects: 30 * inode_sb_list_lock protects:
30 * sb->s_inodes, inode->i_sb_list 31 * sb->s_inodes, inode->i_sb_list
@@ -37,7 +38,7 @@
37 * 38 *
38 * inode_sb_list_lock 39 * inode_sb_list_lock
39 * inode->i_lock 40 * inode->i_lock
40 * inode->i_sb->s_inode_lru_lock 41 * Inode LRU list locks
41 * 42 *
42 * bdi->wb.list_lock 43 * bdi->wb.list_lock
43 * inode->i_lock 44 * inode->i_lock
@@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops);
70 */ 71 */
71struct inodes_stat_t inodes_stat; 72struct inodes_stat_t inodes_stat;
72 73
73static DEFINE_PER_CPU(unsigned int, nr_inodes); 74static DEFINE_PER_CPU(unsigned long, nr_inodes);
74static DEFINE_PER_CPU(unsigned int, nr_unused); 75static DEFINE_PER_CPU(unsigned long, nr_unused);
75 76
76static struct kmem_cache *inode_cachep __read_mostly; 77static struct kmem_cache *inode_cachep __read_mostly;
77 78
78static int get_nr_inodes(void) 79static long get_nr_inodes(void)
79{ 80{
80 int i; 81 int i;
81 int sum = 0; 82 long sum = 0;
82 for_each_possible_cpu(i) 83 for_each_possible_cpu(i)
83 sum += per_cpu(nr_inodes, i); 84 sum += per_cpu(nr_inodes, i);
84 return sum < 0 ? 0 : sum; 85 return sum < 0 ? 0 : sum;
85} 86}
86 87
87static inline int get_nr_inodes_unused(void) 88static inline long get_nr_inodes_unused(void)
88{ 89{
89 int i; 90 int i;
90 int sum = 0; 91 long sum = 0;
91 for_each_possible_cpu(i) 92 for_each_possible_cpu(i)
92 sum += per_cpu(nr_unused, i); 93 sum += per_cpu(nr_unused, i);
93 return sum < 0 ? 0 : sum; 94 return sum < 0 ? 0 : sum;
94} 95}
95 96
96int get_nr_dirty_inodes(void) 97long get_nr_dirty_inodes(void)
97{ 98{
98 /* not actually dirty inodes, but a wild approximation */ 99 /* not actually dirty inodes, but a wild approximation */
99 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 100 long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
100 return nr_dirty > 0 ? nr_dirty : 0; 101 return nr_dirty > 0 ? nr_dirty : 0;
101} 102}
102 103
@@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write,
109{ 110{
110 inodes_stat.nr_inodes = get_nr_inodes(); 111 inodes_stat.nr_inodes = get_nr_inodes();
111 inodes_stat.nr_unused = get_nr_inodes_unused(); 112 inodes_stat.nr_unused = get_nr_inodes_unused();
112 return proc_dointvec(table, write, buffer, lenp, ppos); 113 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
113} 114}
114#endif 115#endif
115 116
@@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold);
401 402
402static void inode_lru_list_add(struct inode *inode) 403static void inode_lru_list_add(struct inode *inode)
403{ 404{
404 spin_lock(&inode->i_sb->s_inode_lru_lock); 405 if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
405 if (list_empty(&inode->i_lru)) {
406 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
407 inode->i_sb->s_nr_inodes_unused++;
408 this_cpu_inc(nr_unused); 406 this_cpu_inc(nr_unused);
409 }
410 spin_unlock(&inode->i_sb->s_inode_lru_lock);
411} 407}
412 408
413/* 409/*
@@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode)
425 421
426static void inode_lru_list_del(struct inode *inode) 422static void inode_lru_list_del(struct inode *inode)
427{ 423{
428 spin_lock(&inode->i_sb->s_inode_lru_lock); 424
429 if (!list_empty(&inode->i_lru)) { 425 if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
430 list_del_init(&inode->i_lru);
431 inode->i_sb->s_nr_inodes_unused--;
432 this_cpu_dec(nr_unused); 426 this_cpu_dec(nr_unused);
433 }
434 spin_unlock(&inode->i_sb->s_inode_lru_lock);
435} 427}
436 428
437/** 429/**
@@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
675 return busy; 667 return busy;
676} 668}
677 669
678static int can_unuse(struct inode *inode)
679{
680 if (inode->i_state & ~I_REFERENCED)
681 return 0;
682 if (inode_has_buffers(inode))
683 return 0;
684 if (atomic_read(&inode->i_count))
685 return 0;
686 if (inode->i_data.nrpages)
687 return 0;
688 return 1;
689}
690
691/* 670/*
692 * Walk the superblock inode LRU for freeable inodes and attempt to free them. 671 * Isolate the inode from the LRU in preparation for freeing it.
693 * This is called from the superblock shrinker function with a number of inodes
694 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
695 * then are freed outside inode_lock by dispose_list().
696 * 672 *
697 * Any inodes which are pinned purely because of attached pagecache have their 673 * Any inodes which are pinned purely because of attached pagecache have their
698 * pagecache removed. If the inode has metadata buffers attached to 674 * pagecache removed. If the inode has metadata buffers attached to
@@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode)
706 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 682 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
707 * with this flag set because they are the inodes that are out of order. 683 * with this flag set because they are the inodes that are out of order.
708 */ 684 */
709void prune_icache_sb(struct super_block *sb, int nr_to_scan) 685static enum lru_status
686inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
710{ 687{
711 LIST_HEAD(freeable); 688 struct list_head *freeable = arg;
712 int nr_scanned; 689 struct inode *inode = container_of(item, struct inode, i_lru);
713 unsigned long reap = 0;
714 690
715 spin_lock(&sb->s_inode_lru_lock); 691 /*
716 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { 692 * we are inverting the lru lock/inode->i_lock here, so use a trylock.
717 struct inode *inode; 693 * If we fail to get the lock, just skip it.
694 */
695 if (!spin_trylock(&inode->i_lock))
696 return LRU_SKIP;
718 697
719 if (list_empty(&sb->s_inode_lru)) 698 /*
720 break; 699 * Referenced or dirty inodes are still in use. Give them another pass
700 * through the LRU as we canot reclaim them now.
701 */
702 if (atomic_read(&inode->i_count) ||
703 (inode->i_state & ~I_REFERENCED)) {
704 list_del_init(&inode->i_lru);
705 spin_unlock(&inode->i_lock);
706 this_cpu_dec(nr_unused);
707 return LRU_REMOVED;
708 }
721 709
722 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); 710 /* recently referenced inodes get one more pass */
711 if (inode->i_state & I_REFERENCED) {
712 inode->i_state &= ~I_REFERENCED;
713 spin_unlock(&inode->i_lock);
714 return LRU_ROTATE;
715 }
723 716
724 /* 717 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
725 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, 718 __iget(inode);
726 * so use a trylock. If we fail to get the lock, just move the 719 spin_unlock(&inode->i_lock);
727 * inode to the back of the list so we don't spin on it. 720 spin_unlock(lru_lock);
728 */ 721 if (remove_inode_buffers(inode)) {
729 if (!spin_trylock(&inode->i_lock)) { 722 unsigned long reap;
730 list_move(&inode->i_lru, &sb->s_inode_lru); 723 reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
731 continue; 724 if (current_is_kswapd())
725 __count_vm_events(KSWAPD_INODESTEAL, reap);
726 else
727 __count_vm_events(PGINODESTEAL, reap);
728 if (current->reclaim_state)
729 current->reclaim_state->reclaimed_slab += reap;
732 } 730 }
731 iput(inode);
732 spin_lock(lru_lock);
733 return LRU_RETRY;
734 }
733 735
734 /* 736 WARN_ON(inode->i_state & I_NEW);
735 * Referenced or dirty inodes are still in use. Give them 737 inode->i_state |= I_FREEING;
736 * another pass through the LRU as we canot reclaim them now. 738 list_move(&inode->i_lru, freeable);
737 */ 739 spin_unlock(&inode->i_lock);
738 if (atomic_read(&inode->i_count) ||
739 (inode->i_state & ~I_REFERENCED)) {
740 list_del_init(&inode->i_lru);
741 spin_unlock(&inode->i_lock);
742 sb->s_nr_inodes_unused--;
743 this_cpu_dec(nr_unused);
744 continue;
745 }
746 740
747 /* recently referenced inodes get one more pass */ 741 this_cpu_dec(nr_unused);
748 if (inode->i_state & I_REFERENCED) { 742 return LRU_REMOVED;
749 inode->i_state &= ~I_REFERENCED; 743}
750 list_move(&inode->i_lru, &sb->s_inode_lru);
751 spin_unlock(&inode->i_lock);
752 continue;
753 }
754 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
755 __iget(inode);
756 spin_unlock(&inode->i_lock);
757 spin_unlock(&sb->s_inode_lru_lock);
758 if (remove_inode_buffers(inode))
759 reap += invalidate_mapping_pages(&inode->i_data,
760 0, -1);
761 iput(inode);
762 spin_lock(&sb->s_inode_lru_lock);
763
764 if (inode != list_entry(sb->s_inode_lru.next,
765 struct inode, i_lru))
766 continue; /* wrong inode or list_empty */
767 /* avoid lock inversions with trylock */
768 if (!spin_trylock(&inode->i_lock))
769 continue;
770 if (!can_unuse(inode)) {
771 spin_unlock(&inode->i_lock);
772 continue;
773 }
774 }
775 WARN_ON(inode->i_state & I_NEW);
776 inode->i_state |= I_FREEING;
777 spin_unlock(&inode->i_lock);
778 744
779 list_move(&inode->i_lru, &freeable); 745/*
780 sb->s_nr_inodes_unused--; 746 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
781 this_cpu_dec(nr_unused); 747 * This is called from the superblock shrinker function with a number of inodes
782 } 748 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
783 if (current_is_kswapd()) 749 * then are freed outside inode_lock by dispose_list().
784 __count_vm_events(KSWAPD_INODESTEAL, reap); 750 */
785 else 751long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
786 __count_vm_events(PGINODESTEAL, reap); 752 int nid)
787 spin_unlock(&sb->s_inode_lru_lock); 753{
788 if (current->reclaim_state) 754 LIST_HEAD(freeable);
789 current->reclaim_state->reclaimed_slab += reap; 755 long freed;
790 756
757 freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate,
758 &freeable, &nr_to_scan);
791 dispose_list(&freeable); 759 dispose_list(&freeable);
760 return freed;
792} 761}
793 762
794static void __wait_on_freeing_inode(struct inode *inode); 763static void __wait_on_freeing_inode(struct inode *inode);
diff --git a/fs/internal.h b/fs/internal.h
index 2be46ea5dd0b..513e0d859a6c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f);
114 * inode.c 114 * inode.c
115 */ 115 */
116extern spinlock_t inode_sb_list_lock; 116extern spinlock_t inode_sb_list_lock;
117extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
118 int nid);
117extern void inode_add_lru(struct inode *inode); 119extern void inode_add_lru(struct inode *inode);
118 120
119/* 121/*
@@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode);
121 */ 123 */
122extern void inode_wb_list_del(struct inode *inode); 124extern void inode_wb_list_del(struct inode *inode);
123 125
124extern int get_nr_dirty_inodes(void); 126extern long get_nr_dirty_inodes(void);
125extern void evict_inodes(struct super_block *); 127extern void evict_inodes(struct super_block *);
126extern int invalidate_inodes(struct super_block *, bool); 128extern int invalidate_inodes(struct super_block *, bool);
127 129
@@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool);
130 */ 132 */
131extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); 133extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
132extern int d_set_mounted(struct dentry *dentry); 134extern int d_set_mounted(struct dentry *dentry);
135extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
136 int nid);
133 137
134/* 138/*
135 * read_write.c 139 * read_write.c
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 8c32ef3ba88e..e519e45bf673 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list);
86static LIST_HEAD(mb_cache_lru_list); 86static LIST_HEAD(mb_cache_lru_list);
87static DEFINE_SPINLOCK(mb_cache_spinlock); 87static DEFINE_SPINLOCK(mb_cache_spinlock);
88 88
89/*
90 * What the mbcache registers as to get shrunk dynamically.
91 */
92
93static int mb_cache_shrink_fn(struct shrinker *shrink,
94 struct shrink_control *sc);
95
96static struct shrinker mb_cache_shrinker = {
97 .shrink = mb_cache_shrink_fn,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static inline int 89static inline int
102__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) 90__mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
103{ 91{
@@ -151,7 +139,7 @@ forget:
151 139
152 140
153/* 141/*
154 * mb_cache_shrink_fn() memory pressure callback 142 * mb_cache_shrink_scan() memory pressure callback
155 * 143 *
156 * This function is called by the kernel memory management when memory 144 * This function is called by the kernel memory management when memory
157 * gets low. 145 * gets low.
@@ -159,17 +147,16 @@ forget:
159 * @shrink: (ignored) 147 * @shrink: (ignored)
160 * @sc: shrink_control passed from reclaim 148 * @sc: shrink_control passed from reclaim
161 * 149 *
162 * Returns the number of objects which are present in the cache. 150 * Returns the number of objects freed.
163 */ 151 */
164static int 152static unsigned long
165mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) 153mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
166{ 154{
167 LIST_HEAD(free_list); 155 LIST_HEAD(free_list);
168 struct mb_cache *cache;
169 struct mb_cache_entry *entry, *tmp; 156 struct mb_cache_entry *entry, *tmp;
170 int count = 0;
171 int nr_to_scan = sc->nr_to_scan; 157 int nr_to_scan = sc->nr_to_scan;
172 gfp_t gfp_mask = sc->gfp_mask; 158 gfp_t gfp_mask = sc->gfp_mask;
159 unsigned long freed = 0;
173 160
174 mb_debug("trying to free %d entries", nr_to_scan); 161 mb_debug("trying to free %d entries", nr_to_scan);
175 spin_lock(&mb_cache_spinlock); 162 spin_lock(&mb_cache_spinlock);
@@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
179 struct mb_cache_entry, e_lru_list); 166 struct mb_cache_entry, e_lru_list);
180 list_move_tail(&ce->e_lru_list, &free_list); 167 list_move_tail(&ce->e_lru_list, &free_list);
181 __mb_cache_entry_unhash(ce); 168 __mb_cache_entry_unhash(ce);
169 freed++;
170 }
171 spin_unlock(&mb_cache_spinlock);
172 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
173 __mb_cache_entry_forget(entry, gfp_mask);
182 } 174 }
175 return freed;
176}
177
178static unsigned long
179mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
180{
181 struct mb_cache *cache;
182 unsigned long count = 0;
183
184 spin_lock(&mb_cache_spinlock);
183 list_for_each_entry(cache, &mb_cache_list, c_cache_list) { 185 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
184 mb_debug("cache %s (%d)", cache->c_name, 186 mb_debug("cache %s (%d)", cache->c_name,
185 atomic_read(&cache->c_entry_count)); 187 atomic_read(&cache->c_entry_count));
186 count += atomic_read(&cache->c_entry_count); 188 count += atomic_read(&cache->c_entry_count);
187 } 189 }
188 spin_unlock(&mb_cache_spinlock); 190 spin_unlock(&mb_cache_spinlock);
189 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 191
190 __mb_cache_entry_forget(entry, gfp_mask); 192 return vfs_pressure_ratio(count);
191 }
192 return (count / 100) * sysctl_vfs_cache_pressure;
193} 193}
194 194
195static struct shrinker mb_cache_shrinker = {
196 .count_objects = mb_cache_shrink_count,
197 .scan_objects = mb_cache_shrink_scan,
198 .seeks = DEFAULT_SEEKS,
199};
195 200
196/* 201/*
197 * mb_cache_create() create a new cache 202 * mb_cache_create() create a new cache
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e79bc6ce828e..de434f309af0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head)
2006 } 2006 }
2007} 2007}
2008 2008
2009int nfs_access_cache_shrinker(struct shrinker *shrink, 2009unsigned long
2010 struct shrink_control *sc) 2010nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2011{ 2011{
2012 LIST_HEAD(head); 2012 LIST_HEAD(head);
2013 struct nfs_inode *nfsi, *next; 2013 struct nfs_inode *nfsi, *next;
2014 struct nfs_access_entry *cache; 2014 struct nfs_access_entry *cache;
2015 int nr_to_scan = sc->nr_to_scan; 2015 int nr_to_scan = sc->nr_to_scan;
2016 gfp_t gfp_mask = sc->gfp_mask; 2016 gfp_t gfp_mask = sc->gfp_mask;
2017 long freed = 0;
2017 2018
2018 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 2019 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2019 return (nr_to_scan == 0) ? 0 : -1; 2020 return SHRINK_STOP;
2020 2021
2021 spin_lock(&nfs_access_lru_lock); 2022 spin_lock(&nfs_access_lru_lock);
2022 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { 2023 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
@@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink,
2032 struct nfs_access_entry, lru); 2033 struct nfs_access_entry, lru);
2033 list_move(&cache->lru, &head); 2034 list_move(&cache->lru, &head);
2034 rb_erase(&cache->rb_node, &nfsi->access_cache); 2035 rb_erase(&cache->rb_node, &nfsi->access_cache);
2036 freed++;
2035 if (!list_empty(&nfsi->access_cache_entry_lru)) 2037 if (!list_empty(&nfsi->access_cache_entry_lru))
2036 list_move_tail(&nfsi->access_cache_inode_lru, 2038 list_move_tail(&nfsi->access_cache_inode_lru,
2037 &nfs_access_lru_list); 2039 &nfs_access_lru_list);
@@ -2046,7 +2048,13 @@ remove_lru_entry:
2046 } 2048 }
2047 spin_unlock(&nfs_access_lru_lock); 2049 spin_unlock(&nfs_access_lru_lock);
2048 nfs_access_free_list(&head); 2050 nfs_access_free_list(&head);
2049 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; 2051 return freed;
2052}
2053
2054unsigned long
2055nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2056{
2057 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2050} 2058}
2051 2059
2052static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) 2060static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d388302c005f..38da8c2b81ac 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
273 const char *ip_addr); 273 const char *ip_addr);
274 274
275/* dir.c */ 275/* dir.c */
276extern int nfs_access_cache_shrinker(struct shrinker *shrink, 276extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
277 struct shrink_control *sc); 277 struct shrink_control *sc);
278extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
279 struct shrink_control *sc);
278struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); 280struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
279int nfs_create(struct inode *, struct dentry *, umode_t, bool); 281int nfs_create(struct inode *, struct dentry *, umode_t, bool);
280int nfs_mkdir(struct inode *, struct dentry *, umode_t); 282int nfs_mkdir(struct inode *, struct dentry *, umode_t);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5793f24613c8..a03b9c6f9489 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void)
360#endif 360#endif
361 361
362static struct shrinker acl_shrinker = { 362static struct shrinker acl_shrinker = {
363 .shrink = nfs_access_cache_shrinker, 363 .count_objects = nfs_access_cache_count,
364 .scan_objects = nfs_access_cache_scan,
364 .seeks = DEFAULT_SEEKS, 365 .seeks = DEFAULT_SEEKS,
365}; 366};
366 367
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e76244edd748..9186c7ce0b14 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize;
59 59
60static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); 60static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
61static void cache_cleaner_func(struct work_struct *unused); 61static void cache_cleaner_func(struct work_struct *unused);
62static int nfsd_reply_cache_shrink(struct shrinker *shrink, 62static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
63 struct shrink_control *sc); 63 struct shrink_control *sc);
64static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
65 struct shrink_control *sc);
64 66
65static struct shrinker nfsd_reply_cache_shrinker = { 67static struct shrinker nfsd_reply_cache_shrinker = {
66 .shrink = nfsd_reply_cache_shrink, 68 .scan_objects = nfsd_reply_cache_scan,
69 .count_objects = nfsd_reply_cache_count,
67 .seeks = 1, 70 .seeks = 1,
68}; 71};
69 72
@@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp)
232 * Walk the LRU list and prune off entries that are older than RC_EXPIRE. 235 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
233 * Also prune the oldest ones when the total exceeds the max number of entries. 236 * Also prune the oldest ones when the total exceeds the max number of entries.
234 */ 237 */
235static void 238static long
236prune_cache_entries(void) 239prune_cache_entries(void)
237{ 240{
238 struct svc_cacherep *rp, *tmp; 241 struct svc_cacherep *rp, *tmp;
242 long freed = 0;
239 243
240 list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { 244 list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
241 if (!nfsd_cache_entry_expired(rp) && 245 if (!nfsd_cache_entry_expired(rp) &&
242 num_drc_entries <= max_drc_entries) 246 num_drc_entries <= max_drc_entries)
243 break; 247 break;
244 nfsd_reply_cache_free_locked(rp); 248 nfsd_reply_cache_free_locked(rp);
249 freed++;
245 } 250 }
246 251
247 /* 252 /*
@@ -254,6 +259,7 @@ prune_cache_entries(void)
254 cancel_delayed_work(&cache_cleaner); 259 cancel_delayed_work(&cache_cleaner);
255 else 260 else
256 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); 261 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
262 return freed;
257} 263}
258 264
259static void 265static void
@@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused)
264 spin_unlock(&cache_lock); 270 spin_unlock(&cache_lock);
265} 271}
266 272
267static int 273static unsigned long
268nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) 274nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
269{ 275{
270 unsigned int num; 276 unsigned long num;
271 277
272 spin_lock(&cache_lock); 278 spin_lock(&cache_lock);
273 if (sc->nr_to_scan)
274 prune_cache_entries();
275 num = num_drc_entries; 279 num = num_drc_entries;
276 spin_unlock(&cache_lock); 280 spin_unlock(&cache_lock);
277 281
278 return num; 282 return num;
279} 283}
280 284
285static unsigned long
286nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
287{
288 unsigned long freed;
289
290 spin_lock(&cache_lock);
291 freed = prune_cache_entries();
292 spin_unlock(&cache_lock);
293 return freed;
294}
281/* 295/*
282 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes 296 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
283 */ 297 */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9a702e193538..831d49a4111f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type)
687} 687}
688EXPORT_SYMBOL(dquot_quota_sync); 688EXPORT_SYMBOL(dquot_quota_sync);
689 689
690/* Free unused dquots from cache */ 690static unsigned long
691static void prune_dqcache(int count) 691dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
692{ 692{
693 struct list_head *head; 693 struct list_head *head;
694 struct dquot *dquot; 694 struct dquot *dquot;
695 unsigned long freed = 0;
695 696
696 head = free_dquots.prev; 697 head = free_dquots.prev;
697 while (head != &free_dquots && count) { 698 while (head != &free_dquots && sc->nr_to_scan) {
698 dquot = list_entry(head, struct dquot, dq_free); 699 dquot = list_entry(head, struct dquot, dq_free);
699 remove_dquot_hash(dquot); 700 remove_dquot_hash(dquot);
700 remove_free_dquot(dquot); 701 remove_free_dquot(dquot);
701 remove_inuse(dquot); 702 remove_inuse(dquot);
702 do_destroy_dquot(dquot); 703 do_destroy_dquot(dquot);
703 count--; 704 sc->nr_to_scan--;
705 freed++;
704 head = free_dquots.prev; 706 head = free_dquots.prev;
705 } 707 }
708 return freed;
706} 709}
707 710
708/* 711static unsigned long
709 * This is called from kswapd when we think we need some 712dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
710 * more memory
711 */
712static int shrink_dqcache_memory(struct shrinker *shrink,
713 struct shrink_control *sc)
714{ 713{
715 int nr = sc->nr_to_scan; 714 return vfs_pressure_ratio(
716 715 percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]));
717 if (nr) {
718 spin_lock(&dq_list_lock);
719 prune_dqcache(nr);
720 spin_unlock(&dq_list_lock);
721 }
722 return ((unsigned)
723 percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
724 /100) * sysctl_vfs_cache_pressure;
725} 716}
726 717
727static struct shrinker dqcache_shrinker = { 718static struct shrinker dqcache_shrinker = {
728 .shrink = shrink_dqcache_memory, 719 .count_objects = dqcache_shrink_count,
720 .scan_objects = dqcache_shrink_scan,
729 .seeks = DEFAULT_SEEKS, 721 .seeks = DEFAULT_SEEKS,
730}; 722};
731 723
diff --git a/fs/super.c b/fs/super.c
index f6961ea84c56..3a96c9783a8b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
53 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we 53 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
54 * take a passive reference to the superblock to avoid this from occurring. 54 * take a passive reference to the superblock to avoid this from occurring.
55 */ 55 */
56static int prune_super(struct shrinker *shrink, struct shrink_control *sc) 56static unsigned long super_cache_scan(struct shrinker *shrink,
57 struct shrink_control *sc)
57{ 58{
58 struct super_block *sb; 59 struct super_block *sb;
59 int fs_objects = 0; 60 long fs_objects = 0;
60 int total_objects; 61 long total_objects;
62 long freed = 0;
63 long dentries;
64 long inodes;
61 65
62 sb = container_of(shrink, struct super_block, s_shrink); 66 sb = container_of(shrink, struct super_block, s_shrink);
63 67
@@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
65 * Deadlock avoidance. We may hold various FS locks, and we don't want 69 * Deadlock avoidance. We may hold various FS locks, and we don't want
66 * to recurse into the FS that called us in clear_inode() and friends.. 70 * to recurse into the FS that called us in clear_inode() and friends..
67 */ 71 */
68 if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) 72 if (!(sc->gfp_mask & __GFP_FS))
69 return -1; 73 return SHRINK_STOP;
70 74
71 if (!grab_super_passive(sb)) 75 if (!grab_super_passive(sb))
72 return -1; 76 return SHRINK_STOP;
73 77
74 if (sb->s_op->nr_cached_objects) 78 if (sb->s_op->nr_cached_objects)
75 fs_objects = sb->s_op->nr_cached_objects(sb); 79 fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
76
77 total_objects = sb->s_nr_dentry_unused +
78 sb->s_nr_inodes_unused + fs_objects + 1;
79
80 if (sc->nr_to_scan) {
81 int dentries;
82 int inodes;
83
84 /* proportion the scan between the caches */
85 dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
86 total_objects;
87 inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
88 total_objects;
89 if (fs_objects)
90 fs_objects = (sc->nr_to_scan * fs_objects) /
91 total_objects;
92 /*
93 * prune the dcache first as the icache is pinned by it, then
94 * prune the icache, followed by the filesystem specific caches
95 */
96 prune_dcache_sb(sb, dentries);
97 prune_icache_sb(sb, inodes);
98 80
99 if (fs_objects && sb->s_op->free_cached_objects) { 81 inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
100 sb->s_op->free_cached_objects(sb, fs_objects); 82 dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
101 fs_objects = sb->s_op->nr_cached_objects(sb); 83 total_objects = dentries + inodes + fs_objects + 1;
102 } 84
103 total_objects = sb->s_nr_dentry_unused + 85 /* proportion the scan between the caches */
104 sb->s_nr_inodes_unused + fs_objects; 86 dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
87 inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
88
89 /*
90 * prune the dcache first as the icache is pinned by it, then
91 * prune the icache, followed by the filesystem specific caches
92 */
93 freed = prune_dcache_sb(sb, dentries, sc->nid);
94 freed += prune_icache_sb(sb, inodes, sc->nid);
95
96 if (fs_objects) {
97 fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
98 total_objects);
99 freed += sb->s_op->free_cached_objects(sb, fs_objects,
100 sc->nid);
105 } 101 }
106 102
107 total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; 103 drop_super(sb);
104 return freed;
105}
106
107static unsigned long super_cache_count(struct shrinker *shrink,
108 struct shrink_control *sc)
109{
110 struct super_block *sb;
111 long total_objects = 0;
112
113 sb = container_of(shrink, struct super_block, s_shrink);
114
115 if (!grab_super_passive(sb))
116 return 0;
117
118 if (sb->s_op && sb->s_op->nr_cached_objects)
119 total_objects = sb->s_op->nr_cached_objects(sb,
120 sc->nid);
121
122 total_objects += list_lru_count_node(&sb->s_dentry_lru,
123 sc->nid);
124 total_objects += list_lru_count_node(&sb->s_inode_lru,
125 sc->nid);
126
127 total_objects = vfs_pressure_ratio(total_objects);
108 drop_super(sb); 128 drop_super(sb);
109 return total_objects; 129 return total_objects;
110} 130}
@@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
175 INIT_HLIST_NODE(&s->s_instances); 195 INIT_HLIST_NODE(&s->s_instances);
176 INIT_HLIST_BL_HEAD(&s->s_anon); 196 INIT_HLIST_BL_HEAD(&s->s_anon);
177 INIT_LIST_HEAD(&s->s_inodes); 197 INIT_LIST_HEAD(&s->s_inodes);
178 INIT_LIST_HEAD(&s->s_dentry_lru); 198
179 INIT_LIST_HEAD(&s->s_inode_lru); 199 if (list_lru_init(&s->s_dentry_lru))
180 spin_lock_init(&s->s_inode_lru_lock); 200 goto err_out;
201 if (list_lru_init(&s->s_inode_lru))
202 goto err_out_dentry_lru;
203
181 INIT_LIST_HEAD(&s->s_mounts); 204 INIT_LIST_HEAD(&s->s_mounts);
182 init_rwsem(&s->s_umount); 205 init_rwsem(&s->s_umount);
183 lockdep_set_class(&s->s_umount, &type->s_umount_key); 206 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
210 s->cleancache_poolid = -1; 233 s->cleancache_poolid = -1;
211 234
212 s->s_shrink.seeks = DEFAULT_SEEKS; 235 s->s_shrink.seeks = DEFAULT_SEEKS;
213 s->s_shrink.shrink = prune_super; 236 s->s_shrink.scan_objects = super_cache_scan;
237 s->s_shrink.count_objects = super_cache_count;
214 s->s_shrink.batch = 1024; 238 s->s_shrink.batch = 1024;
239 s->s_shrink.flags = SHRINKER_NUMA_AWARE;
215 } 240 }
216out: 241out:
217 return s; 242 return s;
243
244err_out_dentry_lru:
245 list_lru_destroy(&s->s_dentry_lru);
218err_out: 246err_out:
219 security_sb_free(s); 247 security_sb_free(s);
220#ifdef CONFIG_SMP 248#ifdef CONFIG_SMP
@@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s)
295 323
296 /* caches are now gone, we can safely kill the shrinker now */ 324 /* caches are now gone, we can safely kill the shrinker now */
297 unregister_shrinker(&s->s_shrink); 325 unregister_shrinker(&s->s_shrink);
326 list_lru_destroy(&s->s_dentry_lru);
327 list_lru_destroy(&s->s_inode_lru);
328
298 put_filesystem(fs); 329 put_filesystem(fs);
299 put_super(s); 330 put_super(s);
300 } else { 331 } else {
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 9e1d05666fed..f35135e28e96 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,18 +277,25 @@ static int kick_a_thread(void)
277 return 0; 277 return 0;
278} 278}
279 279
280int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) 280unsigned long ubifs_shrink_count(struct shrinker *shrink,
281 struct shrink_control *sc)
281{ 282{
282 int nr = sc->nr_to_scan;
283 int freed, contention = 0;
284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
285 284
286 if (nr == 0) 285 /*
287 /* 286 * Due to the way UBIFS updates the clean znode counter it may
288 * Due to the way UBIFS updates the clean znode counter it may 287 * temporarily be negative.
289 * temporarily be negative. 288 */
290 */ 289 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
291 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; 290}
291
292unsigned long ubifs_shrink_scan(struct shrinker *shrink,
293 struct shrink_control *sc)
294{
295 unsigned long nr = sc->nr_to_scan;
296 int contention = 0;
297 unsigned long freed;
298 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
292 299
293 if (!clean_zn_cnt) { 300 if (!clean_zn_cnt) {
294 /* 301 /*
@@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
316 323
317 if (!freed && contention) { 324 if (!freed && contention) {
318 dbg_tnc("freed nothing, but contention"); 325 dbg_tnc("freed nothing, but contention");
319 return -1; 326 return SHRINK_STOP;
320 } 327 }
321 328
322out: 329out:
323 dbg_tnc("%d znodes were freed, requested %d", freed, nr); 330 dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
324 return freed; 331 return freed;
325} 332}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 879b9976c12b..3e4aa7281e04 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab;
49 49
50/* UBIFS TNC shrinker description */ 50/* UBIFS TNC shrinker description */
51static struct shrinker ubifs_shrinker_info = { 51static struct shrinker ubifs_shrinker_info = {
52 .shrink = ubifs_shrinker, 52 .scan_objects = ubifs_shrink_scan,
53 .count_objects = ubifs_shrink_count,
53 .seeks = DEFAULT_SEEKS, 54 .seeks = DEFAULT_SEEKS,
54}; 55};
55 56
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2babce4d70f..e8c8cfe1435c 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1624int ubifs_tnc_end_commit(struct ubifs_info *c); 1624int ubifs_tnc_end_commit(struct ubifs_info *c);
1625 1625
1626/* shrinker.c */ 1626/* shrinker.c */
1627int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); 1627unsigned long ubifs_shrink_scan(struct shrinker *shrink,
1628 struct shrink_control *sc);
1629unsigned long ubifs_shrink_count(struct shrinker *shrink,
1630 struct shrink_control *sc);
1628 1631
1629/* commit.c */ 1632/* commit.c */
1630int ubifs_bg_thread(void *info); 1633int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c06823fe10d3..263470075ea2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -81,54 +81,6 @@ xfs_buf_vmap_len(
81} 81}
82 82
83/* 83/*
84 * xfs_buf_lru_add - add a buffer to the LRU.
85 *
86 * The LRU takes a new reference to the buffer so that it will only be freed
87 * once the shrinker takes the buffer off the LRU.
88 */
89STATIC void
90xfs_buf_lru_add(
91 struct xfs_buf *bp)
92{
93 struct xfs_buftarg *btp = bp->b_target;
94
95 spin_lock(&btp->bt_lru_lock);
96 if (list_empty(&bp->b_lru)) {
97 atomic_inc(&bp->b_hold);
98 list_add_tail(&bp->b_lru, &btp->bt_lru);
99 btp->bt_lru_nr++;
100 bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
101 }
102 spin_unlock(&btp->bt_lru_lock);
103}
104
105/*
106 * xfs_buf_lru_del - remove a buffer from the LRU
107 *
108 * The unlocked check is safe here because it only occurs when there are not
109 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
110 * to optimise the shrinker removing the buffer from the LRU and calling
111 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
112 * bt_lru_lock.
113 */
114STATIC void
115xfs_buf_lru_del(
116 struct xfs_buf *bp)
117{
118 struct xfs_buftarg *btp = bp->b_target;
119
120 if (list_empty(&bp->b_lru))
121 return;
122
123 spin_lock(&btp->bt_lru_lock);
124 if (!list_empty(&bp->b_lru)) {
125 list_del_init(&bp->b_lru);
126 btp->bt_lru_nr--;
127 }
128 spin_unlock(&btp->bt_lru_lock);
129}
130
131/*
132 * When we mark a buffer stale, we remove the buffer from the LRU and clear the 84 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
133 * b_lru_ref count so that the buffer is freed immediately when the buffer 85 * b_lru_ref count so that the buffer is freed immediately when the buffer
134 * reference count falls to zero. If the buffer is already on the LRU, we need 86 * reference count falls to zero. If the buffer is already on the LRU, we need
@@ -151,20 +103,14 @@ xfs_buf_stale(
151 */ 103 */
152 bp->b_flags &= ~_XBF_DELWRI_Q; 104 bp->b_flags &= ~_XBF_DELWRI_Q;
153 105
154 atomic_set(&(bp)->b_lru_ref, 0); 106 spin_lock(&bp->b_lock);
155 if (!list_empty(&bp->b_lru)) { 107 atomic_set(&bp->b_lru_ref, 0);
156 struct xfs_buftarg *btp = bp->b_target; 108 if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
109 (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
110 atomic_dec(&bp->b_hold);
157 111
158 spin_lock(&btp->bt_lru_lock);
159 if (!list_empty(&bp->b_lru) &&
160 !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
161 list_del_init(&bp->b_lru);
162 btp->bt_lru_nr--;
163 atomic_dec(&bp->b_hold);
164 }
165 spin_unlock(&btp->bt_lru_lock);
166 }
167 ASSERT(atomic_read(&bp->b_hold) >= 1); 112 ASSERT(atomic_read(&bp->b_hold) >= 1);
113 spin_unlock(&bp->b_lock);
168} 114}
169 115
170static int 116static int
@@ -228,6 +174,7 @@ _xfs_buf_alloc(
228 INIT_LIST_HEAD(&bp->b_list); 174 INIT_LIST_HEAD(&bp->b_list);
229 RB_CLEAR_NODE(&bp->b_rbnode); 175 RB_CLEAR_NODE(&bp->b_rbnode);
230 sema_init(&bp->b_sema, 0); /* held, no waiters */ 176 sema_init(&bp->b_sema, 0); /* held, no waiters */
177 spin_lock_init(&bp->b_lock);
231 XB_SET_OWNER(bp); 178 XB_SET_OWNER(bp);
232 bp->b_target = target; 179 bp->b_target = target;
233 bp->b_flags = flags; 180 bp->b_flags = flags;
@@ -917,12 +864,33 @@ xfs_buf_rele(
917 864
918 ASSERT(atomic_read(&bp->b_hold) > 0); 865 ASSERT(atomic_read(&bp->b_hold) > 0);
919 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { 866 if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
920 if (!(bp->b_flags & XBF_STALE) && 867 spin_lock(&bp->b_lock);
921 atomic_read(&bp->b_lru_ref)) { 868 if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
922 xfs_buf_lru_add(bp); 869 /*
870 * If the buffer is added to the LRU take a new
871 * reference to the buffer for the LRU and clear the
872 * (now stale) dispose list state flag
873 */
874 if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
875 bp->b_state &= ~XFS_BSTATE_DISPOSE;
876 atomic_inc(&bp->b_hold);
877 }
878 spin_unlock(&bp->b_lock);
923 spin_unlock(&pag->pag_buf_lock); 879 spin_unlock(&pag->pag_buf_lock);
924 } else { 880 } else {
925 xfs_buf_lru_del(bp); 881 /*
882 * most of the time buffers will already be removed from
883 * the LRU, so optimise that case by checking for the
884 * XFS_BSTATE_DISPOSE flag indicating the last list the
885 * buffer was on was the disposal list
886 */
887 if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
888 list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
889 } else {
890 ASSERT(list_empty(&bp->b_lru));
891 }
892 spin_unlock(&bp->b_lock);
893
926 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 894 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
927 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); 895 rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
928 spin_unlock(&pag->pag_buf_lock); 896 spin_unlock(&pag->pag_buf_lock);
@@ -1502,83 +1470,121 @@ xfs_buf_iomove(
1502 * returned. These buffers will have an elevated hold count, so wait on those 1470 * returned. These buffers will have an elevated hold count, so wait on those
1503 * while freeing all the buffers only held by the LRU. 1471 * while freeing all the buffers only held by the LRU.
1504 */ 1472 */
1473static enum lru_status
1474xfs_buftarg_wait_rele(
1475 struct list_head *item,
1476 spinlock_t *lru_lock,
1477 void *arg)
1478
1479{
1480 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1481 struct list_head *dispose = arg;
1482
1483 if (atomic_read(&bp->b_hold) > 1) {
1484 /* need to wait, so skip it this pass */
1485 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1486 return LRU_SKIP;
1487 }
1488 if (!spin_trylock(&bp->b_lock))
1489 return LRU_SKIP;
1490
1491 /*
1492 * clear the LRU reference count so the buffer doesn't get
1493 * ignored in xfs_buf_rele().
1494 */
1495 atomic_set(&bp->b_lru_ref, 0);
1496 bp->b_state |= XFS_BSTATE_DISPOSE;
1497 list_move(item, dispose);
1498 spin_unlock(&bp->b_lock);
1499 return LRU_REMOVED;
1500}
1501
1505void 1502void
1506xfs_wait_buftarg( 1503xfs_wait_buftarg(
1507 struct xfs_buftarg *btp) 1504 struct xfs_buftarg *btp)
1508{ 1505{
1509 struct xfs_buf *bp; 1506 LIST_HEAD(dispose);
1507 int loop = 0;
1510 1508
1511restart: 1509 /* loop until there is nothing left on the lru list. */
1512 spin_lock(&btp->bt_lru_lock); 1510 while (list_lru_count(&btp->bt_lru)) {
1513 while (!list_empty(&btp->bt_lru)) { 1511 list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
1514 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); 1512 &dispose, LONG_MAX);
1515 if (atomic_read(&bp->b_hold) > 1) { 1513
1516 trace_xfs_buf_wait_buftarg(bp, _RET_IP_); 1514 while (!list_empty(&dispose)) {
1517 list_move_tail(&bp->b_lru, &btp->bt_lru); 1515 struct xfs_buf *bp;
1518 spin_unlock(&btp->bt_lru_lock); 1516 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1519 delay(100); 1517 list_del_init(&bp->b_lru);
1520 goto restart; 1518 xfs_buf_rele(bp);
1521 } 1519 }
1522 /* 1520 if (loop++ != 0)
1523 * clear the LRU reference count so the buffer doesn't get 1521 delay(100);
1524 * ignored in xfs_buf_rele().
1525 */
1526 atomic_set(&bp->b_lru_ref, 0);
1527 spin_unlock(&btp->bt_lru_lock);
1528 xfs_buf_rele(bp);
1529 spin_lock(&btp->bt_lru_lock);
1530 } 1522 }
1531 spin_unlock(&btp->bt_lru_lock);
1532} 1523}
1533 1524
1534int 1525static enum lru_status
1535xfs_buftarg_shrink( 1526xfs_buftarg_isolate(
1527 struct list_head *item,
1528 spinlock_t *lru_lock,
1529 void *arg)
1530{
1531 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
1532 struct list_head *dispose = arg;
1533
1534 /*
1535 * we are inverting the lru lock/bp->b_lock here, so use a trylock.
1536 * If we fail to get the lock, just skip it.
1537 */
1538 if (!spin_trylock(&bp->b_lock))
1539 return LRU_SKIP;
1540 /*
1541 * Decrement the b_lru_ref count unless the value is already
1542 * zero. If the value is already zero, we need to reclaim the
1543 * buffer, otherwise it gets another trip through the LRU.
1544 */
1545 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1546 spin_unlock(&bp->b_lock);
1547 return LRU_ROTATE;
1548 }
1549
1550 bp->b_state |= XFS_BSTATE_DISPOSE;
1551 list_move(item, dispose);
1552 spin_unlock(&bp->b_lock);
1553 return LRU_REMOVED;
1554}
1555
1556static unsigned long
1557xfs_buftarg_shrink_scan(
1536 struct shrinker *shrink, 1558 struct shrinker *shrink,
1537 struct shrink_control *sc) 1559 struct shrink_control *sc)
1538{ 1560{
1539 struct xfs_buftarg *btp = container_of(shrink, 1561 struct xfs_buftarg *btp = container_of(shrink,
1540 struct xfs_buftarg, bt_shrinker); 1562 struct xfs_buftarg, bt_shrinker);
1541 struct xfs_buf *bp;
1542 int nr_to_scan = sc->nr_to_scan;
1543 LIST_HEAD(dispose); 1563 LIST_HEAD(dispose);
1564 unsigned long freed;
1565 unsigned long nr_to_scan = sc->nr_to_scan;
1544 1566
1545 if (!nr_to_scan) 1567 freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate,
1546 return btp->bt_lru_nr; 1568 &dispose, &nr_to_scan);
1547
1548 spin_lock(&btp->bt_lru_lock);
1549 while (!list_empty(&btp->bt_lru)) {
1550 if (nr_to_scan-- <= 0)
1551 break;
1552
1553 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1554
1555 /*
1556 * Decrement the b_lru_ref count unless the value is already
1557 * zero. If the value is already zero, we need to reclaim the
1558 * buffer, otherwise it gets another trip through the LRU.
1559 */
1560 if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1561 list_move_tail(&bp->b_lru, &btp->bt_lru);
1562 continue;
1563 }
1564
1565 /*
1566 * remove the buffer from the LRU now to avoid needing another
1567 * lock round trip inside xfs_buf_rele().
1568 */
1569 list_move(&bp->b_lru, &dispose);
1570 btp->bt_lru_nr--;
1571 bp->b_lru_flags |= _XBF_LRU_DISPOSE;
1572 }
1573 spin_unlock(&btp->bt_lru_lock);
1574 1569
1575 while (!list_empty(&dispose)) { 1570 while (!list_empty(&dispose)) {
1571 struct xfs_buf *bp;
1576 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1572 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1577 list_del_init(&bp->b_lru); 1573 list_del_init(&bp->b_lru);
1578 xfs_buf_rele(bp); 1574 xfs_buf_rele(bp);
1579 } 1575 }
1580 1576
1581 return btp->bt_lru_nr; 1577 return freed;
1578}
1579
1580static unsigned long
1581xfs_buftarg_shrink_count(
1582 struct shrinker *shrink,
1583 struct shrink_control *sc)
1584{
1585 struct xfs_buftarg *btp = container_of(shrink,
1586 struct xfs_buftarg, bt_shrinker);
1587 return list_lru_count_node(&btp->bt_lru, sc->nid);
1582} 1588}
1583 1589
1584void 1590void
@@ -1587,6 +1593,7 @@ xfs_free_buftarg(
1587 struct xfs_buftarg *btp) 1593 struct xfs_buftarg *btp)
1588{ 1594{
1589 unregister_shrinker(&btp->bt_shrinker); 1595 unregister_shrinker(&btp->bt_shrinker);
1596 list_lru_destroy(&btp->bt_lru);
1590 1597
1591 if (mp->m_flags & XFS_MOUNT_BARRIER) 1598 if (mp->m_flags & XFS_MOUNT_BARRIER)
1592 xfs_blkdev_issue_flush(btp); 1599 xfs_blkdev_issue_flush(btp);
@@ -1660,12 +1667,16 @@ xfs_alloc_buftarg(
1660 if (!btp->bt_bdi) 1667 if (!btp->bt_bdi)
1661 goto error; 1668 goto error;
1662 1669
1663 INIT_LIST_HEAD(&btp->bt_lru);
1664 spin_lock_init(&btp->bt_lru_lock);
1665 if (xfs_setsize_buftarg_early(btp, bdev)) 1670 if (xfs_setsize_buftarg_early(btp, bdev))
1666 goto error; 1671 goto error;
1667 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1672
1673 if (list_lru_init(&btp->bt_lru))
1674 goto error;
1675
1676 btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
1677 btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
1668 btp->bt_shrinker.seeks = DEFAULT_SEEKS; 1678 btp->bt_shrinker.seeks = DEFAULT_SEEKS;
1679 btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
1669 register_shrinker(&btp->bt_shrinker); 1680 register_shrinker(&btp->bt_shrinker);
1670 return btp; 1681 return btp;
1671 1682
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 433a12ed7b17..e65683361017 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/uio.h> 27#include <linux/uio.h>
28#include <linux/list_lru.h>
28 29
29/* 30/*
30 * Base types 31 * Base types
@@ -59,7 +60,6 @@ typedef enum {
59#define _XBF_KMEM (1 << 21)/* backed by heap memory */ 60#define _XBF_KMEM (1 << 21)/* backed by heap memory */
60#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 61#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
61#define _XBF_COMPOUND (1 << 23)/* compound buffer */ 62#define _XBF_COMPOUND (1 << 23)/* compound buffer */
62#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
63 63
64typedef unsigned int xfs_buf_flags_t; 64typedef unsigned int xfs_buf_flags_t;
65 65
@@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t;
78 { _XBF_PAGES, "PAGES" }, \ 78 { _XBF_PAGES, "PAGES" }, \
79 { _XBF_KMEM, "KMEM" }, \ 79 { _XBF_KMEM, "KMEM" }, \
80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
81 { _XBF_COMPOUND, "COMPOUND" }, \ 81 { _XBF_COMPOUND, "COMPOUND" }
82 { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } 82
83/*
84 * Internal state flags.
85 */
86#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
83 87
84typedef struct xfs_buftarg { 88typedef struct xfs_buftarg {
85 dev_t bt_dev; 89 dev_t bt_dev;
@@ -92,9 +96,7 @@ typedef struct xfs_buftarg {
92 96
93 /* LRU control structures */ 97 /* LRU control structures */
94 struct shrinker bt_shrinker; 98 struct shrinker bt_shrinker;
95 struct list_head bt_lru; 99 struct list_lru bt_lru;
96 spinlock_t bt_lru_lock;
97 unsigned int bt_lru_nr;
98} xfs_buftarg_t; 100} xfs_buftarg_t;
99 101
100struct xfs_buf; 102struct xfs_buf;
@@ -137,7 +139,8 @@ typedef struct xfs_buf {
137 * bt_lru_lock and not by b_sema 139 * bt_lru_lock and not by b_sema
138 */ 140 */
139 struct list_head b_lru; /* lru list */ 141 struct list_head b_lru; /* lru list */
140 xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ 142 spinlock_t b_lock; /* internal state lock */
143 unsigned int b_state; /* internal state flags */
141 wait_queue_head_t b_waiters; /* unpin waiters */ 144 wait_queue_head_t b_waiters; /* unpin waiters */
142 struct list_head b_list; 145 struct list_head b_list;
143 struct xfs_perag *b_pag; /* contains rbtree root */ 146 struct xfs_perag *b_pag; /* contains rbtree root */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 251c66632e5e..71520e6e5d65 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -940,13 +940,8 @@ xfs_qm_dqput_final(
940 940
941 trace_xfs_dqput_free(dqp); 941 trace_xfs_dqput_free(dqp);
942 942
943 mutex_lock(&qi->qi_lru_lock); 943 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
944 if (list_empty(&dqp->q_lru)) {
945 list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
946 qi->qi_lru_count++;
947 XFS_STATS_INC(xs_qm_dquot_unused); 944 XFS_STATS_INC(xs_qm_dquot_unused);
948 }
949 mutex_unlock(&qi->qi_lru_lock);
950 945
951 /* 946 /*
952 * If we just added a udquot to the freelist, then we want to release 947 * If we just added a udquot to the freelist, then we want to release
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 16219b9c6790..73b62a24ceac 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1167,7 +1167,7 @@ xfs_reclaim_inodes(
1167 * them to be cleaned, which we hope will not be very long due to the 1167 * them to be cleaned, which we hope will not be very long due to the
1168 * background walker having already kicked the IO off on those dirty inodes. 1168 * background walker having already kicked the IO off on those dirty inodes.
1169 */ 1169 */
1170void 1170long
1171xfs_reclaim_inodes_nr( 1171xfs_reclaim_inodes_nr(
1172 struct xfs_mount *mp, 1172 struct xfs_mount *mp,
1173 int nr_to_scan) 1173 int nr_to_scan)
@@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr(
1176 xfs_reclaim_work_queue(mp); 1176 xfs_reclaim_work_queue(mp);
1177 xfs_ail_push_all(mp->m_ail); 1177 xfs_ail_push_all(mp->m_ail);
1178 1178
1179 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); 1179 return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
1180} 1180}
1181 1181
1182/* 1182/*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 8a89f7d791bd..456f0144e1b6 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work);
46 46
47int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 47int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
48int xfs_reclaim_inodes_count(struct xfs_mount *mp); 48int xfs_reclaim_inodes_count(struct xfs_mount *mp);
49void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 49long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
50 50
51void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 51void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
52 52
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6218a0aeeeea..3e6c2e6c9cd2 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -51,8 +51,9 @@
51 */ 51 */
52STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 52STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
53STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 53STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
54STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
55 54
55
56STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
56/* 57/*
57 * We use the batch lookup interface to iterate over the dquots as it 58 * We use the batch lookup interface to iterate over the dquots as it
58 * currently is the only interface into the radix tree code that allows 59 * currently is the only interface into the radix tree code that allows
@@ -203,12 +204,9 @@ xfs_qm_dqpurge(
203 * We move dquots to the freelist as soon as their reference count 204 * We move dquots to the freelist as soon as their reference count
204 * hits zero, so it really should be on the freelist here. 205 * hits zero, so it really should be on the freelist here.
205 */ 206 */
206 mutex_lock(&qi->qi_lru_lock);
207 ASSERT(!list_empty(&dqp->q_lru)); 207 ASSERT(!list_empty(&dqp->q_lru));
208 list_del_init(&dqp->q_lru); 208 list_lru_del(&qi->qi_lru, &dqp->q_lru);
209 qi->qi_lru_count--;
210 XFS_STATS_DEC(xs_qm_dquot_unused); 209 XFS_STATS_DEC(xs_qm_dquot_unused);
211 mutex_unlock(&qi->qi_lru_lock);
212 210
213 xfs_qm_dqdestroy(dqp); 211 xfs_qm_dqdestroy(dqp);
214 212
@@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk(
680 return ndquots; 678 return ndquots;
681} 679}
682 680
681struct xfs_qm_isolate {
682 struct list_head buffers;
683 struct list_head dispose;
684};
685
686static enum lru_status
687xfs_qm_dquot_isolate(
688 struct list_head *item,
689 spinlock_t *lru_lock,
690 void *arg)
691{
692 struct xfs_dquot *dqp = container_of(item,
693 struct xfs_dquot, q_lru);
694 struct xfs_qm_isolate *isol = arg;
695
696 if (!xfs_dqlock_nowait(dqp))
697 goto out_miss_busy;
698
699 /*
700 * This dquot has acquired a reference in the meantime remove it from
701 * the freelist and try again.
702 */
703 if (dqp->q_nrefs) {
704 xfs_dqunlock(dqp);
705 XFS_STATS_INC(xs_qm_dqwants);
706
707 trace_xfs_dqreclaim_want(dqp);
708 list_del_init(&dqp->q_lru);
709 XFS_STATS_DEC(xs_qm_dquot_unused);
710 return LRU_REMOVED;
711 }
712
713 /*
714 * If the dquot is dirty, flush it. If it's already being flushed, just
715 * skip it so there is time for the IO to complete before we try to
716 * reclaim it again on the next LRU pass.
717 */
718 if (!xfs_dqflock_nowait(dqp)) {
719 xfs_dqunlock(dqp);
720 goto out_miss_busy;
721 }
722
723 if (XFS_DQ_IS_DIRTY(dqp)) {
724 struct xfs_buf *bp = NULL;
725 int error;
726
727 trace_xfs_dqreclaim_dirty(dqp);
728
729 /* we have to drop the LRU lock to flush the dquot */
730 spin_unlock(lru_lock);
731
732 error = xfs_qm_dqflush(dqp, &bp);
733 if (error) {
734 xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
735 __func__, dqp);
736 goto out_unlock_dirty;
737 }
738
739 xfs_buf_delwri_queue(bp, &isol->buffers);
740 xfs_buf_relse(bp);
741 goto out_unlock_dirty;
742 }
743 xfs_dqfunlock(dqp);
744
745 /*
746 * Prevent lookups now that we are past the point of no return.
747 */
748 dqp->dq_flags |= XFS_DQ_FREEING;
749 xfs_dqunlock(dqp);
750
751 ASSERT(dqp->q_nrefs == 0);
752 list_move_tail(&dqp->q_lru, &isol->dispose);
753 XFS_STATS_DEC(xs_qm_dquot_unused);
754 trace_xfs_dqreclaim_done(dqp);
755 XFS_STATS_INC(xs_qm_dqreclaims);
756 return LRU_REMOVED;
757
758out_miss_busy:
759 trace_xfs_dqreclaim_busy(dqp);
760 XFS_STATS_INC(xs_qm_dqreclaim_misses);
761 return LRU_SKIP;
762
763out_unlock_dirty:
764 trace_xfs_dqreclaim_busy(dqp);
765 XFS_STATS_INC(xs_qm_dqreclaim_misses);
766 xfs_dqunlock(dqp);
767 spin_lock(lru_lock);
768 return LRU_RETRY;
769}
770
771static unsigned long
772xfs_qm_shrink_scan(
773 struct shrinker *shrink,
774 struct shrink_control *sc)
775{
776 struct xfs_quotainfo *qi = container_of(shrink,
777 struct xfs_quotainfo, qi_shrinker);
778 struct xfs_qm_isolate isol;
779 unsigned long freed;
780 int error;
781 unsigned long nr_to_scan = sc->nr_to_scan;
782
783 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
784 return 0;
785
786 INIT_LIST_HEAD(&isol.buffers);
787 INIT_LIST_HEAD(&isol.dispose);
788
789 freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol,
790 &nr_to_scan);
791
792 error = xfs_buf_delwri_submit(&isol.buffers);
793 if (error)
794 xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
795
796 while (!list_empty(&isol.dispose)) {
797 struct xfs_dquot *dqp;
798
799 dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
800 list_del_init(&dqp->q_lru);
801 xfs_qm_dqfree_one(dqp);
802 }
803
804 return freed;
805}
806
807static unsigned long
808xfs_qm_shrink_count(
809 struct shrinker *shrink,
810 struct shrink_control *sc)
811{
812 struct xfs_quotainfo *qi = container_of(shrink,
813 struct xfs_quotainfo, qi_shrinker);
814
815 return list_lru_count_node(&qi->qi_lru, sc->nid);
816}
817
683/* 818/*
684 * This initializes all the quota information that's kept in the 819 * This initializes all the quota information that's kept in the
685 * mount structure 820 * mount structure
@@ -696,11 +831,18 @@ xfs_qm_init_quotainfo(
696 831
697 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 832 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
698 833
834 if ((error = list_lru_init(&qinf->qi_lru))) {
835 kmem_free(qinf);
836 mp->m_quotainfo = NULL;
837 return error;
838 }
839
699 /* 840 /*
700 * See if quotainodes are setup, and if not, allocate them, 841 * See if quotainodes are setup, and if not, allocate them,
701 * and change the superblock accordingly. 842 * and change the superblock accordingly.
702 */ 843 */
703 if ((error = xfs_qm_init_quotainos(mp))) { 844 if ((error = xfs_qm_init_quotainos(mp))) {
845 list_lru_destroy(&qinf->qi_lru);
704 kmem_free(qinf); 846 kmem_free(qinf);
705 mp->m_quotainfo = NULL; 847 mp->m_quotainfo = NULL;
706 return error; 848 return error;
@@ -711,10 +853,6 @@ xfs_qm_init_quotainfo(
711 INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); 853 INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
712 mutex_init(&qinf->qi_tree_lock); 854 mutex_init(&qinf->qi_tree_lock);
713 855
714 INIT_LIST_HEAD(&qinf->qi_lru_list);
715 qinf->qi_lru_count = 0;
716 mutex_init(&qinf->qi_lru_lock);
717
718 /* mutex used to serialize quotaoffs */ 856 /* mutex used to serialize quotaoffs */
719 mutex_init(&qinf->qi_quotaofflock); 857 mutex_init(&qinf->qi_quotaofflock);
720 858
@@ -779,8 +917,10 @@ xfs_qm_init_quotainfo(
779 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 917 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
780 } 918 }
781 919
782 qinf->qi_shrinker.shrink = xfs_qm_shake; 920 qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
921 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
783 qinf->qi_shrinker.seeks = DEFAULT_SEEKS; 922 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
923 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
784 register_shrinker(&qinf->qi_shrinker); 924 register_shrinker(&qinf->qi_shrinker);
785 return 0; 925 return 0;
786} 926}
@@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo(
801 ASSERT(qi != NULL); 941 ASSERT(qi != NULL);
802 942
803 unregister_shrinker(&qi->qi_shrinker); 943 unregister_shrinker(&qi->qi_shrinker);
944 list_lru_destroy(&qi->qi_lru);
804 945
805 if (qi->qi_uquotaip) { 946 if (qi->qi_uquotaip) {
806 IRELE(qi->qi_uquotaip); 947 IRELE(qi->qi_uquotaip);
@@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one(
1599 xfs_qm_dqdestroy(dqp); 1740 xfs_qm_dqdestroy(dqp);
1600} 1741}
1601 1742
1602STATIC void
1603xfs_qm_dqreclaim_one(
1604 struct xfs_dquot *dqp,
1605 struct list_head *buffer_list,
1606 struct list_head *dispose_list)
1607{
1608 struct xfs_mount *mp = dqp->q_mount;
1609 struct xfs_quotainfo *qi = mp->m_quotainfo;
1610 int error;
1611
1612 if (!xfs_dqlock_nowait(dqp))
1613 goto out_move_tail;
1614
1615 /*
1616 * This dquot has acquired a reference in the meantime remove it from
1617 * the freelist and try again.
1618 */
1619 if (dqp->q_nrefs) {
1620 xfs_dqunlock(dqp);
1621
1622 trace_xfs_dqreclaim_want(dqp);
1623 XFS_STATS_INC(xs_qm_dqwants);
1624
1625 list_del_init(&dqp->q_lru);
1626 qi->qi_lru_count--;
1627 XFS_STATS_DEC(xs_qm_dquot_unused);
1628 return;
1629 }
1630
1631 /*
1632 * Try to grab the flush lock. If this dquot is in the process of
1633 * getting flushed to disk, we don't want to reclaim it.
1634 */
1635 if (!xfs_dqflock_nowait(dqp))
1636 goto out_unlock_move_tail;
1637
1638 if (XFS_DQ_IS_DIRTY(dqp)) {
1639 struct xfs_buf *bp = NULL;
1640
1641 trace_xfs_dqreclaim_dirty(dqp);
1642
1643 error = xfs_qm_dqflush(dqp, &bp);
1644 if (error) {
1645 xfs_warn(mp, "%s: dquot %p flush failed",
1646 __func__, dqp);
1647 goto out_unlock_move_tail;
1648 }
1649
1650 xfs_buf_delwri_queue(bp, buffer_list);
1651 xfs_buf_relse(bp);
1652 /*
1653 * Give the dquot another try on the freelist, as the
1654 * flushing will take some time.
1655 */
1656 goto out_unlock_move_tail;
1657 }
1658 xfs_dqfunlock(dqp);
1659
1660 /*
1661 * Prevent lookups now that we are past the point of no return.
1662 */
1663 dqp->dq_flags |= XFS_DQ_FREEING;
1664 xfs_dqunlock(dqp);
1665
1666 ASSERT(dqp->q_nrefs == 0);
1667 list_move_tail(&dqp->q_lru, dispose_list);
1668 qi->qi_lru_count--;
1669 XFS_STATS_DEC(xs_qm_dquot_unused);
1670
1671 trace_xfs_dqreclaim_done(dqp);
1672 XFS_STATS_INC(xs_qm_dqreclaims);
1673 return;
1674
1675 /*
1676 * Move the dquot to the tail of the list so that we don't spin on it.
1677 */
1678out_unlock_move_tail:
1679 xfs_dqunlock(dqp);
1680out_move_tail:
1681 list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
1682 trace_xfs_dqreclaim_busy(dqp);
1683 XFS_STATS_INC(xs_qm_dqreclaim_misses);
1684}
1685
1686STATIC int
1687xfs_qm_shake(
1688 struct shrinker *shrink,
1689 struct shrink_control *sc)
1690{
1691 struct xfs_quotainfo *qi =
1692 container_of(shrink, struct xfs_quotainfo, qi_shrinker);
1693 int nr_to_scan = sc->nr_to_scan;
1694 LIST_HEAD (buffer_list);
1695 LIST_HEAD (dispose_list);
1696 struct xfs_dquot *dqp;
1697 int error;
1698
1699 if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
1700 return 0;
1701 if (!nr_to_scan)
1702 goto out;
1703
1704 mutex_lock(&qi->qi_lru_lock);
1705 while (!list_empty(&qi->qi_lru_list)) {
1706 if (nr_to_scan-- <= 0)
1707 break;
1708 dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
1709 q_lru);
1710 xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
1711 }
1712 mutex_unlock(&qi->qi_lru_lock);
1713
1714 error = xfs_buf_delwri_submit(&buffer_list);
1715 if (error)
1716 xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
1717
1718 while (!list_empty(&dispose_list)) {
1719 dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
1720 list_del_init(&dqp->q_lru);
1721 xfs_qm_dqfree_one(dqp);
1722 }
1723
1724out:
1725 return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
1726}
1727
1728/* 1743/*
1729 * Start a transaction and write the incore superblock changes to 1744 * Start a transaction and write the incore superblock changes to
1730 * disk. flags parameter indicates which fields have changed. 1745 * disk. flags parameter indicates which fields have changed.
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 670cd4464070..2b602df9c242 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -49,9 +49,7 @@ typedef struct xfs_quotainfo {
49 struct xfs_inode *qi_uquotaip; /* user quota inode */ 49 struct xfs_inode *qi_uquotaip; /* user quota inode */
50 struct xfs_inode *qi_gquotaip; /* group quota inode */ 50 struct xfs_inode *qi_gquotaip; /* group quota inode */
51 struct xfs_inode *qi_pquotaip; /* project quota inode */ 51 struct xfs_inode *qi_pquotaip; /* project quota inode */
52 struct list_head qi_lru_list; 52 struct list_lru qi_lru;
53 struct mutex qi_lru_lock;
54 int qi_lru_count;
55 int qi_dquots; 53 int qi_dquots;
56 time_t qi_btimelimit; /* limit for blks timer */ 54 time_t qi_btimelimit; /* limit for blks timer */
57 time_t qi_itimelimit; /* limit for inodes timer */ 55 time_t qi_itimelimit; /* limit for inodes timer */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 979a77d4b87d..15188cc99449 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1535,19 +1535,21 @@ xfs_fs_mount(
1535 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); 1535 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1536} 1536}
1537 1537
1538static int 1538static long
1539xfs_fs_nr_cached_objects( 1539xfs_fs_nr_cached_objects(
1540 struct super_block *sb) 1540 struct super_block *sb,
1541 int nid)
1541{ 1542{
1542 return xfs_reclaim_inodes_count(XFS_M(sb)); 1543 return xfs_reclaim_inodes_count(XFS_M(sb));
1543} 1544}
1544 1545
1545static void 1546static long
1546xfs_fs_free_cached_objects( 1547xfs_fs_free_cached_objects(
1547 struct super_block *sb, 1548 struct super_block *sb,
1548 int nr_to_scan) 1549 long nr_to_scan,
1550 int nid)
1549{ 1551{
1550 xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); 1552 return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
1551} 1553}
1552 1554
1553static const struct super_operations xfs_super_operations = { 1555static const struct super_operations xfs_super_operations = {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index feaa8d88eef7..59066e0b4ff1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,11 +55,11 @@ struct qstr {
55#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) 55#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
56 56
57struct dentry_stat_t { 57struct dentry_stat_t {
58 int nr_dentry; 58 long nr_dentry;
59 int nr_unused; 59 long nr_unused;
60 int age_limit; /* age in seconds */ 60 long age_limit; /* age in seconds */
61 int want_pages; /* pages requested by system */ 61 long want_pages; /* pages requested by system */
62 int dummy[2]; 62 long dummy[2];
63}; 63};
64extern struct dentry_stat_t dentry_stat; 64extern struct dentry_stat_t dentry_stat;
65 65
@@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry)
395 395
396extern int sysctl_vfs_cache_pressure; 396extern int sysctl_vfs_cache_pressure;
397 397
398static inline unsigned long vfs_pressure_ratio(unsigned long val)
399{
400 return mult_frac(val, sysctl_vfs_cache_pressure, 100);
401}
398#endif /* __LINUX_DCACHE_H */ 402#endif /* __LINUX_DCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 49e71b0f0e9f..a4acd3c61190 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
10#include <linux/stat.h> 10#include <linux/stat.h>
11#include <linux/cache.h> 11#include <linux/cache.h>
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/list_lru.h>
13#include <linux/llist.h> 14#include <linux/llist.h>
14#include <linux/radix-tree.h> 15#include <linux/radix-tree.h>
15#include <linux/rbtree.h> 16#include <linux/rbtree.h>
@@ -1269,15 +1270,6 @@ struct super_block {
1269 struct list_head s_files; 1270 struct list_head s_files;
1270#endif 1271#endif
1271 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1272 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1272 /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
1273 struct list_head s_dentry_lru; /* unused dentry lru */
1274 int s_nr_dentry_unused; /* # of dentry on lru */
1275
1276 /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
1277 spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp;
1278 struct list_head s_inode_lru; /* unused inode lru */
1279 int s_nr_inodes_unused; /* # of inodes on lru */
1280
1281 struct block_device *s_bdev; 1273 struct block_device *s_bdev;
1282 struct backing_dev_info *s_bdi; 1274 struct backing_dev_info *s_bdi;
1283 struct mtd_info *s_mtd; 1275 struct mtd_info *s_mtd;
@@ -1331,11 +1323,14 @@ struct super_block {
1331 1323
1332 /* AIO completions deferred from interrupt context */ 1324 /* AIO completions deferred from interrupt context */
1333 struct workqueue_struct *s_dio_done_wq; 1325 struct workqueue_struct *s_dio_done_wq;
1334};
1335 1326
1336/* superblock cache pruning functions */ 1327 /*
1337extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); 1328 * Keep the lru lists last in the structure so they always sit on their
1338extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); 1329 * own individual cachelines.
1330 */
1331 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1332 struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1333};
1339 1334
1340extern struct timespec current_fs_time(struct super_block *sb); 1335extern struct timespec current_fs_time(struct super_block *sb);
1341 1336
@@ -1629,8 +1624,8 @@ struct super_operations {
1629 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1624 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1630#endif 1625#endif
1631 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1626 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1632 int (*nr_cached_objects)(struct super_block *); 1627 long (*nr_cached_objects)(struct super_block *, int);
1633 void (*free_cached_objects)(struct super_block *, int); 1628 long (*free_cached_objects)(struct super_block *, long, int);
1634}; 1629};
1635 1630
1636/* 1631/*
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
new file mode 100644
index 000000000000..3ce541753c88
--- /dev/null
+++ b/include/linux/list_lru.h
@@ -0,0 +1,131 @@
1/*
2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
4 *
5 * Generic LRU infrastructure
6 */
7#ifndef _LRU_LIST_H
8#define _LRU_LIST_H
9
10#include <linux/list.h>
11#include <linux/nodemask.h>
12
13/* list_lru_walk_cb has to always return one of those */
14enum lru_status {
15 LRU_REMOVED, /* item removed from list */
16 LRU_ROTATE, /* item referenced, give another pass */
17 LRU_SKIP, /* item cannot be locked, skip */
18 LRU_RETRY, /* item not freeable. May drop the lock
19 internally, but has to return locked. */
20};
21
22struct list_lru_node {
23 spinlock_t lock;
24 struct list_head list;
25 /* kept as signed so we can catch imbalance bugs */
26 long nr_items;
27} ____cacheline_aligned_in_smp;
28
29struct list_lru {
30 struct list_lru_node *node;
31 nodemask_t active_nodes;
32};
33
34void list_lru_destroy(struct list_lru *lru);
35int list_lru_init(struct list_lru *lru);
36
37/**
38 * list_lru_add: add an element to the lru list's tail
39 * @list_lru: the lru pointer
40 * @item: the item to be added.
41 *
42 * If the element is already part of a list, this function returns doing
43 * nothing. Therefore the caller does not need to keep state about whether or
44 * not the element already belongs in the list and is allowed to lazy update
45 * it. Note however that this is valid for *a* list, not *this* list. If
46 * the caller organize itself in a way that elements can be in more than
47 * one type of list, it is up to the caller to fully remove the item from
48 * the previous list (with list_lru_del() for instance) before moving it
49 * to @list_lru
50 *
51 * Return value: true if the list was updated, false otherwise
52 */
53bool list_lru_add(struct list_lru *lru, struct list_head *item);
54
55/**
56 * list_lru_del: delete an element to the lru list
57 * @list_lru: the lru pointer
58 * @item: the item to be deleted.
59 *
60 * This function works analogously as list_lru_add in terms of list
61 * manipulation. The comments about an element already pertaining to
62 * a list are also valid for list_lru_del.
63 *
64 * Return value: true if the list was updated, false otherwise
65 */
66bool list_lru_del(struct list_lru *lru, struct list_head *item);
67
68/**
69 * list_lru_count_node: return the number of objects currently held by @lru
70 * @lru: the lru pointer.
71 * @nid: the node id to count from.
72 *
73 * Always return a non-negative number, 0 for empty lists. There is no
74 * guarantee that the list is not updated while the count is being computed.
75 * Callers that want such a guarantee need to provide an outer lock.
76 */
77unsigned long list_lru_count_node(struct list_lru *lru, int nid);
78static inline unsigned long list_lru_count(struct list_lru *lru)
79{
80 long count = 0;
81 int nid;
82
83 for_each_node_mask(nid, lru->active_nodes)
84 count += list_lru_count_node(lru, nid);
85
86 return count;
87}
88
89typedef enum lru_status
90(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
91/**
92 * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items.
93 * @lru: the lru pointer.
94 * @nid: the node id to scan from.
95 * @isolate: callback function that is resposible for deciding what to do with
96 * the item currently being scanned
97 * @cb_arg: opaque type that will be passed to @isolate
98 * @nr_to_walk: how many items to scan.
99 *
100 * This function will scan all elements in a particular list_lru, calling the
101 * @isolate callback for each of those items, along with the current list
102 * spinlock and a caller-provided opaque. The @isolate callback can choose to
103 * drop the lock internally, but *must* return with the lock held. The callback
104 * will return an enum lru_status telling the list_lru infrastructure what to
105 * do with the object being scanned.
106 *
107 * Please note that nr_to_walk does not mean how many objects will be freed,
108 * just how many objects will be scanned.
109 *
110 * Return value: the number of objects effectively removed from the LRU.
111 */
112unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
113 list_lru_walk_cb isolate, void *cb_arg,
114 unsigned long *nr_to_walk);
115
116static inline unsigned long
117list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
118 void *cb_arg, unsigned long nr_to_walk)
119{
120 long isolated = 0;
121 int nid;
122
123 for_each_node_mask(nid, lru->active_nodes) {
124 isolated += list_lru_walk_node(lru, nid, isolate,
125 cb_arg, &nr_to_walk);
126 if (nr_to_walk <= 0)
127 break;
128 }
129 return isolated;
130}
131#endif /* _LRU_LIST_H */
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index ac6b8ee07825..68c097077ef0 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -4,39 +4,67 @@
4/* 4/*
5 * This struct is used to pass information from page reclaim to the shrinkers. 5 * This struct is used to pass information from page reclaim to the shrinkers.
6 * We consolidate the values for easier extention later. 6 * We consolidate the values for easier extention later.
7 *
8 * The 'gfpmask' refers to the allocation we are currently trying to
9 * fulfil.
7 */ 10 */
8struct shrink_control { 11struct shrink_control {
9 gfp_t gfp_mask; 12 gfp_t gfp_mask;
10 13
11 /* How many slab objects shrinker() should scan and try to reclaim */ 14 /*
15 * How many objects scan_objects should scan and try to reclaim.
16 * This is reset before every call, so it is safe for callees
17 * to modify.
18 */
12 unsigned long nr_to_scan; 19 unsigned long nr_to_scan;
20
21 /* shrink from these nodes */
22 nodemask_t nodes_to_scan;
23 /* current node being shrunk (for NUMA aware shrinkers) */
24 int nid;
13}; 25};
14 26
27#define SHRINK_STOP (~0UL)
15/* 28/*
16 * A callback you can register to apply pressure to ageable caches. 29 * A callback you can register to apply pressure to ageable caches.
17 * 30 *
18 * 'sc' is passed shrink_control which includes a count 'nr_to_scan' 31 * @count_objects should return the number of freeable items in the cache. If
19 * and a 'gfpmask'. It should look through the least-recently-used 32 * there are no objects to free or the number of freeable items cannot be
20 * 'nr_to_scan' entries and attempt to free them up. It should return 33 * determined, it should return 0. No deadlock checks should be done during the
21 * the number of objects which remain in the cache. If it returns -1, it means 34 * count callback - the shrinker relies on aggregating scan counts that couldn't
22 * it cannot do any scanning at this time (eg. there is a risk of deadlock). 35 * be executed due to potential deadlocks to be run at a later call when the
36 * deadlock condition is no longer pending.
23 * 37 *
24 * The 'gfpmask' refers to the allocation we are currently trying to 38 * @scan_objects will only be called if @count_objects returned a non-zero
25 * fulfil. 39 * value for the number of freeable objects. The callout should scan the cache
40 * and attempt to free items from the cache. It should then return the number
41 * of objects freed during the scan, or SHRINK_STOP if progress cannot be made
42 * due to potential deadlocks. If SHRINK_STOP is returned, then no further
43 * attempts to call the @scan_objects will be made from the current reclaim
44 * context.
26 * 45 *
27 * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is 46 * @flags determine the shrinker abilities, like numa awareness
28 * querying the cache size, so a fastpath for that case is appropriate.
29 */ 47 */
30struct shrinker { 48struct shrinker {
31 int (*shrink)(struct shrinker *, struct shrink_control *sc); 49 unsigned long (*count_objects)(struct shrinker *,
50 struct shrink_control *sc);
51 unsigned long (*scan_objects)(struct shrinker *,
52 struct shrink_control *sc);
53
32 int seeks; /* seeks to recreate an obj */ 54 int seeks; /* seeks to recreate an obj */
33 long batch; /* reclaim batch size, 0 = default */ 55 long batch; /* reclaim batch size, 0 = default */
56 unsigned long flags;
34 57
35 /* These are for internal use */ 58 /* These are for internal use */
36 struct list_head list; 59 struct list_head list;
37 atomic_long_t nr_in_batch; /* objs pending delete */ 60 /* objs pending delete, per node */
61 atomic_long_t *nr_deferred;
38}; 62};
39#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ 63#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
40extern void register_shrinker(struct shrinker *); 64
65/* Flags */
66#define SHRINKER_NUMA_AWARE (1 << 0)
67
68extern int register_shrinker(struct shrinker *);
41extern void unregister_shrinker(struct shrinker *); 69extern void unregister_shrinker(struct shrinker *);
42#endif 70#endif
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 63cfcccaebb3..132a985aba8b 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start,
202 202
203 TP_fast_assign( 203 TP_fast_assign(
204 __entry->shr = shr; 204 __entry->shr = shr;
205 __entry->shrink = shr->shrink; 205 __entry->shrink = shr->scan_objects;
206 __entry->nr_objects_to_shrink = nr_objects_to_shrink; 206 __entry->nr_objects_to_shrink = nr_objects_to_shrink;
207 __entry->gfp_flags = sc->gfp_mask; 207 __entry->gfp_flags = sc->gfp_mask;
208 __entry->pgs_scanned = pgs_scanned; 208 __entry->pgs_scanned = pgs_scanned;
@@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end,
241 241
242 TP_fast_assign( 242 TP_fast_assign(
243 __entry->shr = shr; 243 __entry->shr = shr;
244 __entry->shrink = shr->shrink; 244 __entry->shrink = shr->scan_objects;
245 __entry->unused_scan = unused_scan_cnt; 245 __entry->unused_scan = unused_scan_cnt;
246 __entry->new_scan = new_scan_cnt; 246 __entry->new_scan = new_scan_cnt;
247 __entry->retval = shrinker_retval; 247 __entry->retval = shrinker_retval;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a4ed56cf0eac..6c28b61bb690 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -49,9 +49,9 @@ struct files_stat_struct {
49}; 49};
50 50
51struct inodes_stat_t { 51struct inodes_stat_t {
52 int nr_inodes; 52 long nr_inodes;
53 int nr_unused; 53 long nr_unused;
54 int dummy[5]; /* padding for sysctl ABI compatibility */ 54 long dummy[5]; /* padding for sysctl ABI compatibility */
55}; 55};
56 56
57 57
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 07f6fc468e17..7822cd88a95c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = {
1471 { 1471 {
1472 .procname = "inode-nr", 1472 .procname = "inode-nr",
1473 .data = &inodes_stat, 1473 .data = &inodes_stat,
1474 .maxlen = 2*sizeof(int), 1474 .maxlen = 2*sizeof(long),
1475 .mode = 0444, 1475 .mode = 0444,
1476 .proc_handler = proc_nr_inodes, 1476 .proc_handler = proc_nr_inodes,
1477 }, 1477 },
1478 { 1478 {
1479 .procname = "inode-state", 1479 .procname = "inode-state",
1480 .data = &inodes_stat, 1480 .data = &inodes_stat,
1481 .maxlen = 7*sizeof(int), 1481 .maxlen = 7*sizeof(long),
1482 .mode = 0444, 1482 .mode = 0444,
1483 .proc_handler = proc_nr_inodes, 1483 .proc_handler = proc_nr_inodes,
1484 }, 1484 },
@@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = {
1508 { 1508 {
1509 .procname = "dentry-state", 1509 .procname = "dentry-state",
1510 .data = &dentry_stat, 1510 .data = &dentry_stat,
1511 .maxlen = 6*sizeof(int), 1511 .maxlen = 6*sizeof(long),
1512 .mode = 0444, 1512 .mode = 0444,
1513 .proc_handler = proc_nr_dentry, 1513 .proc_handler = proc_nr_dentry,
1514 }, 1514 },
diff --git a/mm/Makefile b/mm/Makefile
index f00803386a67..305d10acd081 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
17 util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o balloon_compaction.o \ 19 compaction.o balloon_compaction.o \
20 interval_tree.o $(mmu-y) 20 interval_tree.o list_lru.o $(mmu-y)
21 21
22obj-y += init-mm.o 22obj-y += init-mm.o
23 23
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a92012a71702..d94f7dee3997 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -211,24 +211,29 @@ static void put_huge_zero_page(void)
211 BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); 211 BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
212} 212}
213 213
214static int shrink_huge_zero_page(struct shrinker *shrink, 214static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
215 struct shrink_control *sc) 215 struct shrink_control *sc)
216{ 216{
217 if (!sc->nr_to_scan) 217 /* we can free zero page only if last reference remains */
218 /* we can free zero page only if last reference remains */ 218 return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
219 return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; 219}
220 220
221static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
222 struct shrink_control *sc)
223{
221 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { 224 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
222 struct page *zero_page = xchg(&huge_zero_page, NULL); 225 struct page *zero_page = xchg(&huge_zero_page, NULL);
223 BUG_ON(zero_page == NULL); 226 BUG_ON(zero_page == NULL);
224 __free_page(zero_page); 227 __free_page(zero_page);
228 return HPAGE_PMD_NR;
225 } 229 }
226 230
227 return 0; 231 return 0;
228} 232}
229 233
230static struct shrinker huge_zero_page_shrinker = { 234static struct shrinker huge_zero_page_shrinker = {
231 .shrink = shrink_huge_zero_page, 235 .count_objects = shrink_huge_zero_page_count,
236 .scan_objects = shrink_huge_zero_page_scan,
232 .seeks = DEFAULT_SEEKS, 237 .seeks = DEFAULT_SEEKS,
233}; 238};
234 239
diff --git a/mm/list_lru.c b/mm/list_lru.c
new file mode 100644
index 000000000000..72467914b856
--- /dev/null
+++ b/mm/list_lru.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
4 *
5 * Generic LRU infrastructure
6 */
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <linux/mm.h>
10#include <linux/list_lru.h>
11#include <linux/slab.h>
12
13bool list_lru_add(struct list_lru *lru, struct list_head *item)
14{
15 int nid = page_to_nid(virt_to_page(item));
16 struct list_lru_node *nlru = &lru->node[nid];
17
18 spin_lock(&nlru->lock);
19 WARN_ON_ONCE(nlru->nr_items < 0);
20 if (list_empty(item)) {
21 list_add_tail(item, &nlru->list);
22 if (nlru->nr_items++ == 0)
23 node_set(nid, lru->active_nodes);
24 spin_unlock(&nlru->lock);
25 return true;
26 }
27 spin_unlock(&nlru->lock);
28 return false;
29}
30EXPORT_SYMBOL_GPL(list_lru_add);
31
32bool list_lru_del(struct list_lru *lru, struct list_head *item)
33{
34 int nid = page_to_nid(virt_to_page(item));
35 struct list_lru_node *nlru = &lru->node[nid];
36
37 spin_lock(&nlru->lock);
38 if (!list_empty(item)) {
39 list_del_init(item);
40 if (--nlru->nr_items == 0)
41 node_clear(nid, lru->active_nodes);
42 WARN_ON_ONCE(nlru->nr_items < 0);
43 spin_unlock(&nlru->lock);
44 return true;
45 }
46 spin_unlock(&nlru->lock);
47 return false;
48}
49EXPORT_SYMBOL_GPL(list_lru_del);
50
51unsigned long
52list_lru_count_node(struct list_lru *lru, int nid)
53{
54 unsigned long count = 0;
55 struct list_lru_node *nlru = &lru->node[nid];
56
57 spin_lock(&nlru->lock);
58 WARN_ON_ONCE(nlru->nr_items < 0);
59 count += nlru->nr_items;
60 spin_unlock(&nlru->lock);
61
62 return count;
63}
64EXPORT_SYMBOL_GPL(list_lru_count_node);
65
66unsigned long
67list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate,
68 void *cb_arg, unsigned long *nr_to_walk)
69{
70
71 struct list_lru_node *nlru = &lru->node[nid];
72 struct list_head *item, *n;
73 unsigned long isolated = 0;
74
75 spin_lock(&nlru->lock);
76restart:
77 list_for_each_safe(item, n, &nlru->list) {
78 enum lru_status ret;
79
80 /*
81 * decrement nr_to_walk first so that we don't livelock if we
82 * get stuck on large numbesr of LRU_RETRY items
83 */
84 if (--(*nr_to_walk) == 0)
85 break;
86
87 ret = isolate(item, &nlru->lock, cb_arg);
88 switch (ret) {
89 case LRU_REMOVED:
90 if (--nlru->nr_items == 0)
91 node_clear(nid, lru->active_nodes);
92 WARN_ON_ONCE(nlru->nr_items < 0);
93 isolated++;
94 break;
95 case LRU_ROTATE:
96 list_move_tail(item, &nlru->list);
97 break;
98 case LRU_SKIP:
99 break;
100 case LRU_RETRY:
101 /*
102 * The lru lock has been dropped, our list traversal is
103 * now invalid and so we have to restart from scratch.
104 */
105 goto restart;
106 default:
107 BUG();
108 }
109 }
110
111 spin_unlock(&nlru->lock);
112 return isolated;
113}
114EXPORT_SYMBOL_GPL(list_lru_walk_node);
115
116int list_lru_init(struct list_lru *lru)
117{
118 int i;
119 size_t size = sizeof(*lru->node) * nr_node_ids;
120
121 lru->node = kzalloc(size, GFP_KERNEL);
122 if (!lru->node)
123 return -ENOMEM;
124
125 nodes_clear(lru->active_nodes);
126 for (i = 0; i < nr_node_ids; i++) {
127 spin_lock_init(&lru->node[i].lock);
128 INIT_LIST_HEAD(&lru->node[i].list);
129 lru->node[i].nr_items = 0;
130 }
131 return 0;
132}
133EXPORT_SYMBOL_GPL(list_lru_init);
134
135void list_lru_destroy(struct list_lru *lru)
136{
137 kfree(lru->node);
138}
139EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d84c5e5331bb..baa4e0a45dec 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -248,10 +248,12 @@ void shake_page(struct page *p, int access)
248 */ 248 */
249 if (access) { 249 if (access) {
250 int nr; 250 int nr;
251 int nid = page_to_nid(p);
251 do { 252 do {
252 struct shrink_control shrink = { 253 struct shrink_control shrink = {
253 .gfp_mask = GFP_KERNEL, 254 .gfp_mask = GFP_KERNEL,
254 }; 255 };
256 node_set(nid, shrink.nodes_to_scan);
255 257
256 nr = shrink_slab(&shrink, 1000, 1000); 258 nr = shrink_slab(&shrink, 1000, 1000);
257 if (page_count(p) == 1) 259 if (page_count(p) == 1)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2cff0d491c6d..e36454220614 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -155,14 +155,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
155} 155}
156 156
157/* 157/*
158 * Add a shrinker callback to be called from the vm 158 * Add a shrinker callback to be called from the vm.
159 */ 159 */
160void register_shrinker(struct shrinker *shrinker) 160int register_shrinker(struct shrinker *shrinker)
161{ 161{
162 atomic_long_set(&shrinker->nr_in_batch, 0); 162 size_t size = sizeof(*shrinker->nr_deferred);
163
164 /*
165 * If we only have one possible node in the system anyway, save
166 * ourselves the trouble and disable NUMA aware behavior. This way we
167 * will save memory and some small loop time later.
168 */
169 if (nr_node_ids == 1)
170 shrinker->flags &= ~SHRINKER_NUMA_AWARE;
171
172 if (shrinker->flags & SHRINKER_NUMA_AWARE)
173 size *= nr_node_ids;
174
175 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
176 if (!shrinker->nr_deferred)
177 return -ENOMEM;
178
163 down_write(&shrinker_rwsem); 179 down_write(&shrinker_rwsem);
164 list_add_tail(&shrinker->list, &shrinker_list); 180 list_add_tail(&shrinker->list, &shrinker_list);
165 up_write(&shrinker_rwsem); 181 up_write(&shrinker_rwsem);
182 return 0;
166} 183}
167EXPORT_SYMBOL(register_shrinker); 184EXPORT_SYMBOL(register_shrinker);
168 185
@@ -177,15 +194,102 @@ void unregister_shrinker(struct shrinker *shrinker)
177} 194}
178EXPORT_SYMBOL(unregister_shrinker); 195EXPORT_SYMBOL(unregister_shrinker);
179 196
180static inline int do_shrinker_shrink(struct shrinker *shrinker, 197#define SHRINK_BATCH 128
181 struct shrink_control *sc, 198
182 unsigned long nr_to_scan) 199static unsigned long
183{ 200shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
184 sc->nr_to_scan = nr_to_scan; 201 unsigned long nr_pages_scanned, unsigned long lru_pages)
185 return (*shrinker->shrink)(shrinker, sc); 202{
203 unsigned long freed = 0;
204 unsigned long long delta;
205 long total_scan;
206 long max_pass;
207 long nr;
208 long new_nr;
209 int nid = shrinkctl->nid;
210 long batch_size = shrinker->batch ? shrinker->batch
211 : SHRINK_BATCH;
212
213 max_pass = shrinker->count_objects(shrinker, shrinkctl);
214 if (max_pass == 0)
215 return 0;
216
217 /*
218 * copy the current shrinker scan count into a local variable
219 * and zero it so that other concurrent shrinker invocations
220 * don't also do this scanning work.
221 */
222 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
223
224 total_scan = nr;
225 delta = (4 * nr_pages_scanned) / shrinker->seeks;
226 delta *= max_pass;
227 do_div(delta, lru_pages + 1);
228 total_scan += delta;
229 if (total_scan < 0) {
230 printk(KERN_ERR
231 "shrink_slab: %pF negative objects to delete nr=%ld\n",
232 shrinker->scan_objects, total_scan);
233 total_scan = max_pass;
234 }
235
236 /*
237 * We need to avoid excessive windup on filesystem shrinkers
238 * due to large numbers of GFP_NOFS allocations causing the
239 * shrinkers to return -1 all the time. This results in a large
240 * nr being built up so when a shrink that can do some work
241 * comes along it empties the entire cache due to nr >>>
242 * max_pass. This is bad for sustaining a working set in
243 * memory.
244 *
245 * Hence only allow the shrinker to scan the entire cache when
246 * a large delta change is calculated directly.
247 */
248 if (delta < max_pass / 4)
249 total_scan = min(total_scan, max_pass / 2);
250
251 /*
252 * Avoid risking looping forever due to too large nr value:
253 * never try to free more than twice the estimate number of
254 * freeable entries.
255 */
256 if (total_scan > max_pass * 2)
257 total_scan = max_pass * 2;
258
259 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
260 nr_pages_scanned, lru_pages,
261 max_pass, delta, total_scan);
262
263 while (total_scan >= batch_size) {
264 unsigned long ret;
265
266 shrinkctl->nr_to_scan = batch_size;
267 ret = shrinker->scan_objects(shrinker, shrinkctl);
268 if (ret == SHRINK_STOP)
269 break;
270 freed += ret;
271
272 count_vm_events(SLABS_SCANNED, batch_size);
273 total_scan -= batch_size;
274
275 cond_resched();
276 }
277
278 /*
279 * move the unused scan count back into the shrinker in a
280 * manner that handles concurrent updates. If we exhausted the
281 * scan, there is no need to do an update.
282 */
283 if (total_scan > 0)
284 new_nr = atomic_long_add_return(total_scan,
285 &shrinker->nr_deferred[nid]);
286 else
287 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
288
289 trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
290 return freed;
186} 291}
187 292
188#define SHRINK_BATCH 128
189/* 293/*
190 * Call the shrink functions to age shrinkable caches 294 * Call the shrink functions to age shrinkable caches
191 * 295 *
@@ -205,115 +309,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
205 * 309 *
206 * Returns the number of slab objects which we shrunk. 310 * Returns the number of slab objects which we shrunk.
207 */ 311 */
208unsigned long shrink_slab(struct shrink_control *shrink, 312unsigned long shrink_slab(struct shrink_control *shrinkctl,
209 unsigned long nr_pages_scanned, 313 unsigned long nr_pages_scanned,
210 unsigned long lru_pages) 314 unsigned long lru_pages)
211{ 315{
212 struct shrinker *shrinker; 316 struct shrinker *shrinker;
213 unsigned long ret = 0; 317 unsigned long freed = 0;
214 318
215 if (nr_pages_scanned == 0) 319 if (nr_pages_scanned == 0)
216 nr_pages_scanned = SWAP_CLUSTER_MAX; 320 nr_pages_scanned = SWAP_CLUSTER_MAX;
217 321
218 if (!down_read_trylock(&shrinker_rwsem)) { 322 if (!down_read_trylock(&shrinker_rwsem)) {
219 /* Assume we'll be able to shrink next time */ 323 /*
220 ret = 1; 324 * If we would return 0, our callers would understand that we
325 * have nothing else to shrink and give up trying. By returning
326 * 1 we keep it going and assume we'll be able to shrink next
327 * time.
328 */
329 freed = 1;
221 goto out; 330 goto out;
222 } 331 }
223 332
224 list_for_each_entry(shrinker, &shrinker_list, list) { 333 list_for_each_entry(shrinker, &shrinker_list, list) {
225 unsigned long long delta; 334 for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
226 long total_scan; 335 if (!node_online(shrinkctl->nid))
227 long max_pass; 336 continue;
228 int shrink_ret = 0;
229 long nr;
230 long new_nr;
231 long batch_size = shrinker->batch ? shrinker->batch
232 : SHRINK_BATCH;
233
234 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
235 if (max_pass <= 0)
236 continue;
237
238 /*
239 * copy the current shrinker scan count into a local variable
240 * and zero it so that other concurrent shrinker invocations
241 * don't also do this scanning work.
242 */
243 nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
244
245 total_scan = nr;
246 delta = (4 * nr_pages_scanned) / shrinker->seeks;
247 delta *= max_pass;
248 do_div(delta, lru_pages + 1);
249 total_scan += delta;
250 if (total_scan < 0) {
251 printk(KERN_ERR "shrink_slab: %pF negative objects to "
252 "delete nr=%ld\n",
253 shrinker->shrink, total_scan);
254 total_scan = max_pass;
255 }
256
257 /*
258 * We need to avoid excessive windup on filesystem shrinkers
259 * due to large numbers of GFP_NOFS allocations causing the
260 * shrinkers to return -1 all the time. This results in a large
261 * nr being built up so when a shrink that can do some work
262 * comes along it empties the entire cache due to nr >>>
263 * max_pass. This is bad for sustaining a working set in
264 * memory.
265 *
266 * Hence only allow the shrinker to scan the entire cache when
267 * a large delta change is calculated directly.
268 */
269 if (delta < max_pass / 4)
270 total_scan = min(total_scan, max_pass / 2);
271
272 /*
273 * Avoid risking looping forever due to too large nr value:
274 * never try to free more than twice the estimate number of
275 * freeable entries.
276 */
277 if (total_scan > max_pass * 2)
278 total_scan = max_pass * 2;
279
280 trace_mm_shrink_slab_start(shrinker, shrink, nr,
281 nr_pages_scanned, lru_pages,
282 max_pass, delta, total_scan);
283
284 while (total_scan >= batch_size) {
285 int nr_before;
286 337
287 nr_before = do_shrinker_shrink(shrinker, shrink, 0); 338 if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
288 shrink_ret = do_shrinker_shrink(shrinker, shrink, 339 (shrinkctl->nid != 0))
289 batch_size);
290 if (shrink_ret == -1)
291 break; 340 break;
292 if (shrink_ret < nr_before)
293 ret += nr_before - shrink_ret;
294 count_vm_events(SLABS_SCANNED, batch_size);
295 total_scan -= batch_size;
296 341
297 cond_resched(); 342 freed += shrink_slab_node(shrinkctl, shrinker,
298 } 343 nr_pages_scanned, lru_pages);
299 344
300 /* 345 }
301 * move the unused scan count back into the shrinker in a
302 * manner that handles concurrent updates. If we exhausted the
303 * scan, there is no need to do an update.
304 */
305 if (total_scan > 0)
306 new_nr = atomic_long_add_return(total_scan,
307 &shrinker->nr_in_batch);
308 else
309 new_nr = atomic_long_read(&shrinker->nr_in_batch);
310
311 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
312 } 346 }
313 up_read(&shrinker_rwsem); 347 up_read(&shrinker_rwsem);
314out: 348out:
315 cond_resched(); 349 cond_resched();
316 return ret; 350 return freed;
317} 351}
318 352
319static inline int is_page_cache_freeable(struct page *page) 353static inline int is_page_cache_freeable(struct page *page)
@@ -2354,12 +2388,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2354 */ 2388 */
2355 if (global_reclaim(sc)) { 2389 if (global_reclaim(sc)) {
2356 unsigned long lru_pages = 0; 2390 unsigned long lru_pages = 0;
2391
2392 nodes_clear(shrink->nodes_to_scan);
2357 for_each_zone_zonelist(zone, z, zonelist, 2393 for_each_zone_zonelist(zone, z, zonelist,
2358 gfp_zone(sc->gfp_mask)) { 2394 gfp_zone(sc->gfp_mask)) {
2359 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2395 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
2360 continue; 2396 continue;
2361 2397
2362 lru_pages += zone_reclaimable_pages(zone); 2398 lru_pages += zone_reclaimable_pages(zone);
2399 node_set(zone_to_nid(zone),
2400 shrink->nodes_to_scan);
2363 } 2401 }
2364 2402
2365 shrink_slab(shrink, sc->nr_scanned, lru_pages); 2403 shrink_slab(shrink, sc->nr_scanned, lru_pages);
@@ -2816,6 +2854,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
2816 return true; 2854 return true;
2817 2855
2818 shrink_zone(zone, sc); 2856 shrink_zone(zone, sc);
2857 nodes_clear(shrink.nodes_to_scan);
2858 node_set(zone_to_nid(zone), shrink.nodes_to_scan);
2819 2859
2820 reclaim_state->reclaimed_slab = 0; 2860 reclaim_state->reclaimed_slab = 0;
2821 nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages); 2861 nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages);
@@ -3524,10 +3564,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3524 * number of slab pages and shake the slab until it is reduced 3564 * number of slab pages and shake the slab until it is reduced
3525 * by the same nr_pages that we used for reclaiming unmapped 3565 * by the same nr_pages that we used for reclaiming unmapped
3526 * pages. 3566 * pages.
3527 *
3528 * Note that shrink_slab will free memory on all zones and may
3529 * take a long time.
3530 */ 3567 */
3568 nodes_clear(shrink.nodes_to_scan);
3569 node_set(zone_to_nid(zone), shrink.nodes_to_scan);
3531 for (;;) { 3570 for (;;) {
3532 unsigned long lru_pages = zone_reclaimable_pages(zone); 3571 unsigned long lru_pages = zone_reclaimable_pages(zone);
3533 3572
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 415159061cd0..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
434/* 434/*
435 * Remove stale credentials. Avoid sleeping inside the loop. 435 * Remove stale credentials. Avoid sleeping inside the loop.
436 */ 436 */
437static int 437static long
438rpcauth_prune_expired(struct list_head *free, int nr_to_scan) 438rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
439{ 439{
440 spinlock_t *cache_lock; 440 spinlock_t *cache_lock;
441 struct rpc_cred *cred, *next; 441 struct rpc_cred *cred, *next;
442 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; 442 unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
443 long freed = 0;
443 444
444 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { 445 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
445 446
@@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
451 */ 452 */
452 if (time_in_range(cred->cr_expire, expired, jiffies) && 453 if (time_in_range(cred->cr_expire, expired, jiffies) &&
453 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 454 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
454 return 0; 455 break;
455 456
456 list_del_init(&cred->cr_lru); 457 list_del_init(&cred->cr_lru);
457 number_cred_unused--; 458 number_cred_unused--;
459 freed++;
458 if (atomic_read(&cred->cr_count) != 0) 460 if (atomic_read(&cred->cr_count) != 0)
459 continue; 461 continue;
460 462
@@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
467 } 469 }
468 spin_unlock(cache_lock); 470 spin_unlock(cache_lock);
469 } 471 }
470 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; 472 return freed;
471} 473}
472 474
473/* 475/*
474 * Run memory cache shrinker. 476 * Run memory cache shrinker.
475 */ 477 */
476static int 478static unsigned long
477rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) 479rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
480
478{ 481{
479 LIST_HEAD(free); 482 LIST_HEAD(free);
480 int res; 483 unsigned long freed;
481 int nr_to_scan = sc->nr_to_scan; 484
482 gfp_t gfp_mask = sc->gfp_mask; 485 if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
486 return SHRINK_STOP;
483 487
484 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 488 /* nothing left, don't come back */
485 return (nr_to_scan == 0) ? 0 : -1;
486 if (list_empty(&cred_unused)) 489 if (list_empty(&cred_unused))
487 return 0; 490 return SHRINK_STOP;
491
488 spin_lock(&rpc_credcache_lock); 492 spin_lock(&rpc_credcache_lock);
489 res = rpcauth_prune_expired(&free, nr_to_scan); 493 freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
490 spin_unlock(&rpc_credcache_lock); 494 spin_unlock(&rpc_credcache_lock);
491 rpcauth_destroy_credlist(&free); 495 rpcauth_destroy_credlist(&free);
492 return res; 496
497 return freed;
498}
499
500static unsigned long
501rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
502
503{
504 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
493} 505}
494 506
495/* 507/*
@@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
805} 817}
806 818
807static struct shrinker rpc_cred_shrinker = { 819static struct shrinker rpc_cred_shrinker = {
808 .shrink = rpcauth_cache_shrinker, 820 .count_objects = rpcauth_cache_shrink_count,
821 .scan_objects = rpcauth_cache_shrink_scan,
809 .seeks = DEFAULT_SEEKS, 822 .seeks = DEFAULT_SEEKS,
810}; 823};
811 824