diff options
56 files changed, 1777 insertions, 1161 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 206a1bdc7321..f0890581f7f6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -451,3 +451,7 @@ in your dentry operations instead. | |||
451 | -- | 451 | -- |
452 | [mandatory] | 452 | [mandatory] |
453 | ->readdir() is gone now; switch to ->iterate() | 453 | ->readdir() is gone now; switch to ->iterate() |
454 | [mandatory] | ||
455 | vfs_follow_link has been removed. Filesystems must use nd_set_link | ||
456 | from ->follow_link for normal symlinks, or nd_jump_link for magic | ||
457 | /proc/<pid> style links. | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e2d2c8f230b..dce0df8150df 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
4421 | } | 4421 | } |
4422 | } | 4422 | } |
4423 | 4423 | ||
4424 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4424 | static unsigned long |
4425 | mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
4425 | { | 4426 | { |
4426 | struct kvm *kvm; | 4427 | struct kvm *kvm; |
4427 | int nr_to_scan = sc->nr_to_scan; | 4428 | int nr_to_scan = sc->nr_to_scan; |
4428 | 4429 | unsigned long freed = 0; | |
4429 | if (nr_to_scan == 0) | ||
4430 | goto out; | ||
4431 | 4430 | ||
4432 | raw_spin_lock(&kvm_lock); | 4431 | raw_spin_lock(&kvm_lock); |
4433 | 4432 | ||
@@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4462 | goto unlock; | 4461 | goto unlock; |
4463 | } | 4462 | } |
4464 | 4463 | ||
4465 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); | 4464 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
4465 | freed++; | ||
4466 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4466 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4467 | 4467 | ||
4468 | unlock: | 4468 | unlock: |
4469 | spin_unlock(&kvm->mmu_lock); | 4469 | spin_unlock(&kvm->mmu_lock); |
4470 | srcu_read_unlock(&kvm->srcu, idx); | 4470 | srcu_read_unlock(&kvm->srcu, idx); |
4471 | 4471 | ||
4472 | /* | ||
4473 | * unfair on small ones | ||
4474 | * per-vm shrinkers cry out | ||
4475 | * sadness comes quickly | ||
4476 | */ | ||
4472 | list_move_tail(&kvm->vm_list, &vm_list); | 4477 | list_move_tail(&kvm->vm_list, &vm_list); |
4473 | break; | 4478 | break; |
4474 | } | 4479 | } |
4475 | 4480 | ||
4476 | raw_spin_unlock(&kvm_lock); | 4481 | raw_spin_unlock(&kvm_lock); |
4482 | return freed; | ||
4477 | 4483 | ||
4478 | out: | 4484 | } |
4485 | |||
4486 | static unsigned long | ||
4487 | mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
4488 | { | ||
4479 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | 4489 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); |
4480 | } | 4490 | } |
4481 | 4491 | ||
4482 | static struct shrinker mmu_shrinker = { | 4492 | static struct shrinker mmu_shrinker = { |
4483 | .shrink = mmu_shrink, | 4493 | .count_objects = mmu_shrink_count, |
4494 | .scan_objects = mmu_shrink_scan, | ||
4484 | .seeks = DEFAULT_SEEKS * 10, | 4495 | .seeks = DEFAULT_SEEKS * 10, |
4485 | }; | 4496 | }; |
4486 | 4497 | ||
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9b265a4c6a3d..c27a21034a5e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c | |||
@@ -1676,7 +1676,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) | |||
1676 | return 0; | 1676 | return 0; |
1677 | 1677 | ||
1678 | out_gem_unload: | 1678 | out_gem_unload: |
1679 | if (dev_priv->mm.inactive_shrinker.shrink) | 1679 | if (dev_priv->mm.inactive_shrinker.scan_objects) |
1680 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); | 1680 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); |
1681 | 1681 | ||
1682 | if (dev->pdev->msi_enabled) | 1682 | if (dev->pdev->msi_enabled) |
@@ -1715,7 +1715,7 @@ int i915_driver_unload(struct drm_device *dev) | |||
1715 | 1715 | ||
1716 | i915_teardown_sysfs(dev); | 1716 | i915_teardown_sysfs(dev); |
1717 | 1717 | ||
1718 | if (dev_priv->mm.inactive_shrinker.shrink) | 1718 | if (dev_priv->mm.inactive_shrinker.scan_objects) |
1719 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); | 1719 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); |
1720 | 1720 | ||
1721 | mutex_lock(&dev->struct_mutex); | 1721 | mutex_lock(&dev->struct_mutex); |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9e337feef14..8507c6d1e642 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, | |||
57 | struct drm_i915_fence_reg *fence, | 57 | struct drm_i915_fence_reg *fence, |
58 | bool enable); | 58 | bool enable); |
59 | 59 | ||
60 | static int i915_gem_inactive_shrink(struct shrinker *shrinker, | 60 | static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, |
61 | struct shrink_control *sc); | 61 | struct shrink_control *sc); |
62 | static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, | ||
63 | struct shrink_control *sc); | ||
62 | static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); | 64 | static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); |
63 | static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); | 65 | static long i915_gem_shrink_all(struct drm_i915_private *dev_priv); |
64 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); | 66 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); |
65 | 67 | ||
66 | static bool cpu_cache_is_coherent(struct drm_device *dev, | 68 | static bool cpu_cache_is_coherent(struct drm_device *dev, |
@@ -1769,16 +1771,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target) | |||
1769 | return __i915_gem_shrink(dev_priv, target, true); | 1771 | return __i915_gem_shrink(dev_priv, target, true); |
1770 | } | 1772 | } |
1771 | 1773 | ||
1772 | static void | 1774 | static long |
1773 | i915_gem_shrink_all(struct drm_i915_private *dev_priv) | 1775 | i915_gem_shrink_all(struct drm_i915_private *dev_priv) |
1774 | { | 1776 | { |
1775 | struct drm_i915_gem_object *obj, *next; | 1777 | struct drm_i915_gem_object *obj, *next; |
1778 | long freed = 0; | ||
1776 | 1779 | ||
1777 | i915_gem_evict_everything(dev_priv->dev); | 1780 | i915_gem_evict_everything(dev_priv->dev); |
1778 | 1781 | ||
1779 | list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, | 1782 | list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, |
1780 | global_list) | 1783 | global_list) { |
1784 | if (obj->pages_pin_count == 0) | ||
1785 | freed += obj->base.size >> PAGE_SHIFT; | ||
1781 | i915_gem_object_put_pages(obj); | 1786 | i915_gem_object_put_pages(obj); |
1787 | } | ||
1788 | return freed; | ||
1782 | } | 1789 | } |
1783 | 1790 | ||
1784 | static int | 1791 | static int |
@@ -4558,7 +4565,8 @@ i915_gem_load(struct drm_device *dev) | |||
4558 | 4565 | ||
4559 | dev_priv->mm.interruptible = true; | 4566 | dev_priv->mm.interruptible = true; |
4560 | 4567 | ||
4561 | dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; | 4568 | dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; |
4569 | dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; | ||
4562 | dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; | 4570 | dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; |
4563 | register_shrinker(&dev_priv->mm.inactive_shrinker); | 4571 | register_shrinker(&dev_priv->mm.inactive_shrinker); |
4564 | } | 4572 | } |
@@ -4781,8 +4789,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |||
4781 | #endif | 4789 | #endif |
4782 | } | 4790 | } |
4783 | 4791 | ||
4784 | static int | 4792 | static unsigned long |
4785 | i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) | 4793 | i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) |
4786 | { | 4794 | { |
4787 | struct drm_i915_private *dev_priv = | 4795 | struct drm_i915_private *dev_priv = |
4788 | container_of(shrinker, | 4796 | container_of(shrinker, |
@@ -4790,45 +4798,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) | |||
4790 | mm.inactive_shrinker); | 4798 | mm.inactive_shrinker); |
4791 | struct drm_device *dev = dev_priv->dev; | 4799 | struct drm_device *dev = dev_priv->dev; |
4792 | struct drm_i915_gem_object *obj; | 4800 | struct drm_i915_gem_object *obj; |
4793 | int nr_to_scan = sc->nr_to_scan; | ||
4794 | bool unlock = true; | 4801 | bool unlock = true; |
4795 | int cnt; | 4802 | unsigned long count; |
4796 | 4803 | ||
4797 | if (!mutex_trylock(&dev->struct_mutex)) { | 4804 | if (!mutex_trylock(&dev->struct_mutex)) { |
4798 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) | 4805 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) |
4799 | return 0; | 4806 | return SHRINK_STOP; |
4800 | 4807 | ||
4801 | if (dev_priv->mm.shrinker_no_lock_stealing) | 4808 | if (dev_priv->mm.shrinker_no_lock_stealing) |
4802 | return 0; | 4809 | return SHRINK_STOP; |
4803 | 4810 | ||
4804 | unlock = false; | 4811 | unlock = false; |
4805 | } | 4812 | } |
4806 | 4813 | ||
4807 | if (nr_to_scan) { | 4814 | count = 0; |
4808 | nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); | ||
4809 | if (nr_to_scan > 0) | ||
4810 | nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, | ||
4811 | false); | ||
4812 | if (nr_to_scan > 0) | ||
4813 | i915_gem_shrink_all(dev_priv); | ||
4814 | } | ||
4815 | |||
4816 | cnt = 0; | ||
4817 | list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) | 4815 | list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) |
4818 | if (obj->pages_pin_count == 0) | 4816 | if (obj->pages_pin_count == 0) |
4819 | cnt += obj->base.size >> PAGE_SHIFT; | 4817 | count += obj->base.size >> PAGE_SHIFT; |
4820 | 4818 | ||
4821 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { | 4819 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { |
4822 | if (obj->active) | 4820 | if (obj->active) |
4823 | continue; | 4821 | continue; |
4824 | 4822 | ||
4825 | if (obj->pin_count == 0 && obj->pages_pin_count == 0) | 4823 | if (obj->pin_count == 0 && obj->pages_pin_count == 0) |
4826 | cnt += obj->base.size >> PAGE_SHIFT; | 4824 | count += obj->base.size >> PAGE_SHIFT; |
4827 | } | 4825 | } |
4828 | 4826 | ||
4829 | if (unlock) | 4827 | if (unlock) |
4830 | mutex_unlock(&dev->struct_mutex); | 4828 | mutex_unlock(&dev->struct_mutex); |
4831 | return cnt; | 4829 | return count; |
4832 | } | 4830 | } |
4833 | 4831 | ||
4834 | /* All the new VM stuff */ | 4832 | /* All the new VM stuff */ |
@@ -4892,6 +4890,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, | |||
4892 | return 0; | 4890 | return 0; |
4893 | } | 4891 | } |
4894 | 4892 | ||
4893 | static unsigned long | ||
4894 | i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) | ||
4895 | { | ||
4896 | struct drm_i915_private *dev_priv = | ||
4897 | container_of(shrinker, | ||
4898 | struct drm_i915_private, | ||
4899 | mm.inactive_shrinker); | ||
4900 | struct drm_device *dev = dev_priv->dev; | ||
4901 | int nr_to_scan = sc->nr_to_scan; | ||
4902 | unsigned long freed; | ||
4903 | bool unlock = true; | ||
4904 | |||
4905 | if (!mutex_trylock(&dev->struct_mutex)) { | ||
4906 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) | ||
4907 | return 0; | ||
4908 | |||
4909 | if (dev_priv->mm.shrinker_no_lock_stealing) | ||
4910 | return 0; | ||
4911 | |||
4912 | unlock = false; | ||
4913 | } | ||
4914 | |||
4915 | freed = i915_gem_purge(dev_priv, nr_to_scan); | ||
4916 | if (freed < nr_to_scan) | ||
4917 | freed += __i915_gem_shrink(dev_priv, nr_to_scan, | ||
4918 | false); | ||
4919 | if (freed < nr_to_scan) | ||
4920 | freed += i915_gem_shrink_all(dev_priv); | ||
4921 | |||
4922 | if (unlock) | ||
4923 | mutex_unlock(&dev->struct_mutex); | ||
4924 | return freed; | ||
4925 | } | ||
4926 | |||
4895 | struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, | 4927 | struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, |
4896 | struct i915_address_space *vm) | 4928 | struct i915_address_space *vm) |
4897 | { | 4929 | { |
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index bd2a3b40cd12..863bef9f9234 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c | |||
@@ -377,28 +377,26 @@ out: | |||
377 | return nr_free; | 377 | return nr_free; |
378 | } | 378 | } |
379 | 379 | ||
380 | /* Get good estimation how many pages are free in pools */ | ||
381 | static int ttm_pool_get_num_unused_pages(void) | ||
382 | { | ||
383 | unsigned i; | ||
384 | int total = 0; | ||
385 | for (i = 0; i < NUM_POOLS; ++i) | ||
386 | total += _manager->pools[i].npages; | ||
387 | |||
388 | return total; | ||
389 | } | ||
390 | |||
391 | /** | 380 | /** |
392 | * Callback for mm to request pool to reduce number of page held. | 381 | * Callback for mm to request pool to reduce number of page held. |
382 | * | ||
383 | * XXX: (dchinner) Deadlock warning! | ||
384 | * | ||
385 | * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means | ||
386 | * this can deadlock when called a sc->gfp_mask that is not equal to | ||
387 | * GFP_KERNEL. | ||
388 | * | ||
389 | * This code is crying out for a shrinker per pool.... | ||
393 | */ | 390 | */ |
394 | static int ttm_pool_mm_shrink(struct shrinker *shrink, | 391 | static unsigned long |
395 | struct shrink_control *sc) | 392 | ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
396 | { | 393 | { |
397 | static atomic_t start_pool = ATOMIC_INIT(0); | 394 | static atomic_t start_pool = ATOMIC_INIT(0); |
398 | unsigned i; | 395 | unsigned i; |
399 | unsigned pool_offset = atomic_add_return(1, &start_pool); | 396 | unsigned pool_offset = atomic_add_return(1, &start_pool); |
400 | struct ttm_page_pool *pool; | 397 | struct ttm_page_pool *pool; |
401 | int shrink_pages = sc->nr_to_scan; | 398 | int shrink_pages = sc->nr_to_scan; |
399 | unsigned long freed = 0; | ||
402 | 400 | ||
403 | pool_offset = pool_offset % NUM_POOLS; | 401 | pool_offset = pool_offset % NUM_POOLS; |
404 | /* select start pool in round robin fashion */ | 402 | /* select start pool in round robin fashion */ |
@@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink, | |||
408 | break; | 406 | break; |
409 | pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; | 407 | pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; |
410 | shrink_pages = ttm_page_pool_free(pool, nr_free); | 408 | shrink_pages = ttm_page_pool_free(pool, nr_free); |
409 | freed += nr_free - shrink_pages; | ||
411 | } | 410 | } |
412 | /* return estimated number of unused pages in pool */ | 411 | return freed; |
413 | return ttm_pool_get_num_unused_pages(); | 412 | } |
413 | |||
414 | |||
415 | static unsigned long | ||
416 | ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
417 | { | ||
418 | unsigned i; | ||
419 | unsigned long count = 0; | ||
420 | |||
421 | for (i = 0; i < NUM_POOLS; ++i) | ||
422 | count += _manager->pools[i].npages; | ||
423 | |||
424 | return count; | ||
414 | } | 425 | } |
415 | 426 | ||
416 | static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) | 427 | static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) |
417 | { | 428 | { |
418 | manager->mm_shrink.shrink = &ttm_pool_mm_shrink; | 429 | manager->mm_shrink.count_objects = ttm_pool_shrink_count; |
430 | manager->mm_shrink.scan_objects = ttm_pool_shrink_scan; | ||
419 | manager->mm_shrink.seeks = 1; | 431 | manager->mm_shrink.seeks = 1; |
420 | register_shrinker(&manager->mm_shrink); | 432 | register_shrinker(&manager->mm_shrink); |
421 | } | 433 | } |
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index b8b394319b45..7957beeeaf73 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | |||
@@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev) | |||
918 | } | 918 | } |
919 | EXPORT_SYMBOL_GPL(ttm_dma_populate); | 919 | EXPORT_SYMBOL_GPL(ttm_dma_populate); |
920 | 920 | ||
921 | /* Get good estimation how many pages are free in pools */ | ||
922 | static int ttm_dma_pool_get_num_unused_pages(void) | ||
923 | { | ||
924 | struct device_pools *p; | ||
925 | unsigned total = 0; | ||
926 | |||
927 | mutex_lock(&_manager->lock); | ||
928 | list_for_each_entry(p, &_manager->pools, pools) | ||
929 | total += p->pool->npages_free; | ||
930 | mutex_unlock(&_manager->lock); | ||
931 | return total; | ||
932 | } | ||
933 | |||
934 | /* Put all pages in pages list to correct pool to wait for reuse */ | 921 | /* Put all pages in pages list to correct pool to wait for reuse */ |
935 | void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) | 922 | void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) |
936 | { | 923 | { |
@@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); | |||
1002 | 989 | ||
1003 | /** | 990 | /** |
1004 | * Callback for mm to request pool to reduce number of page held. | 991 | * Callback for mm to request pool to reduce number of page held. |
992 | * | ||
993 | * XXX: (dchinner) Deadlock warning! | ||
994 | * | ||
995 | * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention | ||
996 | * needs to be paid to sc->gfp_mask to determine if this can be done or not. | ||
997 | * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really | ||
998 | * bad. | ||
999 | * | ||
1000 | * I'm getting sadder as I hear more pathetical whimpers about needing per-pool | ||
1001 | * shrinkers | ||
1005 | */ | 1002 | */ |
1006 | static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, | 1003 | static unsigned long |
1007 | struct shrink_control *sc) | 1004 | ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
1008 | { | 1005 | { |
1009 | static atomic_t start_pool = ATOMIC_INIT(0); | 1006 | static atomic_t start_pool = ATOMIC_INIT(0); |
1010 | unsigned idx = 0; | 1007 | unsigned idx = 0; |
1011 | unsigned pool_offset = atomic_add_return(1, &start_pool); | 1008 | unsigned pool_offset = atomic_add_return(1, &start_pool); |
1012 | unsigned shrink_pages = sc->nr_to_scan; | 1009 | unsigned shrink_pages = sc->nr_to_scan; |
1013 | struct device_pools *p; | 1010 | struct device_pools *p; |
1011 | unsigned long freed = 0; | ||
1014 | 1012 | ||
1015 | if (list_empty(&_manager->pools)) | 1013 | if (list_empty(&_manager->pools)) |
1016 | return 0; | 1014 | return SHRINK_STOP; |
1017 | 1015 | ||
1018 | mutex_lock(&_manager->lock); | 1016 | mutex_lock(&_manager->lock); |
1019 | pool_offset = pool_offset % _manager->npools; | 1017 | pool_offset = pool_offset % _manager->npools; |
@@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, | |||
1029 | continue; | 1027 | continue; |
1030 | nr_free = shrink_pages; | 1028 | nr_free = shrink_pages; |
1031 | shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); | 1029 | shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); |
1030 | freed += nr_free - shrink_pages; | ||
1031 | |||
1032 | pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", | 1032 | pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", |
1033 | p->pool->dev_name, p->pool->name, current->pid, | 1033 | p->pool->dev_name, p->pool->name, current->pid, |
1034 | nr_free, shrink_pages); | 1034 | nr_free, shrink_pages); |
1035 | } | 1035 | } |
1036 | mutex_unlock(&_manager->lock); | 1036 | mutex_unlock(&_manager->lock); |
1037 | /* return estimated number of unused pages in pool */ | 1037 | return freed; |
1038 | return ttm_dma_pool_get_num_unused_pages(); | 1038 | } |
1039 | |||
1040 | static unsigned long | ||
1041 | ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
1042 | { | ||
1043 | struct device_pools *p; | ||
1044 | unsigned long count = 0; | ||
1045 | |||
1046 | mutex_lock(&_manager->lock); | ||
1047 | list_for_each_entry(p, &_manager->pools, pools) | ||
1048 | count += p->pool->npages_free; | ||
1049 | mutex_unlock(&_manager->lock); | ||
1050 | return count; | ||
1039 | } | 1051 | } |
1040 | 1052 | ||
1041 | static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) | 1053 | static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) |
1042 | { | 1054 | { |
1043 | manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink; | 1055 | manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count; |
1056 | manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan; | ||
1044 | manager->mm_shrink.seeks = 1; | 1057 | manager->mm_shrink.seeks = 1; |
1045 | register_shrinker(&manager->mm_shrink); | 1058 | register_shrinker(&manager->mm_shrink); |
1046 | } | 1059 | } |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ee372884c405..f9764e61978b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) | |||
597 | return 0; | 597 | return 0; |
598 | } | 598 | } |
599 | 599 | ||
600 | static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | 600 | static unsigned long bch_mca_scan(struct shrinker *shrink, |
601 | struct shrink_control *sc) | ||
601 | { | 602 | { |
602 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); | 603 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); |
603 | struct btree *b, *t; | 604 | struct btree *b, *t; |
604 | unsigned long i, nr = sc->nr_to_scan; | 605 | unsigned long i, nr = sc->nr_to_scan; |
606 | unsigned long freed = 0; | ||
605 | 607 | ||
606 | if (c->shrinker_disabled) | 608 | if (c->shrinker_disabled) |
607 | return 0; | 609 | return SHRINK_STOP; |
608 | 610 | ||
609 | if (c->try_harder) | 611 | if (c->try_harder) |
610 | return 0; | 612 | return SHRINK_STOP; |
611 | |||
612 | /* | ||
613 | * If nr == 0, we're supposed to return the number of items we have | ||
614 | * cached. Not allowed to return -1. | ||
615 | */ | ||
616 | if (!nr) | ||
617 | return mca_can_free(c) * c->btree_pages; | ||
618 | 613 | ||
619 | /* Return -1 if we can't do anything right now */ | 614 | /* Return -1 if we can't do anything right now */ |
620 | if (sc->gfp_mask & __GFP_WAIT) | 615 | if (sc->gfp_mask & __GFP_WAIT) |
@@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
634 | 629 | ||
635 | i = 0; | 630 | i = 0; |
636 | list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { | 631 | list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { |
637 | if (!nr) | 632 | if (freed >= nr) |
638 | break; | 633 | break; |
639 | 634 | ||
640 | if (++i > 3 && | 635 | if (++i > 3 && |
641 | !mca_reap(b, NULL, 0)) { | 636 | !mca_reap(b, NULL, 0)) { |
642 | mca_data_free(b); | 637 | mca_data_free(b); |
643 | rw_unlock(true, b); | 638 | rw_unlock(true, b); |
644 | --nr; | 639 | freed++; |
645 | } | 640 | } |
646 | } | 641 | } |
647 | 642 | ||
@@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
652 | if (list_empty(&c->btree_cache)) | 647 | if (list_empty(&c->btree_cache)) |
653 | goto out; | 648 | goto out; |
654 | 649 | ||
655 | for (i = 0; nr && i < c->bucket_cache_used; i++) { | 650 | for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { |
656 | b = list_first_entry(&c->btree_cache, struct btree, list); | 651 | b = list_first_entry(&c->btree_cache, struct btree, list); |
657 | list_rotate_left(&c->btree_cache); | 652 | list_rotate_left(&c->btree_cache); |
658 | 653 | ||
@@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
661 | mca_bucket_free(b); | 656 | mca_bucket_free(b); |
662 | mca_data_free(b); | 657 | mca_data_free(b); |
663 | rw_unlock(true, b); | 658 | rw_unlock(true, b); |
664 | --nr; | 659 | freed++; |
665 | } else | 660 | } else |
666 | b->accessed = 0; | 661 | b->accessed = 0; |
667 | } | 662 | } |
668 | out: | 663 | out: |
669 | nr = mca_can_free(c) * c->btree_pages; | ||
670 | mutex_unlock(&c->bucket_lock); | 664 | mutex_unlock(&c->bucket_lock); |
671 | return nr; | 665 | return freed; |
666 | } | ||
667 | |||
668 | static unsigned long bch_mca_count(struct shrinker *shrink, | ||
669 | struct shrink_control *sc) | ||
670 | { | ||
671 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); | ||
672 | |||
673 | if (c->shrinker_disabled) | ||
674 | return 0; | ||
675 | |||
676 | if (c->try_harder) | ||
677 | return 0; | ||
678 | |||
679 | return mca_can_free(c) * c->btree_pages; | ||
672 | } | 680 | } |
673 | 681 | ||
674 | void bch_btree_cache_free(struct cache_set *c) | 682 | void bch_btree_cache_free(struct cache_set *c) |
@@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c) | |||
737 | c->verify_data = NULL; | 745 | c->verify_data = NULL; |
738 | #endif | 746 | #endif |
739 | 747 | ||
740 | c->shrink.shrink = bch_mca_shrink; | 748 | c->shrink.count_objects = bch_mca_count; |
749 | c->shrink.scan_objects = bch_mca_scan; | ||
741 | c->shrink.seeks = 4; | 750 | c->shrink.seeks = 4; |
742 | c->shrink.batch = c->btree_pages * 2; | 751 | c->shrink.batch = c->btree_pages * 2; |
743 | register_shrinker(&c->shrink); | 752 | register_shrinker(&c->shrink); |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 12a2c2846f99..4fe6ab2fbe2e 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -556,7 +556,7 @@ STORE(__bch_cache_set) | |||
556 | struct shrink_control sc; | 556 | struct shrink_control sc; |
557 | sc.gfp_mask = GFP_KERNEL; | 557 | sc.gfp_mask = GFP_KERNEL; |
558 | sc.nr_to_scan = strtoul_or_return(buf); | 558 | sc.nr_to_scan = strtoul_or_return(buf); |
559 | c->shrink.shrink(&c->shrink, &sc); | 559 | c->shrink.scan_objects(&c->shrink, &sc); |
560 | } | 560 | } |
561 | 561 | ||
562 | sysfs_strtoul(congested_read_threshold_us, | 562 | sysfs_strtoul(congested_read_threshold_us, |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5227e079a6e3..173cbb20d104 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, | |||
1425 | unsigned long max_jiffies) | 1425 | unsigned long max_jiffies) |
1426 | { | 1426 | { |
1427 | if (jiffies - b->last_accessed < max_jiffies) | 1427 | if (jiffies - b->last_accessed < max_jiffies) |
1428 | return 1; | 1428 | return 0; |
1429 | 1429 | ||
1430 | if (!(gfp & __GFP_IO)) { | 1430 | if (!(gfp & __GFP_IO)) { |
1431 | if (test_bit(B_READING, &b->state) || | 1431 | if (test_bit(B_READING, &b->state) || |
1432 | test_bit(B_WRITING, &b->state) || | 1432 | test_bit(B_WRITING, &b->state) || |
1433 | test_bit(B_DIRTY, &b->state)) | 1433 | test_bit(B_DIRTY, &b->state)) |
1434 | return 1; | 1434 | return 0; |
1435 | } | 1435 | } |
1436 | 1436 | ||
1437 | if (b->hold_count) | 1437 | if (b->hold_count) |
1438 | return 1; | 1438 | return 0; |
1439 | 1439 | ||
1440 | __make_buffer_clean(b); | 1440 | __make_buffer_clean(b); |
1441 | __unlink_buffer(b); | 1441 | __unlink_buffer(b); |
1442 | __free_buffer_wake(b); | 1442 | __free_buffer_wake(b); |
1443 | 1443 | ||
1444 | return 0; | 1444 | return 1; |
1445 | } | 1445 | } |
1446 | 1446 | ||
1447 | static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, | 1447 | static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, |
1448 | struct shrink_control *sc) | 1448 | gfp_t gfp_mask) |
1449 | { | 1449 | { |
1450 | int l; | 1450 | int l; |
1451 | struct dm_buffer *b, *tmp; | 1451 | struct dm_buffer *b, *tmp; |
1452 | long freed = 0; | ||
1452 | 1453 | ||
1453 | for (l = 0; l < LIST_SIZE; l++) { | 1454 | for (l = 0; l < LIST_SIZE; l++) { |
1454 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) | 1455 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { |
1455 | if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && | 1456 | freed += __cleanup_old_buffer(b, gfp_mask, 0); |
1456 | !--nr_to_scan) | 1457 | if (!--nr_to_scan) |
1457 | return; | 1458 | break; |
1459 | } | ||
1458 | dm_bufio_cond_resched(); | 1460 | dm_bufio_cond_resched(); |
1459 | } | 1461 | } |
1462 | return freed; | ||
1460 | } | 1463 | } |
1461 | 1464 | ||
1462 | static int shrink(struct shrinker *shrinker, struct shrink_control *sc) | 1465 | static unsigned long |
1466 | dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
1463 | { | 1467 | { |
1464 | struct dm_bufio_client *c = | 1468 | struct dm_bufio_client *c; |
1465 | container_of(shrinker, struct dm_bufio_client, shrinker); | 1469 | unsigned long freed; |
1466 | unsigned long r; | ||
1467 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
1468 | 1470 | ||
1471 | c = container_of(shrink, struct dm_bufio_client, shrinker); | ||
1469 | if (sc->gfp_mask & __GFP_IO) | 1472 | if (sc->gfp_mask & __GFP_IO) |
1470 | dm_bufio_lock(c); | 1473 | dm_bufio_lock(c); |
1471 | else if (!dm_bufio_trylock(c)) | 1474 | else if (!dm_bufio_trylock(c)) |
1472 | return !nr_to_scan ? 0 : -1; | 1475 | return SHRINK_STOP; |
1473 | 1476 | ||
1474 | if (nr_to_scan) | 1477 | freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); |
1475 | __scan(c, nr_to_scan, sc); | 1478 | dm_bufio_unlock(c); |
1479 | return freed; | ||
1480 | } | ||
1476 | 1481 | ||
1477 | r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; | 1482 | static unsigned long |
1478 | if (r > INT_MAX) | 1483 | dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
1479 | r = INT_MAX; | 1484 | { |
1485 | struct dm_bufio_client *c; | ||
1486 | unsigned long count; | ||
1480 | 1487 | ||
1481 | dm_bufio_unlock(c); | 1488 | c = container_of(shrink, struct dm_bufio_client, shrinker); |
1489 | if (sc->gfp_mask & __GFP_IO) | ||
1490 | dm_bufio_lock(c); | ||
1491 | else if (!dm_bufio_trylock(c)) | ||
1492 | return 0; | ||
1482 | 1493 | ||
1483 | return r; | 1494 | count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; |
1495 | dm_bufio_unlock(c); | ||
1496 | return count; | ||
1484 | } | 1497 | } |
1485 | 1498 | ||
1486 | /* | 1499 | /* |
@@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign | |||
1582 | __cache_size_refresh(); | 1595 | __cache_size_refresh(); |
1583 | mutex_unlock(&dm_bufio_clients_lock); | 1596 | mutex_unlock(&dm_bufio_clients_lock); |
1584 | 1597 | ||
1585 | c->shrinker.shrink = shrink; | 1598 | c->shrinker.count_objects = dm_bufio_shrink_count; |
1599 | c->shrinker.scan_objects = dm_bufio_shrink_scan; | ||
1586 | c->shrinker.seeks = 1; | 1600 | c->shrinker.seeks = 1; |
1587 | c->shrinker.batch = 0; | 1601 | c->shrinker.batch = 0; |
1588 | register_shrinker(&c->shrinker); | 1602 | register_shrinker(&c->shrinker); |
@@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void) | |||
1669 | struct dm_buffer *b; | 1683 | struct dm_buffer *b; |
1670 | b = list_entry(c->lru[LIST_CLEAN].prev, | 1684 | b = list_entry(c->lru[LIST_CLEAN].prev, |
1671 | struct dm_buffer, lru_list); | 1685 | struct dm_buffer, lru_list); |
1672 | if (__cleanup_old_buffer(b, 0, max_age * HZ)) | 1686 | if (!__cleanup_old_buffer(b, 0, max_age * HZ)) |
1673 | break; | 1687 | break; |
1674 | dm_bufio_cond_resched(); | 1688 | dm_bufio_cond_resched(); |
1675 | } | 1689 | } |
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 21a3f7250531..8e76ddca0999 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c | |||
@@ -341,27 +341,26 @@ out: | |||
341 | /* | 341 | /* |
342 | * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab | 342 | * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab |
343 | * | 343 | * |
344 | * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how | 344 | * 'nr_to_scan' is the number of objects to scan for freeing. |
345 | * many objects (pages) we have in total. | ||
346 | * | 345 | * |
347 | * 'gfp_mask' is the mask of the allocation that got us into this mess. | 346 | * 'gfp_mask' is the mask of the allocation that got us into this mess. |
348 | * | 347 | * |
349 | * Return value is the number of objects (pages) remaining, or -1 if we cannot | 348 | * Return value is the number of objects freed or -1 if we cannot |
350 | * proceed without risk of deadlock (due to gfp_mask). | 349 | * proceed without risk of deadlock (due to gfp_mask). |
351 | * | 350 | * |
352 | * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial | 351 | * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial |
353 | * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' | 352 | * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' |
354 | * pages freed. | 353 | * pages freed. |
355 | */ | 354 | */ |
356 | static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) | 355 | static unsigned long |
356 | ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
357 | { | 357 | { |
358 | struct ashmem_range *range, *next; | 358 | struct ashmem_range *range, *next; |
359 | unsigned long freed = 0; | ||
359 | 360 | ||
360 | /* We might recurse into filesystem code, so bail out if necessary */ | 361 | /* We might recurse into filesystem code, so bail out if necessary */ |
361 | if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) | 362 | if (!(sc->gfp_mask & __GFP_FS)) |
362 | return -1; | 363 | return SHRINK_STOP; |
363 | if (!sc->nr_to_scan) | ||
364 | return lru_count; | ||
365 | 364 | ||
366 | mutex_lock(&ashmem_mutex); | 365 | mutex_lock(&ashmem_mutex); |
367 | list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { | 366 | list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { |
@@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
374 | range->purged = ASHMEM_WAS_PURGED; | 373 | range->purged = ASHMEM_WAS_PURGED; |
375 | lru_del(range); | 374 | lru_del(range); |
376 | 375 | ||
377 | sc->nr_to_scan -= range_size(range); | 376 | freed += range_size(range); |
378 | if (sc->nr_to_scan <= 0) | 377 | if (--sc->nr_to_scan <= 0) |
379 | break; | 378 | break; |
380 | } | 379 | } |
381 | mutex_unlock(&ashmem_mutex); | 380 | mutex_unlock(&ashmem_mutex); |
381 | return freed; | ||
382 | } | ||
382 | 383 | ||
384 | static unsigned long | ||
385 | ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
386 | { | ||
387 | /* | ||
388 | * note that lru_count is count of pages on the lru, not a count of | ||
389 | * objects on the list. This means the scan function needs to return the | ||
390 | * number of pages freed, not the number of objects scanned. | ||
391 | */ | ||
383 | return lru_count; | 392 | return lru_count; |
384 | } | 393 | } |
385 | 394 | ||
386 | static struct shrinker ashmem_shrinker = { | 395 | static struct shrinker ashmem_shrinker = { |
387 | .shrink = ashmem_shrink, | 396 | .count_objects = ashmem_shrink_count, |
397 | .scan_objects = ashmem_shrink_scan, | ||
398 | /* | ||
399 | * XXX (dchinner): I wish people would comment on why they need on | ||
400 | * significant changes to the default value here | ||
401 | */ | ||
388 | .seeks = DEFAULT_SEEKS * 4, | 402 | .seeks = DEFAULT_SEEKS * 4, |
389 | }; | 403 | }; |
390 | 404 | ||
@@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
690 | if (capable(CAP_SYS_ADMIN)) { | 704 | if (capable(CAP_SYS_ADMIN)) { |
691 | struct shrink_control sc = { | 705 | struct shrink_control sc = { |
692 | .gfp_mask = GFP_KERNEL, | 706 | .gfp_mask = GFP_KERNEL, |
693 | .nr_to_scan = 0, | 707 | .nr_to_scan = LONG_MAX, |
694 | }; | 708 | }; |
695 | ret = ashmem_shrink(&ashmem_shrinker, &sc); | 709 | |
696 | sc.nr_to_scan = ret; | 710 | nodes_setall(sc.nodes_to_scan); |
697 | ashmem_shrink(&ashmem_shrinker, &sc); | 711 | ashmem_shrink_scan(&ashmem_shrinker, &sc); |
698 | } | 712 | } |
699 | break; | 713 | break; |
700 | } | 714 | } |
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index fe74494868ef..6f094b37f1f1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c | |||
@@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout; | |||
66 | pr_info(x); \ | 66 | pr_info(x); \ |
67 | } while (0) | 67 | } while (0) |
68 | 68 | ||
69 | static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | 69 | static unsigned long lowmem_count(struct shrinker *s, |
70 | struct shrink_control *sc) | ||
71 | { | ||
72 | return global_page_state(NR_ACTIVE_ANON) + | ||
73 | global_page_state(NR_ACTIVE_FILE) + | ||
74 | global_page_state(NR_INACTIVE_ANON) + | ||
75 | global_page_state(NR_INACTIVE_FILE); | ||
76 | } | ||
77 | |||
78 | static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) | ||
70 | { | 79 | { |
71 | struct task_struct *tsk; | 80 | struct task_struct *tsk; |
72 | struct task_struct *selected = NULL; | 81 | struct task_struct *selected = NULL; |
73 | int rem = 0; | 82 | unsigned long rem = 0; |
74 | int tasksize; | 83 | int tasksize; |
75 | int i; | 84 | int i; |
76 | short min_score_adj = OOM_SCORE_ADJ_MAX + 1; | 85 | short min_score_adj = OOM_SCORE_ADJ_MAX + 1; |
@@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
92 | break; | 101 | break; |
93 | } | 102 | } |
94 | } | 103 | } |
95 | if (sc->nr_to_scan > 0) | 104 | |
96 | lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", | 105 | lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", |
97 | sc->nr_to_scan, sc->gfp_mask, other_free, | 106 | sc->nr_to_scan, sc->gfp_mask, other_free, |
98 | other_file, min_score_adj); | 107 | other_file, min_score_adj); |
99 | rem = global_page_state(NR_ACTIVE_ANON) + | 108 | |
100 | global_page_state(NR_ACTIVE_FILE) + | 109 | if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { |
101 | global_page_state(NR_INACTIVE_ANON) + | 110 | lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", |
102 | global_page_state(NR_INACTIVE_FILE); | 111 | sc->nr_to_scan, sc->gfp_mask); |
103 | if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { | 112 | return 0; |
104 | lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", | ||
105 | sc->nr_to_scan, sc->gfp_mask, rem); | ||
106 | return rem; | ||
107 | } | 113 | } |
114 | |||
108 | selected_oom_score_adj = min_score_adj; | 115 | selected_oom_score_adj = min_score_adj; |
109 | 116 | ||
110 | rcu_read_lock(); | 117 | rcu_read_lock(); |
@@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
154 | lowmem_deathpending_timeout = jiffies + HZ; | 161 | lowmem_deathpending_timeout = jiffies + HZ; |
155 | send_sig(SIGKILL, selected, 0); | 162 | send_sig(SIGKILL, selected, 0); |
156 | set_tsk_thread_flag(selected, TIF_MEMDIE); | 163 | set_tsk_thread_flag(selected, TIF_MEMDIE); |
157 | rem -= selected_tasksize; | 164 | rem += selected_tasksize; |
158 | } | 165 | } |
159 | lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", | 166 | |
167 | lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", | ||
160 | sc->nr_to_scan, sc->gfp_mask, rem); | 168 | sc->nr_to_scan, sc->gfp_mask, rem); |
161 | rcu_read_unlock(); | 169 | rcu_read_unlock(); |
162 | return rem; | 170 | return rem; |
163 | } | 171 | } |
164 | 172 | ||
165 | static struct shrinker lowmem_shrinker = { | 173 | static struct shrinker lowmem_shrinker = { |
166 | .shrink = lowmem_shrink, | 174 | .scan_objects = lowmem_scan, |
175 | .count_objects = lowmem_count, | ||
167 | .seeks = DEFAULT_SEEKS * 16 | 176 | .seeks = DEFAULT_SEEKS * 16 |
168 | }; | 177 | }; |
169 | 178 | ||
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h index 63efb7b456c6..2af15d41e77a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h | |||
@@ -79,42 +79,4 @@ | |||
79 | do { __oldfs = get_fs(); set_fs(get_ds());} while(0) | 79 | do { __oldfs = get_fs(); set_fs(get_ds());} while(0) |
80 | #define MMSPACE_CLOSE set_fs(__oldfs) | 80 | #define MMSPACE_CLOSE set_fs(__oldfs) |
81 | 81 | ||
82 | /* | ||
83 | * Shrinker | ||
84 | */ | ||
85 | |||
86 | # define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ | ||
87 | struct shrinker *shrinker, \ | ||
88 | struct shrink_control *sc | ||
89 | # define shrink_param(sc, var) ((sc)->var) | ||
90 | |||
91 | typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); | ||
92 | |||
93 | static inline | ||
94 | struct shrinker *set_shrinker(int seek, shrinker_t func) | ||
95 | { | ||
96 | struct shrinker *s; | ||
97 | |||
98 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
99 | if (s == NULL) | ||
100 | return (NULL); | ||
101 | |||
102 | s->shrink = func; | ||
103 | s->seeks = seek; | ||
104 | |||
105 | register_shrinker(s); | ||
106 | |||
107 | return s; | ||
108 | } | ||
109 | |||
110 | static inline | ||
111 | void remove_shrinker(struct shrinker *shrinker) | ||
112 | { | ||
113 | if (shrinker == NULL) | ||
114 | return; | ||
115 | |||
116 | unregister_shrinker(shrinker); | ||
117 | kfree(shrinker); | ||
118 | } | ||
119 | |||
120 | #endif /* __LINUX_CFS_MEM_H__ */ | 82 | #endif /* __LINUX_CFS_MEM_H__ */ |
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 454027d68d54..0025ee6356da 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c | |||
@@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, | |||
521 | int nr, unsigned int gfp_mask) | 521 | int nr, unsigned int gfp_mask) |
522 | { | 522 | { |
523 | struct ldlm_namespace *ns; | 523 | struct ldlm_namespace *ns; |
524 | int canceled = 0, unused; | 524 | int unused; |
525 | 525 | ||
526 | ns = ldlm_pl2ns(pl); | 526 | ns = ldlm_pl2ns(pl); |
527 | 527 | ||
@@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, | |||
540 | unused = ns->ns_nr_unused; | 540 | unused = ns->ns_nr_unused; |
541 | spin_unlock(&ns->ns_lock); | 541 | spin_unlock(&ns->ns_lock); |
542 | 542 | ||
543 | if (nr) { | 543 | if (nr == 0) |
544 | canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, | 544 | return (unused / 100) * sysctl_vfs_cache_pressure; |
545 | LDLM_CANCEL_SHRINK); | 545 | else |
546 | } | 546 | return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); |
547 | /* | ||
548 | * Return the number of potentially reclaimable locks. | ||
549 | */ | ||
550 | return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure; | ||
551 | } | 547 | } |
552 | 548 | ||
553 | struct ldlm_pool_ops ldlm_srv_pool_ops = { | 549 | struct ldlm_pool_ops ldlm_srv_pool_ops = { |
@@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) | |||
601 | return recalc_interval_sec; | 597 | return recalc_interval_sec; |
602 | } | 598 | } |
603 | 599 | ||
604 | /** | 600 | /* |
605 | * Pool shrink wrapper. Will call either client or server pool recalc callback | 601 | * Pool shrink wrapper. Will call either client or server pool recalc callback |
606 | * depending what pool \a pl is used. | 602 | * depending what pool pl is used. When nr == 0, just return the number of |
603 | * freeable locks. Otherwise, return the number of canceled locks. | ||
607 | */ | 604 | */ |
608 | int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, | 605 | int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, |
609 | unsigned int gfp_mask) | 606 | unsigned int gfp_mask) |
@@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) | |||
1017 | } | 1014 | } |
1018 | 1015 | ||
1019 | static struct ptlrpc_thread *ldlm_pools_thread; | 1016 | static struct ptlrpc_thread *ldlm_pools_thread; |
1020 | static struct shrinker *ldlm_pools_srv_shrinker; | ||
1021 | static struct shrinker *ldlm_pools_cli_shrinker; | ||
1022 | static struct completion ldlm_pools_comp; | 1017 | static struct completion ldlm_pools_comp; |
1023 | 1018 | ||
1024 | /* | 1019 | /* |
1025 | * Cancel \a nr locks from all namespaces (if possible). Returns number of | 1020 | * count locks from all namespaces (if possible). Returns number of |
1026 | * cached locks after shrink is finished. All namespaces are asked to | 1021 | * cached locks. |
1027 | * cancel approximately equal amount of locks to keep balancing. | ||
1028 | */ | 1022 | */ |
1029 | static int ldlm_pools_shrink(ldlm_side_t client, int nr, | 1023 | static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask) |
1030 | unsigned int gfp_mask) | ||
1031 | { | 1024 | { |
1032 | int total = 0, cached = 0, nr_ns; | 1025 | int total = 0, nr_ns; |
1033 | struct ldlm_namespace *ns; | 1026 | struct ldlm_namespace *ns; |
1034 | struct ldlm_namespace *ns_old = NULL; /* loop detection */ | 1027 | struct ldlm_namespace *ns_old = NULL; /* loop detection */ |
1035 | void *cookie; | 1028 | void *cookie; |
1036 | 1029 | ||
1037 | if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && | 1030 | if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) |
1038 | !(gfp_mask & __GFP_FS)) | 1031 | return 0; |
1039 | return -1; | ||
1040 | 1032 | ||
1041 | CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", | 1033 | CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", |
1042 | nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); | 1034 | client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); |
1043 | 1035 | ||
1044 | cookie = cl_env_reenter(); | 1036 | cookie = cl_env_reenter(); |
1045 | 1037 | ||
@@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
1047 | * Find out how many resources we may release. | 1039 | * Find out how many resources we may release. |
1048 | */ | 1040 | */ |
1049 | for (nr_ns = ldlm_namespace_nr_read(client); | 1041 | for (nr_ns = ldlm_namespace_nr_read(client); |
1050 | nr_ns > 0; nr_ns--) | 1042 | nr_ns > 0; nr_ns--) { |
1051 | { | ||
1052 | mutex_lock(ldlm_namespace_lock(client)); | 1043 | mutex_lock(ldlm_namespace_lock(client)); |
1053 | if (list_empty(ldlm_namespace_list(client))) { | 1044 | if (list_empty(ldlm_namespace_list(client))) { |
1054 | mutex_unlock(ldlm_namespace_lock(client)); | 1045 | mutex_unlock(ldlm_namespace_lock(client)); |
@@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
1078 | ldlm_namespace_put(ns); | 1069 | ldlm_namespace_put(ns); |
1079 | } | 1070 | } |
1080 | 1071 | ||
1081 | if (nr == 0 || total == 0) { | 1072 | cl_env_reexit(cookie); |
1082 | cl_env_reexit(cookie); | 1073 | return total; |
1083 | return total; | 1074 | } |
1084 | } | 1075 | |
1076 | static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask) | ||
1077 | { | ||
1078 | unsigned long freed = 0; | ||
1079 | int tmp, nr_ns; | ||
1080 | struct ldlm_namespace *ns; | ||
1081 | void *cookie; | ||
1082 | |||
1083 | if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) | ||
1084 | return -1; | ||
1085 | |||
1086 | cookie = cl_env_reenter(); | ||
1085 | 1087 | ||
1086 | /* | 1088 | /* |
1087 | * Shrink at least ldlm_namespace_nr(client) namespaces. | 1089 | * Shrink at least ldlm_namespace_nr_read(client) namespaces. |
1088 | */ | 1090 | */ |
1089 | for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns; | 1091 | for (tmp = nr_ns = ldlm_namespace_nr_read(client); |
1090 | nr_ns > 0; nr_ns--) | 1092 | tmp > 0; tmp--) { |
1091 | { | ||
1092 | int cancel, nr_locks; | 1093 | int cancel, nr_locks; |
1093 | 1094 | ||
1094 | /* | 1095 | /* |
@@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
1097 | mutex_lock(ldlm_namespace_lock(client)); | 1098 | mutex_lock(ldlm_namespace_lock(client)); |
1098 | if (list_empty(ldlm_namespace_list(client))) { | 1099 | if (list_empty(ldlm_namespace_list(client))) { |
1099 | mutex_unlock(ldlm_namespace_lock(client)); | 1100 | mutex_unlock(ldlm_namespace_lock(client)); |
1100 | /* | ||
1101 | * If list is empty, we can't return any @cached > 0, | ||
1102 | * that probably would cause needless shrinker | ||
1103 | * call. | ||
1104 | */ | ||
1105 | cached = 0; | ||
1106 | break; | 1101 | break; |
1107 | } | 1102 | } |
1108 | ns = ldlm_namespace_first_locked(client); | 1103 | ns = ldlm_namespace_first_locked(client); |
@@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
1111 | mutex_unlock(ldlm_namespace_lock(client)); | 1106 | mutex_unlock(ldlm_namespace_lock(client)); |
1112 | 1107 | ||
1113 | nr_locks = ldlm_pool_granted(&ns->ns_pool); | 1108 | nr_locks = ldlm_pool_granted(&ns->ns_pool); |
1114 | cancel = 1 + nr_locks * nr / total; | 1109 | /* |
1115 | ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); | 1110 | * We use to shrink propotionally but with new shrinker API, |
1116 | cached += ldlm_pool_granted(&ns->ns_pool); | 1111 | * we lost the total number of freeable locks. |
1112 | */ | ||
1113 | cancel = 1 + min_t(int, nr_locks, nr / nr_ns); | ||
1114 | freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); | ||
1117 | ldlm_namespace_put(ns); | 1115 | ldlm_namespace_put(ns); |
1118 | } | 1116 | } |
1119 | cl_env_reexit(cookie); | 1117 | cl_env_reexit(cookie); |
1120 | /* we only decrease the SLV in server pools shrinker, return -1 to | 1118 | /* |
1121 | * kernel to avoid needless loop. LU-1128 */ | 1119 | * we only decrease the SLV in server pools shrinker, return |
1122 | return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; | 1120 | * SHRINK_STOP to kernel to avoid needless loop. LU-1128 |
1121 | */ | ||
1122 | return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed; | ||
1123 | } | ||
1124 | |||
1125 | static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc) | ||
1126 | { | ||
1127 | return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask); | ||
1123 | } | 1128 | } |
1124 | 1129 | ||
1125 | static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1130 | static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc) |
1126 | { | 1131 | { |
1127 | return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, | 1132 | return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan, |
1128 | shrink_param(sc, nr_to_scan), | 1133 | sc->gfp_mask); |
1129 | shrink_param(sc, gfp_mask)); | ||
1130 | } | 1134 | } |
1131 | 1135 | ||
1132 | static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1136 | static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc) |
1133 | { | 1137 | { |
1134 | return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, | 1138 | return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); |
1135 | shrink_param(sc, nr_to_scan), | 1139 | } |
1136 | shrink_param(sc, gfp_mask)); | 1140 | |
1141 | static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc) | ||
1142 | { | ||
1143 | return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, | ||
1144 | sc->gfp_mask); | ||
1137 | } | 1145 | } |
1138 | 1146 | ||
1139 | int ldlm_pools_recalc(ldlm_side_t client) | 1147 | int ldlm_pools_recalc(ldlm_side_t client) |
@@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client) | |||
1216 | } | 1224 | } |
1217 | 1225 | ||
1218 | /* | 1226 | /* |
1219 | * Recalc at least ldlm_namespace_nr(client) namespaces. | 1227 | * Recalc at least ldlm_namespace_nr_read(client) namespaces. |
1220 | */ | 1228 | */ |
1221 | for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { | 1229 | for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { |
1222 | int skip; | 1230 | int skip; |
@@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void) | |||
1383 | ldlm_pools_thread = NULL; | 1391 | ldlm_pools_thread = NULL; |
1384 | } | 1392 | } |
1385 | 1393 | ||
1394 | static struct shrinker ldlm_pools_srv_shrinker = { | ||
1395 | .count_objects = ldlm_pools_srv_count, | ||
1396 | .scan_objects = ldlm_pools_srv_scan, | ||
1397 | .seeks = DEFAULT_SEEKS, | ||
1398 | }; | ||
1399 | |||
1400 | static struct shrinker ldlm_pools_cli_shrinker = { | ||
1401 | .count_objects = ldlm_pools_cli_count, | ||
1402 | .scan_objects = ldlm_pools_cli_scan, | ||
1403 | .seeks = DEFAULT_SEEKS, | ||
1404 | }; | ||
1405 | |||
1386 | int ldlm_pools_init(void) | 1406 | int ldlm_pools_init(void) |
1387 | { | 1407 | { |
1388 | int rc; | 1408 | int rc; |
1389 | 1409 | ||
1390 | rc = ldlm_pools_thread_start(); | 1410 | rc = ldlm_pools_thread_start(); |
1391 | if (rc == 0) { | 1411 | if (rc == 0) { |
1392 | ldlm_pools_srv_shrinker = | 1412 | register_shrinker(&ldlm_pools_srv_shrinker); |
1393 | set_shrinker(DEFAULT_SEEKS, | 1413 | register_shrinker(&ldlm_pools_cli_shrinker); |
1394 | ldlm_pools_srv_shrink); | ||
1395 | ldlm_pools_cli_shrinker = | ||
1396 | set_shrinker(DEFAULT_SEEKS, | ||
1397 | ldlm_pools_cli_shrink); | ||
1398 | } | 1414 | } |
1399 | return rc; | 1415 | return rc; |
1400 | } | 1416 | } |
@@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init); | |||
1402 | 1418 | ||
1403 | void ldlm_pools_fini(void) | 1419 | void ldlm_pools_fini(void) |
1404 | { | 1420 | { |
1405 | if (ldlm_pools_srv_shrinker != NULL) { | 1421 | unregister_shrinker(&ldlm_pools_srv_shrinker); |
1406 | remove_shrinker(ldlm_pools_srv_shrinker); | 1422 | unregister_shrinker(&ldlm_pools_cli_shrinker); |
1407 | ldlm_pools_srv_shrinker = NULL; | ||
1408 | } | ||
1409 | if (ldlm_pools_cli_shrinker != NULL) { | ||
1410 | remove_shrinker(ldlm_pools_cli_shrinker); | ||
1411 | ldlm_pools_cli_shrinker = NULL; | ||
1412 | } | ||
1413 | ldlm_pools_thread_stop(); | 1423 | ldlm_pools_thread_stop(); |
1414 | } | 1424 | } |
1415 | EXPORT_SYMBOL(ldlm_pools_fini); | 1425 | EXPORT_SYMBOL(ldlm_pools_fini); |
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index c29ac1c2defd..3a3d5bc5a628 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c | |||
@@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, | |||
1779 | } | 1779 | } |
1780 | EXPORT_SYMBOL(lu_env_refill_by_tags); | 1780 | EXPORT_SYMBOL(lu_env_refill_by_tags); |
1781 | 1781 | ||
1782 | static struct shrinker *lu_site_shrinker = NULL; | ||
1783 | 1782 | ||
1784 | typedef struct lu_site_stats{ | 1783 | typedef struct lu_site_stats{ |
1785 | unsigned lss_populated; | 1784 | unsigned lss_populated; |
@@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs, | |||
1835 | * objects without taking the lu_sites_guard lock, but this is not | 1834 | * objects without taking the lu_sites_guard lock, but this is not |
1836 | * possible in the current implementation. | 1835 | * possible in the current implementation. |
1837 | */ | 1836 | */ |
1838 | static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1837 | static unsigned long lu_cache_shrink_count(struct shrinker *sk, |
1838 | struct shrink_control *sc) | ||
1839 | { | 1839 | { |
1840 | lu_site_stats_t stats; | 1840 | lu_site_stats_t stats; |
1841 | struct lu_site *s; | 1841 | struct lu_site *s; |
1842 | struct lu_site *tmp; | 1842 | struct lu_site *tmp; |
1843 | int cached = 0; | 1843 | unsigned long cached = 0; |
1844 | int remain = shrink_param(sc, nr_to_scan); | ||
1845 | LIST_HEAD(splice); | ||
1846 | |||
1847 | if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) { | ||
1848 | if (remain != 0) | ||
1849 | return -1; | ||
1850 | else | ||
1851 | /* We must not take the lu_sites_guard lock when | ||
1852 | * __GFP_FS is *not* set because of the deadlock | ||
1853 | * possibility detailed above. Additionally, | ||
1854 | * since we cannot determine the number of | ||
1855 | * objects in the cache without taking this | ||
1856 | * lock, we're in a particularly tough spot. As | ||
1857 | * a result, we'll just lie and say our cache is | ||
1858 | * empty. This _should_ be ok, as we can't | ||
1859 | * reclaim objects when __GFP_FS is *not* set | ||
1860 | * anyways. | ||
1861 | */ | ||
1862 | return 0; | ||
1863 | } | ||
1864 | 1844 | ||
1865 | CDEBUG(D_INODE, "Shrink %d objects\n", remain); | 1845 | if (!(sc->gfp_mask & __GFP_FS)) |
1846 | return 0; | ||
1866 | 1847 | ||
1867 | mutex_lock(&lu_sites_guard); | 1848 | mutex_lock(&lu_sites_guard); |
1868 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { | 1849 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { |
1869 | if (shrink_param(sc, nr_to_scan) != 0) { | ||
1870 | remain = lu_site_purge(&lu_shrink_env, s, remain); | ||
1871 | /* | ||
1872 | * Move just shrunk site to the tail of site list to | ||
1873 | * assure shrinking fairness. | ||
1874 | */ | ||
1875 | list_move_tail(&s->ls_linkage, &splice); | ||
1876 | } | ||
1877 | |||
1878 | memset(&stats, 0, sizeof(stats)); | 1850 | memset(&stats, 0, sizeof(stats)); |
1879 | lu_site_stats_get(s->ls_obj_hash, &stats, 0); | 1851 | lu_site_stats_get(s->ls_obj_hash, &stats, 0); |
1880 | cached += stats.lss_total - stats.lss_busy; | 1852 | cached += stats.lss_total - stats.lss_busy; |
1881 | if (shrink_param(sc, nr_to_scan) && remain <= 0) | ||
1882 | break; | ||
1883 | } | 1853 | } |
1884 | list_splice(&splice, lu_sites.prev); | ||
1885 | mutex_unlock(&lu_sites_guard); | 1854 | mutex_unlock(&lu_sites_guard); |
1886 | 1855 | ||
1887 | cached = (cached / 100) * sysctl_vfs_cache_pressure; | 1856 | cached = (cached / 100) * sysctl_vfs_cache_pressure; |
1888 | if (shrink_param(sc, nr_to_scan) == 0) | 1857 | CDEBUG(D_INODE, "%ld objects cached\n", cached); |
1889 | CDEBUG(D_INODE, "%d objects cached\n", cached); | ||
1890 | return cached; | 1858 | return cached; |
1891 | } | 1859 | } |
1892 | 1860 | ||
1861 | static unsigned long lu_cache_shrink_scan(struct shrinker *sk, | ||
1862 | struct shrink_control *sc) | ||
1863 | { | ||
1864 | struct lu_site *s; | ||
1865 | struct lu_site *tmp; | ||
1866 | unsigned long remain = sc->nr_to_scan, freed = 0; | ||
1867 | LIST_HEAD(splice); | ||
1868 | |||
1869 | if (!(sc->gfp_mask & __GFP_FS)) | ||
1870 | /* We must not take the lu_sites_guard lock when | ||
1871 | * __GFP_FS is *not* set because of the deadlock | ||
1872 | * possibility detailed above. Additionally, | ||
1873 | * since we cannot determine the number of | ||
1874 | * objects in the cache without taking this | ||
1875 | * lock, we're in a particularly tough spot. As | ||
1876 | * a result, we'll just lie and say our cache is | ||
1877 | * empty. This _should_ be ok, as we can't | ||
1878 | * reclaim objects when __GFP_FS is *not* set | ||
1879 | * anyways. | ||
1880 | */ | ||
1881 | return SHRINK_STOP; | ||
1882 | |||
1883 | mutex_lock(&lu_sites_guard); | ||
1884 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { | ||
1885 | freed = lu_site_purge(&lu_shrink_env, s, remain); | ||
1886 | remain -= freed; | ||
1887 | /* | ||
1888 | * Move just shrunk site to the tail of site list to | ||
1889 | * assure shrinking fairness. | ||
1890 | */ | ||
1891 | list_move_tail(&s->ls_linkage, &splice); | ||
1892 | } | ||
1893 | list_splice(&splice, lu_sites.prev); | ||
1894 | mutex_unlock(&lu_sites_guard); | ||
1895 | |||
1896 | return sc->nr_to_scan - remain; | ||
1897 | } | ||
1898 | |||
1893 | /* | 1899 | /* |
1894 | * Debugging stuff. | 1900 | * Debugging stuff. |
1895 | */ | 1901 | */ |
@@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env, | |||
1913 | return 0; | 1919 | return 0; |
1914 | } | 1920 | } |
1915 | 1921 | ||
1922 | static struct shrinker lu_site_shrinker = { | ||
1923 | .count_objects = lu_cache_shrink_count, | ||
1924 | .scan_objects = lu_cache_shrink_scan, | ||
1925 | .seeks = DEFAULT_SEEKS, | ||
1926 | }; | ||
1927 | |||
1916 | /** | 1928 | /** |
1917 | * Initialization of global lu_* data. | 1929 | * Initialization of global lu_* data. |
1918 | */ | 1930 | */ |
@@ -1947,9 +1959,7 @@ int lu_global_init(void) | |||
1947 | * inode, one for ea. Unfortunately setting this high value results in | 1959 | * inode, one for ea. Unfortunately setting this high value results in |
1948 | * lu_object/inode cache consuming all the memory. | 1960 | * lu_object/inode cache consuming all the memory. |
1949 | */ | 1961 | */ |
1950 | lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); | 1962 | register_shrinker(&lu_site_shrinker); |
1951 | if (lu_site_shrinker == NULL) | ||
1952 | return -ENOMEM; | ||
1953 | 1963 | ||
1954 | return result; | 1964 | return result; |
1955 | } | 1965 | } |
@@ -1959,11 +1969,7 @@ int lu_global_init(void) | |||
1959 | */ | 1969 | */ |
1960 | void lu_global_fini(void) | 1970 | void lu_global_fini(void) |
1961 | { | 1971 | { |
1962 | if (lu_site_shrinker != NULL) { | 1972 | unregister_shrinker(&lu_site_shrinker); |
1963 | remove_shrinker(lu_site_shrinker); | ||
1964 | lu_site_shrinker = NULL; | ||
1965 | } | ||
1966 | |||
1967 | lu_context_key_degister(&lu_global_key); | 1973 | lu_context_key_degister(&lu_global_key); |
1968 | 1974 | ||
1969 | /* | 1975 | /* |
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index 9013745ab105..e90c8fb7da6a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c | |||
@@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool { | |||
121 | } page_pools; | 121 | } page_pools; |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * memory shrinker | ||
125 | */ | ||
126 | const int pools_shrinker_seeks = DEFAULT_SEEKS; | ||
127 | static struct shrinker *pools_shrinker = NULL; | ||
128 | |||
129 | |||
130 | /* | ||
131 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools | 124 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools |
132 | */ | 125 | */ |
133 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) | 126 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) |
@@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages) | |||
226 | } | 219 | } |
227 | 220 | ||
228 | /* | 221 | /* |
229 | * could be called frequently for query (@nr_to_scan == 0). | ||
230 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | 222 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. |
231 | */ | 223 | */ |
232 | static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 224 | static unsigned long enc_pools_shrink_count(struct shrinker *s, |
225 | struct shrink_control *sc) | ||
233 | { | 226 | { |
234 | if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { | 227 | /* |
228 | * if no pool access for a long time, we consider it's fully idle. | ||
229 | * a little race here is fine. | ||
230 | */ | ||
231 | if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > | ||
232 | CACHE_QUIESCENT_PERIOD)) { | ||
235 | spin_lock(&page_pools.epp_lock); | 233 | spin_lock(&page_pools.epp_lock); |
236 | shrink_param(sc, nr_to_scan) = min_t(unsigned long, | 234 | page_pools.epp_idle_idx = IDLE_IDX_MAX; |
237 | shrink_param(sc, nr_to_scan), | ||
238 | page_pools.epp_free_pages - | ||
239 | PTLRPC_MAX_BRW_PAGES); | ||
240 | if (shrink_param(sc, nr_to_scan) > 0) { | ||
241 | enc_pools_release_free_pages(shrink_param(sc, | ||
242 | nr_to_scan)); | ||
243 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | ||
244 | (long)shrink_param(sc, nr_to_scan), | ||
245 | page_pools.epp_free_pages); | ||
246 | |||
247 | page_pools.epp_st_shrinks++; | ||
248 | page_pools.epp_last_shrink = cfs_time_current_sec(); | ||
249 | } | ||
250 | spin_unlock(&page_pools.epp_lock); | 235 | spin_unlock(&page_pools.epp_lock); |
251 | } | 236 | } |
252 | 237 | ||
238 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | ||
239 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | ||
240 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | ||
245 | */ | ||
246 | static unsigned long enc_pools_shrink_scan(struct shrinker *s, | ||
247 | struct shrink_control *sc) | ||
248 | { | ||
249 | spin_lock(&page_pools.epp_lock); | ||
250 | sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, | ||
251 | page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); | ||
252 | if (sc->nr_to_scan > 0) { | ||
253 | enc_pools_release_free_pages(sc->nr_to_scan); | ||
254 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | ||
255 | (long)sc->nr_to_scan, page_pools.epp_free_pages); | ||
256 | |||
257 | page_pools.epp_st_shrinks++; | ||
258 | page_pools.epp_last_shrink = cfs_time_current_sec(); | ||
259 | } | ||
260 | spin_unlock(&page_pools.epp_lock); | ||
261 | |||
253 | /* | 262 | /* |
254 | * if no pool access for a long time, we consider it's fully idle. | 263 | * if no pool access for a long time, we consider it's fully idle. |
255 | * a little race here is fine. | 264 | * a little race here is fine. |
@@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | |||
262 | } | 271 | } |
263 | 272 | ||
264 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | 273 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); |
265 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | 274 | return sc->nr_to_scan; |
266 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | ||
267 | } | 275 | } |
268 | 276 | ||
269 | static inline | 277 | static inline |
@@ -699,6 +707,12 @@ static inline void enc_pools_free(void) | |||
699 | sizeof(*page_pools.epp_pools)); | 707 | sizeof(*page_pools.epp_pools)); |
700 | } | 708 | } |
701 | 709 | ||
710 | static struct shrinker pools_shrinker = { | ||
711 | .count_objects = enc_pools_shrink_count, | ||
712 | .scan_objects = enc_pools_shrink_scan, | ||
713 | .seeks = DEFAULT_SEEKS, | ||
714 | }; | ||
715 | |||
702 | int sptlrpc_enc_pool_init(void) | 716 | int sptlrpc_enc_pool_init(void) |
703 | { | 717 | { |
704 | /* | 718 | /* |
@@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void) | |||
736 | if (page_pools.epp_pools == NULL) | 750 | if (page_pools.epp_pools == NULL) |
737 | return -ENOMEM; | 751 | return -ENOMEM; |
738 | 752 | ||
739 | pools_shrinker = set_shrinker(pools_shrinker_seeks, | 753 | register_shrinker(&pools_shrinker); |
740 | enc_pools_shrink); | ||
741 | if (pools_shrinker == NULL) { | ||
742 | enc_pools_free(); | ||
743 | return -ENOMEM; | ||
744 | } | ||
745 | 754 | ||
746 | return 0; | 755 | return 0; |
747 | } | 756 | } |
@@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void) | |||
750 | { | 759 | { |
751 | unsigned long cleaned, npools; | 760 | unsigned long cleaned, npools; |
752 | 761 | ||
753 | LASSERT(pools_shrinker); | ||
754 | LASSERT(page_pools.epp_pools); | 762 | LASSERT(page_pools.epp_pools); |
755 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); | 763 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); |
756 | 764 | ||
757 | remove_shrinker(pools_shrinker); | 765 | unregister_shrinker(&pools_shrinker); |
758 | 766 | ||
759 | npools = npages_to_npools(page_pools.epp_total_pages); | 767 | npools = npages_to_npools(page_pools.epp_total_pages); |
760 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); | 768 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); |
diff --git a/fs/dcache.c b/fs/dcache.c index dddc67fed732..1bd4614ce93b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/rculist_bl.h> | 37 | #include <linux/rculist_bl.h> |
38 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | #include <linux/list_lru.h> | ||
40 | #include "internal.h" | 41 | #include "internal.h" |
41 | #include "mount.h" | 42 | #include "mount.h" |
42 | 43 | ||
@@ -48,7 +49,7 @@ | |||
48 | * - the dcache hash table | 49 | * - the dcache hash table |
49 | * s_anon bl list spinlock protects: | 50 | * s_anon bl list spinlock protects: |
50 | * - the s_anon list (see __d_drop) | 51 | * - the s_anon list (see __d_drop) |
51 | * dcache_lru_lock protects: | 52 | * dentry->d_sb->s_dentry_lru_lock protects: |
52 | * - the dcache lru lists and counters | 53 | * - the dcache lru lists and counters |
53 | * d_lock protects: | 54 | * d_lock protects: |
54 | * - d_flags | 55 | * - d_flags |
@@ -63,7 +64,7 @@ | |||
63 | * Ordering: | 64 | * Ordering: |
64 | * dentry->d_inode->i_lock | 65 | * dentry->d_inode->i_lock |
65 | * dentry->d_lock | 66 | * dentry->d_lock |
66 | * dcache_lru_lock | 67 | * dentry->d_sb->s_dentry_lru_lock |
67 | * dcache_hash_bucket lock | 68 | * dcache_hash_bucket lock |
68 | * s_anon lock | 69 | * s_anon lock |
69 | * | 70 | * |
@@ -81,7 +82,6 @@ | |||
81 | int sysctl_vfs_cache_pressure __read_mostly = 100; | 82 | int sysctl_vfs_cache_pressure __read_mostly = 100; |
82 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); | 83 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); |
83 | 84 | ||
84 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); | ||
85 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); | 85 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); |
86 | 86 | ||
87 | EXPORT_SYMBOL(rename_lock); | 87 | EXPORT_SYMBOL(rename_lock); |
@@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = { | |||
146 | .age_limit = 45, | 146 | .age_limit = 45, |
147 | }; | 147 | }; |
148 | 148 | ||
149 | static DEFINE_PER_CPU(unsigned int, nr_dentry); | 149 | static DEFINE_PER_CPU(long, nr_dentry); |
150 | static DEFINE_PER_CPU(long, nr_dentry_unused); | ||
150 | 151 | ||
151 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 152 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
152 | static int get_nr_dentry(void) | 153 | |
154 | /* | ||
155 | * Here we resort to our own counters instead of using generic per-cpu counters | ||
156 | * for consistency with what the vfs inode code does. We are expected to harvest | ||
157 | * better code and performance by having our own specialized counters. | ||
158 | * | ||
159 | * Please note that the loop is done over all possible CPUs, not over all online | ||
160 | * CPUs. The reason for this is that we don't want to play games with CPUs going | ||
161 | * on and off. If one of them goes off, we will just keep their counters. | ||
162 | * | ||
163 | * glommer: See cffbc8a for details, and if you ever intend to change this, | ||
164 | * please update all vfs counters to match. | ||
165 | */ | ||
166 | static long get_nr_dentry(void) | ||
153 | { | 167 | { |
154 | int i; | 168 | int i; |
155 | int sum = 0; | 169 | long sum = 0; |
156 | for_each_possible_cpu(i) | 170 | for_each_possible_cpu(i) |
157 | sum += per_cpu(nr_dentry, i); | 171 | sum += per_cpu(nr_dentry, i); |
158 | return sum < 0 ? 0 : sum; | 172 | return sum < 0 ? 0 : sum; |
159 | } | 173 | } |
160 | 174 | ||
175 | static long get_nr_dentry_unused(void) | ||
176 | { | ||
177 | int i; | ||
178 | long sum = 0; | ||
179 | for_each_possible_cpu(i) | ||
180 | sum += per_cpu(nr_dentry_unused, i); | ||
181 | return sum < 0 ? 0 : sum; | ||
182 | } | ||
183 | |||
161 | int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | 184 | int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, |
162 | size_t *lenp, loff_t *ppos) | 185 | size_t *lenp, loff_t *ppos) |
163 | { | 186 | { |
164 | dentry_stat.nr_dentry = get_nr_dentry(); | 187 | dentry_stat.nr_dentry = get_nr_dentry(); |
165 | return proc_dointvec(table, write, buffer, lenp, ppos); | 188 | dentry_stat.nr_unused = get_nr_dentry_unused(); |
189 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); | ||
166 | } | 190 | } |
167 | #endif | 191 | #endif |
168 | 192 | ||
@@ -333,52 +357,35 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
333 | } | 357 | } |
334 | 358 | ||
335 | /* | 359 | /* |
336 | * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. | 360 | * dentry_lru_(add|del)_list) must be called with d_lock held. |
337 | */ | 361 | */ |
338 | static void dentry_lru_add(struct dentry *dentry) | 362 | static void dentry_lru_add(struct dentry *dentry) |
339 | { | 363 | { |
340 | if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { | 364 | if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { |
341 | spin_lock(&dcache_lru_lock); | 365 | if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) |
366 | this_cpu_inc(nr_dentry_unused); | ||
342 | dentry->d_flags |= DCACHE_LRU_LIST; | 367 | dentry->d_flags |= DCACHE_LRU_LIST; |
343 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | ||
344 | dentry->d_sb->s_nr_dentry_unused++; | ||
345 | dentry_stat.nr_unused++; | ||
346 | spin_unlock(&dcache_lru_lock); | ||
347 | } | 368 | } |
348 | } | 369 | } |
349 | 370 | ||
350 | static void __dentry_lru_del(struct dentry *dentry) | ||
351 | { | ||
352 | list_del_init(&dentry->d_lru); | ||
353 | dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); | ||
354 | dentry->d_sb->s_nr_dentry_unused--; | ||
355 | dentry_stat.nr_unused--; | ||
356 | } | ||
357 | |||
358 | /* | 371 | /* |
359 | * Remove a dentry with references from the LRU. | 372 | * Remove a dentry with references from the LRU. |
373 | * | ||
374 | * If we are on the shrink list, then we can get to try_prune_one_dentry() and | ||
375 | * lose our last reference through the parent walk. In this case, we need to | ||
376 | * remove ourselves from the shrink list, not the LRU. | ||
360 | */ | 377 | */ |
361 | static void dentry_lru_del(struct dentry *dentry) | 378 | static void dentry_lru_del(struct dentry *dentry) |
362 | { | 379 | { |
363 | if (!list_empty(&dentry->d_lru)) { | 380 | if (dentry->d_flags & DCACHE_SHRINK_LIST) { |
364 | spin_lock(&dcache_lru_lock); | 381 | list_del_init(&dentry->d_lru); |
365 | __dentry_lru_del(dentry); | 382 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; |
366 | spin_unlock(&dcache_lru_lock); | 383 | return; |
367 | } | 384 | } |
368 | } | ||
369 | 385 | ||
370 | static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) | 386 | if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) |
371 | { | 387 | this_cpu_dec(nr_dentry_unused); |
372 | spin_lock(&dcache_lru_lock); | 388 | dentry->d_flags &= ~DCACHE_LRU_LIST; |
373 | if (list_empty(&dentry->d_lru)) { | ||
374 | dentry->d_flags |= DCACHE_LRU_LIST; | ||
375 | list_add_tail(&dentry->d_lru, list); | ||
376 | dentry->d_sb->s_nr_dentry_unused++; | ||
377 | dentry_stat.nr_unused++; | ||
378 | } else { | ||
379 | list_move_tail(&dentry->d_lru, list); | ||
380 | } | ||
381 | spin_unlock(&dcache_lru_lock); | ||
382 | } | 389 | } |
383 | 390 | ||
384 | /** | 391 | /** |
@@ -474,7 +481,8 @@ EXPORT_SYMBOL(d_drop); | |||
474 | * If ref is non-zero, then decrement the refcount too. | 481 | * If ref is non-zero, then decrement the refcount too. |
475 | * Returns dentry requiring refcount drop, or NULL if we're done. | 482 | * Returns dentry requiring refcount drop, or NULL if we're done. |
476 | */ | 483 | */ |
477 | static inline struct dentry *dentry_kill(struct dentry *dentry) | 484 | static inline struct dentry * |
485 | dentry_kill(struct dentry *dentry, int unlock_on_failure) | ||
478 | __releases(dentry->d_lock) | 486 | __releases(dentry->d_lock) |
479 | { | 487 | { |
480 | struct inode *inode; | 488 | struct inode *inode; |
@@ -483,8 +491,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry) | |||
483 | inode = dentry->d_inode; | 491 | inode = dentry->d_inode; |
484 | if (inode && !spin_trylock(&inode->i_lock)) { | 492 | if (inode && !spin_trylock(&inode->i_lock)) { |
485 | relock: | 493 | relock: |
486 | spin_unlock(&dentry->d_lock); | 494 | if (unlock_on_failure) { |
487 | cpu_relax(); | 495 | spin_unlock(&dentry->d_lock); |
496 | cpu_relax(); | ||
497 | } | ||
488 | return dentry; /* try again with same dentry */ | 498 | return dentry; /* try again with same dentry */ |
489 | } | 499 | } |
490 | if (IS_ROOT(dentry)) | 500 | if (IS_ROOT(dentry)) |
@@ -567,7 +577,7 @@ repeat: | |||
567 | return; | 577 | return; |
568 | 578 | ||
569 | kill_it: | 579 | kill_it: |
570 | dentry = dentry_kill(dentry); | 580 | dentry = dentry_kill(dentry, 1); |
571 | if (dentry) | 581 | if (dentry) |
572 | goto repeat; | 582 | goto repeat; |
573 | } | 583 | } |
@@ -787,12 +797,12 @@ EXPORT_SYMBOL(d_prune_aliases); | |||
787 | * | 797 | * |
788 | * This may fail if locks cannot be acquired no problem, just try again. | 798 | * This may fail if locks cannot be acquired no problem, just try again. |
789 | */ | 799 | */ |
790 | static void try_prune_one_dentry(struct dentry *dentry) | 800 | static struct dentry * try_prune_one_dentry(struct dentry *dentry) |
791 | __releases(dentry->d_lock) | 801 | __releases(dentry->d_lock) |
792 | { | 802 | { |
793 | struct dentry *parent; | 803 | struct dentry *parent; |
794 | 804 | ||
795 | parent = dentry_kill(dentry); | 805 | parent = dentry_kill(dentry, 0); |
796 | /* | 806 | /* |
797 | * If dentry_kill returns NULL, we have nothing more to do. | 807 | * If dentry_kill returns NULL, we have nothing more to do. |
798 | * if it returns the same dentry, trylocks failed. In either | 808 | * if it returns the same dentry, trylocks failed. In either |
@@ -804,17 +814,18 @@ static void try_prune_one_dentry(struct dentry *dentry) | |||
804 | * fragmentation. | 814 | * fragmentation. |
805 | */ | 815 | */ |
806 | if (!parent) | 816 | if (!parent) |
807 | return; | 817 | return NULL; |
808 | if (parent == dentry) | 818 | if (parent == dentry) |
809 | return; | 819 | return dentry; |
810 | 820 | ||
811 | /* Prune ancestors. */ | 821 | /* Prune ancestors. */ |
812 | dentry = parent; | 822 | dentry = parent; |
813 | while (dentry) { | 823 | while (dentry) { |
814 | if (lockref_put_or_lock(&dentry->d_lockref)) | 824 | if (lockref_put_or_lock(&dentry->d_lockref)) |
815 | return; | 825 | return NULL; |
816 | dentry = dentry_kill(dentry); | 826 | dentry = dentry_kill(dentry, 1); |
817 | } | 827 | } |
828 | return NULL; | ||
818 | } | 829 | } |
819 | 830 | ||
820 | static void shrink_dentry_list(struct list_head *list) | 831 | static void shrink_dentry_list(struct list_head *list) |
@@ -833,76 +844,143 @@ static void shrink_dentry_list(struct list_head *list) | |||
833 | } | 844 | } |
834 | 845 | ||
835 | /* | 846 | /* |
847 | * The dispose list is isolated and dentries are not accounted | ||
848 | * to the LRU here, so we can simply remove it from the list | ||
849 | * here regardless of whether it is referenced or not. | ||
850 | */ | ||
851 | list_del_init(&dentry->d_lru); | ||
852 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | ||
853 | |||
854 | /* | ||
836 | * We found an inuse dentry which was not removed from | 855 | * We found an inuse dentry which was not removed from |
837 | * the LRU because of laziness during lookup. Do not free | 856 | * the LRU because of laziness during lookup. Do not free it. |
838 | * it - just keep it off the LRU list. | ||
839 | */ | 857 | */ |
840 | if (dentry->d_lockref.count) { | 858 | if (dentry->d_lockref.count) { |
841 | dentry_lru_del(dentry); | ||
842 | spin_unlock(&dentry->d_lock); | 859 | spin_unlock(&dentry->d_lock); |
843 | continue; | 860 | continue; |
844 | } | 861 | } |
845 | |||
846 | rcu_read_unlock(); | 862 | rcu_read_unlock(); |
847 | 863 | ||
848 | try_prune_one_dentry(dentry); | 864 | dentry = try_prune_one_dentry(dentry); |
849 | 865 | ||
850 | rcu_read_lock(); | 866 | rcu_read_lock(); |
867 | if (dentry) { | ||
868 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
869 | list_add(&dentry->d_lru, list); | ||
870 | spin_unlock(&dentry->d_lock); | ||
871 | } | ||
851 | } | 872 | } |
852 | rcu_read_unlock(); | 873 | rcu_read_unlock(); |
853 | } | 874 | } |
854 | 875 | ||
876 | static enum lru_status | ||
877 | dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | ||
878 | { | ||
879 | struct list_head *freeable = arg; | ||
880 | struct dentry *dentry = container_of(item, struct dentry, d_lru); | ||
881 | |||
882 | |||
883 | /* | ||
884 | * we are inverting the lru lock/dentry->d_lock here, | ||
885 | * so use a trylock. If we fail to get the lock, just skip | ||
886 | * it | ||
887 | */ | ||
888 | if (!spin_trylock(&dentry->d_lock)) | ||
889 | return LRU_SKIP; | ||
890 | |||
891 | /* | ||
892 | * Referenced dentries are still in use. If they have active | ||
893 | * counts, just remove them from the LRU. Otherwise give them | ||
894 | * another pass through the LRU. | ||
895 | */ | ||
896 | if (dentry->d_lockref.count) { | ||
897 | list_del_init(&dentry->d_lru); | ||
898 | spin_unlock(&dentry->d_lock); | ||
899 | return LRU_REMOVED; | ||
900 | } | ||
901 | |||
902 | if (dentry->d_flags & DCACHE_REFERENCED) { | ||
903 | dentry->d_flags &= ~DCACHE_REFERENCED; | ||
904 | spin_unlock(&dentry->d_lock); | ||
905 | |||
906 | /* | ||
907 | * The list move itself will be made by the common LRU code. At | ||
908 | * this point, we've dropped the dentry->d_lock but keep the | ||
909 | * lru lock. This is safe to do, since every list movement is | ||
910 | * protected by the lru lock even if both locks are held. | ||
911 | * | ||
912 | * This is guaranteed by the fact that all LRU management | ||
913 | * functions are intermediated by the LRU API calls like | ||
914 | * list_lru_add and list_lru_del. List movement in this file | ||
915 | * only ever occur through this functions or through callbacks | ||
916 | * like this one, that are called from the LRU API. | ||
917 | * | ||
918 | * The only exceptions to this are functions like | ||
919 | * shrink_dentry_list, and code that first checks for the | ||
920 | * DCACHE_SHRINK_LIST flag. Those are guaranteed to be | ||
921 | * operating only with stack provided lists after they are | ||
922 | * properly isolated from the main list. It is thus, always a | ||
923 | * local access. | ||
924 | */ | ||
925 | return LRU_ROTATE; | ||
926 | } | ||
927 | |||
928 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
929 | list_move_tail(&dentry->d_lru, freeable); | ||
930 | this_cpu_dec(nr_dentry_unused); | ||
931 | spin_unlock(&dentry->d_lock); | ||
932 | |||
933 | return LRU_REMOVED; | ||
934 | } | ||
935 | |||
855 | /** | 936 | /** |
856 | * prune_dcache_sb - shrink the dcache | 937 | * prune_dcache_sb - shrink the dcache |
857 | * @sb: superblock | 938 | * @sb: superblock |
858 | * @count: number of entries to try to free | 939 | * @nr_to_scan : number of entries to try to free |
940 | * @nid: which node to scan for freeable entities | ||
859 | * | 941 | * |
860 | * Attempt to shrink the superblock dcache LRU by @count entries. This is | 942 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is |
861 | * done when we need more memory an called from the superblock shrinker | 943 | * done when we need more memory an called from the superblock shrinker |
862 | * function. | 944 | * function. |
863 | * | 945 | * |
864 | * This function may fail to free any resources if all the dentries are in | 946 | * This function may fail to free any resources if all the dentries are in |
865 | * use. | 947 | * use. |
866 | */ | 948 | */ |
867 | void prune_dcache_sb(struct super_block *sb, int count) | 949 | long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, |
950 | int nid) | ||
868 | { | 951 | { |
869 | struct dentry *dentry; | 952 | LIST_HEAD(dispose); |
870 | LIST_HEAD(referenced); | 953 | long freed; |
871 | LIST_HEAD(tmp); | ||
872 | 954 | ||
873 | relock: | 955 | freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, |
874 | spin_lock(&dcache_lru_lock); | 956 | &dispose, &nr_to_scan); |
875 | while (!list_empty(&sb->s_dentry_lru)) { | 957 | shrink_dentry_list(&dispose); |
876 | dentry = list_entry(sb->s_dentry_lru.prev, | 958 | return freed; |
877 | struct dentry, d_lru); | 959 | } |
878 | BUG_ON(dentry->d_sb != sb); | ||
879 | |||
880 | if (!spin_trylock(&dentry->d_lock)) { | ||
881 | spin_unlock(&dcache_lru_lock); | ||
882 | cpu_relax(); | ||
883 | goto relock; | ||
884 | } | ||
885 | 960 | ||
886 | if (dentry->d_flags & DCACHE_REFERENCED) { | 961 | static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, |
887 | dentry->d_flags &= ~DCACHE_REFERENCED; | 962 | spinlock_t *lru_lock, void *arg) |
888 | list_move(&dentry->d_lru, &referenced); | 963 | { |
889 | spin_unlock(&dentry->d_lock); | 964 | struct list_head *freeable = arg; |
890 | } else { | 965 | struct dentry *dentry = container_of(item, struct dentry, d_lru); |
891 | list_move_tail(&dentry->d_lru, &tmp); | ||
892 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
893 | spin_unlock(&dentry->d_lock); | ||
894 | if (!--count) | ||
895 | break; | ||
896 | } | ||
897 | cond_resched_lock(&dcache_lru_lock); | ||
898 | } | ||
899 | if (!list_empty(&referenced)) | ||
900 | list_splice(&referenced, &sb->s_dentry_lru); | ||
901 | spin_unlock(&dcache_lru_lock); | ||
902 | 966 | ||
903 | shrink_dentry_list(&tmp); | 967 | /* |
968 | * we are inverting the lru lock/dentry->d_lock here, | ||
969 | * so use a trylock. If we fail to get the lock, just skip | ||
970 | * it | ||
971 | */ | ||
972 | if (!spin_trylock(&dentry->d_lock)) | ||
973 | return LRU_SKIP; | ||
974 | |||
975 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
976 | list_move_tail(&dentry->d_lru, freeable); | ||
977 | this_cpu_dec(nr_dentry_unused); | ||
978 | spin_unlock(&dentry->d_lock); | ||
979 | |||
980 | return LRU_REMOVED; | ||
904 | } | 981 | } |
905 | 982 | ||
983 | |||
906 | /** | 984 | /** |
907 | * shrink_dcache_sb - shrink dcache for a superblock | 985 | * shrink_dcache_sb - shrink dcache for a superblock |
908 | * @sb: superblock | 986 | * @sb: superblock |
@@ -912,16 +990,17 @@ relock: | |||
912 | */ | 990 | */ |
913 | void shrink_dcache_sb(struct super_block *sb) | 991 | void shrink_dcache_sb(struct super_block *sb) |
914 | { | 992 | { |
915 | LIST_HEAD(tmp); | 993 | long freed; |
916 | 994 | ||
917 | spin_lock(&dcache_lru_lock); | 995 | do { |
918 | while (!list_empty(&sb->s_dentry_lru)) { | 996 | LIST_HEAD(dispose); |
919 | list_splice_init(&sb->s_dentry_lru, &tmp); | 997 | |
920 | spin_unlock(&dcache_lru_lock); | 998 | freed = list_lru_walk(&sb->s_dentry_lru, |
921 | shrink_dentry_list(&tmp); | 999 | dentry_lru_isolate_shrink, &dispose, UINT_MAX); |
922 | spin_lock(&dcache_lru_lock); | 1000 | |
923 | } | 1001 | this_cpu_sub(nr_dentry_unused, freed); |
924 | spin_unlock(&dcache_lru_lock); | 1002 | shrink_dentry_list(&dispose); |
1003 | } while (freed > 0); | ||
925 | } | 1004 | } |
926 | EXPORT_SYMBOL(shrink_dcache_sb); | 1005 | EXPORT_SYMBOL(shrink_dcache_sb); |
927 | 1006 | ||
@@ -1283,7 +1362,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) | |||
1283 | if (dentry->d_lockref.count) { | 1362 | if (dentry->d_lockref.count) { |
1284 | dentry_lru_del(dentry); | 1363 | dentry_lru_del(dentry); |
1285 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { | 1364 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { |
1286 | dentry_lru_move_list(dentry, &data->dispose); | 1365 | dentry_lru_del(dentry); |
1366 | list_add_tail(&dentry->d_lru, &data->dispose); | ||
1287 | dentry->d_flags |= DCACHE_SHRINK_LIST; | 1367 | dentry->d_flags |= DCACHE_SHRINK_LIST; |
1288 | data->found++; | 1368 | data->found++; |
1289 | ret = D_WALK_NORETRY; | 1369 | ret = D_WALK_NORETRY; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index c00e055b6282..9fd702f5bfb2 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -44,6 +44,7 @@ static void drop_slab(void) | |||
44 | .gfp_mask = GFP_KERNEL, | 44 | .gfp_mask = GFP_KERNEL, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | nodes_setall(shrink.nodes_to_scan); | ||
47 | do { | 48 | do { |
48 | nr_objects = shrink_slab(&shrink, 1000, 1000); | 49 | nr_objects = shrink_slab(&shrink, 1000, 1000); |
49 | } while (nr_objects > 10); | 50 | } while (nr_objects > 10); |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 2d1bdbe78c04..3981ff783950 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
931 | struct ext4_inode_info *ei; | 931 | struct ext4_inode_info *ei; |
932 | struct list_head *cur, *tmp; | 932 | struct list_head *cur, *tmp; |
933 | LIST_HEAD(skipped); | 933 | LIST_HEAD(skipped); |
934 | int ret, nr_shrunk = 0; | 934 | int nr_shrunk = 0; |
935 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 935 | int retried = 0, skip_precached = 1, nr_skipped = 0; |
936 | 936 | ||
937 | spin_lock(&sbi->s_es_lru_lock); | 937 | spin_lock(&sbi->s_es_lru_lock); |
938 | 938 | ||
939 | retry: | 939 | retry: |
940 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 940 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
941 | int shrunk; | ||
942 | |||
941 | /* | 943 | /* |
942 | * If we have already reclaimed all extents from extent | 944 | * If we have already reclaimed all extents from extent |
943 | * status tree, just stop the loop immediately. | 945 | * status tree, just stop the loop immediately. |
@@ -964,13 +966,13 @@ retry: | |||
964 | continue; | 966 | continue; |
965 | 967 | ||
966 | write_lock(&ei->i_es_lock); | 968 | write_lock(&ei->i_es_lock); |
967 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 969 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); |
968 | if (ei->i_es_lru_nr == 0) | 970 | if (ei->i_es_lru_nr == 0) |
969 | list_del_init(&ei->i_es_lru); | 971 | list_del_init(&ei->i_es_lru); |
970 | write_unlock(&ei->i_es_lock); | 972 | write_unlock(&ei->i_es_lock); |
971 | 973 | ||
972 | nr_shrunk += ret; | 974 | nr_shrunk += shrunk; |
973 | nr_to_scan -= ret; | 975 | nr_to_scan -= shrunk; |
974 | if (nr_to_scan == 0) | 976 | if (nr_to_scan == 0) |
975 | break; | 977 | break; |
976 | } | 978 | } |
@@ -1007,7 +1009,20 @@ retry: | |||
1007 | return nr_shrunk; | 1009 | return nr_shrunk; |
1008 | } | 1010 | } |
1009 | 1011 | ||
1010 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 1012 | static unsigned long ext4_es_count(struct shrinker *shrink, |
1013 | struct shrink_control *sc) | ||
1014 | { | ||
1015 | unsigned long nr; | ||
1016 | struct ext4_sb_info *sbi; | ||
1017 | |||
1018 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | ||
1019 | nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
1020 | trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); | ||
1021 | return nr; | ||
1022 | } | ||
1023 | |||
1024 | static unsigned long ext4_es_scan(struct shrinker *shrink, | ||
1025 | struct shrink_control *sc) | ||
1011 | { | 1026 | { |
1012 | struct ext4_sb_info *sbi = container_of(shrink, | 1027 | struct ext4_sb_info *sbi = container_of(shrink, |
1013 | struct ext4_sb_info, s_es_shrinker); | 1028 | struct ext4_sb_info, s_es_shrinker); |
@@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
1022 | 1037 | ||
1023 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1038 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); |
1024 | 1039 | ||
1025 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
1026 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 1040 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
1027 | return ret; | 1041 | return nr_shrunk; |
1028 | } | 1042 | } |
1029 | 1043 | ||
1030 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | 1044 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
@@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1032 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1046 | INIT_LIST_HEAD(&sbi->s_es_lru); |
1033 | spin_lock_init(&sbi->s_es_lru_lock); | 1047 | spin_lock_init(&sbi->s_es_lru_lock); |
1034 | sbi->s_es_last_sorted = 0; | 1048 | sbi->s_es_last_sorted = 0; |
1035 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 1049 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; |
1050 | sbi->s_es_shrinker.count_objects = ext4_es_count; | ||
1036 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 1051 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
1037 | register_shrinker(&sbi->s_es_shrinker); | 1052 | register_shrinker(&sbi->s_es_shrinker); |
1038 | } | 1053 | } |
@@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | |||
1076 | struct ext4_es_tree *tree = &ei->i_es_tree; | 1091 | struct ext4_es_tree *tree = &ei->i_es_tree; |
1077 | struct rb_node *node; | 1092 | struct rb_node *node; |
1078 | struct extent_status *es; | 1093 | struct extent_status *es; |
1079 | int nr_shrunk = 0; | 1094 | unsigned long nr_shrunk = 0; |
1080 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1095 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
1081 | DEFAULT_RATELIMIT_BURST); | 1096 | DEFAULT_RATELIMIT_BURST); |
1082 | 1097 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 722329cac98f..c2f41b4d00b9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1427,21 +1427,22 @@ __acquires(&lru_lock) | |||
1427 | * gfs2_dispose_glock_lru() above. | 1427 | * gfs2_dispose_glock_lru() above. |
1428 | */ | 1428 | */ |
1429 | 1429 | ||
1430 | static void gfs2_scan_glock_lru(int nr) | 1430 | static long gfs2_scan_glock_lru(int nr) |
1431 | { | 1431 | { |
1432 | struct gfs2_glock *gl; | 1432 | struct gfs2_glock *gl; |
1433 | LIST_HEAD(skipped); | 1433 | LIST_HEAD(skipped); |
1434 | LIST_HEAD(dispose); | 1434 | LIST_HEAD(dispose); |
1435 | long freed = 0; | ||
1435 | 1436 | ||
1436 | spin_lock(&lru_lock); | 1437 | spin_lock(&lru_lock); |
1437 | while(nr && !list_empty(&lru_list)) { | 1438 | while ((nr-- >= 0) && !list_empty(&lru_list)) { |
1438 | gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); | 1439 | gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); |
1439 | 1440 | ||
1440 | /* Test for being demotable */ | 1441 | /* Test for being demotable */ |
1441 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 1442 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
1442 | list_move(&gl->gl_lru, &dispose); | 1443 | list_move(&gl->gl_lru, &dispose); |
1443 | atomic_dec(&lru_count); | 1444 | atomic_dec(&lru_count); |
1444 | nr--; | 1445 | freed++; |
1445 | continue; | 1446 | continue; |
1446 | } | 1447 | } |
1447 | 1448 | ||
@@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr) | |||
1451 | if (!list_empty(&dispose)) | 1452 | if (!list_empty(&dispose)) |
1452 | gfs2_dispose_glock_lru(&dispose); | 1453 | gfs2_dispose_glock_lru(&dispose); |
1453 | spin_unlock(&lru_lock); | 1454 | spin_unlock(&lru_lock); |
1455 | |||
1456 | return freed; | ||
1454 | } | 1457 | } |
1455 | 1458 | ||
1456 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, | 1459 | static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, |
1457 | struct shrink_control *sc) | 1460 | struct shrink_control *sc) |
1458 | { | 1461 | { |
1459 | if (sc->nr_to_scan) { | 1462 | if (!(sc->gfp_mask & __GFP_FS)) |
1460 | if (!(sc->gfp_mask & __GFP_FS)) | 1463 | return SHRINK_STOP; |
1461 | return -1; | 1464 | return gfs2_scan_glock_lru(sc->nr_to_scan); |
1462 | gfs2_scan_glock_lru(sc->nr_to_scan); | 1465 | } |
1463 | } | ||
1464 | 1466 | ||
1465 | return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; | 1467 | static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, |
1468 | struct shrink_control *sc) | ||
1469 | { | ||
1470 | return vfs_pressure_ratio(atomic_read(&lru_count)); | ||
1466 | } | 1471 | } |
1467 | 1472 | ||
1468 | static struct shrinker glock_shrinker = { | 1473 | static struct shrinker glock_shrinker = { |
1469 | .shrink = gfs2_shrink_glock_memory, | ||
1470 | .seeks = DEFAULT_SEEKS, | 1474 | .seeks = DEFAULT_SEEKS, |
1475 | .count_objects = gfs2_glock_shrink_count, | ||
1476 | .scan_objects = gfs2_glock_shrink_scan, | ||
1471 | }; | 1477 | }; |
1472 | 1478 | ||
1473 | /** | 1479 | /** |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7b0f5043cf24..351586e24e30 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -32,7 +32,8 @@ | |||
32 | struct workqueue_struct *gfs2_control_wq; | 32 | struct workqueue_struct *gfs2_control_wq; |
33 | 33 | ||
34 | static struct shrinker qd_shrinker = { | 34 | static struct shrinker qd_shrinker = { |
35 | .shrink = gfs2_shrink_qd_memory, | 35 | .count_objects = gfs2_qd_shrink_count, |
36 | .scan_objects = gfs2_qd_shrink_scan, | ||
36 | .seeks = DEFAULT_SEEKS, | 37 | .seeks = DEFAULT_SEEKS, |
37 | }; | 38 | }; |
38 | 39 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3768c2f40e43..db441359ee8c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list); | |||
75 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 75 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
76 | static DEFINE_SPINLOCK(qd_lru_lock); | 76 | static DEFINE_SPINLOCK(qd_lru_lock); |
77 | 77 | ||
78 | int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) | 78 | unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, |
79 | struct shrink_control *sc) | ||
79 | { | 80 | { |
80 | struct gfs2_quota_data *qd; | 81 | struct gfs2_quota_data *qd; |
81 | struct gfs2_sbd *sdp; | 82 | struct gfs2_sbd *sdp; |
82 | int nr_to_scan = sc->nr_to_scan; | 83 | int nr_to_scan = sc->nr_to_scan; |
83 | 84 | long freed = 0; | |
84 | if (nr_to_scan == 0) | ||
85 | goto out; | ||
86 | 85 | ||
87 | if (!(sc->gfp_mask & __GFP_FS)) | 86 | if (!(sc->gfp_mask & __GFP_FS)) |
88 | return -1; | 87 | return SHRINK_STOP; |
89 | 88 | ||
90 | spin_lock(&qd_lru_lock); | 89 | spin_lock(&qd_lru_lock); |
91 | while (nr_to_scan && !list_empty(&qd_lru_list)) { | 90 | while (nr_to_scan && !list_empty(&qd_lru_list)) { |
@@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) | |||
110 | kmem_cache_free(gfs2_quotad_cachep, qd); | 109 | kmem_cache_free(gfs2_quotad_cachep, qd); |
111 | spin_lock(&qd_lru_lock); | 110 | spin_lock(&qd_lru_lock); |
112 | nr_to_scan--; | 111 | nr_to_scan--; |
112 | freed++; | ||
113 | } | 113 | } |
114 | spin_unlock(&qd_lru_lock); | 114 | spin_unlock(&qd_lru_lock); |
115 | return freed; | ||
116 | } | ||
115 | 117 | ||
116 | out: | 118 | unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, |
117 | return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; | 119 | struct shrink_control *sc) |
120 | { | ||
121 | return vfs_pressure_ratio(atomic_read(&qd_lru_count)); | ||
118 | } | 122 | } |
119 | 123 | ||
120 | static u64 qd2index(struct gfs2_quota_data *qd) | 124 | static u64 qd2index(struct gfs2_quota_data *qd) |
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed83..0f64d9deb1b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
53 | return ret; | 53 | return ret; |
54 | } | 54 | } |
55 | 55 | ||
56 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, | 56 | extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, |
57 | struct shrink_control *sc); | 57 | struct shrink_control *sc); |
58 | extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, | ||
59 | struct shrink_control *sc); | ||
58 | extern const struct quotactl_ops gfs2_quotactl_ops; | 60 | extern const struct quotactl_ops gfs2_quotactl_ops; |
59 | 61 | ||
60 | #endif /* __QUOTA_DOT_H__ */ | 62 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..b33ba8e021cc 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/prefetch.h> | 17 | #include <linux/prefetch.h> |
18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ | 18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ |
19 | #include <linux/ratelimit.h> | 19 | #include <linux/ratelimit.h> |
20 | #include <linux/list_lru.h> | ||
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
22 | /* | 23 | /* |
@@ -24,7 +25,7 @@ | |||
24 | * | 25 | * |
25 | * inode->i_lock protects: | 26 | * inode->i_lock protects: |
26 | * inode->i_state, inode->i_hash, __iget() | 27 | * inode->i_state, inode->i_hash, __iget() |
27 | * inode->i_sb->s_inode_lru_lock protects: | 28 | * Inode LRU list locks protect: |
28 | * inode->i_sb->s_inode_lru, inode->i_lru | 29 | * inode->i_sb->s_inode_lru, inode->i_lru |
29 | * inode_sb_list_lock protects: | 30 | * inode_sb_list_lock protects: |
30 | * sb->s_inodes, inode->i_sb_list | 31 | * sb->s_inodes, inode->i_sb_list |
@@ -37,7 +38,7 @@ | |||
37 | * | 38 | * |
38 | * inode_sb_list_lock | 39 | * inode_sb_list_lock |
39 | * inode->i_lock | 40 | * inode->i_lock |
40 | * inode->i_sb->s_inode_lru_lock | 41 | * Inode LRU list locks |
41 | * | 42 | * |
42 | * bdi->wb.list_lock | 43 | * bdi->wb.list_lock |
43 | * inode->i_lock | 44 | * inode->i_lock |
@@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops); | |||
70 | */ | 71 | */ |
71 | struct inodes_stat_t inodes_stat; | 72 | struct inodes_stat_t inodes_stat; |
72 | 73 | ||
73 | static DEFINE_PER_CPU(unsigned int, nr_inodes); | 74 | static DEFINE_PER_CPU(unsigned long, nr_inodes); |
74 | static DEFINE_PER_CPU(unsigned int, nr_unused); | 75 | static DEFINE_PER_CPU(unsigned long, nr_unused); |
75 | 76 | ||
76 | static struct kmem_cache *inode_cachep __read_mostly; | 77 | static struct kmem_cache *inode_cachep __read_mostly; |
77 | 78 | ||
78 | static int get_nr_inodes(void) | 79 | static long get_nr_inodes(void) |
79 | { | 80 | { |
80 | int i; | 81 | int i; |
81 | int sum = 0; | 82 | long sum = 0; |
82 | for_each_possible_cpu(i) | 83 | for_each_possible_cpu(i) |
83 | sum += per_cpu(nr_inodes, i); | 84 | sum += per_cpu(nr_inodes, i); |
84 | return sum < 0 ? 0 : sum; | 85 | return sum < 0 ? 0 : sum; |
85 | } | 86 | } |
86 | 87 | ||
87 | static inline int get_nr_inodes_unused(void) | 88 | static inline long get_nr_inodes_unused(void) |
88 | { | 89 | { |
89 | int i; | 90 | int i; |
90 | int sum = 0; | 91 | long sum = 0; |
91 | for_each_possible_cpu(i) | 92 | for_each_possible_cpu(i) |
92 | sum += per_cpu(nr_unused, i); | 93 | sum += per_cpu(nr_unused, i); |
93 | return sum < 0 ? 0 : sum; | 94 | return sum < 0 ? 0 : sum; |
94 | } | 95 | } |
95 | 96 | ||
96 | int get_nr_dirty_inodes(void) | 97 | long get_nr_dirty_inodes(void) |
97 | { | 98 | { |
98 | /* not actually dirty inodes, but a wild approximation */ | 99 | /* not actually dirty inodes, but a wild approximation */ |
99 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | 100 | long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); |
100 | return nr_dirty > 0 ? nr_dirty : 0; | 101 | return nr_dirty > 0 ? nr_dirty : 0; |
101 | } | 102 | } |
102 | 103 | ||
@@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
109 | { | 110 | { |
110 | inodes_stat.nr_inodes = get_nr_inodes(); | 111 | inodes_stat.nr_inodes = get_nr_inodes(); |
111 | inodes_stat.nr_unused = get_nr_inodes_unused(); | 112 | inodes_stat.nr_unused = get_nr_inodes_unused(); |
112 | return proc_dointvec(table, write, buffer, lenp, ppos); | 113 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
113 | } | 114 | } |
114 | #endif | 115 | #endif |
115 | 116 | ||
@@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold); | |||
401 | 402 | ||
402 | static void inode_lru_list_add(struct inode *inode) | 403 | static void inode_lru_list_add(struct inode *inode) |
403 | { | 404 | { |
404 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 405 | if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
405 | if (list_empty(&inode->i_lru)) { | ||
406 | list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); | ||
407 | inode->i_sb->s_nr_inodes_unused++; | ||
408 | this_cpu_inc(nr_unused); | 406 | this_cpu_inc(nr_unused); |
409 | } | ||
410 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
411 | } | 407 | } |
412 | 408 | ||
413 | /* | 409 | /* |
@@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode) | |||
425 | 421 | ||
426 | static void inode_lru_list_del(struct inode *inode) | 422 | static void inode_lru_list_del(struct inode *inode) |
427 | { | 423 | { |
428 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 424 | |
429 | if (!list_empty(&inode->i_lru)) { | 425 | if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
430 | list_del_init(&inode->i_lru); | ||
431 | inode->i_sb->s_nr_inodes_unused--; | ||
432 | this_cpu_dec(nr_unused); | 426 | this_cpu_dec(nr_unused); |
433 | } | ||
434 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
435 | } | 427 | } |
436 | 428 | ||
437 | /** | 429 | /** |
@@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
675 | return busy; | 667 | return busy; |
676 | } | 668 | } |
677 | 669 | ||
678 | static int can_unuse(struct inode *inode) | ||
679 | { | ||
680 | if (inode->i_state & ~I_REFERENCED) | ||
681 | return 0; | ||
682 | if (inode_has_buffers(inode)) | ||
683 | return 0; | ||
684 | if (atomic_read(&inode->i_count)) | ||
685 | return 0; | ||
686 | if (inode->i_data.nrpages) | ||
687 | return 0; | ||
688 | return 1; | ||
689 | } | ||
690 | |||
691 | /* | 670 | /* |
692 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. | 671 | * Isolate the inode from the LRU in preparation for freeing it. |
693 | * This is called from the superblock shrinker function with a number of inodes | ||
694 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and | ||
695 | * then are freed outside inode_lock by dispose_list(). | ||
696 | * | 672 | * |
697 | * Any inodes which are pinned purely because of attached pagecache have their | 673 | * Any inodes which are pinned purely because of attached pagecache have their |
698 | * pagecache removed. If the inode has metadata buffers attached to | 674 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode) | |||
706 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 682 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
707 | * with this flag set because they are the inodes that are out of order. | 683 | * with this flag set because they are the inodes that are out of order. |
708 | */ | 684 | */ |
709 | void prune_icache_sb(struct super_block *sb, int nr_to_scan) | 685 | static enum lru_status |
686 | inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | ||
710 | { | 687 | { |
711 | LIST_HEAD(freeable); | 688 | struct list_head *freeable = arg; |
712 | int nr_scanned; | 689 | struct inode *inode = container_of(item, struct inode, i_lru); |
713 | unsigned long reap = 0; | ||
714 | 690 | ||
715 | spin_lock(&sb->s_inode_lru_lock); | 691 | /* |
716 | for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { | 692 | * we are inverting the lru lock/inode->i_lock here, so use a trylock. |
717 | struct inode *inode; | 693 | * If we fail to get the lock, just skip it. |
694 | */ | ||
695 | if (!spin_trylock(&inode->i_lock)) | ||
696 | return LRU_SKIP; | ||
718 | 697 | ||
719 | if (list_empty(&sb->s_inode_lru)) | 698 | /* |
720 | break; | 699 | * Referenced or dirty inodes are still in use. Give them another pass |
700 | * through the LRU as we canot reclaim them now. | ||
701 | */ | ||
702 | if (atomic_read(&inode->i_count) || | ||
703 | (inode->i_state & ~I_REFERENCED)) { | ||
704 | list_del_init(&inode->i_lru); | ||
705 | spin_unlock(&inode->i_lock); | ||
706 | this_cpu_dec(nr_unused); | ||
707 | return LRU_REMOVED; | ||
708 | } | ||
721 | 709 | ||
722 | inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); | 710 | /* recently referenced inodes get one more pass */ |
711 | if (inode->i_state & I_REFERENCED) { | ||
712 | inode->i_state &= ~I_REFERENCED; | ||
713 | spin_unlock(&inode->i_lock); | ||
714 | return LRU_ROTATE; | ||
715 | } | ||
723 | 716 | ||
724 | /* | 717 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
725 | * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, | 718 | __iget(inode); |
726 | * so use a trylock. If we fail to get the lock, just move the | 719 | spin_unlock(&inode->i_lock); |
727 | * inode to the back of the list so we don't spin on it. | 720 | spin_unlock(lru_lock); |
728 | */ | 721 | if (remove_inode_buffers(inode)) { |
729 | if (!spin_trylock(&inode->i_lock)) { | 722 | unsigned long reap; |
730 | list_move(&inode->i_lru, &sb->s_inode_lru); | 723 | reap = invalidate_mapping_pages(&inode->i_data, 0, -1); |
731 | continue; | 724 | if (current_is_kswapd()) |
725 | __count_vm_events(KSWAPD_INODESTEAL, reap); | ||
726 | else | ||
727 | __count_vm_events(PGINODESTEAL, reap); | ||
728 | if (current->reclaim_state) | ||
729 | current->reclaim_state->reclaimed_slab += reap; | ||
732 | } | 730 | } |
731 | iput(inode); | ||
732 | spin_lock(lru_lock); | ||
733 | return LRU_RETRY; | ||
734 | } | ||
733 | 735 | ||
734 | /* | 736 | WARN_ON(inode->i_state & I_NEW); |
735 | * Referenced or dirty inodes are still in use. Give them | 737 | inode->i_state |= I_FREEING; |
736 | * another pass through the LRU as we canot reclaim them now. | 738 | list_move(&inode->i_lru, freeable); |
737 | */ | 739 | spin_unlock(&inode->i_lock); |
738 | if (atomic_read(&inode->i_count) || | ||
739 | (inode->i_state & ~I_REFERENCED)) { | ||
740 | list_del_init(&inode->i_lru); | ||
741 | spin_unlock(&inode->i_lock); | ||
742 | sb->s_nr_inodes_unused--; | ||
743 | this_cpu_dec(nr_unused); | ||
744 | continue; | ||
745 | } | ||
746 | 740 | ||
747 | /* recently referenced inodes get one more pass */ | 741 | this_cpu_dec(nr_unused); |
748 | if (inode->i_state & I_REFERENCED) { | 742 | return LRU_REMOVED; |
749 | inode->i_state &= ~I_REFERENCED; | 743 | } |
750 | list_move(&inode->i_lru, &sb->s_inode_lru); | ||
751 | spin_unlock(&inode->i_lock); | ||
752 | continue; | ||
753 | } | ||
754 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | ||
755 | __iget(inode); | ||
756 | spin_unlock(&inode->i_lock); | ||
757 | spin_unlock(&sb->s_inode_lru_lock); | ||
758 | if (remove_inode_buffers(inode)) | ||
759 | reap += invalidate_mapping_pages(&inode->i_data, | ||
760 | 0, -1); | ||
761 | iput(inode); | ||
762 | spin_lock(&sb->s_inode_lru_lock); | ||
763 | |||
764 | if (inode != list_entry(sb->s_inode_lru.next, | ||
765 | struct inode, i_lru)) | ||
766 | continue; /* wrong inode or list_empty */ | ||
767 | /* avoid lock inversions with trylock */ | ||
768 | if (!spin_trylock(&inode->i_lock)) | ||
769 | continue; | ||
770 | if (!can_unuse(inode)) { | ||
771 | spin_unlock(&inode->i_lock); | ||
772 | continue; | ||
773 | } | ||
774 | } | ||
775 | WARN_ON(inode->i_state & I_NEW); | ||
776 | inode->i_state |= I_FREEING; | ||
777 | spin_unlock(&inode->i_lock); | ||
778 | 744 | ||
779 | list_move(&inode->i_lru, &freeable); | 745 | /* |
780 | sb->s_nr_inodes_unused--; | 746 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. |
781 | this_cpu_dec(nr_unused); | 747 | * This is called from the superblock shrinker function with a number of inodes |
782 | } | 748 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and |
783 | if (current_is_kswapd()) | 749 | * then are freed outside inode_lock by dispose_list(). |
784 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 750 | */ |
785 | else | 751 | long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, |
786 | __count_vm_events(PGINODESTEAL, reap); | 752 | int nid) |
787 | spin_unlock(&sb->s_inode_lru_lock); | 753 | { |
788 | if (current->reclaim_state) | 754 | LIST_HEAD(freeable); |
789 | current->reclaim_state->reclaimed_slab += reap; | 755 | long freed; |
790 | 756 | ||
757 | freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, | ||
758 | &freeable, &nr_to_scan); | ||
791 | dispose_list(&freeable); | 759 | dispose_list(&freeable); |
760 | return freed; | ||
792 | } | 761 | } |
793 | 762 | ||
794 | static void __wait_on_freeing_inode(struct inode *inode); | 763 | static void __wait_on_freeing_inode(struct inode *inode); |
diff --git a/fs/internal.h b/fs/internal.h index 2be46ea5dd0b..513e0d859a6c 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f); | |||
114 | * inode.c | 114 | * inode.c |
115 | */ | 115 | */ |
116 | extern spinlock_t inode_sb_list_lock; | 116 | extern spinlock_t inode_sb_list_lock; |
117 | extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, | ||
118 | int nid); | ||
117 | extern void inode_add_lru(struct inode *inode); | 119 | extern void inode_add_lru(struct inode *inode); |
118 | 120 | ||
119 | /* | 121 | /* |
@@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode); | |||
121 | */ | 123 | */ |
122 | extern void inode_wb_list_del(struct inode *inode); | 124 | extern void inode_wb_list_del(struct inode *inode); |
123 | 125 | ||
124 | extern int get_nr_dirty_inodes(void); | 126 | extern long get_nr_dirty_inodes(void); |
125 | extern void evict_inodes(struct super_block *); | 127 | extern void evict_inodes(struct super_block *); |
126 | extern int invalidate_inodes(struct super_block *, bool); | 128 | extern int invalidate_inodes(struct super_block *, bool); |
127 | 129 | ||
@@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool); | |||
130 | */ | 132 | */ |
131 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | 133 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); |
132 | extern int d_set_mounted(struct dentry *dentry); | 134 | extern int d_set_mounted(struct dentry *dentry); |
135 | extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | ||
136 | int nid); | ||
133 | 137 | ||
134 | /* | 138 | /* |
135 | * read_write.c | 139 | * read_write.c |
diff --git a/fs/mbcache.c b/fs/mbcache.c index 8c32ef3ba88e..e519e45bf673 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list); | |||
86 | static LIST_HEAD(mb_cache_lru_list); | 86 | static LIST_HEAD(mb_cache_lru_list); |
87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 87 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
88 | 88 | ||
89 | /* | ||
90 | * What the mbcache registers as to get shrunk dynamically. | ||
91 | */ | ||
92 | |||
93 | static int mb_cache_shrink_fn(struct shrinker *shrink, | ||
94 | struct shrink_control *sc); | ||
95 | |||
96 | static struct shrinker mb_cache_shrinker = { | ||
97 | .shrink = mb_cache_shrink_fn, | ||
98 | .seeks = DEFAULT_SEEKS, | ||
99 | }; | ||
100 | |||
101 | static inline int | 89 | static inline int |
102 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) |
103 | { | 91 | { |
@@ -151,7 +139,7 @@ forget: | |||
151 | 139 | ||
152 | 140 | ||
153 | /* | 141 | /* |
154 | * mb_cache_shrink_fn() memory pressure callback | 142 | * mb_cache_shrink_scan() memory pressure callback |
155 | * | 143 | * |
156 | * This function is called by the kernel memory management when memory | 144 | * This function is called by the kernel memory management when memory |
157 | * gets low. | 145 | * gets low. |
@@ -159,17 +147,16 @@ forget: | |||
159 | * @shrink: (ignored) | 147 | * @shrink: (ignored) |
160 | * @sc: shrink_control passed from reclaim | 148 | * @sc: shrink_control passed from reclaim |
161 | * | 149 | * |
162 | * Returns the number of objects which are present in the cache. | 150 | * Returns the number of objects freed. |
163 | */ | 151 | */ |
164 | static int | 152 | static unsigned long |
165 | mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) | 153 | mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
166 | { | 154 | { |
167 | LIST_HEAD(free_list); | 155 | LIST_HEAD(free_list); |
168 | struct mb_cache *cache; | ||
169 | struct mb_cache_entry *entry, *tmp; | 156 | struct mb_cache_entry *entry, *tmp; |
170 | int count = 0; | ||
171 | int nr_to_scan = sc->nr_to_scan; | 157 | int nr_to_scan = sc->nr_to_scan; |
172 | gfp_t gfp_mask = sc->gfp_mask; | 158 | gfp_t gfp_mask = sc->gfp_mask; |
159 | unsigned long freed = 0; | ||
173 | 160 | ||
174 | mb_debug("trying to free %d entries", nr_to_scan); | 161 | mb_debug("trying to free %d entries", nr_to_scan); |
175 | spin_lock(&mb_cache_spinlock); | 162 | spin_lock(&mb_cache_spinlock); |
@@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) | |||
179 | struct mb_cache_entry, e_lru_list); | 166 | struct mb_cache_entry, e_lru_list); |
180 | list_move_tail(&ce->e_lru_list, &free_list); | 167 | list_move_tail(&ce->e_lru_list, &free_list); |
181 | __mb_cache_entry_unhash(ce); | 168 | __mb_cache_entry_unhash(ce); |
169 | freed++; | ||
170 | } | ||
171 | spin_unlock(&mb_cache_spinlock); | ||
172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | ||
173 | __mb_cache_entry_forget(entry, gfp_mask); | ||
182 | } | 174 | } |
175 | return freed; | ||
176 | } | ||
177 | |||
178 | static unsigned long | ||
179 | mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
180 | { | ||
181 | struct mb_cache *cache; | ||
182 | unsigned long count = 0; | ||
183 | |||
184 | spin_lock(&mb_cache_spinlock); | ||
183 | list_for_each_entry(cache, &mb_cache_list, c_cache_list) { | 185 | list_for_each_entry(cache, &mb_cache_list, c_cache_list) { |
184 | mb_debug("cache %s (%d)", cache->c_name, | 186 | mb_debug("cache %s (%d)", cache->c_name, |
185 | atomic_read(&cache->c_entry_count)); | 187 | atomic_read(&cache->c_entry_count)); |
186 | count += atomic_read(&cache->c_entry_count); | 188 | count += atomic_read(&cache->c_entry_count); |
187 | } | 189 | } |
188 | spin_unlock(&mb_cache_spinlock); | 190 | spin_unlock(&mb_cache_spinlock); |
189 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 191 | |
190 | __mb_cache_entry_forget(entry, gfp_mask); | 192 | return vfs_pressure_ratio(count); |
191 | } | ||
192 | return (count / 100) * sysctl_vfs_cache_pressure; | ||
193 | } | 193 | } |
194 | 194 | ||
195 | static struct shrinker mb_cache_shrinker = { | ||
196 | .count_objects = mb_cache_shrink_count, | ||
197 | .scan_objects = mb_cache_shrink_scan, | ||
198 | .seeks = DEFAULT_SEEKS, | ||
199 | }; | ||
195 | 200 | ||
196 | /* | 201 | /* |
197 | * mb_cache_create() create a new cache | 202 | * mb_cache_create() create a new cache |
diff --git a/fs/namei.c b/fs/namei.c index 409a441ba2ae..0dc4cbf21f37 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -660,29 +660,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd) | |||
660 | } | 660 | } |
661 | } | 661 | } |
662 | 662 | ||
663 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) | ||
664 | { | ||
665 | int ret; | ||
666 | |||
667 | if (IS_ERR(link)) | ||
668 | goto fail; | ||
669 | |||
670 | if (*link == '/') { | ||
671 | set_root(nd); | ||
672 | path_put(&nd->path); | ||
673 | nd->path = nd->root; | ||
674 | path_get(&nd->root); | ||
675 | nd->flags |= LOOKUP_JUMPED; | ||
676 | } | ||
677 | nd->inode = nd->path.dentry->d_inode; | ||
678 | |||
679 | ret = link_path_walk(link, nd); | ||
680 | return ret; | ||
681 | fail: | ||
682 | path_put(&nd->path); | ||
683 | return PTR_ERR(link); | ||
684 | } | ||
685 | |||
686 | static void path_put_conditional(struct path *path, struct nameidata *nd) | 663 | static void path_put_conditional(struct path *path, struct nameidata *nd) |
687 | { | 664 | { |
688 | dput(path->dentry); | 665 | dput(path->dentry); |
@@ -874,7 +851,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p) | |||
874 | error = 0; | 851 | error = 0; |
875 | s = nd_get_link(nd); | 852 | s = nd_get_link(nd); |
876 | if (s) { | 853 | if (s) { |
877 | error = __vfs_follow_link(nd, s); | 854 | if (unlikely(IS_ERR(s))) { |
855 | path_put(&nd->path); | ||
856 | put_link(nd, link, *p); | ||
857 | return PTR_ERR(s); | ||
858 | } | ||
859 | if (*s == '/') { | ||
860 | set_root(nd); | ||
861 | path_put(&nd->path); | ||
862 | nd->path = nd->root; | ||
863 | path_get(&nd->root); | ||
864 | nd->flags |= LOOKUP_JUMPED; | ||
865 | } | ||
866 | nd->inode = nd->path.dentry->d_inode; | ||
867 | error = link_path_walk(s, nd); | ||
878 | if (unlikely(error)) | 868 | if (unlikely(error)) |
879 | put_link(nd, link, *p); | 869 | put_link(nd, link, *p); |
880 | } | 870 | } |
@@ -2271,12 +2261,15 @@ mountpoint_last(struct nameidata *nd, struct path *path) | |||
2271 | dentry = d_alloc(dir, &nd->last); | 2261 | dentry = d_alloc(dir, &nd->last); |
2272 | if (!dentry) { | 2262 | if (!dentry) { |
2273 | error = -ENOMEM; | 2263 | error = -ENOMEM; |
2264 | mutex_unlock(&dir->d_inode->i_mutex); | ||
2274 | goto out; | 2265 | goto out; |
2275 | } | 2266 | } |
2276 | dentry = lookup_real(dir->d_inode, dentry, nd->flags); | 2267 | dentry = lookup_real(dir->d_inode, dentry, nd->flags); |
2277 | error = PTR_ERR(dentry); | 2268 | error = PTR_ERR(dentry); |
2278 | if (IS_ERR(dentry)) | 2269 | if (IS_ERR(dentry)) { |
2270 | mutex_unlock(&dir->d_inode->i_mutex); | ||
2279 | goto out; | 2271 | goto out; |
2272 | } | ||
2280 | } | 2273 | } |
2281 | mutex_unlock(&dir->d_inode->i_mutex); | 2274 | mutex_unlock(&dir->d_inode->i_mutex); |
2282 | 2275 | ||
@@ -4236,11 +4229,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) | |||
4236 | return res; | 4229 | return res; |
4237 | } | 4230 | } |
4238 | 4231 | ||
4239 | int vfs_follow_link(struct nameidata *nd, const char *link) | ||
4240 | { | ||
4241 | return __vfs_follow_link(nd, link); | ||
4242 | } | ||
4243 | |||
4244 | /* get the link contents into pagecache */ | 4232 | /* get the link contents into pagecache */ |
4245 | static char *page_getlink(struct dentry * dentry, struct page **ppage) | 4233 | static char *page_getlink(struct dentry * dentry, struct page **ppage) |
4246 | { | 4234 | { |
@@ -4352,7 +4340,6 @@ EXPORT_SYMBOL(vfs_path_lookup); | |||
4352 | EXPORT_SYMBOL(inode_permission); | 4340 | EXPORT_SYMBOL(inode_permission); |
4353 | EXPORT_SYMBOL(unlock_rename); | 4341 | EXPORT_SYMBOL(unlock_rename); |
4354 | EXPORT_SYMBOL(vfs_create); | 4342 | EXPORT_SYMBOL(vfs_create); |
4355 | EXPORT_SYMBOL(vfs_follow_link); | ||
4356 | EXPORT_SYMBOL(vfs_link); | 4343 | EXPORT_SYMBOL(vfs_link); |
4357 | EXPORT_SYMBOL(vfs_mkdir); | 4344 | EXPORT_SYMBOL(vfs_mkdir); |
4358 | EXPORT_SYMBOL(vfs_mknod); | 4345 | EXPORT_SYMBOL(vfs_mknod); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e79bc6ce828e..de434f309af0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head) | |||
2006 | } | 2006 | } |
2007 | } | 2007 | } |
2008 | 2008 | ||
2009 | int nfs_access_cache_shrinker(struct shrinker *shrink, | 2009 | unsigned long |
2010 | struct shrink_control *sc) | 2010 | nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) |
2011 | { | 2011 | { |
2012 | LIST_HEAD(head); | 2012 | LIST_HEAD(head); |
2013 | struct nfs_inode *nfsi, *next; | 2013 | struct nfs_inode *nfsi, *next; |
2014 | struct nfs_access_entry *cache; | 2014 | struct nfs_access_entry *cache; |
2015 | int nr_to_scan = sc->nr_to_scan; | 2015 | int nr_to_scan = sc->nr_to_scan; |
2016 | gfp_t gfp_mask = sc->gfp_mask; | 2016 | gfp_t gfp_mask = sc->gfp_mask; |
2017 | long freed = 0; | ||
2017 | 2018 | ||
2018 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 2019 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
2019 | return (nr_to_scan == 0) ? 0 : -1; | 2020 | return SHRINK_STOP; |
2020 | 2021 | ||
2021 | spin_lock(&nfs_access_lru_lock); | 2022 | spin_lock(&nfs_access_lru_lock); |
2022 | list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { | 2023 | list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { |
@@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, | |||
2032 | struct nfs_access_entry, lru); | 2033 | struct nfs_access_entry, lru); |
2033 | list_move(&cache->lru, &head); | 2034 | list_move(&cache->lru, &head); |
2034 | rb_erase(&cache->rb_node, &nfsi->access_cache); | 2035 | rb_erase(&cache->rb_node, &nfsi->access_cache); |
2036 | freed++; | ||
2035 | if (!list_empty(&nfsi->access_cache_entry_lru)) | 2037 | if (!list_empty(&nfsi->access_cache_entry_lru)) |
2036 | list_move_tail(&nfsi->access_cache_inode_lru, | 2038 | list_move_tail(&nfsi->access_cache_inode_lru, |
2037 | &nfs_access_lru_list); | 2039 | &nfs_access_lru_list); |
@@ -2046,7 +2048,13 @@ remove_lru_entry: | |||
2046 | } | 2048 | } |
2047 | spin_unlock(&nfs_access_lru_lock); | 2049 | spin_unlock(&nfs_access_lru_lock); |
2048 | nfs_access_free_list(&head); | 2050 | nfs_access_free_list(&head); |
2049 | return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; | 2051 | return freed; |
2052 | } | ||
2053 | |||
2054 | unsigned long | ||
2055 | nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) | ||
2056 | { | ||
2057 | return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); | ||
2050 | } | 2058 | } |
2051 | 2059 | ||
2052 | static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) | 2060 | static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d388302c005f..38da8c2b81ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, | |||
273 | const char *ip_addr); | 273 | const char *ip_addr); |
274 | 274 | ||
275 | /* dir.c */ | 275 | /* dir.c */ |
276 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, | 276 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, |
277 | struct shrink_control *sc); | 277 | struct shrink_control *sc); |
278 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, | ||
279 | struct shrink_control *sc); | ||
278 | struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); | 280 | struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); |
279 | int nfs_create(struct inode *, struct dentry *, umode_t, bool); | 281 | int nfs_create(struct inode *, struct dentry *, umode_t, bool); |
280 | int nfs_mkdir(struct inode *, struct dentry *, umode_t); | 282 | int nfs_mkdir(struct inode *, struct dentry *, umode_t); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5793f24613c8..a03b9c6f9489 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void) | |||
360 | #endif | 360 | #endif |
361 | 361 | ||
362 | static struct shrinker acl_shrinker = { | 362 | static struct shrinker acl_shrinker = { |
363 | .shrink = nfs_access_cache_shrinker, | 363 | .count_objects = nfs_access_cache_count, |
364 | .scan_objects = nfs_access_cache_scan, | ||
364 | .seeks = DEFAULT_SEEKS, | 365 | .seeks = DEFAULT_SEEKS, |
365 | }; | 366 | }; |
366 | 367 | ||
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..9186c7ce0b14 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
@@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize; | |||
59 | 59 | ||
60 | static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); | 60 | static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); |
61 | static void cache_cleaner_func(struct work_struct *unused); | 61 | static void cache_cleaner_func(struct work_struct *unused); |
62 | static int nfsd_reply_cache_shrink(struct shrinker *shrink, | 62 | static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, |
63 | struct shrink_control *sc); | 63 | struct shrink_control *sc); |
64 | static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, | ||
65 | struct shrink_control *sc); | ||
64 | 66 | ||
65 | static struct shrinker nfsd_reply_cache_shrinker = { | 67 | static struct shrinker nfsd_reply_cache_shrinker = { |
66 | .shrink = nfsd_reply_cache_shrink, | 68 | .scan_objects = nfsd_reply_cache_scan, |
69 | .count_objects = nfsd_reply_cache_count, | ||
67 | .seeks = 1, | 70 | .seeks = 1, |
68 | }; | 71 | }; |
69 | 72 | ||
@@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) | |||
232 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. | 235 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. |
233 | * Also prune the oldest ones when the total exceeds the max number of entries. | 236 | * Also prune the oldest ones when the total exceeds the max number of entries. |
234 | */ | 237 | */ |
235 | static void | 238 | static long |
236 | prune_cache_entries(void) | 239 | prune_cache_entries(void) |
237 | { | 240 | { |
238 | struct svc_cacherep *rp, *tmp; | 241 | struct svc_cacherep *rp, *tmp; |
242 | long freed = 0; | ||
239 | 243 | ||
240 | list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { | 244 | list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { |
241 | if (!nfsd_cache_entry_expired(rp) && | 245 | if (!nfsd_cache_entry_expired(rp) && |
242 | num_drc_entries <= max_drc_entries) | 246 | num_drc_entries <= max_drc_entries) |
243 | break; | 247 | break; |
244 | nfsd_reply_cache_free_locked(rp); | 248 | nfsd_reply_cache_free_locked(rp); |
249 | freed++; | ||
245 | } | 250 | } |
246 | 251 | ||
247 | /* | 252 | /* |
@@ -254,6 +259,7 @@ prune_cache_entries(void) | |||
254 | cancel_delayed_work(&cache_cleaner); | 259 | cancel_delayed_work(&cache_cleaner); |
255 | else | 260 | else |
256 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); | 261 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); |
262 | return freed; | ||
257 | } | 263 | } |
258 | 264 | ||
259 | static void | 265 | static void |
@@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused) | |||
264 | spin_unlock(&cache_lock); | 270 | spin_unlock(&cache_lock); |
265 | } | 271 | } |
266 | 272 | ||
267 | static int | 273 | static unsigned long |
268 | nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) | 274 | nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) |
269 | { | 275 | { |
270 | unsigned int num; | 276 | unsigned long num; |
271 | 277 | ||
272 | spin_lock(&cache_lock); | 278 | spin_lock(&cache_lock); |
273 | if (sc->nr_to_scan) | ||
274 | prune_cache_entries(); | ||
275 | num = num_drc_entries; | 279 | num = num_drc_entries; |
276 | spin_unlock(&cache_lock); | 280 | spin_unlock(&cache_lock); |
277 | 281 | ||
278 | return num; | 282 | return num; |
279 | } | 283 | } |
280 | 284 | ||
285 | static unsigned long | ||
286 | nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
287 | { | ||
288 | unsigned long freed; | ||
289 | |||
290 | spin_lock(&cache_lock); | ||
291 | freed = prune_cache_entries(); | ||
292 | spin_unlock(&cache_lock); | ||
293 | return freed; | ||
294 | } | ||
281 | /* | 295 | /* |
282 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes | 296 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes |
283 | */ | 297 | */ |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9a702e193538..831d49a4111f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type) | |||
687 | } | 687 | } |
688 | EXPORT_SYMBOL(dquot_quota_sync); | 688 | EXPORT_SYMBOL(dquot_quota_sync); |
689 | 689 | ||
690 | /* Free unused dquots from cache */ | 690 | static unsigned long |
691 | static void prune_dqcache(int count) | 691 | dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
692 | { | 692 | { |
693 | struct list_head *head; | 693 | struct list_head *head; |
694 | struct dquot *dquot; | 694 | struct dquot *dquot; |
695 | unsigned long freed = 0; | ||
695 | 696 | ||
696 | head = free_dquots.prev; | 697 | head = free_dquots.prev; |
697 | while (head != &free_dquots && count) { | 698 | while (head != &free_dquots && sc->nr_to_scan) { |
698 | dquot = list_entry(head, struct dquot, dq_free); | 699 | dquot = list_entry(head, struct dquot, dq_free); |
699 | remove_dquot_hash(dquot); | 700 | remove_dquot_hash(dquot); |
700 | remove_free_dquot(dquot); | 701 | remove_free_dquot(dquot); |
701 | remove_inuse(dquot); | 702 | remove_inuse(dquot); |
702 | do_destroy_dquot(dquot); | 703 | do_destroy_dquot(dquot); |
703 | count--; | 704 | sc->nr_to_scan--; |
705 | freed++; | ||
704 | head = free_dquots.prev; | 706 | head = free_dquots.prev; |
705 | } | 707 | } |
708 | return freed; | ||
706 | } | 709 | } |
707 | 710 | ||
708 | /* | 711 | static unsigned long |
709 | * This is called from kswapd when we think we need some | 712 | dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
710 | * more memory | ||
711 | */ | ||
712 | static int shrink_dqcache_memory(struct shrinker *shrink, | ||
713 | struct shrink_control *sc) | ||
714 | { | 713 | { |
715 | int nr = sc->nr_to_scan; | 714 | return vfs_pressure_ratio( |
716 | 715 | percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); | |
717 | if (nr) { | ||
718 | spin_lock(&dq_list_lock); | ||
719 | prune_dqcache(nr); | ||
720 | spin_unlock(&dq_list_lock); | ||
721 | } | ||
722 | return ((unsigned) | ||
723 | percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) | ||
724 | /100) * sysctl_vfs_cache_pressure; | ||
725 | } | 716 | } |
726 | 717 | ||
727 | static struct shrinker dqcache_shrinker = { | 718 | static struct shrinker dqcache_shrinker = { |
728 | .shrink = shrink_dqcache_memory, | 719 | .count_objects = dqcache_shrink_count, |
720 | .scan_objects = dqcache_shrink_scan, | ||
729 | .seeks = DEFAULT_SEEKS, | 721 | .seeks = DEFAULT_SEEKS, |
730 | }; | 722 | }; |
731 | 723 | ||
diff --git a/fs/super.c b/fs/super.c index f6961ea84c56..3a96c9783a8b 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { | |||
53 | * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we | 53 | * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we |
54 | * take a passive reference to the superblock to avoid this from occurring. | 54 | * take a passive reference to the superblock to avoid this from occurring. |
55 | */ | 55 | */ |
56 | static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | 56 | static unsigned long super_cache_scan(struct shrinker *shrink, |
57 | struct shrink_control *sc) | ||
57 | { | 58 | { |
58 | struct super_block *sb; | 59 | struct super_block *sb; |
59 | int fs_objects = 0; | 60 | long fs_objects = 0; |
60 | int total_objects; | 61 | long total_objects; |
62 | long freed = 0; | ||
63 | long dentries; | ||
64 | long inodes; | ||
61 | 65 | ||
62 | sb = container_of(shrink, struct super_block, s_shrink); | 66 | sb = container_of(shrink, struct super_block, s_shrink); |
63 | 67 | ||
@@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | |||
65 | * Deadlock avoidance. We may hold various FS locks, and we don't want | 69 | * Deadlock avoidance. We may hold various FS locks, and we don't want |
66 | * to recurse into the FS that called us in clear_inode() and friends.. | 70 | * to recurse into the FS that called us in clear_inode() and friends.. |
67 | */ | 71 | */ |
68 | if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) | 72 | if (!(sc->gfp_mask & __GFP_FS)) |
69 | return -1; | 73 | return SHRINK_STOP; |
70 | 74 | ||
71 | if (!grab_super_passive(sb)) | 75 | if (!grab_super_passive(sb)) |
72 | return -1; | 76 | return SHRINK_STOP; |
73 | 77 | ||
74 | if (sb->s_op->nr_cached_objects) | 78 | if (sb->s_op->nr_cached_objects) |
75 | fs_objects = sb->s_op->nr_cached_objects(sb); | 79 | fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); |
76 | |||
77 | total_objects = sb->s_nr_dentry_unused + | ||
78 | sb->s_nr_inodes_unused + fs_objects + 1; | ||
79 | |||
80 | if (sc->nr_to_scan) { | ||
81 | int dentries; | ||
82 | int inodes; | ||
83 | |||
84 | /* proportion the scan between the caches */ | ||
85 | dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) / | ||
86 | total_objects; | ||
87 | inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) / | ||
88 | total_objects; | ||
89 | if (fs_objects) | ||
90 | fs_objects = (sc->nr_to_scan * fs_objects) / | ||
91 | total_objects; | ||
92 | /* | ||
93 | * prune the dcache first as the icache is pinned by it, then | ||
94 | * prune the icache, followed by the filesystem specific caches | ||
95 | */ | ||
96 | prune_dcache_sb(sb, dentries); | ||
97 | prune_icache_sb(sb, inodes); | ||
98 | 80 | ||
99 | if (fs_objects && sb->s_op->free_cached_objects) { | 81 | inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); |
100 | sb->s_op->free_cached_objects(sb, fs_objects); | 82 | dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); |
101 | fs_objects = sb->s_op->nr_cached_objects(sb); | 83 | total_objects = dentries + inodes + fs_objects + 1; |
102 | } | 84 | |
103 | total_objects = sb->s_nr_dentry_unused + | 85 | /* proportion the scan between the caches */ |
104 | sb->s_nr_inodes_unused + fs_objects; | 86 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); |
87 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); | ||
88 | |||
89 | /* | ||
90 | * prune the dcache first as the icache is pinned by it, then | ||
91 | * prune the icache, followed by the filesystem specific caches | ||
92 | */ | ||
93 | freed = prune_dcache_sb(sb, dentries, sc->nid); | ||
94 | freed += prune_icache_sb(sb, inodes, sc->nid); | ||
95 | |||
96 | if (fs_objects) { | ||
97 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, | ||
98 | total_objects); | ||
99 | freed += sb->s_op->free_cached_objects(sb, fs_objects, | ||
100 | sc->nid); | ||
105 | } | 101 | } |
106 | 102 | ||
107 | total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; | 103 | drop_super(sb); |
104 | return freed; | ||
105 | } | ||
106 | |||
107 | static unsigned long super_cache_count(struct shrinker *shrink, | ||
108 | struct shrink_control *sc) | ||
109 | { | ||
110 | struct super_block *sb; | ||
111 | long total_objects = 0; | ||
112 | |||
113 | sb = container_of(shrink, struct super_block, s_shrink); | ||
114 | |||
115 | if (!grab_super_passive(sb)) | ||
116 | return 0; | ||
117 | |||
118 | if (sb->s_op && sb->s_op->nr_cached_objects) | ||
119 | total_objects = sb->s_op->nr_cached_objects(sb, | ||
120 | sc->nid); | ||
121 | |||
122 | total_objects += list_lru_count_node(&sb->s_dentry_lru, | ||
123 | sc->nid); | ||
124 | total_objects += list_lru_count_node(&sb->s_inode_lru, | ||
125 | sc->nid); | ||
126 | |||
127 | total_objects = vfs_pressure_ratio(total_objects); | ||
108 | drop_super(sb); | 128 | drop_super(sb); |
109 | return total_objects; | 129 | return total_objects; |
110 | } | 130 | } |
@@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
175 | INIT_HLIST_NODE(&s->s_instances); | 195 | INIT_HLIST_NODE(&s->s_instances); |
176 | INIT_HLIST_BL_HEAD(&s->s_anon); | 196 | INIT_HLIST_BL_HEAD(&s->s_anon); |
177 | INIT_LIST_HEAD(&s->s_inodes); | 197 | INIT_LIST_HEAD(&s->s_inodes); |
178 | INIT_LIST_HEAD(&s->s_dentry_lru); | 198 | |
179 | INIT_LIST_HEAD(&s->s_inode_lru); | 199 | if (list_lru_init(&s->s_dentry_lru)) |
180 | spin_lock_init(&s->s_inode_lru_lock); | 200 | goto err_out; |
201 | if (list_lru_init(&s->s_inode_lru)) | ||
202 | goto err_out_dentry_lru; | ||
203 | |||
181 | INIT_LIST_HEAD(&s->s_mounts); | 204 | INIT_LIST_HEAD(&s->s_mounts); |
182 | init_rwsem(&s->s_umount); | 205 | init_rwsem(&s->s_umount); |
183 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 206 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
@@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
210 | s->cleancache_poolid = -1; | 233 | s->cleancache_poolid = -1; |
211 | 234 | ||
212 | s->s_shrink.seeks = DEFAULT_SEEKS; | 235 | s->s_shrink.seeks = DEFAULT_SEEKS; |
213 | s->s_shrink.shrink = prune_super; | 236 | s->s_shrink.scan_objects = super_cache_scan; |
237 | s->s_shrink.count_objects = super_cache_count; | ||
214 | s->s_shrink.batch = 1024; | 238 | s->s_shrink.batch = 1024; |
239 | s->s_shrink.flags = SHRINKER_NUMA_AWARE; | ||
215 | } | 240 | } |
216 | out: | 241 | out: |
217 | return s; | 242 | return s; |
243 | |||
244 | err_out_dentry_lru: | ||
245 | list_lru_destroy(&s->s_dentry_lru); | ||
218 | err_out: | 246 | err_out: |
219 | security_sb_free(s); | 247 | security_sb_free(s); |
220 | #ifdef CONFIG_SMP | 248 | #ifdef CONFIG_SMP |
@@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s) | |||
295 | 323 | ||
296 | /* caches are now gone, we can safely kill the shrinker now */ | 324 | /* caches are now gone, we can safely kill the shrinker now */ |
297 | unregister_shrinker(&s->s_shrink); | 325 | unregister_shrinker(&s->s_shrink); |
326 | list_lru_destroy(&s->s_dentry_lru); | ||
327 | list_lru_destroy(&s->s_inode_lru); | ||
328 | |||
298 | put_filesystem(fs); | 329 | put_filesystem(fs); |
299 | put_super(s); | 330 | put_super(s); |
300 | } else { | 331 | } else { |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d05666fed..f35135e28e96 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -277,18 +277,25 @@ static int kick_a_thread(void) | |||
277 | return 0; | 277 | return 0; |
278 | } | 278 | } |
279 | 279 | ||
280 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) | 280 | unsigned long ubifs_shrink_count(struct shrinker *shrink, |
281 | struct shrink_control *sc) | ||
281 | { | 282 | { |
282 | int nr = sc->nr_to_scan; | ||
283 | int freed, contention = 0; | ||
284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | 283 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
285 | 284 | ||
286 | if (nr == 0) | 285 | /* |
287 | /* | 286 | * Due to the way UBIFS updates the clean znode counter it may |
288 | * Due to the way UBIFS updates the clean znode counter it may | 287 | * temporarily be negative. |
289 | * temporarily be negative. | 288 | */ |
290 | */ | 289 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; |
291 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | 290 | } |
291 | |||
292 | unsigned long ubifs_shrink_scan(struct shrinker *shrink, | ||
293 | struct shrink_control *sc) | ||
294 | { | ||
295 | unsigned long nr = sc->nr_to_scan; | ||
296 | int contention = 0; | ||
297 | unsigned long freed; | ||
298 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | ||
292 | 299 | ||
293 | if (!clean_zn_cnt) { | 300 | if (!clean_zn_cnt) { |
294 | /* | 301 | /* |
@@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) | |||
316 | 323 | ||
317 | if (!freed && contention) { | 324 | if (!freed && contention) { |
318 | dbg_tnc("freed nothing, but contention"); | 325 | dbg_tnc("freed nothing, but contention"); |
319 | return -1; | 326 | return SHRINK_STOP; |
320 | } | 327 | } |
321 | 328 | ||
322 | out: | 329 | out: |
323 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | 330 | dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); |
324 | return freed; | 331 | return freed; |
325 | } | 332 | } |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 879b9976c12b..3e4aa7281e04 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; | |||
49 | 49 | ||
50 | /* UBIFS TNC shrinker description */ | 50 | /* UBIFS TNC shrinker description */ |
51 | static struct shrinker ubifs_shrinker_info = { | 51 | static struct shrinker ubifs_shrinker_info = { |
52 | .shrink = ubifs_shrinker, | 52 | .scan_objects = ubifs_shrink_scan, |
53 | .count_objects = ubifs_shrink_count, | ||
53 | .seeks = DEFAULT_SEEKS, | 54 | .seeks = DEFAULT_SEEKS, |
54 | }; | 55 | }; |
55 | 56 | ||
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce4d70f..e8c8cfe1435c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
1624 | int ubifs_tnc_end_commit(struct ubifs_info *c); | 1624 | int ubifs_tnc_end_commit(struct ubifs_info *c); |
1625 | 1625 | ||
1626 | /* shrinker.c */ | 1626 | /* shrinker.c */ |
1627 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); | 1627 | unsigned long ubifs_shrink_scan(struct shrinker *shrink, |
1628 | struct shrink_control *sc); | ||
1629 | unsigned long ubifs_shrink_count(struct shrinker *shrink, | ||
1630 | struct shrink_control *sc); | ||
1628 | 1631 | ||
1629 | /* commit.c */ | 1632 | /* commit.c */ |
1630 | int ubifs_bg_thread(void *info); | 1633 | int ubifs_bg_thread(void *info); |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06823fe10d3..263470075ea2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -81,54 +81,6 @@ xfs_buf_vmap_len( | |||
81 | } | 81 | } |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * xfs_buf_lru_add - add a buffer to the LRU. | ||
85 | * | ||
86 | * The LRU takes a new reference to the buffer so that it will only be freed | ||
87 | * once the shrinker takes the buffer off the LRU. | ||
88 | */ | ||
89 | STATIC void | ||
90 | xfs_buf_lru_add( | ||
91 | struct xfs_buf *bp) | ||
92 | { | ||
93 | struct xfs_buftarg *btp = bp->b_target; | ||
94 | |||
95 | spin_lock(&btp->bt_lru_lock); | ||
96 | if (list_empty(&bp->b_lru)) { | ||
97 | atomic_inc(&bp->b_hold); | ||
98 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
99 | btp->bt_lru_nr++; | ||
100 | bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; | ||
101 | } | ||
102 | spin_unlock(&btp->bt_lru_lock); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
107 | * | ||
108 | * The unlocked check is safe here because it only occurs when there are not | ||
109 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
110 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
111 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the | ||
112 | * bt_lru_lock. | ||
113 | */ | ||
114 | STATIC void | ||
115 | xfs_buf_lru_del( | ||
116 | struct xfs_buf *bp) | ||
117 | { | ||
118 | struct xfs_buftarg *btp = bp->b_target; | ||
119 | |||
120 | if (list_empty(&bp->b_lru)) | ||
121 | return; | ||
122 | |||
123 | spin_lock(&btp->bt_lru_lock); | ||
124 | if (!list_empty(&bp->b_lru)) { | ||
125 | list_del_init(&bp->b_lru); | ||
126 | btp->bt_lru_nr--; | ||
127 | } | ||
128 | spin_unlock(&btp->bt_lru_lock); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the | 84 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the |
133 | * b_lru_ref count so that the buffer is freed immediately when the buffer | 85 | * b_lru_ref count so that the buffer is freed immediately when the buffer |
134 | * reference count falls to zero. If the buffer is already on the LRU, we need | 86 | * reference count falls to zero. If the buffer is already on the LRU, we need |
@@ -151,20 +103,14 @@ xfs_buf_stale( | |||
151 | */ | 103 | */ |
152 | bp->b_flags &= ~_XBF_DELWRI_Q; | 104 | bp->b_flags &= ~_XBF_DELWRI_Q; |
153 | 105 | ||
154 | atomic_set(&(bp)->b_lru_ref, 0); | 106 | spin_lock(&bp->b_lock); |
155 | if (!list_empty(&bp->b_lru)) { | 107 | atomic_set(&bp->b_lru_ref, 0); |
156 | struct xfs_buftarg *btp = bp->b_target; | 108 | if (!(bp->b_state & XFS_BSTATE_DISPOSE) && |
109 | (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) | ||
110 | atomic_dec(&bp->b_hold); | ||
157 | 111 | ||
158 | spin_lock(&btp->bt_lru_lock); | ||
159 | if (!list_empty(&bp->b_lru) && | ||
160 | !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { | ||
161 | list_del_init(&bp->b_lru); | ||
162 | btp->bt_lru_nr--; | ||
163 | atomic_dec(&bp->b_hold); | ||
164 | } | ||
165 | spin_unlock(&btp->bt_lru_lock); | ||
166 | } | ||
167 | ASSERT(atomic_read(&bp->b_hold) >= 1); | 112 | ASSERT(atomic_read(&bp->b_hold) >= 1); |
113 | spin_unlock(&bp->b_lock); | ||
168 | } | 114 | } |
169 | 115 | ||
170 | static int | 116 | static int |
@@ -228,6 +174,7 @@ _xfs_buf_alloc( | |||
228 | INIT_LIST_HEAD(&bp->b_list); | 174 | INIT_LIST_HEAD(&bp->b_list); |
229 | RB_CLEAR_NODE(&bp->b_rbnode); | 175 | RB_CLEAR_NODE(&bp->b_rbnode); |
230 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 176 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
177 | spin_lock_init(&bp->b_lock); | ||
231 | XB_SET_OWNER(bp); | 178 | XB_SET_OWNER(bp); |
232 | bp->b_target = target; | 179 | bp->b_target = target; |
233 | bp->b_flags = flags; | 180 | bp->b_flags = flags; |
@@ -917,12 +864,33 @@ xfs_buf_rele( | |||
917 | 864 | ||
918 | ASSERT(atomic_read(&bp->b_hold) > 0); | 865 | ASSERT(atomic_read(&bp->b_hold) > 0); |
919 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 866 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
920 | if (!(bp->b_flags & XBF_STALE) && | 867 | spin_lock(&bp->b_lock); |
921 | atomic_read(&bp->b_lru_ref)) { | 868 | if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { |
922 | xfs_buf_lru_add(bp); | 869 | /* |
870 | * If the buffer is added to the LRU take a new | ||
871 | * reference to the buffer for the LRU and clear the | ||
872 | * (now stale) dispose list state flag | ||
873 | */ | ||
874 | if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { | ||
875 | bp->b_state &= ~XFS_BSTATE_DISPOSE; | ||
876 | atomic_inc(&bp->b_hold); | ||
877 | } | ||
878 | spin_unlock(&bp->b_lock); | ||
923 | spin_unlock(&pag->pag_buf_lock); | 879 | spin_unlock(&pag->pag_buf_lock); |
924 | } else { | 880 | } else { |
925 | xfs_buf_lru_del(bp); | 881 | /* |
882 | * most of the time buffers will already be removed from | ||
883 | * the LRU, so optimise that case by checking for the | ||
884 | * XFS_BSTATE_DISPOSE flag indicating the last list the | ||
885 | * buffer was on was the disposal list | ||
886 | */ | ||
887 | if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { | ||
888 | list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); | ||
889 | } else { | ||
890 | ASSERT(list_empty(&bp->b_lru)); | ||
891 | } | ||
892 | spin_unlock(&bp->b_lock); | ||
893 | |||
926 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); | 894 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
927 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 895 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
928 | spin_unlock(&pag->pag_buf_lock); | 896 | spin_unlock(&pag->pag_buf_lock); |
@@ -1502,83 +1470,121 @@ xfs_buf_iomove( | |||
1502 | * returned. These buffers will have an elevated hold count, so wait on those | 1470 | * returned. These buffers will have an elevated hold count, so wait on those |
1503 | * while freeing all the buffers only held by the LRU. | 1471 | * while freeing all the buffers only held by the LRU. |
1504 | */ | 1472 | */ |
1473 | static enum lru_status | ||
1474 | xfs_buftarg_wait_rele( | ||
1475 | struct list_head *item, | ||
1476 | spinlock_t *lru_lock, | ||
1477 | void *arg) | ||
1478 | |||
1479 | { | ||
1480 | struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); | ||
1481 | struct list_head *dispose = arg; | ||
1482 | |||
1483 | if (atomic_read(&bp->b_hold) > 1) { | ||
1484 | /* need to wait, so skip it this pass */ | ||
1485 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | ||
1486 | return LRU_SKIP; | ||
1487 | } | ||
1488 | if (!spin_trylock(&bp->b_lock)) | ||
1489 | return LRU_SKIP; | ||
1490 | |||
1491 | /* | ||
1492 | * clear the LRU reference count so the buffer doesn't get | ||
1493 | * ignored in xfs_buf_rele(). | ||
1494 | */ | ||
1495 | atomic_set(&bp->b_lru_ref, 0); | ||
1496 | bp->b_state |= XFS_BSTATE_DISPOSE; | ||
1497 | list_move(item, dispose); | ||
1498 | spin_unlock(&bp->b_lock); | ||
1499 | return LRU_REMOVED; | ||
1500 | } | ||
1501 | |||
1505 | void | 1502 | void |
1506 | xfs_wait_buftarg( | 1503 | xfs_wait_buftarg( |
1507 | struct xfs_buftarg *btp) | 1504 | struct xfs_buftarg *btp) |
1508 | { | 1505 | { |
1509 | struct xfs_buf *bp; | 1506 | LIST_HEAD(dispose); |
1507 | int loop = 0; | ||
1510 | 1508 | ||
1511 | restart: | 1509 | /* loop until there is nothing left on the lru list. */ |
1512 | spin_lock(&btp->bt_lru_lock); | 1510 | while (list_lru_count(&btp->bt_lru)) { |
1513 | while (!list_empty(&btp->bt_lru)) { | 1511 | list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, |
1514 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | 1512 | &dispose, LONG_MAX); |
1515 | if (atomic_read(&bp->b_hold) > 1) { | 1513 | |
1516 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | 1514 | while (!list_empty(&dispose)) { |
1517 | list_move_tail(&bp->b_lru, &btp->bt_lru); | 1515 | struct xfs_buf *bp; |
1518 | spin_unlock(&btp->bt_lru_lock); | 1516 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
1519 | delay(100); | 1517 | list_del_init(&bp->b_lru); |
1520 | goto restart; | 1518 | xfs_buf_rele(bp); |
1521 | } | 1519 | } |
1522 | /* | 1520 | if (loop++ != 0) |
1523 | * clear the LRU reference count so the buffer doesn't get | 1521 | delay(100); |
1524 | * ignored in xfs_buf_rele(). | ||
1525 | */ | ||
1526 | atomic_set(&bp->b_lru_ref, 0); | ||
1527 | spin_unlock(&btp->bt_lru_lock); | ||
1528 | xfs_buf_rele(bp); | ||
1529 | spin_lock(&btp->bt_lru_lock); | ||
1530 | } | 1522 | } |
1531 | spin_unlock(&btp->bt_lru_lock); | ||
1532 | } | 1523 | } |
1533 | 1524 | ||
1534 | int | 1525 | static enum lru_status |
1535 | xfs_buftarg_shrink( | 1526 | xfs_buftarg_isolate( |
1527 | struct list_head *item, | ||
1528 | spinlock_t *lru_lock, | ||
1529 | void *arg) | ||
1530 | { | ||
1531 | struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); | ||
1532 | struct list_head *dispose = arg; | ||
1533 | |||
1534 | /* | ||
1535 | * we are inverting the lru lock/bp->b_lock here, so use a trylock. | ||
1536 | * If we fail to get the lock, just skip it. | ||
1537 | */ | ||
1538 | if (!spin_trylock(&bp->b_lock)) | ||
1539 | return LRU_SKIP; | ||
1540 | /* | ||
1541 | * Decrement the b_lru_ref count unless the value is already | ||
1542 | * zero. If the value is already zero, we need to reclaim the | ||
1543 | * buffer, otherwise it gets another trip through the LRU. | ||
1544 | */ | ||
1545 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
1546 | spin_unlock(&bp->b_lock); | ||
1547 | return LRU_ROTATE; | ||
1548 | } | ||
1549 | |||
1550 | bp->b_state |= XFS_BSTATE_DISPOSE; | ||
1551 | list_move(item, dispose); | ||
1552 | spin_unlock(&bp->b_lock); | ||
1553 | return LRU_REMOVED; | ||
1554 | } | ||
1555 | |||
1556 | static unsigned long | ||
1557 | xfs_buftarg_shrink_scan( | ||
1536 | struct shrinker *shrink, | 1558 | struct shrinker *shrink, |
1537 | struct shrink_control *sc) | 1559 | struct shrink_control *sc) |
1538 | { | 1560 | { |
1539 | struct xfs_buftarg *btp = container_of(shrink, | 1561 | struct xfs_buftarg *btp = container_of(shrink, |
1540 | struct xfs_buftarg, bt_shrinker); | 1562 | struct xfs_buftarg, bt_shrinker); |
1541 | struct xfs_buf *bp; | ||
1542 | int nr_to_scan = sc->nr_to_scan; | ||
1543 | LIST_HEAD(dispose); | 1563 | LIST_HEAD(dispose); |
1564 | unsigned long freed; | ||
1565 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
1544 | 1566 | ||
1545 | if (!nr_to_scan) | 1567 | freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, |
1546 | return btp->bt_lru_nr; | 1568 | &dispose, &nr_to_scan); |
1547 | |||
1548 | spin_lock(&btp->bt_lru_lock); | ||
1549 | while (!list_empty(&btp->bt_lru)) { | ||
1550 | if (nr_to_scan-- <= 0) | ||
1551 | break; | ||
1552 | |||
1553 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
1554 | |||
1555 | /* | ||
1556 | * Decrement the b_lru_ref count unless the value is already | ||
1557 | * zero. If the value is already zero, we need to reclaim the | ||
1558 | * buffer, otherwise it gets another trip through the LRU. | ||
1559 | */ | ||
1560 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
1561 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1562 | continue; | ||
1563 | } | ||
1564 | |||
1565 | /* | ||
1566 | * remove the buffer from the LRU now to avoid needing another | ||
1567 | * lock round trip inside xfs_buf_rele(). | ||
1568 | */ | ||
1569 | list_move(&bp->b_lru, &dispose); | ||
1570 | btp->bt_lru_nr--; | ||
1571 | bp->b_lru_flags |= _XBF_LRU_DISPOSE; | ||
1572 | } | ||
1573 | spin_unlock(&btp->bt_lru_lock); | ||
1574 | 1569 | ||
1575 | while (!list_empty(&dispose)) { | 1570 | while (!list_empty(&dispose)) { |
1571 | struct xfs_buf *bp; | ||
1576 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | 1572 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
1577 | list_del_init(&bp->b_lru); | 1573 | list_del_init(&bp->b_lru); |
1578 | xfs_buf_rele(bp); | 1574 | xfs_buf_rele(bp); |
1579 | } | 1575 | } |
1580 | 1576 | ||
1581 | return btp->bt_lru_nr; | 1577 | return freed; |
1578 | } | ||
1579 | |||
1580 | static unsigned long | ||
1581 | xfs_buftarg_shrink_count( | ||
1582 | struct shrinker *shrink, | ||
1583 | struct shrink_control *sc) | ||
1584 | { | ||
1585 | struct xfs_buftarg *btp = container_of(shrink, | ||
1586 | struct xfs_buftarg, bt_shrinker); | ||
1587 | return list_lru_count_node(&btp->bt_lru, sc->nid); | ||
1582 | } | 1588 | } |
1583 | 1589 | ||
1584 | void | 1590 | void |
@@ -1587,6 +1593,7 @@ xfs_free_buftarg( | |||
1587 | struct xfs_buftarg *btp) | 1593 | struct xfs_buftarg *btp) |
1588 | { | 1594 | { |
1589 | unregister_shrinker(&btp->bt_shrinker); | 1595 | unregister_shrinker(&btp->bt_shrinker); |
1596 | list_lru_destroy(&btp->bt_lru); | ||
1590 | 1597 | ||
1591 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1598 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1592 | xfs_blkdev_issue_flush(btp); | 1599 | xfs_blkdev_issue_flush(btp); |
@@ -1660,12 +1667,16 @@ xfs_alloc_buftarg( | |||
1660 | if (!btp->bt_bdi) | 1667 | if (!btp->bt_bdi) |
1661 | goto error; | 1668 | goto error; |
1662 | 1669 | ||
1663 | INIT_LIST_HEAD(&btp->bt_lru); | ||
1664 | spin_lock_init(&btp->bt_lru_lock); | ||
1665 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1670 | if (xfs_setsize_buftarg_early(btp, bdev)) |
1666 | goto error; | 1671 | goto error; |
1667 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1672 | |
1673 | if (list_lru_init(&btp->bt_lru)) | ||
1674 | goto error; | ||
1675 | |||
1676 | btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; | ||
1677 | btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; | ||
1668 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1678 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
1679 | btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; | ||
1669 | register_shrinker(&btp->bt_shrinker); | 1680 | register_shrinker(&btp->bt_shrinker); |
1670 | return btp; | 1681 | return btp; |
1671 | 1682 | ||
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 433a12ed7b17..e65683361017 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/uio.h> | 27 | #include <linux/uio.h> |
28 | #include <linux/list_lru.h> | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * Base types | 31 | * Base types |
@@ -59,7 +60,6 @@ typedef enum { | |||
59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 60 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ | 61 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ | 62 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ |
62 | #define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ | ||
63 | 63 | ||
64 | typedef unsigned int xfs_buf_flags_t; | 64 | typedef unsigned int xfs_buf_flags_t; |
65 | 65 | ||
@@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t; | |||
78 | { _XBF_PAGES, "PAGES" }, \ | 78 | { _XBF_PAGES, "PAGES" }, \ |
79 | { _XBF_KMEM, "KMEM" }, \ | 79 | { _XBF_KMEM, "KMEM" }, \ |
80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
81 | { _XBF_COMPOUND, "COMPOUND" }, \ | 81 | { _XBF_COMPOUND, "COMPOUND" } |
82 | { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } | 82 | |
83 | /* | ||
84 | * Internal state flags. | ||
85 | */ | ||
86 | #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ | ||
83 | 87 | ||
84 | typedef struct xfs_buftarg { | 88 | typedef struct xfs_buftarg { |
85 | dev_t bt_dev; | 89 | dev_t bt_dev; |
@@ -92,9 +96,7 @@ typedef struct xfs_buftarg { | |||
92 | 96 | ||
93 | /* LRU control structures */ | 97 | /* LRU control structures */ |
94 | struct shrinker bt_shrinker; | 98 | struct shrinker bt_shrinker; |
95 | struct list_head bt_lru; | 99 | struct list_lru bt_lru; |
96 | spinlock_t bt_lru_lock; | ||
97 | unsigned int bt_lru_nr; | ||
98 | } xfs_buftarg_t; | 100 | } xfs_buftarg_t; |
99 | 101 | ||
100 | struct xfs_buf; | 102 | struct xfs_buf; |
@@ -137,7 +139,8 @@ typedef struct xfs_buf { | |||
137 | * bt_lru_lock and not by b_sema | 139 | * bt_lru_lock and not by b_sema |
138 | */ | 140 | */ |
139 | struct list_head b_lru; /* lru list */ | 141 | struct list_head b_lru; /* lru list */ |
140 | xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ | 142 | spinlock_t b_lock; /* internal state lock */ |
143 | unsigned int b_state; /* internal state flags */ | ||
141 | wait_queue_head_t b_waiters; /* unpin waiters */ | 144 | wait_queue_head_t b_waiters; /* unpin waiters */ |
142 | struct list_head b_list; | 145 | struct list_head b_list; |
143 | struct xfs_perag *b_pag; /* contains rbtree root */ | 146 | struct xfs_perag *b_pag; /* contains rbtree root */ |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 251c66632e5e..71520e6e5d65 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -940,13 +940,8 @@ xfs_qm_dqput_final( | |||
940 | 940 | ||
941 | trace_xfs_dqput_free(dqp); | 941 | trace_xfs_dqput_free(dqp); |
942 | 942 | ||
943 | mutex_lock(&qi->qi_lru_lock); | 943 | if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) |
944 | if (list_empty(&dqp->q_lru)) { | ||
945 | list_add_tail(&dqp->q_lru, &qi->qi_lru_list); | ||
946 | qi->qi_lru_count++; | ||
947 | XFS_STATS_INC(xs_qm_dquot_unused); | 944 | XFS_STATS_INC(xs_qm_dquot_unused); |
948 | } | ||
949 | mutex_unlock(&qi->qi_lru_lock); | ||
950 | 945 | ||
951 | /* | 946 | /* |
952 | * If we just added a udquot to the freelist, then we want to release | 947 | * If we just added a udquot to the freelist, then we want to release |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c6790..73b62a24ceac 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -1167,7 +1167,7 @@ xfs_reclaim_inodes( | |||
1167 | * them to be cleaned, which we hope will not be very long due to the | 1167 | * them to be cleaned, which we hope will not be very long due to the |
1168 | * background walker having already kicked the IO off on those dirty inodes. | 1168 | * background walker having already kicked the IO off on those dirty inodes. |
1169 | */ | 1169 | */ |
1170 | void | 1170 | long |
1171 | xfs_reclaim_inodes_nr( | 1171 | xfs_reclaim_inodes_nr( |
1172 | struct xfs_mount *mp, | 1172 | struct xfs_mount *mp, |
1173 | int nr_to_scan) | 1173 | int nr_to_scan) |
@@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr( | |||
1176 | xfs_reclaim_work_queue(mp); | 1176 | xfs_reclaim_work_queue(mp); |
1177 | xfs_ail_push_all(mp->m_ail); | 1177 | xfs_ail_push_all(mp->m_ail); |
1178 | 1178 | ||
1179 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); | 1179 | return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); |
1180 | } | 1180 | } |
1181 | 1181 | ||
1182 | /* | 1182 | /* |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791bd..456f0144e1b6 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work); | |||
46 | 46 | ||
47 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 47 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
48 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); | 48 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); |
49 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); | 49 | long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); |
50 | 50 | ||
51 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 51 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
52 | 52 | ||
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6218a0aeeeea..3e6c2e6c9cd2 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -51,8 +51,9 @@ | |||
51 | */ | 51 | */ |
52 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 52 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
53 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 53 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
54 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); | ||
55 | 54 | ||
55 | |||
56 | STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); | ||
56 | /* | 57 | /* |
57 | * We use the batch lookup interface to iterate over the dquots as it | 58 | * We use the batch lookup interface to iterate over the dquots as it |
58 | * currently is the only interface into the radix tree code that allows | 59 | * currently is the only interface into the radix tree code that allows |
@@ -203,12 +204,9 @@ xfs_qm_dqpurge( | |||
203 | * We move dquots to the freelist as soon as their reference count | 204 | * We move dquots to the freelist as soon as their reference count |
204 | * hits zero, so it really should be on the freelist here. | 205 | * hits zero, so it really should be on the freelist here. |
205 | */ | 206 | */ |
206 | mutex_lock(&qi->qi_lru_lock); | ||
207 | ASSERT(!list_empty(&dqp->q_lru)); | 207 | ASSERT(!list_empty(&dqp->q_lru)); |
208 | list_del_init(&dqp->q_lru); | 208 | list_lru_del(&qi->qi_lru, &dqp->q_lru); |
209 | qi->qi_lru_count--; | ||
210 | XFS_STATS_DEC(xs_qm_dquot_unused); | 209 | XFS_STATS_DEC(xs_qm_dquot_unused); |
211 | mutex_unlock(&qi->qi_lru_lock); | ||
212 | 210 | ||
213 | xfs_qm_dqdestroy(dqp); | 211 | xfs_qm_dqdestroy(dqp); |
214 | 212 | ||
@@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk( | |||
680 | return ndquots; | 678 | return ndquots; |
681 | } | 679 | } |
682 | 680 | ||
681 | struct xfs_qm_isolate { | ||
682 | struct list_head buffers; | ||
683 | struct list_head dispose; | ||
684 | }; | ||
685 | |||
686 | static enum lru_status | ||
687 | xfs_qm_dquot_isolate( | ||
688 | struct list_head *item, | ||
689 | spinlock_t *lru_lock, | ||
690 | void *arg) | ||
691 | { | ||
692 | struct xfs_dquot *dqp = container_of(item, | ||
693 | struct xfs_dquot, q_lru); | ||
694 | struct xfs_qm_isolate *isol = arg; | ||
695 | |||
696 | if (!xfs_dqlock_nowait(dqp)) | ||
697 | goto out_miss_busy; | ||
698 | |||
699 | /* | ||
700 | * This dquot has acquired a reference in the meantime remove it from | ||
701 | * the freelist and try again. | ||
702 | */ | ||
703 | if (dqp->q_nrefs) { | ||
704 | xfs_dqunlock(dqp); | ||
705 | XFS_STATS_INC(xs_qm_dqwants); | ||
706 | |||
707 | trace_xfs_dqreclaim_want(dqp); | ||
708 | list_del_init(&dqp->q_lru); | ||
709 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
710 | return LRU_REMOVED; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * If the dquot is dirty, flush it. If it's already being flushed, just | ||
715 | * skip it so there is time for the IO to complete before we try to | ||
716 | * reclaim it again on the next LRU pass. | ||
717 | */ | ||
718 | if (!xfs_dqflock_nowait(dqp)) { | ||
719 | xfs_dqunlock(dqp); | ||
720 | goto out_miss_busy; | ||
721 | } | ||
722 | |||
723 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
724 | struct xfs_buf *bp = NULL; | ||
725 | int error; | ||
726 | |||
727 | trace_xfs_dqreclaim_dirty(dqp); | ||
728 | |||
729 | /* we have to drop the LRU lock to flush the dquot */ | ||
730 | spin_unlock(lru_lock); | ||
731 | |||
732 | error = xfs_qm_dqflush(dqp, &bp); | ||
733 | if (error) { | ||
734 | xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", | ||
735 | __func__, dqp); | ||
736 | goto out_unlock_dirty; | ||
737 | } | ||
738 | |||
739 | xfs_buf_delwri_queue(bp, &isol->buffers); | ||
740 | xfs_buf_relse(bp); | ||
741 | goto out_unlock_dirty; | ||
742 | } | ||
743 | xfs_dqfunlock(dqp); | ||
744 | |||
745 | /* | ||
746 | * Prevent lookups now that we are past the point of no return. | ||
747 | */ | ||
748 | dqp->dq_flags |= XFS_DQ_FREEING; | ||
749 | xfs_dqunlock(dqp); | ||
750 | |||
751 | ASSERT(dqp->q_nrefs == 0); | ||
752 | list_move_tail(&dqp->q_lru, &isol->dispose); | ||
753 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
754 | trace_xfs_dqreclaim_done(dqp); | ||
755 | XFS_STATS_INC(xs_qm_dqreclaims); | ||
756 | return LRU_REMOVED; | ||
757 | |||
758 | out_miss_busy: | ||
759 | trace_xfs_dqreclaim_busy(dqp); | ||
760 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
761 | return LRU_SKIP; | ||
762 | |||
763 | out_unlock_dirty: | ||
764 | trace_xfs_dqreclaim_busy(dqp); | ||
765 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
766 | xfs_dqunlock(dqp); | ||
767 | spin_lock(lru_lock); | ||
768 | return LRU_RETRY; | ||
769 | } | ||
770 | |||
771 | static unsigned long | ||
772 | xfs_qm_shrink_scan( | ||
773 | struct shrinker *shrink, | ||
774 | struct shrink_control *sc) | ||
775 | { | ||
776 | struct xfs_quotainfo *qi = container_of(shrink, | ||
777 | struct xfs_quotainfo, qi_shrinker); | ||
778 | struct xfs_qm_isolate isol; | ||
779 | unsigned long freed; | ||
780 | int error; | ||
781 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
782 | |||
783 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | ||
784 | return 0; | ||
785 | |||
786 | INIT_LIST_HEAD(&isol.buffers); | ||
787 | INIT_LIST_HEAD(&isol.dispose); | ||
788 | |||
789 | freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, | ||
790 | &nr_to_scan); | ||
791 | |||
792 | error = xfs_buf_delwri_submit(&isol.buffers); | ||
793 | if (error) | ||
794 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
795 | |||
796 | while (!list_empty(&isol.dispose)) { | ||
797 | struct xfs_dquot *dqp; | ||
798 | |||
799 | dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru); | ||
800 | list_del_init(&dqp->q_lru); | ||
801 | xfs_qm_dqfree_one(dqp); | ||
802 | } | ||
803 | |||
804 | return freed; | ||
805 | } | ||
806 | |||
807 | static unsigned long | ||
808 | xfs_qm_shrink_count( | ||
809 | struct shrinker *shrink, | ||
810 | struct shrink_control *sc) | ||
811 | { | ||
812 | struct xfs_quotainfo *qi = container_of(shrink, | ||
813 | struct xfs_quotainfo, qi_shrinker); | ||
814 | |||
815 | return list_lru_count_node(&qi->qi_lru, sc->nid); | ||
816 | } | ||
817 | |||
683 | /* | 818 | /* |
684 | * This initializes all the quota information that's kept in the | 819 | * This initializes all the quota information that's kept in the |
685 | * mount structure | 820 | * mount structure |
@@ -696,11 +831,18 @@ xfs_qm_init_quotainfo( | |||
696 | 831 | ||
697 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); | 832 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); |
698 | 833 | ||
834 | if ((error = list_lru_init(&qinf->qi_lru))) { | ||
835 | kmem_free(qinf); | ||
836 | mp->m_quotainfo = NULL; | ||
837 | return error; | ||
838 | } | ||
839 | |||
699 | /* | 840 | /* |
700 | * See if quotainodes are setup, and if not, allocate them, | 841 | * See if quotainodes are setup, and if not, allocate them, |
701 | * and change the superblock accordingly. | 842 | * and change the superblock accordingly. |
702 | */ | 843 | */ |
703 | if ((error = xfs_qm_init_quotainos(mp))) { | 844 | if ((error = xfs_qm_init_quotainos(mp))) { |
845 | list_lru_destroy(&qinf->qi_lru); | ||
704 | kmem_free(qinf); | 846 | kmem_free(qinf); |
705 | mp->m_quotainfo = NULL; | 847 | mp->m_quotainfo = NULL; |
706 | return error; | 848 | return error; |
@@ -711,10 +853,6 @@ xfs_qm_init_quotainfo( | |||
711 | INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); | 853 | INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); |
712 | mutex_init(&qinf->qi_tree_lock); | 854 | mutex_init(&qinf->qi_tree_lock); |
713 | 855 | ||
714 | INIT_LIST_HEAD(&qinf->qi_lru_list); | ||
715 | qinf->qi_lru_count = 0; | ||
716 | mutex_init(&qinf->qi_lru_lock); | ||
717 | |||
718 | /* mutex used to serialize quotaoffs */ | 856 | /* mutex used to serialize quotaoffs */ |
719 | mutex_init(&qinf->qi_quotaofflock); | 857 | mutex_init(&qinf->qi_quotaofflock); |
720 | 858 | ||
@@ -779,8 +917,10 @@ xfs_qm_init_quotainfo( | |||
779 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; | 917 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; |
780 | } | 918 | } |
781 | 919 | ||
782 | qinf->qi_shrinker.shrink = xfs_qm_shake; | 920 | qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; |
921 | qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; | ||
783 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; | 922 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; |
923 | qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; | ||
784 | register_shrinker(&qinf->qi_shrinker); | 924 | register_shrinker(&qinf->qi_shrinker); |
785 | return 0; | 925 | return 0; |
786 | } | 926 | } |
@@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo( | |||
801 | ASSERT(qi != NULL); | 941 | ASSERT(qi != NULL); |
802 | 942 | ||
803 | unregister_shrinker(&qi->qi_shrinker); | 943 | unregister_shrinker(&qi->qi_shrinker); |
944 | list_lru_destroy(&qi->qi_lru); | ||
804 | 945 | ||
805 | if (qi->qi_uquotaip) { | 946 | if (qi->qi_uquotaip) { |
806 | IRELE(qi->qi_uquotaip); | 947 | IRELE(qi->qi_uquotaip); |
@@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one( | |||
1599 | xfs_qm_dqdestroy(dqp); | 1740 | xfs_qm_dqdestroy(dqp); |
1600 | } | 1741 | } |
1601 | 1742 | ||
1602 | STATIC void | ||
1603 | xfs_qm_dqreclaim_one( | ||
1604 | struct xfs_dquot *dqp, | ||
1605 | struct list_head *buffer_list, | ||
1606 | struct list_head *dispose_list) | ||
1607 | { | ||
1608 | struct xfs_mount *mp = dqp->q_mount; | ||
1609 | struct xfs_quotainfo *qi = mp->m_quotainfo; | ||
1610 | int error; | ||
1611 | |||
1612 | if (!xfs_dqlock_nowait(dqp)) | ||
1613 | goto out_move_tail; | ||
1614 | |||
1615 | /* | ||
1616 | * This dquot has acquired a reference in the meantime remove it from | ||
1617 | * the freelist and try again. | ||
1618 | */ | ||
1619 | if (dqp->q_nrefs) { | ||
1620 | xfs_dqunlock(dqp); | ||
1621 | |||
1622 | trace_xfs_dqreclaim_want(dqp); | ||
1623 | XFS_STATS_INC(xs_qm_dqwants); | ||
1624 | |||
1625 | list_del_init(&dqp->q_lru); | ||
1626 | qi->qi_lru_count--; | ||
1627 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
1628 | return; | ||
1629 | } | ||
1630 | |||
1631 | /* | ||
1632 | * Try to grab the flush lock. If this dquot is in the process of | ||
1633 | * getting flushed to disk, we don't want to reclaim it. | ||
1634 | */ | ||
1635 | if (!xfs_dqflock_nowait(dqp)) | ||
1636 | goto out_unlock_move_tail; | ||
1637 | |||
1638 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
1639 | struct xfs_buf *bp = NULL; | ||
1640 | |||
1641 | trace_xfs_dqreclaim_dirty(dqp); | ||
1642 | |||
1643 | error = xfs_qm_dqflush(dqp, &bp); | ||
1644 | if (error) { | ||
1645 | xfs_warn(mp, "%s: dquot %p flush failed", | ||
1646 | __func__, dqp); | ||
1647 | goto out_unlock_move_tail; | ||
1648 | } | ||
1649 | |||
1650 | xfs_buf_delwri_queue(bp, buffer_list); | ||
1651 | xfs_buf_relse(bp); | ||
1652 | /* | ||
1653 | * Give the dquot another try on the freelist, as the | ||
1654 | * flushing will take some time. | ||
1655 | */ | ||
1656 | goto out_unlock_move_tail; | ||
1657 | } | ||
1658 | xfs_dqfunlock(dqp); | ||
1659 | |||
1660 | /* | ||
1661 | * Prevent lookups now that we are past the point of no return. | ||
1662 | */ | ||
1663 | dqp->dq_flags |= XFS_DQ_FREEING; | ||
1664 | xfs_dqunlock(dqp); | ||
1665 | |||
1666 | ASSERT(dqp->q_nrefs == 0); | ||
1667 | list_move_tail(&dqp->q_lru, dispose_list); | ||
1668 | qi->qi_lru_count--; | ||
1669 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
1670 | |||
1671 | trace_xfs_dqreclaim_done(dqp); | ||
1672 | XFS_STATS_INC(xs_qm_dqreclaims); | ||
1673 | return; | ||
1674 | |||
1675 | /* | ||
1676 | * Move the dquot to the tail of the list so that we don't spin on it. | ||
1677 | */ | ||
1678 | out_unlock_move_tail: | ||
1679 | xfs_dqunlock(dqp); | ||
1680 | out_move_tail: | ||
1681 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); | ||
1682 | trace_xfs_dqreclaim_busy(dqp); | ||
1683 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
1684 | } | ||
1685 | |||
1686 | STATIC int | ||
1687 | xfs_qm_shake( | ||
1688 | struct shrinker *shrink, | ||
1689 | struct shrink_control *sc) | ||
1690 | { | ||
1691 | struct xfs_quotainfo *qi = | ||
1692 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | ||
1693 | int nr_to_scan = sc->nr_to_scan; | ||
1694 | LIST_HEAD (buffer_list); | ||
1695 | LIST_HEAD (dispose_list); | ||
1696 | struct xfs_dquot *dqp; | ||
1697 | int error; | ||
1698 | |||
1699 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | ||
1700 | return 0; | ||
1701 | if (!nr_to_scan) | ||
1702 | goto out; | ||
1703 | |||
1704 | mutex_lock(&qi->qi_lru_lock); | ||
1705 | while (!list_empty(&qi->qi_lru_list)) { | ||
1706 | if (nr_to_scan-- <= 0) | ||
1707 | break; | ||
1708 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | ||
1709 | q_lru); | ||
1710 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); | ||
1711 | } | ||
1712 | mutex_unlock(&qi->qi_lru_lock); | ||
1713 | |||
1714 | error = xfs_buf_delwri_submit(&buffer_list); | ||
1715 | if (error) | ||
1716 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
1717 | |||
1718 | while (!list_empty(&dispose_list)) { | ||
1719 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | ||
1720 | list_del_init(&dqp->q_lru); | ||
1721 | xfs_qm_dqfree_one(dqp); | ||
1722 | } | ||
1723 | |||
1724 | out: | ||
1725 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | ||
1726 | } | ||
1727 | |||
1728 | /* | 1743 | /* |
1729 | * Start a transaction and write the incore superblock changes to | 1744 | * Start a transaction and write the incore superblock changes to |
1730 | * disk. flags parameter indicates which fields have changed. | 1745 | * disk. flags parameter indicates which fields have changed. |
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 670cd4464070..2b602df9c242 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
@@ -49,9 +49,7 @@ typedef struct xfs_quotainfo { | |||
49 | struct xfs_inode *qi_uquotaip; /* user quota inode */ | 49 | struct xfs_inode *qi_uquotaip; /* user quota inode */ |
50 | struct xfs_inode *qi_gquotaip; /* group quota inode */ | 50 | struct xfs_inode *qi_gquotaip; /* group quota inode */ |
51 | struct xfs_inode *qi_pquotaip; /* project quota inode */ | 51 | struct xfs_inode *qi_pquotaip; /* project quota inode */ |
52 | struct list_head qi_lru_list; | 52 | struct list_lru qi_lru; |
53 | struct mutex qi_lru_lock; | ||
54 | int qi_lru_count; | ||
55 | int qi_dquots; | 53 | int qi_dquots; |
56 | time_t qi_btimelimit; /* limit for blks timer */ | 54 | time_t qi_btimelimit; /* limit for blks timer */ |
57 | time_t qi_itimelimit; /* limit for inodes timer */ | 55 | time_t qi_itimelimit; /* limit for inodes timer */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 979a77d4b87d..15188cc99449 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1535,19 +1535,21 @@ xfs_fs_mount( | |||
1535 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); | 1535 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); |
1536 | } | 1536 | } |
1537 | 1537 | ||
1538 | static int | 1538 | static long |
1539 | xfs_fs_nr_cached_objects( | 1539 | xfs_fs_nr_cached_objects( |
1540 | struct super_block *sb) | 1540 | struct super_block *sb, |
1541 | int nid) | ||
1541 | { | 1542 | { |
1542 | return xfs_reclaim_inodes_count(XFS_M(sb)); | 1543 | return xfs_reclaim_inodes_count(XFS_M(sb)); |
1543 | } | 1544 | } |
1544 | 1545 | ||
1545 | static void | 1546 | static long |
1546 | xfs_fs_free_cached_objects( | 1547 | xfs_fs_free_cached_objects( |
1547 | struct super_block *sb, | 1548 | struct super_block *sb, |
1548 | int nr_to_scan) | 1549 | long nr_to_scan, |
1550 | int nid) | ||
1549 | { | 1551 | { |
1550 | xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); | 1552 | return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); |
1551 | } | 1553 | } |
1552 | 1554 | ||
1553 | static const struct super_operations xfs_super_operations = { | 1555 | static const struct super_operations xfs_super_operations = { |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..59066e0b4ff1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -55,11 +55,11 @@ struct qstr { | |||
55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) | 55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) |
56 | 56 | ||
57 | struct dentry_stat_t { | 57 | struct dentry_stat_t { |
58 | int nr_dentry; | 58 | long nr_dentry; |
59 | int nr_unused; | 59 | long nr_unused; |
60 | int age_limit; /* age in seconds */ | 60 | long age_limit; /* age in seconds */ |
61 | int want_pages; /* pages requested by system */ | 61 | long want_pages; /* pages requested by system */ |
62 | int dummy[2]; | 62 | long dummy[2]; |
63 | }; | 63 | }; |
64 | extern struct dentry_stat_t dentry_stat; | 64 | extern struct dentry_stat_t dentry_stat; |
65 | 65 | ||
@@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry) | |||
395 | 395 | ||
396 | extern int sysctl_vfs_cache_pressure; | 396 | extern int sysctl_vfs_cache_pressure; |
397 | 397 | ||
398 | static inline unsigned long vfs_pressure_ratio(unsigned long val) | ||
399 | { | ||
400 | return mult_frac(val, sysctl_vfs_cache_pressure, 100); | ||
401 | } | ||
398 | #endif /* __LINUX_DCACHE_H */ | 402 | #endif /* __LINUX_DCACHE_H */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 529d8711baba..a4acd3c61190 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/stat.h> | 10 | #include <linux/stat.h> |
11 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/list_lru.h> | ||
13 | #include <linux/llist.h> | 14 | #include <linux/llist.h> |
14 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
15 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
@@ -1269,15 +1270,6 @@ struct super_block { | |||
1269 | struct list_head s_files; | 1270 | struct list_head s_files; |
1270 | #endif | 1271 | #endif |
1271 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ | 1272 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ |
1272 | /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ | ||
1273 | struct list_head s_dentry_lru; /* unused dentry lru */ | ||
1274 | int s_nr_dentry_unused; /* # of dentry on lru */ | ||
1275 | |||
1276 | /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ | ||
1277 | spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; | ||
1278 | struct list_head s_inode_lru; /* unused inode lru */ | ||
1279 | int s_nr_inodes_unused; /* # of inodes on lru */ | ||
1280 | |||
1281 | struct block_device *s_bdev; | 1273 | struct block_device *s_bdev; |
1282 | struct backing_dev_info *s_bdi; | 1274 | struct backing_dev_info *s_bdi; |
1283 | struct mtd_info *s_mtd; | 1275 | struct mtd_info *s_mtd; |
@@ -1331,11 +1323,14 @@ struct super_block { | |||
1331 | 1323 | ||
1332 | /* AIO completions deferred from interrupt context */ | 1324 | /* AIO completions deferred from interrupt context */ |
1333 | struct workqueue_struct *s_dio_done_wq; | 1325 | struct workqueue_struct *s_dio_done_wq; |
1334 | }; | ||
1335 | 1326 | ||
1336 | /* superblock cache pruning functions */ | 1327 | /* |
1337 | extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); | 1328 | * Keep the lru lists last in the structure so they always sit on their |
1338 | extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); | 1329 | * own individual cachelines. |
1330 | */ | ||
1331 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | ||
1332 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | ||
1333 | }; | ||
1339 | 1334 | ||
1340 | extern struct timespec current_fs_time(struct super_block *sb); | 1335 | extern struct timespec current_fs_time(struct super_block *sb); |
1341 | 1336 | ||
@@ -1629,8 +1624,8 @@ struct super_operations { | |||
1629 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1624 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1630 | #endif | 1625 | #endif |
1631 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1626 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1632 | int (*nr_cached_objects)(struct super_block *); | 1627 | long (*nr_cached_objects)(struct super_block *, int); |
1633 | void (*free_cached_objects)(struct super_block *, int); | 1628 | long (*free_cached_objects)(struct super_block *, long, int); |
1634 | }; | 1629 | }; |
1635 | 1630 | ||
1636 | /* | 1631 | /* |
@@ -2494,7 +2489,6 @@ extern const struct file_operations generic_ro_fops; | |||
2494 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2489 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2495 | 2490 | ||
2496 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2491 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
2497 | extern int vfs_follow_link(struct nameidata *, const char *); | ||
2498 | extern int page_readlink(struct dentry *, char __user *, int); | 2492 | extern int page_readlink(struct dentry *, char __user *, int); |
2499 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2493 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2500 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2494 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h new file mode 100644 index 000000000000..3ce541753c88 --- /dev/null +++ b/include/linux/list_lru.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | ||
3 | * Authors: David Chinner and Glauber Costa | ||
4 | * | ||
5 | * Generic LRU infrastructure | ||
6 | */ | ||
7 | #ifndef _LRU_LIST_H | ||
8 | #define _LRU_LIST_H | ||
9 | |||
10 | #include <linux/list.h> | ||
11 | #include <linux/nodemask.h> | ||
12 | |||
13 | /* list_lru_walk_cb has to always return one of those */ | ||
14 | enum lru_status { | ||
15 | LRU_REMOVED, /* item removed from list */ | ||
16 | LRU_ROTATE, /* item referenced, give another pass */ | ||
17 | LRU_SKIP, /* item cannot be locked, skip */ | ||
18 | LRU_RETRY, /* item not freeable. May drop the lock | ||
19 | internally, but has to return locked. */ | ||
20 | }; | ||
21 | |||
22 | struct list_lru_node { | ||
23 | spinlock_t lock; | ||
24 | struct list_head list; | ||
25 | /* kept as signed so we can catch imbalance bugs */ | ||
26 | long nr_items; | ||
27 | } ____cacheline_aligned_in_smp; | ||
28 | |||
29 | struct list_lru { | ||
30 | struct list_lru_node *node; | ||
31 | nodemask_t active_nodes; | ||
32 | }; | ||
33 | |||
34 | void list_lru_destroy(struct list_lru *lru); | ||
35 | int list_lru_init(struct list_lru *lru); | ||
36 | |||
37 | /** | ||
38 | * list_lru_add: add an element to the lru list's tail | ||
39 | * @list_lru: the lru pointer | ||
40 | * @item: the item to be added. | ||
41 | * | ||
42 | * If the element is already part of a list, this function returns doing | ||
43 | * nothing. Therefore the caller does not need to keep state about whether or | ||
44 | * not the element already belongs in the list and is allowed to lazy update | ||
45 | * it. Note however that this is valid for *a* list, not *this* list. If | ||
46 | * the caller organize itself in a way that elements can be in more than | ||
47 | * one type of list, it is up to the caller to fully remove the item from | ||
48 | * the previous list (with list_lru_del() for instance) before moving it | ||
49 | * to @list_lru | ||
50 | * | ||
51 | * Return value: true if the list was updated, false otherwise | ||
52 | */ | ||
53 | bool list_lru_add(struct list_lru *lru, struct list_head *item); | ||
54 | |||
55 | /** | ||
56 | * list_lru_del: delete an element to the lru list | ||
57 | * @list_lru: the lru pointer | ||
58 | * @item: the item to be deleted. | ||
59 | * | ||
60 | * This function works analogously as list_lru_add in terms of list | ||
61 | * manipulation. The comments about an element already pertaining to | ||
62 | * a list are also valid for list_lru_del. | ||
63 | * | ||
64 | * Return value: true if the list was updated, false otherwise | ||
65 | */ | ||
66 | bool list_lru_del(struct list_lru *lru, struct list_head *item); | ||
67 | |||
68 | /** | ||
69 | * list_lru_count_node: return the number of objects currently held by @lru | ||
70 | * @lru: the lru pointer. | ||
71 | * @nid: the node id to count from. | ||
72 | * | ||
73 | * Always return a non-negative number, 0 for empty lists. There is no | ||
74 | * guarantee that the list is not updated while the count is being computed. | ||
75 | * Callers that want such a guarantee need to provide an outer lock. | ||
76 | */ | ||
77 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); | ||
78 | static inline unsigned long list_lru_count(struct list_lru *lru) | ||
79 | { | ||
80 | long count = 0; | ||
81 | int nid; | ||
82 | |||
83 | for_each_node_mask(nid, lru->active_nodes) | ||
84 | count += list_lru_count_node(lru, nid); | ||
85 | |||
86 | return count; | ||
87 | } | ||
88 | |||
89 | typedef enum lru_status | ||
90 | (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); | ||
91 | /** | ||
92 | * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. | ||
93 | * @lru: the lru pointer. | ||
94 | * @nid: the node id to scan from. | ||
95 | * @isolate: callback function that is resposible for deciding what to do with | ||
96 | * the item currently being scanned | ||
97 | * @cb_arg: opaque type that will be passed to @isolate | ||
98 | * @nr_to_walk: how many items to scan. | ||
99 | * | ||
100 | * This function will scan all elements in a particular list_lru, calling the | ||
101 | * @isolate callback for each of those items, along with the current list | ||
102 | * spinlock and a caller-provided opaque. The @isolate callback can choose to | ||
103 | * drop the lock internally, but *must* return with the lock held. The callback | ||
104 | * will return an enum lru_status telling the list_lru infrastructure what to | ||
105 | * do with the object being scanned. | ||
106 | * | ||
107 | * Please note that nr_to_walk does not mean how many objects will be freed, | ||
108 | * just how many objects will be scanned. | ||
109 | * | ||
110 | * Return value: the number of objects effectively removed from the LRU. | ||
111 | */ | ||
112 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | ||
113 | list_lru_walk_cb isolate, void *cb_arg, | ||
114 | unsigned long *nr_to_walk); | ||
115 | |||
116 | static inline unsigned long | ||
117 | list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, | ||
118 | void *cb_arg, unsigned long nr_to_walk) | ||
119 | { | ||
120 | long isolated = 0; | ||
121 | int nid; | ||
122 | |||
123 | for_each_node_mask(nid, lru->active_nodes) { | ||
124 | isolated += list_lru_walk_node(lru, nid, isolate, | ||
125 | cb_arg, &nr_to_walk); | ||
126 | if (nr_to_walk <= 0) | ||
127 | break; | ||
128 | } | ||
129 | return isolated; | ||
130 | } | ||
131 | #endif /* _LRU_LIST_H */ | ||
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ac6b8ee07825..68c097077ef0 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h | |||
@@ -4,39 +4,67 @@ | |||
4 | /* | 4 | /* |
5 | * This struct is used to pass information from page reclaim to the shrinkers. | 5 | * This struct is used to pass information from page reclaim to the shrinkers. |
6 | * We consolidate the values for easier extention later. | 6 | * We consolidate the values for easier extention later. |
7 | * | ||
8 | * The 'gfpmask' refers to the allocation we are currently trying to | ||
9 | * fulfil. | ||
7 | */ | 10 | */ |
8 | struct shrink_control { | 11 | struct shrink_control { |
9 | gfp_t gfp_mask; | 12 | gfp_t gfp_mask; |
10 | 13 | ||
11 | /* How many slab objects shrinker() should scan and try to reclaim */ | 14 | /* |
15 | * How many objects scan_objects should scan and try to reclaim. | ||
16 | * This is reset before every call, so it is safe for callees | ||
17 | * to modify. | ||
18 | */ | ||
12 | unsigned long nr_to_scan; | 19 | unsigned long nr_to_scan; |
20 | |||
21 | /* shrink from these nodes */ | ||
22 | nodemask_t nodes_to_scan; | ||
23 | /* current node being shrunk (for NUMA aware shrinkers) */ | ||
24 | int nid; | ||
13 | }; | 25 | }; |
14 | 26 | ||
27 | #define SHRINK_STOP (~0UL) | ||
15 | /* | 28 | /* |
16 | * A callback you can register to apply pressure to ageable caches. | 29 | * A callback you can register to apply pressure to ageable caches. |
17 | * | 30 | * |
18 | * 'sc' is passed shrink_control which includes a count 'nr_to_scan' | 31 | * @count_objects should return the number of freeable items in the cache. If |
19 | * and a 'gfpmask'. It should look through the least-recently-used | 32 | * there are no objects to free or the number of freeable items cannot be |
20 | * 'nr_to_scan' entries and attempt to free them up. It should return | 33 | * determined, it should return 0. No deadlock checks should be done during the |
21 | * the number of objects which remain in the cache. If it returns -1, it means | 34 | * count callback - the shrinker relies on aggregating scan counts that couldn't |
22 | * it cannot do any scanning at this time (eg. there is a risk of deadlock). | 35 | * be executed due to potential deadlocks to be run at a later call when the |
36 | * deadlock condition is no longer pending. | ||
23 | * | 37 | * |
24 | * The 'gfpmask' refers to the allocation we are currently trying to | 38 | * @scan_objects will only be called if @count_objects returned a non-zero |
25 | * fulfil. | 39 | * value for the number of freeable objects. The callout should scan the cache |
40 | * and attempt to free items from the cache. It should then return the number | ||
41 | * of objects freed during the scan, or SHRINK_STOP if progress cannot be made | ||
42 | * due to potential deadlocks. If SHRINK_STOP is returned, then no further | ||
43 | * attempts to call the @scan_objects will be made from the current reclaim | ||
44 | * context. | ||
26 | * | 45 | * |
27 | * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is | 46 | * @flags determine the shrinker abilities, like numa awareness |
28 | * querying the cache size, so a fastpath for that case is appropriate. | ||
29 | */ | 47 | */ |
30 | struct shrinker { | 48 | struct shrinker { |
31 | int (*shrink)(struct shrinker *, struct shrink_control *sc); | 49 | unsigned long (*count_objects)(struct shrinker *, |
50 | struct shrink_control *sc); | ||
51 | unsigned long (*scan_objects)(struct shrinker *, | ||
52 | struct shrink_control *sc); | ||
53 | |||
32 | int seeks; /* seeks to recreate an obj */ | 54 | int seeks; /* seeks to recreate an obj */ |
33 | long batch; /* reclaim batch size, 0 = default */ | 55 | long batch; /* reclaim batch size, 0 = default */ |
56 | unsigned long flags; | ||
34 | 57 | ||
35 | /* These are for internal use */ | 58 | /* These are for internal use */ |
36 | struct list_head list; | 59 | struct list_head list; |
37 | atomic_long_t nr_in_batch; /* objs pending delete */ | 60 | /* objs pending delete, per node */ |
61 | atomic_long_t *nr_deferred; | ||
38 | }; | 62 | }; |
39 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ | 63 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ |
40 | extern void register_shrinker(struct shrinker *); | 64 | |
65 | /* Flags */ | ||
66 | #define SHRINKER_NUMA_AWARE (1 << 0) | ||
67 | |||
68 | extern int register_shrinker(struct shrinker *); | ||
41 | extern void unregister_shrinker(struct shrinker *); | 69 | extern void unregister_shrinker(struct shrinker *); |
42 | #endif | 70 | #endif |
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 63cfcccaebb3..132a985aba8b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start, | |||
202 | 202 | ||
203 | TP_fast_assign( | 203 | TP_fast_assign( |
204 | __entry->shr = shr; | 204 | __entry->shr = shr; |
205 | __entry->shrink = shr->shrink; | 205 | __entry->shrink = shr->scan_objects; |
206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; | 206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; |
207 | __entry->gfp_flags = sc->gfp_mask; | 207 | __entry->gfp_flags = sc->gfp_mask; |
208 | __entry->pgs_scanned = pgs_scanned; | 208 | __entry->pgs_scanned = pgs_scanned; |
@@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end, | |||
241 | 241 | ||
242 | TP_fast_assign( | 242 | TP_fast_assign( |
243 | __entry->shr = shr; | 243 | __entry->shr = shr; |
244 | __entry->shrink = shr->shrink; | 244 | __entry->shrink = shr->scan_objects; |
245 | __entry->unused_scan = unused_scan_cnt; | 245 | __entry->unused_scan = unused_scan_cnt; |
246 | __entry->new_scan = new_scan_cnt; | 246 | __entry->new_scan = new_scan_cnt; |
247 | __entry->retval = shrinker_retval; | 247 | __entry->retval = shrinker_retval; |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -49,9 +49,9 @@ struct files_stat_struct { | |||
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct inodes_stat_t { | 51 | struct inodes_stat_t { |
52 | int nr_inodes; | 52 | long nr_inodes; |
53 | int nr_unused; | 53 | long nr_unused; |
54 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 54 | long dummy[5]; /* padding for sysctl ABI compatibility */ |
55 | }; | 55 | }; |
56 | 56 | ||
57 | 57 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dc69093a8ec4..b2f06f3c6a3f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { | |||
1471 | { | 1471 | { |
1472 | .procname = "inode-nr", | 1472 | .procname = "inode-nr", |
1473 | .data = &inodes_stat, | 1473 | .data = &inodes_stat, |
1474 | .maxlen = 2*sizeof(int), | 1474 | .maxlen = 2*sizeof(long), |
1475 | .mode = 0444, | 1475 | .mode = 0444, |
1476 | .proc_handler = proc_nr_inodes, | 1476 | .proc_handler = proc_nr_inodes, |
1477 | }, | 1477 | }, |
1478 | { | 1478 | { |
1479 | .procname = "inode-state", | 1479 | .procname = "inode-state", |
1480 | .data = &inodes_stat, | 1480 | .data = &inodes_stat, |
1481 | .maxlen = 7*sizeof(int), | 1481 | .maxlen = 7*sizeof(long), |
1482 | .mode = 0444, | 1482 | .mode = 0444, |
1483 | .proc_handler = proc_nr_inodes, | 1483 | .proc_handler = proc_nr_inodes, |
1484 | }, | 1484 | }, |
@@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { | |||
1508 | { | 1508 | { |
1509 | .procname = "dentry-state", | 1509 | .procname = "dentry-state", |
1510 | .data = &dentry_stat, | 1510 | .data = &dentry_stat, |
1511 | .maxlen = 6*sizeof(int), | 1511 | .maxlen = 6*sizeof(long), |
1512 | .mode = 0444, | 1512 | .mode = 0444, |
1513 | .proc_handler = proc_nr_dentry, | 1513 | .proc_handler = proc_nr_dentry, |
1514 | }, | 1514 | }, |
diff --git a/mm/Makefile b/mm/Makefile index f00803386a67..305d10acd081 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ | |||
17 | util.o mmzone.o vmstat.o backing-dev.o \ | 17 | util.o mmzone.o vmstat.o backing-dev.o \ |
18 | mm_init.o mmu_context.o percpu.o slab_common.o \ | 18 | mm_init.o mmu_context.o percpu.o slab_common.o \ |
19 | compaction.o balloon_compaction.o \ | 19 | compaction.o balloon_compaction.o \ |
20 | interval_tree.o $(mmu-y) | 20 | interval_tree.o list_lru.o $(mmu-y) |
21 | 21 | ||
22 | obj-y += init-mm.o | 22 | obj-y += init-mm.o |
23 | 23 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 963e14c0486f..d66010e0049d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -211,24 +211,29 @@ static void put_huge_zero_page(void) | |||
211 | BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); | 211 | BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); |
212 | } | 212 | } |
213 | 213 | ||
214 | static int shrink_huge_zero_page(struct shrinker *shrink, | 214 | static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, |
215 | struct shrink_control *sc) | 215 | struct shrink_control *sc) |
216 | { | 216 | { |
217 | if (!sc->nr_to_scan) | 217 | /* we can free zero page only if last reference remains */ |
218 | /* we can free zero page only if last reference remains */ | 218 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; |
219 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; | 219 | } |
220 | 220 | ||
221 | static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, | ||
222 | struct shrink_control *sc) | ||
223 | { | ||
221 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { | 224 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { |
222 | struct page *zero_page = xchg(&huge_zero_page, NULL); | 225 | struct page *zero_page = xchg(&huge_zero_page, NULL); |
223 | BUG_ON(zero_page == NULL); | 226 | BUG_ON(zero_page == NULL); |
224 | __free_page(zero_page); | 227 | __free_page(zero_page); |
228 | return HPAGE_PMD_NR; | ||
225 | } | 229 | } |
226 | 230 | ||
227 | return 0; | 231 | return 0; |
228 | } | 232 | } |
229 | 233 | ||
230 | static struct shrinker huge_zero_page_shrinker = { | 234 | static struct shrinker huge_zero_page_shrinker = { |
231 | .shrink = shrink_huge_zero_page, | 235 | .count_objects = shrink_huge_zero_page_count, |
236 | .scan_objects = shrink_huge_zero_page_scan, | ||
232 | .seeks = DEFAULT_SEEKS, | 237 | .seeks = DEFAULT_SEEKS, |
233 | }; | 238 | }; |
234 | 239 | ||
diff --git a/mm/list_lru.c b/mm/list_lru.c new file mode 100644 index 000000000000..72467914b856 --- /dev/null +++ b/mm/list_lru.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | ||
3 | * Authors: David Chinner and Glauber Costa | ||
4 | * | ||
5 | * Generic LRU infrastructure | ||
6 | */ | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/list_lru.h> | ||
11 | #include <linux/slab.h> | ||
12 | |||
13 | bool list_lru_add(struct list_lru *lru, struct list_head *item) | ||
14 | { | ||
15 | int nid = page_to_nid(virt_to_page(item)); | ||
16 | struct list_lru_node *nlru = &lru->node[nid]; | ||
17 | |||
18 | spin_lock(&nlru->lock); | ||
19 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
20 | if (list_empty(item)) { | ||
21 | list_add_tail(item, &nlru->list); | ||
22 | if (nlru->nr_items++ == 0) | ||
23 | node_set(nid, lru->active_nodes); | ||
24 | spin_unlock(&nlru->lock); | ||
25 | return true; | ||
26 | } | ||
27 | spin_unlock(&nlru->lock); | ||
28 | return false; | ||
29 | } | ||
30 | EXPORT_SYMBOL_GPL(list_lru_add); | ||
31 | |||
32 | bool list_lru_del(struct list_lru *lru, struct list_head *item) | ||
33 | { | ||
34 | int nid = page_to_nid(virt_to_page(item)); | ||
35 | struct list_lru_node *nlru = &lru->node[nid]; | ||
36 | |||
37 | spin_lock(&nlru->lock); | ||
38 | if (!list_empty(item)) { | ||
39 | list_del_init(item); | ||
40 | if (--nlru->nr_items == 0) | ||
41 | node_clear(nid, lru->active_nodes); | ||
42 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
43 | spin_unlock(&nlru->lock); | ||
44 | return true; | ||
45 | } | ||
46 | spin_unlock(&nlru->lock); | ||
47 | return false; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(list_lru_del); | ||
50 | |||
51 | unsigned long | ||
52 | list_lru_count_node(struct list_lru *lru, int nid) | ||
53 | { | ||
54 | unsigned long count = 0; | ||
55 | struct list_lru_node *nlru = &lru->node[nid]; | ||
56 | |||
57 | spin_lock(&nlru->lock); | ||
58 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
59 | count += nlru->nr_items; | ||
60 | spin_unlock(&nlru->lock); | ||
61 | |||
62 | return count; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(list_lru_count_node); | ||
65 | |||
66 | unsigned long | ||
67 | list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, | ||
68 | void *cb_arg, unsigned long *nr_to_walk) | ||
69 | { | ||
70 | |||
71 | struct list_lru_node *nlru = &lru->node[nid]; | ||
72 | struct list_head *item, *n; | ||
73 | unsigned long isolated = 0; | ||
74 | |||
75 | spin_lock(&nlru->lock); | ||
76 | restart: | ||
77 | list_for_each_safe(item, n, &nlru->list) { | ||
78 | enum lru_status ret; | ||
79 | |||
80 | /* | ||
81 | * decrement nr_to_walk first so that we don't livelock if we | ||
82 | * get stuck on large numbesr of LRU_RETRY items | ||
83 | */ | ||
84 | if (--(*nr_to_walk) == 0) | ||
85 | break; | ||
86 | |||
87 | ret = isolate(item, &nlru->lock, cb_arg); | ||
88 | switch (ret) { | ||
89 | case LRU_REMOVED: | ||
90 | if (--nlru->nr_items == 0) | ||
91 | node_clear(nid, lru->active_nodes); | ||
92 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
93 | isolated++; | ||
94 | break; | ||
95 | case LRU_ROTATE: | ||
96 | list_move_tail(item, &nlru->list); | ||
97 | break; | ||
98 | case LRU_SKIP: | ||
99 | break; | ||
100 | case LRU_RETRY: | ||
101 | /* | ||
102 | * The lru lock has been dropped, our list traversal is | ||
103 | * now invalid and so we have to restart from scratch. | ||
104 | */ | ||
105 | goto restart; | ||
106 | default: | ||
107 | BUG(); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | spin_unlock(&nlru->lock); | ||
112 | return isolated; | ||
113 | } | ||
114 | EXPORT_SYMBOL_GPL(list_lru_walk_node); | ||
115 | |||
116 | int list_lru_init(struct list_lru *lru) | ||
117 | { | ||
118 | int i; | ||
119 | size_t size = sizeof(*lru->node) * nr_node_ids; | ||
120 | |||
121 | lru->node = kzalloc(size, GFP_KERNEL); | ||
122 | if (!lru->node) | ||
123 | return -ENOMEM; | ||
124 | |||
125 | nodes_clear(lru->active_nodes); | ||
126 | for (i = 0; i < nr_node_ids; i++) { | ||
127 | spin_lock_init(&lru->node[i].lock); | ||
128 | INIT_LIST_HEAD(&lru->node[i].list); | ||
129 | lru->node[i].nr_items = 0; | ||
130 | } | ||
131 | return 0; | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(list_lru_init); | ||
134 | |||
135 | void list_lru_destroy(struct list_lru *lru) | ||
136 | { | ||
137 | kfree(lru->node); | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(list_lru_destroy); | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d472e14c6808..947ed5413279 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) | |||
248 | */ | 248 | */ |
249 | if (access) { | 249 | if (access) { |
250 | int nr; | 250 | int nr; |
251 | int nid = page_to_nid(p); | ||
251 | do { | 252 | do { |
252 | struct shrink_control shrink = { | 253 | struct shrink_control shrink = { |
253 | .gfp_mask = GFP_KERNEL, | 254 | .gfp_mask = GFP_KERNEL, |
254 | }; | 255 | }; |
256 | node_set(nid, shrink.nodes_to_scan); | ||
255 | 257 | ||
256 | nr = shrink_slab(&shrink, 1000, 1000); | 258 | nr = shrink_slab(&shrink, 1000, 1000); |
257 | if (page_count(p) == 1) | 259 | if (page_count(p) == 1) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715daeb8bc..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -174,14 +174,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | |||
174 | } | 174 | } |
175 | 175 | ||
176 | /* | 176 | /* |
177 | * Add a shrinker callback to be called from the vm | 177 | * Add a shrinker callback to be called from the vm. |
178 | */ | 178 | */ |
179 | void register_shrinker(struct shrinker *shrinker) | 179 | int register_shrinker(struct shrinker *shrinker) |
180 | { | 180 | { |
181 | atomic_long_set(&shrinker->nr_in_batch, 0); | 181 | size_t size = sizeof(*shrinker->nr_deferred); |
182 | |||
183 | /* | ||
184 | * If we only have one possible node in the system anyway, save | ||
185 | * ourselves the trouble and disable NUMA aware behavior. This way we | ||
186 | * will save memory and some small loop time later. | ||
187 | */ | ||
188 | if (nr_node_ids == 1) | ||
189 | shrinker->flags &= ~SHRINKER_NUMA_AWARE; | ||
190 | |||
191 | if (shrinker->flags & SHRINKER_NUMA_AWARE) | ||
192 | size *= nr_node_ids; | ||
193 | |||
194 | shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); | ||
195 | if (!shrinker->nr_deferred) | ||
196 | return -ENOMEM; | ||
197 | |||
182 | down_write(&shrinker_rwsem); | 198 | down_write(&shrinker_rwsem); |
183 | list_add_tail(&shrinker->list, &shrinker_list); | 199 | list_add_tail(&shrinker->list, &shrinker_list); |
184 | up_write(&shrinker_rwsem); | 200 | up_write(&shrinker_rwsem); |
201 | return 0; | ||
185 | } | 202 | } |
186 | EXPORT_SYMBOL(register_shrinker); | 203 | EXPORT_SYMBOL(register_shrinker); |
187 | 204 | ||
@@ -196,15 +213,102 @@ void unregister_shrinker(struct shrinker *shrinker) | |||
196 | } | 213 | } |
197 | EXPORT_SYMBOL(unregister_shrinker); | 214 | EXPORT_SYMBOL(unregister_shrinker); |
198 | 215 | ||
199 | static inline int do_shrinker_shrink(struct shrinker *shrinker, | 216 | #define SHRINK_BATCH 128 |
200 | struct shrink_control *sc, | 217 | |
201 | unsigned long nr_to_scan) | 218 | static unsigned long |
202 | { | 219 | shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, |
203 | sc->nr_to_scan = nr_to_scan; | 220 | unsigned long nr_pages_scanned, unsigned long lru_pages) |
204 | return (*shrinker->shrink)(shrinker, sc); | 221 | { |
222 | unsigned long freed = 0; | ||
223 | unsigned long long delta; | ||
224 | long total_scan; | ||
225 | long max_pass; | ||
226 | long nr; | ||
227 | long new_nr; | ||
228 | int nid = shrinkctl->nid; | ||
229 | long batch_size = shrinker->batch ? shrinker->batch | ||
230 | : SHRINK_BATCH; | ||
231 | |||
232 | max_pass = shrinker->count_objects(shrinker, shrinkctl); | ||
233 | if (max_pass == 0) | ||
234 | return 0; | ||
235 | |||
236 | /* | ||
237 | * copy the current shrinker scan count into a local variable | ||
238 | * and zero it so that other concurrent shrinker invocations | ||
239 | * don't also do this scanning work. | ||
240 | */ | ||
241 | nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); | ||
242 | |||
243 | total_scan = nr; | ||
244 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
245 | delta *= max_pass; | ||
246 | do_div(delta, lru_pages + 1); | ||
247 | total_scan += delta; | ||
248 | if (total_scan < 0) { | ||
249 | printk(KERN_ERR | ||
250 | "shrink_slab: %pF negative objects to delete nr=%ld\n", | ||
251 | shrinker->scan_objects, total_scan); | ||
252 | total_scan = max_pass; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * We need to avoid excessive windup on filesystem shrinkers | ||
257 | * due to large numbers of GFP_NOFS allocations causing the | ||
258 | * shrinkers to return -1 all the time. This results in a large | ||
259 | * nr being built up so when a shrink that can do some work | ||
260 | * comes along it empties the entire cache due to nr >>> | ||
261 | * max_pass. This is bad for sustaining a working set in | ||
262 | * memory. | ||
263 | * | ||
264 | * Hence only allow the shrinker to scan the entire cache when | ||
265 | * a large delta change is calculated directly. | ||
266 | */ | ||
267 | if (delta < max_pass / 4) | ||
268 | total_scan = min(total_scan, max_pass / 2); | ||
269 | |||
270 | /* | ||
271 | * Avoid risking looping forever due to too large nr value: | ||
272 | * never try to free more than twice the estimate number of | ||
273 | * freeable entries. | ||
274 | */ | ||
275 | if (total_scan > max_pass * 2) | ||
276 | total_scan = max_pass * 2; | ||
277 | |||
278 | trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, | ||
279 | nr_pages_scanned, lru_pages, | ||
280 | max_pass, delta, total_scan); | ||
281 | |||
282 | while (total_scan >= batch_size) { | ||
283 | unsigned long ret; | ||
284 | |||
285 | shrinkctl->nr_to_scan = batch_size; | ||
286 | ret = shrinker->scan_objects(shrinker, shrinkctl); | ||
287 | if (ret == SHRINK_STOP) | ||
288 | break; | ||
289 | freed += ret; | ||
290 | |||
291 | count_vm_events(SLABS_SCANNED, batch_size); | ||
292 | total_scan -= batch_size; | ||
293 | |||
294 | cond_resched(); | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * move the unused scan count back into the shrinker in a | ||
299 | * manner that handles concurrent updates. If we exhausted the | ||
300 | * scan, there is no need to do an update. | ||
301 | */ | ||
302 | if (total_scan > 0) | ||
303 | new_nr = atomic_long_add_return(total_scan, | ||
304 | &shrinker->nr_deferred[nid]); | ||
305 | else | ||
306 | new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); | ||
307 | |||
308 | trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); | ||
309 | return freed; | ||
205 | } | 310 | } |
206 | 311 | ||
207 | #define SHRINK_BATCH 128 | ||
208 | /* | 312 | /* |
209 | * Call the shrink functions to age shrinkable caches | 313 | * Call the shrink functions to age shrinkable caches |
210 | * | 314 | * |
@@ -224,115 +328,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, | |||
224 | * | 328 | * |
225 | * Returns the number of slab objects which we shrunk. | 329 | * Returns the number of slab objects which we shrunk. |
226 | */ | 330 | */ |
227 | unsigned long shrink_slab(struct shrink_control *shrink, | 331 | unsigned long shrink_slab(struct shrink_control *shrinkctl, |
228 | unsigned long nr_pages_scanned, | 332 | unsigned long nr_pages_scanned, |
229 | unsigned long lru_pages) | 333 | unsigned long lru_pages) |
230 | { | 334 | { |
231 | struct shrinker *shrinker; | 335 | struct shrinker *shrinker; |
232 | unsigned long ret = 0; | 336 | unsigned long freed = 0; |
233 | 337 | ||
234 | if (nr_pages_scanned == 0) | 338 | if (nr_pages_scanned == 0) |
235 | nr_pages_scanned = SWAP_CLUSTER_MAX; | 339 | nr_pages_scanned = SWAP_CLUSTER_MAX; |
236 | 340 | ||
237 | if (!down_read_trylock(&shrinker_rwsem)) { | 341 | if (!down_read_trylock(&shrinker_rwsem)) { |
238 | /* Assume we'll be able to shrink next time */ | 342 | /* |
239 | ret = 1; | 343 | * If we would return 0, our callers would understand that we |
344 | * have nothing else to shrink and give up trying. By returning | ||
345 | * 1 we keep it going and assume we'll be able to shrink next | ||
346 | * time. | ||
347 | */ | ||
348 | freed = 1; | ||
240 | goto out; | 349 | goto out; |
241 | } | 350 | } |
242 | 351 | ||
243 | list_for_each_entry(shrinker, &shrinker_list, list) { | 352 | list_for_each_entry(shrinker, &shrinker_list, list) { |
244 | unsigned long long delta; | 353 | for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { |
245 | long total_scan; | 354 | if (!node_online(shrinkctl->nid)) |
246 | long max_pass; | 355 | continue; |
247 | int shrink_ret = 0; | ||
248 | long nr; | ||
249 | long new_nr; | ||
250 | long batch_size = shrinker->batch ? shrinker->batch | ||
251 | : SHRINK_BATCH; | ||
252 | |||
253 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
254 | if (max_pass <= 0) | ||
255 | continue; | ||
256 | |||
257 | /* | ||
258 | * copy the current shrinker scan count into a local variable | ||
259 | * and zero it so that other concurrent shrinker invocations | ||
260 | * don't also do this scanning work. | ||
261 | */ | ||
262 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); | ||
263 | |||
264 | total_scan = nr; | ||
265 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
266 | delta *= max_pass; | ||
267 | do_div(delta, lru_pages + 1); | ||
268 | total_scan += delta; | ||
269 | if (total_scan < 0) { | ||
270 | printk(KERN_ERR "shrink_slab: %pF negative objects to " | ||
271 | "delete nr=%ld\n", | ||
272 | shrinker->shrink, total_scan); | ||
273 | total_scan = max_pass; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * We need to avoid excessive windup on filesystem shrinkers | ||
278 | * due to large numbers of GFP_NOFS allocations causing the | ||
279 | * shrinkers to return -1 all the time. This results in a large | ||
280 | * nr being built up so when a shrink that can do some work | ||
281 | * comes along it empties the entire cache due to nr >>> | ||
282 | * max_pass. This is bad for sustaining a working set in | ||
283 | * memory. | ||
284 | * | ||
285 | * Hence only allow the shrinker to scan the entire cache when | ||
286 | * a large delta change is calculated directly. | ||
287 | */ | ||
288 | if (delta < max_pass / 4) | ||
289 | total_scan = min(total_scan, max_pass / 2); | ||
290 | |||
291 | /* | ||
292 | * Avoid risking looping forever due to too large nr value: | ||
293 | * never try to free more than twice the estimate number of | ||
294 | * freeable entries. | ||
295 | */ | ||
296 | if (total_scan > max_pass * 2) | ||
297 | total_scan = max_pass * 2; | ||
298 | |||
299 | trace_mm_shrink_slab_start(shrinker, shrink, nr, | ||
300 | nr_pages_scanned, lru_pages, | ||
301 | max_pass, delta, total_scan); | ||
302 | |||
303 | while (total_scan >= batch_size) { | ||
304 | int nr_before; | ||
305 | 356 | ||
306 | nr_before = do_shrinker_shrink(shrinker, shrink, 0); | 357 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && |
307 | shrink_ret = do_shrinker_shrink(shrinker, shrink, | 358 | (shrinkctl->nid != 0)) |
308 | batch_size); | ||
309 | if (shrink_ret == -1) | ||
310 | break; | 359 | break; |
311 | if (shrink_ret < nr_before) | ||
312 | ret += nr_before - shrink_ret; | ||
313 | count_vm_events(SLABS_SCANNED, batch_size); | ||
314 | total_scan -= batch_size; | ||
315 | 360 | ||
316 | cond_resched(); | 361 | freed += shrink_slab_node(shrinkctl, shrinker, |
317 | } | 362 | nr_pages_scanned, lru_pages); |
318 | 363 | ||
319 | /* | 364 | } |
320 | * move the unused scan count back into the shrinker in a | ||
321 | * manner that handles concurrent updates. If we exhausted the | ||
322 | * scan, there is no need to do an update. | ||
323 | */ | ||
324 | if (total_scan > 0) | ||
325 | new_nr = atomic_long_add_return(total_scan, | ||
326 | &shrinker->nr_in_batch); | ||
327 | else | ||
328 | new_nr = atomic_long_read(&shrinker->nr_in_batch); | ||
329 | |||
330 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | ||
331 | } | 365 | } |
332 | up_read(&shrinker_rwsem); | 366 | up_read(&shrinker_rwsem); |
333 | out: | 367 | out: |
334 | cond_resched(); | 368 | cond_resched(); |
335 | return ret; | 369 | return freed; |
336 | } | 370 | } |
337 | 371 | ||
338 | static inline int is_page_cache_freeable(struct page *page) | 372 | static inline int is_page_cache_freeable(struct page *page) |
@@ -2368,12 +2402,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2368 | */ | 2402 | */ |
2369 | if (global_reclaim(sc)) { | 2403 | if (global_reclaim(sc)) { |
2370 | unsigned long lru_pages = 0; | 2404 | unsigned long lru_pages = 0; |
2405 | |||
2406 | nodes_clear(shrink->nodes_to_scan); | ||
2371 | for_each_zone_zonelist(zone, z, zonelist, | 2407 | for_each_zone_zonelist(zone, z, zonelist, |
2372 | gfp_zone(sc->gfp_mask)) { | 2408 | gfp_zone(sc->gfp_mask)) { |
2373 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2409 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2374 | continue; | 2410 | continue; |
2375 | 2411 | ||
2376 | lru_pages += zone_reclaimable_pages(zone); | 2412 | lru_pages += zone_reclaimable_pages(zone); |
2413 | node_set(zone_to_nid(zone), | ||
2414 | shrink->nodes_to_scan); | ||
2377 | } | 2415 | } |
2378 | 2416 | ||
2379 | shrink_slab(shrink, sc->nr_scanned, lru_pages); | 2417 | shrink_slab(shrink, sc->nr_scanned, lru_pages); |
@@ -2829,6 +2867,8 @@ static bool kswapd_shrink_zone(struct zone *zone, | |||
2829 | return true; | 2867 | return true; |
2830 | 2868 | ||
2831 | shrink_zone(zone, sc); | 2869 | shrink_zone(zone, sc); |
2870 | nodes_clear(shrink.nodes_to_scan); | ||
2871 | node_set(zone_to_nid(zone), shrink.nodes_to_scan); | ||
2832 | 2872 | ||
2833 | reclaim_state->reclaimed_slab = 0; | 2873 | reclaim_state->reclaimed_slab = 0; |
2834 | shrink_slab(&shrink, sc->nr_scanned, lru_pages); | 2874 | shrink_slab(&shrink, sc->nr_scanned, lru_pages); |
@@ -3520,10 +3560,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3520 | * number of slab pages and shake the slab until it is reduced | 3560 | * number of slab pages and shake the slab until it is reduced |
3521 | * by the same nr_pages that we used for reclaiming unmapped | 3561 | * by the same nr_pages that we used for reclaiming unmapped |
3522 | * pages. | 3562 | * pages. |
3523 | * | ||
3524 | * Note that shrink_slab will free memory on all zones and may | ||
3525 | * take a long time. | ||
3526 | */ | 3563 | */ |
3564 | nodes_clear(shrink.nodes_to_scan); | ||
3565 | node_set(zone_to_nid(zone), shrink.nodes_to_scan); | ||
3527 | for (;;) { | 3566 | for (;;) { |
3528 | unsigned long lru_pages = zone_reclaimable_pages(zone); | 3567 | unsigned long lru_pages = zone_reclaimable_pages(zone); |
3529 | 3568 | ||
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 415159061cd0..5285ead196c0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); | |||
434 | /* | 434 | /* |
435 | * Remove stale credentials. Avoid sleeping inside the loop. | 435 | * Remove stale credentials. Avoid sleeping inside the loop. |
436 | */ | 436 | */ |
437 | static int | 437 | static long |
438 | rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | 438 | rpcauth_prune_expired(struct list_head *free, int nr_to_scan) |
439 | { | 439 | { |
440 | spinlock_t *cache_lock; | 440 | spinlock_t *cache_lock; |
441 | struct rpc_cred *cred, *next; | 441 | struct rpc_cred *cred, *next; |
442 | unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; | 442 | unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; |
443 | long freed = 0; | ||
443 | 444 | ||
444 | list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { | 445 | list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { |
445 | 446 | ||
@@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | |||
451 | */ | 452 | */ |
452 | if (time_in_range(cred->cr_expire, expired, jiffies) && | 453 | if (time_in_range(cred->cr_expire, expired, jiffies) && |
453 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) | 454 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) |
454 | return 0; | 455 | break; |
455 | 456 | ||
456 | list_del_init(&cred->cr_lru); | 457 | list_del_init(&cred->cr_lru); |
457 | number_cred_unused--; | 458 | number_cred_unused--; |
459 | freed++; | ||
458 | if (atomic_read(&cred->cr_count) != 0) | 460 | if (atomic_read(&cred->cr_count) != 0) |
459 | continue; | 461 | continue; |
460 | 462 | ||
@@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | |||
467 | } | 469 | } |
468 | spin_unlock(cache_lock); | 470 | spin_unlock(cache_lock); |
469 | } | 471 | } |
470 | return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; | 472 | return freed; |
471 | } | 473 | } |
472 | 474 | ||
473 | /* | 475 | /* |
474 | * Run memory cache shrinker. | 476 | * Run memory cache shrinker. |
475 | */ | 477 | */ |
476 | static int | 478 | static unsigned long |
477 | rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) | 479 | rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
480 | |||
478 | { | 481 | { |
479 | LIST_HEAD(free); | 482 | LIST_HEAD(free); |
480 | int res; | 483 | unsigned long freed; |
481 | int nr_to_scan = sc->nr_to_scan; | 484 | |
482 | gfp_t gfp_mask = sc->gfp_mask; | 485 | if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
486 | return SHRINK_STOP; | ||
483 | 487 | ||
484 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 488 | /* nothing left, don't come back */ |
485 | return (nr_to_scan == 0) ? 0 : -1; | ||
486 | if (list_empty(&cred_unused)) | 489 | if (list_empty(&cred_unused)) |
487 | return 0; | 490 | return SHRINK_STOP; |
491 | |||
488 | spin_lock(&rpc_credcache_lock); | 492 | spin_lock(&rpc_credcache_lock); |
489 | res = rpcauth_prune_expired(&free, nr_to_scan); | 493 | freed = rpcauth_prune_expired(&free, sc->nr_to_scan); |
490 | spin_unlock(&rpc_credcache_lock); | 494 | spin_unlock(&rpc_credcache_lock); |
491 | rpcauth_destroy_credlist(&free); | 495 | rpcauth_destroy_credlist(&free); |
492 | return res; | 496 | |
497 | return freed; | ||
498 | } | ||
499 | |||
500 | static unsigned long | ||
501 | rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
502 | |||
503 | { | ||
504 | return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; | ||
493 | } | 505 | } |
494 | 506 | ||
495 | /* | 507 | /* |
@@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task) | |||
805 | } | 817 | } |
806 | 818 | ||
807 | static struct shrinker rpc_cred_shrinker = { | 819 | static struct shrinker rpc_cred_shrinker = { |
808 | .shrink = rpcauth_cache_shrinker, | 820 | .count_objects = rpcauth_cache_shrink_count, |
821 | .scan_objects = rpcauth_cache_shrink_scan, | ||
809 | .seeks = DEFAULT_SEEKS, | 822 | .seeks = DEFAULT_SEEKS, |
810 | }; | 823 | }; |
811 | 824 | ||