diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:01:38 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:01:38 -0400 |
| commit | 26935fb06ee88f1188789807687c03041f3c70d9 (patch) | |
| tree | 381c487716540b52348d78bee6555f8fa61d77ef | |
| parent | 3cc69b638e11bfda5d013c2b75b60934aa0e88a1 (diff) | |
| parent | bf2ba3bc185269eca274b458aac46ba1ad7c1121 (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs pile 4 from Al Viro:
"list_lru pile, mostly"
This came out of Andrew's pile, Al ended up doing the merge work so that
Andrew didn't have to.
Additionally, a few fixes.
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (42 commits)
super: fix for destroy lrus
list_lru: dynamically adjust node arrays
shrinker: Kill old ->shrink API.
shrinker: convert remaining shrinkers to count/scan API
staging/lustre/libcfs: cleanup linux-mem.h
staging/lustre/ptlrpc: convert to new shrinker API
staging/lustre/obdclass: convert lu_object shrinker to count/scan API
staging/lustre/ldlm: convert to shrinkers to count/scan API
hugepage: convert huge zero page shrinker to new shrinker API
i915: bail out earlier when shrinker cannot acquire mutex
drivers: convert shrinkers to new count/scan API
fs: convert fs shrinkers to new scan/count API
xfs: fix dquot isolation hang
xfs-convert-dquot-cache-lru-to-list_lru-fix
xfs: convert dquot cache lru to list_lru
xfs: rework buffer dispose list tracking
xfs-convert-buftarg-lru-to-generic-code-fix
xfs: convert buftarg LRU to generic code
fs: convert inode and dentry shrinking to be node aware
vmscan: per-node deferred work
...
56 files changed, 1777 insertions, 1161 deletions
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 206a1bdc7321..f0890581f7f6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
| @@ -451,3 +451,7 @@ in your dentry operations instead. | |||
| 451 | -- | 451 | -- |
| 452 | [mandatory] | 452 | [mandatory] |
| 453 | ->readdir() is gone now; switch to ->iterate() | 453 | ->readdir() is gone now; switch to ->iterate() |
| 454 | [mandatory] | ||
| 455 | vfs_follow_link has been removed. Filesystems must use nd_set_link | ||
| 456 | from ->follow_link for normal symlinks, or nd_jump_link for magic | ||
| 457 | /proc/<pid> style links. | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e2d2c8f230b..dce0df8150df 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
| 4421 | } | 4421 | } |
| 4422 | } | 4422 | } |
| 4423 | 4423 | ||
| 4424 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4424 | static unsigned long |
| 4425 | mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
| 4425 | { | 4426 | { |
| 4426 | struct kvm *kvm; | 4427 | struct kvm *kvm; |
| 4427 | int nr_to_scan = sc->nr_to_scan; | 4428 | int nr_to_scan = sc->nr_to_scan; |
| 4428 | 4429 | unsigned long freed = 0; | |
| 4429 | if (nr_to_scan == 0) | ||
| 4430 | goto out; | ||
| 4431 | 4430 | ||
| 4432 | raw_spin_lock(&kvm_lock); | 4431 | raw_spin_lock(&kvm_lock); |
| 4433 | 4432 | ||
| @@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 4462 | goto unlock; | 4461 | goto unlock; |
| 4463 | } | 4462 | } |
| 4464 | 4463 | ||
| 4465 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); | 4464 | if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
| 4465 | freed++; | ||
| 4466 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4466 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
| 4467 | 4467 | ||
| 4468 | unlock: | 4468 | unlock: |
| 4469 | spin_unlock(&kvm->mmu_lock); | 4469 | spin_unlock(&kvm->mmu_lock); |
| 4470 | srcu_read_unlock(&kvm->srcu, idx); | 4470 | srcu_read_unlock(&kvm->srcu, idx); |
| 4471 | 4471 | ||
| 4472 | /* | ||
| 4473 | * unfair on small ones | ||
| 4474 | * per-vm shrinkers cry out | ||
| 4475 | * sadness comes quickly | ||
| 4476 | */ | ||
| 4472 | list_move_tail(&kvm->vm_list, &vm_list); | 4477 | list_move_tail(&kvm->vm_list, &vm_list); |
| 4473 | break; | 4478 | break; |
| 4474 | } | 4479 | } |
| 4475 | 4480 | ||
| 4476 | raw_spin_unlock(&kvm_lock); | 4481 | raw_spin_unlock(&kvm_lock); |
| 4482 | return freed; | ||
| 4477 | 4483 | ||
| 4478 | out: | 4484 | } |
| 4485 | |||
| 4486 | static unsigned long | ||
| 4487 | mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 4488 | { | ||
| 4479 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | 4489 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); |
| 4480 | } | 4490 | } |
| 4481 | 4491 | ||
| 4482 | static struct shrinker mmu_shrinker = { | 4492 | static struct shrinker mmu_shrinker = { |
| 4483 | .shrink = mmu_shrink, | 4493 | .count_objects = mmu_shrink_count, |
| 4494 | .scan_objects = mmu_shrink_scan, | ||
| 4484 | .seeks = DEFAULT_SEEKS * 10, | 4495 | .seeks = DEFAULT_SEEKS * 10, |
| 4485 | }; | 4496 | }; |
| 4486 | 4497 | ||
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9b265a4c6a3d..c27a21034a5e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c | |||
| @@ -1676,7 +1676,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) | |||
| 1676 | return 0; | 1676 | return 0; |
| 1677 | 1677 | ||
| 1678 | out_gem_unload: | 1678 | out_gem_unload: |
| 1679 | if (dev_priv->mm.inactive_shrinker.shrink) | 1679 | if (dev_priv->mm.inactive_shrinker.scan_objects) |
| 1680 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); | 1680 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); |
| 1681 | 1681 | ||
| 1682 | if (dev->pdev->msi_enabled) | 1682 | if (dev->pdev->msi_enabled) |
| @@ -1715,7 +1715,7 @@ int i915_driver_unload(struct drm_device *dev) | |||
| 1715 | 1715 | ||
| 1716 | i915_teardown_sysfs(dev); | 1716 | i915_teardown_sysfs(dev); |
| 1717 | 1717 | ||
| 1718 | if (dev_priv->mm.inactive_shrinker.shrink) | 1718 | if (dev_priv->mm.inactive_shrinker.scan_objects) |
| 1719 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); | 1719 | unregister_shrinker(&dev_priv->mm.inactive_shrinker); |
| 1720 | 1720 | ||
| 1721 | mutex_lock(&dev->struct_mutex); | 1721 | mutex_lock(&dev->struct_mutex); |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9e337feef14..8507c6d1e642 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
| @@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, | |||
| 57 | struct drm_i915_fence_reg *fence, | 57 | struct drm_i915_fence_reg *fence, |
| 58 | bool enable); | 58 | bool enable); |
| 59 | 59 | ||
| 60 | static int i915_gem_inactive_shrink(struct shrinker *shrinker, | 60 | static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, |
| 61 | struct shrink_control *sc); | 61 | struct shrink_control *sc); |
| 62 | static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, | ||
| 63 | struct shrink_control *sc); | ||
| 62 | static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); | 64 | static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); |
| 63 | static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); | 65 | static long i915_gem_shrink_all(struct drm_i915_private *dev_priv); |
| 64 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); | 66 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); |
| 65 | 67 | ||
| 66 | static bool cpu_cache_is_coherent(struct drm_device *dev, | 68 | static bool cpu_cache_is_coherent(struct drm_device *dev, |
| @@ -1769,16 +1771,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target) | |||
| 1769 | return __i915_gem_shrink(dev_priv, target, true); | 1771 | return __i915_gem_shrink(dev_priv, target, true); |
| 1770 | } | 1772 | } |
| 1771 | 1773 | ||
| 1772 | static void | 1774 | static long |
| 1773 | i915_gem_shrink_all(struct drm_i915_private *dev_priv) | 1775 | i915_gem_shrink_all(struct drm_i915_private *dev_priv) |
| 1774 | { | 1776 | { |
| 1775 | struct drm_i915_gem_object *obj, *next; | 1777 | struct drm_i915_gem_object *obj, *next; |
| 1778 | long freed = 0; | ||
| 1776 | 1779 | ||
| 1777 | i915_gem_evict_everything(dev_priv->dev); | 1780 | i915_gem_evict_everything(dev_priv->dev); |
| 1778 | 1781 | ||
| 1779 | list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, | 1782 | list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, |
| 1780 | global_list) | 1783 | global_list) { |
| 1784 | if (obj->pages_pin_count == 0) | ||
| 1785 | freed += obj->base.size >> PAGE_SHIFT; | ||
| 1781 | i915_gem_object_put_pages(obj); | 1786 | i915_gem_object_put_pages(obj); |
| 1787 | } | ||
| 1788 | return freed; | ||
| 1782 | } | 1789 | } |
| 1783 | 1790 | ||
| 1784 | static int | 1791 | static int |
| @@ -4558,7 +4565,8 @@ i915_gem_load(struct drm_device *dev) | |||
| 4558 | 4565 | ||
| 4559 | dev_priv->mm.interruptible = true; | 4566 | dev_priv->mm.interruptible = true; |
| 4560 | 4567 | ||
| 4561 | dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; | 4568 | dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; |
| 4569 | dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; | ||
| 4562 | dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; | 4570 | dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; |
| 4563 | register_shrinker(&dev_priv->mm.inactive_shrinker); | 4571 | register_shrinker(&dev_priv->mm.inactive_shrinker); |
| 4564 | } | 4572 | } |
| @@ -4781,8 +4789,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |||
| 4781 | #endif | 4789 | #endif |
| 4782 | } | 4790 | } |
| 4783 | 4791 | ||
| 4784 | static int | 4792 | static unsigned long |
| 4785 | i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) | 4793 | i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) |
| 4786 | { | 4794 | { |
| 4787 | struct drm_i915_private *dev_priv = | 4795 | struct drm_i915_private *dev_priv = |
| 4788 | container_of(shrinker, | 4796 | container_of(shrinker, |
| @@ -4790,45 +4798,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) | |||
| 4790 | mm.inactive_shrinker); | 4798 | mm.inactive_shrinker); |
| 4791 | struct drm_device *dev = dev_priv->dev; | 4799 | struct drm_device *dev = dev_priv->dev; |
| 4792 | struct drm_i915_gem_object *obj; | 4800 | struct drm_i915_gem_object *obj; |
| 4793 | int nr_to_scan = sc->nr_to_scan; | ||
| 4794 | bool unlock = true; | 4801 | bool unlock = true; |
| 4795 | int cnt; | 4802 | unsigned long count; |
| 4796 | 4803 | ||
| 4797 | if (!mutex_trylock(&dev->struct_mutex)) { | 4804 | if (!mutex_trylock(&dev->struct_mutex)) { |
| 4798 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) | 4805 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) |
| 4799 | return 0; | 4806 | return SHRINK_STOP; |
| 4800 | 4807 | ||
| 4801 | if (dev_priv->mm.shrinker_no_lock_stealing) | 4808 | if (dev_priv->mm.shrinker_no_lock_stealing) |
| 4802 | return 0; | 4809 | return SHRINK_STOP; |
| 4803 | 4810 | ||
| 4804 | unlock = false; | 4811 | unlock = false; |
| 4805 | } | 4812 | } |
| 4806 | 4813 | ||
| 4807 | if (nr_to_scan) { | 4814 | count = 0; |
| 4808 | nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); | ||
| 4809 | if (nr_to_scan > 0) | ||
| 4810 | nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, | ||
| 4811 | false); | ||
| 4812 | if (nr_to_scan > 0) | ||
| 4813 | i915_gem_shrink_all(dev_priv); | ||
| 4814 | } | ||
| 4815 | |||
| 4816 | cnt = 0; | ||
| 4817 | list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) | 4815 | list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) |
| 4818 | if (obj->pages_pin_count == 0) | 4816 | if (obj->pages_pin_count == 0) |
| 4819 | cnt += obj->base.size >> PAGE_SHIFT; | 4817 | count += obj->base.size >> PAGE_SHIFT; |
| 4820 | 4818 | ||
| 4821 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { | 4819 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { |
| 4822 | if (obj->active) | 4820 | if (obj->active) |
| 4823 | continue; | 4821 | continue; |
| 4824 | 4822 | ||
| 4825 | if (obj->pin_count == 0 && obj->pages_pin_count == 0) | 4823 | if (obj->pin_count == 0 && obj->pages_pin_count == 0) |
| 4826 | cnt += obj->base.size >> PAGE_SHIFT; | 4824 | count += obj->base.size >> PAGE_SHIFT; |
| 4827 | } | 4825 | } |
| 4828 | 4826 | ||
| 4829 | if (unlock) | 4827 | if (unlock) |
| 4830 | mutex_unlock(&dev->struct_mutex); | 4828 | mutex_unlock(&dev->struct_mutex); |
| 4831 | return cnt; | 4829 | return count; |
| 4832 | } | 4830 | } |
| 4833 | 4831 | ||
| 4834 | /* All the new VM stuff */ | 4832 | /* All the new VM stuff */ |
| @@ -4892,6 +4890,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, | |||
| 4892 | return 0; | 4890 | return 0; |
| 4893 | } | 4891 | } |
| 4894 | 4892 | ||
| 4893 | static unsigned long | ||
| 4894 | i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) | ||
| 4895 | { | ||
| 4896 | struct drm_i915_private *dev_priv = | ||
| 4897 | container_of(shrinker, | ||
| 4898 | struct drm_i915_private, | ||
| 4899 | mm.inactive_shrinker); | ||
| 4900 | struct drm_device *dev = dev_priv->dev; | ||
| 4901 | int nr_to_scan = sc->nr_to_scan; | ||
| 4902 | unsigned long freed; | ||
| 4903 | bool unlock = true; | ||
| 4904 | |||
| 4905 | if (!mutex_trylock(&dev->struct_mutex)) { | ||
| 4906 | if (!mutex_is_locked_by(&dev->struct_mutex, current)) | ||
| 4907 | return 0; | ||
| 4908 | |||
| 4909 | if (dev_priv->mm.shrinker_no_lock_stealing) | ||
| 4910 | return 0; | ||
| 4911 | |||
| 4912 | unlock = false; | ||
| 4913 | } | ||
| 4914 | |||
| 4915 | freed = i915_gem_purge(dev_priv, nr_to_scan); | ||
| 4916 | if (freed < nr_to_scan) | ||
| 4917 | freed += __i915_gem_shrink(dev_priv, nr_to_scan, | ||
| 4918 | false); | ||
| 4919 | if (freed < nr_to_scan) | ||
| 4920 | freed += i915_gem_shrink_all(dev_priv); | ||
| 4921 | |||
| 4922 | if (unlock) | ||
| 4923 | mutex_unlock(&dev->struct_mutex); | ||
| 4924 | return freed; | ||
| 4925 | } | ||
| 4926 | |||
| 4895 | struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, | 4927 | struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, |
| 4896 | struct i915_address_space *vm) | 4928 | struct i915_address_space *vm) |
| 4897 | { | 4929 | { |
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index bd2a3b40cd12..863bef9f9234 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c | |||
| @@ -377,28 +377,26 @@ out: | |||
| 377 | return nr_free; | 377 | return nr_free; |
| 378 | } | 378 | } |
| 379 | 379 | ||
| 380 | /* Get good estimation how many pages are free in pools */ | ||
| 381 | static int ttm_pool_get_num_unused_pages(void) | ||
| 382 | { | ||
| 383 | unsigned i; | ||
| 384 | int total = 0; | ||
| 385 | for (i = 0; i < NUM_POOLS; ++i) | ||
| 386 | total += _manager->pools[i].npages; | ||
| 387 | |||
| 388 | return total; | ||
| 389 | } | ||
| 390 | |||
| 391 | /** | 380 | /** |
| 392 | * Callback for mm to request pool to reduce number of page held. | 381 | * Callback for mm to request pool to reduce number of page held. |
| 382 | * | ||
| 383 | * XXX: (dchinner) Deadlock warning! | ||
| 384 | * | ||
| 385 | * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means | ||
| 386 | * this can deadlock when called a sc->gfp_mask that is not equal to | ||
| 387 | * GFP_KERNEL. | ||
| 388 | * | ||
| 389 | * This code is crying out for a shrinker per pool.... | ||
| 393 | */ | 390 | */ |
| 394 | static int ttm_pool_mm_shrink(struct shrinker *shrink, | 391 | static unsigned long |
| 395 | struct shrink_control *sc) | 392 | ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 396 | { | 393 | { |
| 397 | static atomic_t start_pool = ATOMIC_INIT(0); | 394 | static atomic_t start_pool = ATOMIC_INIT(0); |
| 398 | unsigned i; | 395 | unsigned i; |
| 399 | unsigned pool_offset = atomic_add_return(1, &start_pool); | 396 | unsigned pool_offset = atomic_add_return(1, &start_pool); |
| 400 | struct ttm_page_pool *pool; | 397 | struct ttm_page_pool *pool; |
| 401 | int shrink_pages = sc->nr_to_scan; | 398 | int shrink_pages = sc->nr_to_scan; |
| 399 | unsigned long freed = 0; | ||
| 402 | 400 | ||
| 403 | pool_offset = pool_offset % NUM_POOLS; | 401 | pool_offset = pool_offset % NUM_POOLS; |
| 404 | /* select start pool in round robin fashion */ | 402 | /* select start pool in round robin fashion */ |
| @@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink, | |||
| 408 | break; | 406 | break; |
| 409 | pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; | 407 | pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; |
| 410 | shrink_pages = ttm_page_pool_free(pool, nr_free); | 408 | shrink_pages = ttm_page_pool_free(pool, nr_free); |
| 409 | freed += nr_free - shrink_pages; | ||
| 411 | } | 410 | } |
| 412 | /* return estimated number of unused pages in pool */ | 411 | return freed; |
| 413 | return ttm_pool_get_num_unused_pages(); | 412 | } |
| 413 | |||
| 414 | |||
| 415 | static unsigned long | ||
| 416 | ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 417 | { | ||
| 418 | unsigned i; | ||
| 419 | unsigned long count = 0; | ||
| 420 | |||
| 421 | for (i = 0; i < NUM_POOLS; ++i) | ||
| 422 | count += _manager->pools[i].npages; | ||
| 423 | |||
| 424 | return count; | ||
| 414 | } | 425 | } |
| 415 | 426 | ||
| 416 | static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) | 427 | static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) |
| 417 | { | 428 | { |
| 418 | manager->mm_shrink.shrink = &ttm_pool_mm_shrink; | 429 | manager->mm_shrink.count_objects = ttm_pool_shrink_count; |
| 430 | manager->mm_shrink.scan_objects = ttm_pool_shrink_scan; | ||
| 419 | manager->mm_shrink.seeks = 1; | 431 | manager->mm_shrink.seeks = 1; |
| 420 | register_shrinker(&manager->mm_shrink); | 432 | register_shrinker(&manager->mm_shrink); |
| 421 | } | 433 | } |
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index b8b394319b45..7957beeeaf73 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | |||
| @@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev) | |||
| 918 | } | 918 | } |
| 919 | EXPORT_SYMBOL_GPL(ttm_dma_populate); | 919 | EXPORT_SYMBOL_GPL(ttm_dma_populate); |
| 920 | 920 | ||
| 921 | /* Get good estimation how many pages are free in pools */ | ||
| 922 | static int ttm_dma_pool_get_num_unused_pages(void) | ||
| 923 | { | ||
| 924 | struct device_pools *p; | ||
| 925 | unsigned total = 0; | ||
| 926 | |||
| 927 | mutex_lock(&_manager->lock); | ||
| 928 | list_for_each_entry(p, &_manager->pools, pools) | ||
| 929 | total += p->pool->npages_free; | ||
| 930 | mutex_unlock(&_manager->lock); | ||
| 931 | return total; | ||
| 932 | } | ||
| 933 | |||
| 934 | /* Put all pages in pages list to correct pool to wait for reuse */ | 921 | /* Put all pages in pages list to correct pool to wait for reuse */ |
| 935 | void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) | 922 | void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) |
| 936 | { | 923 | { |
| @@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); | |||
| 1002 | 989 | ||
| 1003 | /** | 990 | /** |
| 1004 | * Callback for mm to request pool to reduce number of page held. | 991 | * Callback for mm to request pool to reduce number of page held. |
| 992 | * | ||
| 993 | * XXX: (dchinner) Deadlock warning! | ||
| 994 | * | ||
| 995 | * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention | ||
| 996 | * needs to be paid to sc->gfp_mask to determine if this can be done or not. | ||
| 997 | * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really | ||
| 998 | * bad. | ||
| 999 | * | ||
| 1000 | * I'm getting sadder as I hear more pathetical whimpers about needing per-pool | ||
| 1001 | * shrinkers | ||
| 1005 | */ | 1002 | */ |
| 1006 | static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, | 1003 | static unsigned long |
| 1007 | struct shrink_control *sc) | 1004 | ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 1008 | { | 1005 | { |
| 1009 | static atomic_t start_pool = ATOMIC_INIT(0); | 1006 | static atomic_t start_pool = ATOMIC_INIT(0); |
| 1010 | unsigned idx = 0; | 1007 | unsigned idx = 0; |
| 1011 | unsigned pool_offset = atomic_add_return(1, &start_pool); | 1008 | unsigned pool_offset = atomic_add_return(1, &start_pool); |
| 1012 | unsigned shrink_pages = sc->nr_to_scan; | 1009 | unsigned shrink_pages = sc->nr_to_scan; |
| 1013 | struct device_pools *p; | 1010 | struct device_pools *p; |
| 1011 | unsigned long freed = 0; | ||
| 1014 | 1012 | ||
| 1015 | if (list_empty(&_manager->pools)) | 1013 | if (list_empty(&_manager->pools)) |
| 1016 | return 0; | 1014 | return SHRINK_STOP; |
| 1017 | 1015 | ||
| 1018 | mutex_lock(&_manager->lock); | 1016 | mutex_lock(&_manager->lock); |
| 1019 | pool_offset = pool_offset % _manager->npools; | 1017 | pool_offset = pool_offset % _manager->npools; |
| @@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, | |||
| 1029 | continue; | 1027 | continue; |
| 1030 | nr_free = shrink_pages; | 1028 | nr_free = shrink_pages; |
| 1031 | shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); | 1029 | shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); |
| 1030 | freed += nr_free - shrink_pages; | ||
| 1031 | |||
| 1032 | pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", | 1032 | pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", |
| 1033 | p->pool->dev_name, p->pool->name, current->pid, | 1033 | p->pool->dev_name, p->pool->name, current->pid, |
| 1034 | nr_free, shrink_pages); | 1034 | nr_free, shrink_pages); |
| 1035 | } | 1035 | } |
| 1036 | mutex_unlock(&_manager->lock); | 1036 | mutex_unlock(&_manager->lock); |
| 1037 | /* return estimated number of unused pages in pool */ | 1037 | return freed; |
| 1038 | return ttm_dma_pool_get_num_unused_pages(); | 1038 | } |
| 1039 | |||
| 1040 | static unsigned long | ||
| 1041 | ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 1042 | { | ||
| 1043 | struct device_pools *p; | ||
| 1044 | unsigned long count = 0; | ||
| 1045 | |||
| 1046 | mutex_lock(&_manager->lock); | ||
| 1047 | list_for_each_entry(p, &_manager->pools, pools) | ||
| 1048 | count += p->pool->npages_free; | ||
| 1049 | mutex_unlock(&_manager->lock); | ||
| 1050 | return count; | ||
| 1039 | } | 1051 | } |
| 1040 | 1052 | ||
| 1041 | static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) | 1053 | static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) |
| 1042 | { | 1054 | { |
| 1043 | manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink; | 1055 | manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count; |
| 1056 | manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan; | ||
| 1044 | manager->mm_shrink.seeks = 1; | 1057 | manager->mm_shrink.seeks = 1; |
| 1045 | register_shrinker(&manager->mm_shrink); | 1058 | register_shrinker(&manager->mm_shrink); |
| 1046 | } | 1059 | } |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ee372884c405..f9764e61978b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
| @@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) | |||
| 597 | return 0; | 597 | return 0; |
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | 600 | static unsigned long bch_mca_scan(struct shrinker *shrink, |
| 601 | struct shrink_control *sc) | ||
| 601 | { | 602 | { |
| 602 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); | 603 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); |
| 603 | struct btree *b, *t; | 604 | struct btree *b, *t; |
| 604 | unsigned long i, nr = sc->nr_to_scan; | 605 | unsigned long i, nr = sc->nr_to_scan; |
| 606 | unsigned long freed = 0; | ||
| 605 | 607 | ||
| 606 | if (c->shrinker_disabled) | 608 | if (c->shrinker_disabled) |
| 607 | return 0; | 609 | return SHRINK_STOP; |
| 608 | 610 | ||
| 609 | if (c->try_harder) | 611 | if (c->try_harder) |
| 610 | return 0; | 612 | return SHRINK_STOP; |
| 611 | |||
| 612 | /* | ||
| 613 | * If nr == 0, we're supposed to return the number of items we have | ||
| 614 | * cached. Not allowed to return -1. | ||
| 615 | */ | ||
| 616 | if (!nr) | ||
| 617 | return mca_can_free(c) * c->btree_pages; | ||
| 618 | 613 | ||
| 619 | /* Return -1 if we can't do anything right now */ | 614 | /* Return -1 if we can't do anything right now */ |
| 620 | if (sc->gfp_mask & __GFP_WAIT) | 615 | if (sc->gfp_mask & __GFP_WAIT) |
| @@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 634 | 629 | ||
| 635 | i = 0; | 630 | i = 0; |
| 636 | list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { | 631 | list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { |
| 637 | if (!nr) | 632 | if (freed >= nr) |
| 638 | break; | 633 | break; |
| 639 | 634 | ||
| 640 | if (++i > 3 && | 635 | if (++i > 3 && |
| 641 | !mca_reap(b, NULL, 0)) { | 636 | !mca_reap(b, NULL, 0)) { |
| 642 | mca_data_free(b); | 637 | mca_data_free(b); |
| 643 | rw_unlock(true, b); | 638 | rw_unlock(true, b); |
| 644 | --nr; | 639 | freed++; |
| 645 | } | 640 | } |
| 646 | } | 641 | } |
| 647 | 642 | ||
| @@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 652 | if (list_empty(&c->btree_cache)) | 647 | if (list_empty(&c->btree_cache)) |
| 653 | goto out; | 648 | goto out; |
| 654 | 649 | ||
| 655 | for (i = 0; nr && i < c->bucket_cache_used; i++) { | 650 | for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { |
| 656 | b = list_first_entry(&c->btree_cache, struct btree, list); | 651 | b = list_first_entry(&c->btree_cache, struct btree, list); |
| 657 | list_rotate_left(&c->btree_cache); | 652 | list_rotate_left(&c->btree_cache); |
| 658 | 653 | ||
| @@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 661 | mca_bucket_free(b); | 656 | mca_bucket_free(b); |
| 662 | mca_data_free(b); | 657 | mca_data_free(b); |
| 663 | rw_unlock(true, b); | 658 | rw_unlock(true, b); |
| 664 | --nr; | 659 | freed++; |
| 665 | } else | 660 | } else |
| 666 | b->accessed = 0; | 661 | b->accessed = 0; |
| 667 | } | 662 | } |
| 668 | out: | 663 | out: |
| 669 | nr = mca_can_free(c) * c->btree_pages; | ||
| 670 | mutex_unlock(&c->bucket_lock); | 664 | mutex_unlock(&c->bucket_lock); |
| 671 | return nr; | 665 | return freed; |
| 666 | } | ||
| 667 | |||
| 668 | static unsigned long bch_mca_count(struct shrinker *shrink, | ||
| 669 | struct shrink_control *sc) | ||
| 670 | { | ||
| 671 | struct cache_set *c = container_of(shrink, struct cache_set, shrink); | ||
| 672 | |||
| 673 | if (c->shrinker_disabled) | ||
| 674 | return 0; | ||
| 675 | |||
| 676 | if (c->try_harder) | ||
| 677 | return 0; | ||
| 678 | |||
| 679 | return mca_can_free(c) * c->btree_pages; | ||
| 672 | } | 680 | } |
| 673 | 681 | ||
| 674 | void bch_btree_cache_free(struct cache_set *c) | 682 | void bch_btree_cache_free(struct cache_set *c) |
| @@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c) | |||
| 737 | c->verify_data = NULL; | 745 | c->verify_data = NULL; |
| 738 | #endif | 746 | #endif |
| 739 | 747 | ||
| 740 | c->shrink.shrink = bch_mca_shrink; | 748 | c->shrink.count_objects = bch_mca_count; |
| 749 | c->shrink.scan_objects = bch_mca_scan; | ||
| 741 | c->shrink.seeks = 4; | 750 | c->shrink.seeks = 4; |
| 742 | c->shrink.batch = c->btree_pages * 2; | 751 | c->shrink.batch = c->btree_pages * 2; |
| 743 | register_shrinker(&c->shrink); | 752 | register_shrinker(&c->shrink); |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 12a2c2846f99..4fe6ab2fbe2e 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
| @@ -556,7 +556,7 @@ STORE(__bch_cache_set) | |||
| 556 | struct shrink_control sc; | 556 | struct shrink_control sc; |
| 557 | sc.gfp_mask = GFP_KERNEL; | 557 | sc.gfp_mask = GFP_KERNEL; |
| 558 | sc.nr_to_scan = strtoul_or_return(buf); | 558 | sc.nr_to_scan = strtoul_or_return(buf); |
| 559 | c->shrink.shrink(&c->shrink, &sc); | 559 | c->shrink.scan_objects(&c->shrink, &sc); |
| 560 | } | 560 | } |
| 561 | 561 | ||
| 562 | sysfs_strtoul(congested_read_threshold_us, | 562 | sysfs_strtoul(congested_read_threshold_us, |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5227e079a6e3..173cbb20d104 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
| @@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, | |||
| 1425 | unsigned long max_jiffies) | 1425 | unsigned long max_jiffies) |
| 1426 | { | 1426 | { |
| 1427 | if (jiffies - b->last_accessed < max_jiffies) | 1427 | if (jiffies - b->last_accessed < max_jiffies) |
| 1428 | return 1; | 1428 | return 0; |
| 1429 | 1429 | ||
| 1430 | if (!(gfp & __GFP_IO)) { | 1430 | if (!(gfp & __GFP_IO)) { |
| 1431 | if (test_bit(B_READING, &b->state) || | 1431 | if (test_bit(B_READING, &b->state) || |
| 1432 | test_bit(B_WRITING, &b->state) || | 1432 | test_bit(B_WRITING, &b->state) || |
| 1433 | test_bit(B_DIRTY, &b->state)) | 1433 | test_bit(B_DIRTY, &b->state)) |
| 1434 | return 1; | 1434 | return 0; |
| 1435 | } | 1435 | } |
| 1436 | 1436 | ||
| 1437 | if (b->hold_count) | 1437 | if (b->hold_count) |
| 1438 | return 1; | 1438 | return 0; |
| 1439 | 1439 | ||
| 1440 | __make_buffer_clean(b); | 1440 | __make_buffer_clean(b); |
| 1441 | __unlink_buffer(b); | 1441 | __unlink_buffer(b); |
| 1442 | __free_buffer_wake(b); | 1442 | __free_buffer_wake(b); |
| 1443 | 1443 | ||
| 1444 | return 0; | 1444 | return 1; |
| 1445 | } | 1445 | } |
| 1446 | 1446 | ||
| 1447 | static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, | 1447 | static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, |
| 1448 | struct shrink_control *sc) | 1448 | gfp_t gfp_mask) |
| 1449 | { | 1449 | { |
| 1450 | int l; | 1450 | int l; |
| 1451 | struct dm_buffer *b, *tmp; | 1451 | struct dm_buffer *b, *tmp; |
| 1452 | long freed = 0; | ||
| 1452 | 1453 | ||
| 1453 | for (l = 0; l < LIST_SIZE; l++) { | 1454 | for (l = 0; l < LIST_SIZE; l++) { |
| 1454 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) | 1455 | list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { |
| 1455 | if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && | 1456 | freed += __cleanup_old_buffer(b, gfp_mask, 0); |
| 1456 | !--nr_to_scan) | 1457 | if (!--nr_to_scan) |
| 1457 | return; | 1458 | break; |
| 1459 | } | ||
| 1458 | dm_bufio_cond_resched(); | 1460 | dm_bufio_cond_resched(); |
| 1459 | } | 1461 | } |
| 1462 | return freed; | ||
| 1460 | } | 1463 | } |
| 1461 | 1464 | ||
| 1462 | static int shrink(struct shrinker *shrinker, struct shrink_control *sc) | 1465 | static unsigned long |
| 1466 | dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
| 1463 | { | 1467 | { |
| 1464 | struct dm_bufio_client *c = | 1468 | struct dm_bufio_client *c; |
| 1465 | container_of(shrinker, struct dm_bufio_client, shrinker); | 1469 | unsigned long freed; |
| 1466 | unsigned long r; | ||
| 1467 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
| 1468 | 1470 | ||
| 1471 | c = container_of(shrink, struct dm_bufio_client, shrinker); | ||
| 1469 | if (sc->gfp_mask & __GFP_IO) | 1472 | if (sc->gfp_mask & __GFP_IO) |
| 1470 | dm_bufio_lock(c); | 1473 | dm_bufio_lock(c); |
| 1471 | else if (!dm_bufio_trylock(c)) | 1474 | else if (!dm_bufio_trylock(c)) |
| 1472 | return !nr_to_scan ? 0 : -1; | 1475 | return SHRINK_STOP; |
| 1473 | 1476 | ||
| 1474 | if (nr_to_scan) | 1477 | freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); |
| 1475 | __scan(c, nr_to_scan, sc); | 1478 | dm_bufio_unlock(c); |
| 1479 | return freed; | ||
| 1480 | } | ||
| 1476 | 1481 | ||
| 1477 | r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; | 1482 | static unsigned long |
| 1478 | if (r > INT_MAX) | 1483 | dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
| 1479 | r = INT_MAX; | 1484 | { |
| 1485 | struct dm_bufio_client *c; | ||
| 1486 | unsigned long count; | ||
| 1480 | 1487 | ||
| 1481 | dm_bufio_unlock(c); | 1488 | c = container_of(shrink, struct dm_bufio_client, shrinker); |
| 1489 | if (sc->gfp_mask & __GFP_IO) | ||
| 1490 | dm_bufio_lock(c); | ||
| 1491 | else if (!dm_bufio_trylock(c)) | ||
| 1492 | return 0; | ||
| 1482 | 1493 | ||
| 1483 | return r; | 1494 | count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; |
| 1495 | dm_bufio_unlock(c); | ||
| 1496 | return count; | ||
| 1484 | } | 1497 | } |
| 1485 | 1498 | ||
| 1486 | /* | 1499 | /* |
| @@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign | |||
| 1582 | __cache_size_refresh(); | 1595 | __cache_size_refresh(); |
| 1583 | mutex_unlock(&dm_bufio_clients_lock); | 1596 | mutex_unlock(&dm_bufio_clients_lock); |
| 1584 | 1597 | ||
| 1585 | c->shrinker.shrink = shrink; | 1598 | c->shrinker.count_objects = dm_bufio_shrink_count; |
| 1599 | c->shrinker.scan_objects = dm_bufio_shrink_scan; | ||
| 1586 | c->shrinker.seeks = 1; | 1600 | c->shrinker.seeks = 1; |
| 1587 | c->shrinker.batch = 0; | 1601 | c->shrinker.batch = 0; |
| 1588 | register_shrinker(&c->shrinker); | 1602 | register_shrinker(&c->shrinker); |
| @@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void) | |||
| 1669 | struct dm_buffer *b; | 1683 | struct dm_buffer *b; |
| 1670 | b = list_entry(c->lru[LIST_CLEAN].prev, | 1684 | b = list_entry(c->lru[LIST_CLEAN].prev, |
| 1671 | struct dm_buffer, lru_list); | 1685 | struct dm_buffer, lru_list); |
| 1672 | if (__cleanup_old_buffer(b, 0, max_age * HZ)) | 1686 | if (!__cleanup_old_buffer(b, 0, max_age * HZ)) |
| 1673 | break; | 1687 | break; |
| 1674 | dm_bufio_cond_resched(); | 1688 | dm_bufio_cond_resched(); |
| 1675 | } | 1689 | } |
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 21a3f7250531..8e76ddca0999 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c | |||
| @@ -341,27 +341,26 @@ out: | |||
| 341 | /* | 341 | /* |
| 342 | * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab | 342 | * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab |
| 343 | * | 343 | * |
| 344 | * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how | 344 | * 'nr_to_scan' is the number of objects to scan for freeing. |
| 345 | * many objects (pages) we have in total. | ||
| 346 | * | 345 | * |
| 347 | * 'gfp_mask' is the mask of the allocation that got us into this mess. | 346 | * 'gfp_mask' is the mask of the allocation that got us into this mess. |
| 348 | * | 347 | * |
| 349 | * Return value is the number of objects (pages) remaining, or -1 if we cannot | 348 | * Return value is the number of objects freed or -1 if we cannot |
| 350 | * proceed without risk of deadlock (due to gfp_mask). | 349 | * proceed without risk of deadlock (due to gfp_mask). |
| 351 | * | 350 | * |
| 352 | * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial | 351 | * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial |
| 353 | * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' | 352 | * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' |
| 354 | * pages freed. | 353 | * pages freed. |
| 355 | */ | 354 | */ |
| 356 | static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) | 355 | static unsigned long |
| 356 | ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
| 357 | { | 357 | { |
| 358 | struct ashmem_range *range, *next; | 358 | struct ashmem_range *range, *next; |
| 359 | unsigned long freed = 0; | ||
| 359 | 360 | ||
| 360 | /* We might recurse into filesystem code, so bail out if necessary */ | 361 | /* We might recurse into filesystem code, so bail out if necessary */ |
| 361 | if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) | 362 | if (!(sc->gfp_mask & __GFP_FS)) |
| 362 | return -1; | 363 | return SHRINK_STOP; |
| 363 | if (!sc->nr_to_scan) | ||
| 364 | return lru_count; | ||
| 365 | 364 | ||
| 366 | mutex_lock(&ashmem_mutex); | 365 | mutex_lock(&ashmem_mutex); |
| 367 | list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { | 366 | list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { |
| @@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
| 374 | range->purged = ASHMEM_WAS_PURGED; | 373 | range->purged = ASHMEM_WAS_PURGED; |
| 375 | lru_del(range); | 374 | lru_del(range); |
| 376 | 375 | ||
| 377 | sc->nr_to_scan -= range_size(range); | 376 | freed += range_size(range); |
| 378 | if (sc->nr_to_scan <= 0) | 377 | if (--sc->nr_to_scan <= 0) |
| 379 | break; | 378 | break; |
| 380 | } | 379 | } |
| 381 | mutex_unlock(&ashmem_mutex); | 380 | mutex_unlock(&ashmem_mutex); |
| 381 | return freed; | ||
| 382 | } | ||
| 382 | 383 | ||
| 384 | static unsigned long | ||
| 385 | ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 386 | { | ||
| 387 | /* | ||
| 388 | * note that lru_count is count of pages on the lru, not a count of | ||
| 389 | * objects on the list. This means the scan function needs to return the | ||
| 390 | * number of pages freed, not the number of objects scanned. | ||
| 391 | */ | ||
| 383 | return lru_count; | 392 | return lru_count; |
| 384 | } | 393 | } |
| 385 | 394 | ||
| 386 | static struct shrinker ashmem_shrinker = { | 395 | static struct shrinker ashmem_shrinker = { |
| 387 | .shrink = ashmem_shrink, | 396 | .count_objects = ashmem_shrink_count, |
| 397 | .scan_objects = ashmem_shrink_scan, | ||
| 398 | /* | ||
| 399 | * XXX (dchinner): I wish people would comment on why they need on | ||
| 400 | * significant changes to the default value here | ||
| 401 | */ | ||
| 388 | .seeks = DEFAULT_SEEKS * 4, | 402 | .seeks = DEFAULT_SEEKS * 4, |
| 389 | }; | 403 | }; |
| 390 | 404 | ||
| @@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 690 | if (capable(CAP_SYS_ADMIN)) { | 704 | if (capable(CAP_SYS_ADMIN)) { |
| 691 | struct shrink_control sc = { | 705 | struct shrink_control sc = { |
| 692 | .gfp_mask = GFP_KERNEL, | 706 | .gfp_mask = GFP_KERNEL, |
| 693 | .nr_to_scan = 0, | 707 | .nr_to_scan = LONG_MAX, |
| 694 | }; | 708 | }; |
| 695 | ret = ashmem_shrink(&ashmem_shrinker, &sc); | 709 | |
| 696 | sc.nr_to_scan = ret; | 710 | nodes_setall(sc.nodes_to_scan); |
| 697 | ashmem_shrink(&ashmem_shrinker, &sc); | 711 | ashmem_shrink_scan(&ashmem_shrinker, &sc); |
| 698 | } | 712 | } |
| 699 | break; | 713 | break; |
| 700 | } | 714 | } |
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index fe74494868ef..6f094b37f1f1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c | |||
| @@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout; | |||
| 66 | pr_info(x); \ | 66 | pr_info(x); \ |
| 67 | } while (0) | 67 | } while (0) |
| 68 | 68 | ||
| 69 | static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | 69 | static unsigned long lowmem_count(struct shrinker *s, |
| 70 | struct shrink_control *sc) | ||
| 71 | { | ||
| 72 | return global_page_state(NR_ACTIVE_ANON) + | ||
| 73 | global_page_state(NR_ACTIVE_FILE) + | ||
| 74 | global_page_state(NR_INACTIVE_ANON) + | ||
| 75 | global_page_state(NR_INACTIVE_FILE); | ||
| 76 | } | ||
| 77 | |||
| 78 | static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) | ||
| 70 | { | 79 | { |
| 71 | struct task_struct *tsk; | 80 | struct task_struct *tsk; |
| 72 | struct task_struct *selected = NULL; | 81 | struct task_struct *selected = NULL; |
| 73 | int rem = 0; | 82 | unsigned long rem = 0; |
| 74 | int tasksize; | 83 | int tasksize; |
| 75 | int i; | 84 | int i; |
| 76 | short min_score_adj = OOM_SCORE_ADJ_MAX + 1; | 85 | short min_score_adj = OOM_SCORE_ADJ_MAX + 1; |
| @@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
| 92 | break; | 101 | break; |
| 93 | } | 102 | } |
| 94 | } | 103 | } |
| 95 | if (sc->nr_to_scan > 0) | 104 | |
| 96 | lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", | 105 | lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", |
| 97 | sc->nr_to_scan, sc->gfp_mask, other_free, | 106 | sc->nr_to_scan, sc->gfp_mask, other_free, |
| 98 | other_file, min_score_adj); | 107 | other_file, min_score_adj); |
| 99 | rem = global_page_state(NR_ACTIVE_ANON) + | 108 | |
| 100 | global_page_state(NR_ACTIVE_FILE) + | 109 | if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { |
| 101 | global_page_state(NR_INACTIVE_ANON) + | 110 | lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", |
| 102 | global_page_state(NR_INACTIVE_FILE); | 111 | sc->nr_to_scan, sc->gfp_mask); |
| 103 | if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { | 112 | return 0; |
| 104 | lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", | ||
| 105 | sc->nr_to_scan, sc->gfp_mask, rem); | ||
| 106 | return rem; | ||
| 107 | } | 113 | } |
| 114 | |||
| 108 | selected_oom_score_adj = min_score_adj; | 115 | selected_oom_score_adj = min_score_adj; |
| 109 | 116 | ||
| 110 | rcu_read_lock(); | 117 | rcu_read_lock(); |
| @@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) | |||
| 154 | lowmem_deathpending_timeout = jiffies + HZ; | 161 | lowmem_deathpending_timeout = jiffies + HZ; |
| 155 | send_sig(SIGKILL, selected, 0); | 162 | send_sig(SIGKILL, selected, 0); |
| 156 | set_tsk_thread_flag(selected, TIF_MEMDIE); | 163 | set_tsk_thread_flag(selected, TIF_MEMDIE); |
| 157 | rem -= selected_tasksize; | 164 | rem += selected_tasksize; |
| 158 | } | 165 | } |
| 159 | lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", | 166 | |
| 167 | lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", | ||
| 160 | sc->nr_to_scan, sc->gfp_mask, rem); | 168 | sc->nr_to_scan, sc->gfp_mask, rem); |
| 161 | rcu_read_unlock(); | 169 | rcu_read_unlock(); |
| 162 | return rem; | 170 | return rem; |
| 163 | } | 171 | } |
| 164 | 172 | ||
| 165 | static struct shrinker lowmem_shrinker = { | 173 | static struct shrinker lowmem_shrinker = { |
| 166 | .shrink = lowmem_shrink, | 174 | .scan_objects = lowmem_scan, |
| 175 | .count_objects = lowmem_count, | ||
| 167 | .seeks = DEFAULT_SEEKS * 16 | 176 | .seeks = DEFAULT_SEEKS * 16 |
| 168 | }; | 177 | }; |
| 169 | 178 | ||
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h index 63efb7b456c6..2af15d41e77a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h | |||
| @@ -79,42 +79,4 @@ | |||
| 79 | do { __oldfs = get_fs(); set_fs(get_ds());} while(0) | 79 | do { __oldfs = get_fs(); set_fs(get_ds());} while(0) |
| 80 | #define MMSPACE_CLOSE set_fs(__oldfs) | 80 | #define MMSPACE_CLOSE set_fs(__oldfs) |
| 81 | 81 | ||
| 82 | /* | ||
| 83 | * Shrinker | ||
| 84 | */ | ||
| 85 | |||
| 86 | # define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ | ||
| 87 | struct shrinker *shrinker, \ | ||
| 88 | struct shrink_control *sc | ||
| 89 | # define shrink_param(sc, var) ((sc)->var) | ||
| 90 | |||
| 91 | typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); | ||
| 92 | |||
| 93 | static inline | ||
| 94 | struct shrinker *set_shrinker(int seek, shrinker_t func) | ||
| 95 | { | ||
| 96 | struct shrinker *s; | ||
| 97 | |||
| 98 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
| 99 | if (s == NULL) | ||
| 100 | return (NULL); | ||
| 101 | |||
| 102 | s->shrink = func; | ||
| 103 | s->seeks = seek; | ||
| 104 | |||
| 105 | register_shrinker(s); | ||
| 106 | |||
| 107 | return s; | ||
| 108 | } | ||
| 109 | |||
| 110 | static inline | ||
| 111 | void remove_shrinker(struct shrinker *shrinker) | ||
| 112 | { | ||
| 113 | if (shrinker == NULL) | ||
| 114 | return; | ||
| 115 | |||
| 116 | unregister_shrinker(shrinker); | ||
| 117 | kfree(shrinker); | ||
| 118 | } | ||
| 119 | |||
| 120 | #endif /* __LINUX_CFS_MEM_H__ */ | 82 | #endif /* __LINUX_CFS_MEM_H__ */ |
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 454027d68d54..0025ee6356da 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c | |||
| @@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, | |||
| 521 | int nr, unsigned int gfp_mask) | 521 | int nr, unsigned int gfp_mask) |
| 522 | { | 522 | { |
| 523 | struct ldlm_namespace *ns; | 523 | struct ldlm_namespace *ns; |
| 524 | int canceled = 0, unused; | 524 | int unused; |
| 525 | 525 | ||
| 526 | ns = ldlm_pl2ns(pl); | 526 | ns = ldlm_pl2ns(pl); |
| 527 | 527 | ||
| @@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, | |||
| 540 | unused = ns->ns_nr_unused; | 540 | unused = ns->ns_nr_unused; |
| 541 | spin_unlock(&ns->ns_lock); | 541 | spin_unlock(&ns->ns_lock); |
| 542 | 542 | ||
| 543 | if (nr) { | 543 | if (nr == 0) |
| 544 | canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, | 544 | return (unused / 100) * sysctl_vfs_cache_pressure; |
| 545 | LDLM_CANCEL_SHRINK); | 545 | else |
| 546 | } | 546 | return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); |
| 547 | /* | ||
| 548 | * Return the number of potentially reclaimable locks. | ||
| 549 | */ | ||
| 550 | return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure; | ||
| 551 | } | 547 | } |
| 552 | 548 | ||
| 553 | struct ldlm_pool_ops ldlm_srv_pool_ops = { | 549 | struct ldlm_pool_ops ldlm_srv_pool_ops = { |
| @@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) | |||
| 601 | return recalc_interval_sec; | 597 | return recalc_interval_sec; |
| 602 | } | 598 | } |
| 603 | 599 | ||
| 604 | /** | 600 | /* |
| 605 | * Pool shrink wrapper. Will call either client or server pool recalc callback | 601 | * Pool shrink wrapper. Will call either client or server pool recalc callback |
| 606 | * depending what pool \a pl is used. | 602 | * depending what pool pl is used. When nr == 0, just return the number of |
| 603 | * freeable locks. Otherwise, return the number of canceled locks. | ||
| 607 | */ | 604 | */ |
| 608 | int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, | 605 | int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, |
| 609 | unsigned int gfp_mask) | 606 | unsigned int gfp_mask) |
| @@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) | |||
| 1017 | } | 1014 | } |
| 1018 | 1015 | ||
| 1019 | static struct ptlrpc_thread *ldlm_pools_thread; | 1016 | static struct ptlrpc_thread *ldlm_pools_thread; |
| 1020 | static struct shrinker *ldlm_pools_srv_shrinker; | ||
| 1021 | static struct shrinker *ldlm_pools_cli_shrinker; | ||
| 1022 | static struct completion ldlm_pools_comp; | 1017 | static struct completion ldlm_pools_comp; |
| 1023 | 1018 | ||
| 1024 | /* | 1019 | /* |
| 1025 | * Cancel \a nr locks from all namespaces (if possible). Returns number of | 1020 | * count locks from all namespaces (if possible). Returns number of |
| 1026 | * cached locks after shrink is finished. All namespaces are asked to | 1021 | * cached locks. |
| 1027 | * cancel approximately equal amount of locks to keep balancing. | ||
| 1028 | */ | 1022 | */ |
| 1029 | static int ldlm_pools_shrink(ldlm_side_t client, int nr, | 1023 | static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask) |
| 1030 | unsigned int gfp_mask) | ||
| 1031 | { | 1024 | { |
| 1032 | int total = 0, cached = 0, nr_ns; | 1025 | int total = 0, nr_ns; |
| 1033 | struct ldlm_namespace *ns; | 1026 | struct ldlm_namespace *ns; |
| 1034 | struct ldlm_namespace *ns_old = NULL; /* loop detection */ | 1027 | struct ldlm_namespace *ns_old = NULL; /* loop detection */ |
| 1035 | void *cookie; | 1028 | void *cookie; |
| 1036 | 1029 | ||
| 1037 | if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && | 1030 | if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) |
| 1038 | !(gfp_mask & __GFP_FS)) | 1031 | return 0; |
| 1039 | return -1; | ||
| 1040 | 1032 | ||
| 1041 | CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", | 1033 | CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", |
| 1042 | nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); | 1034 | client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); |
| 1043 | 1035 | ||
| 1044 | cookie = cl_env_reenter(); | 1036 | cookie = cl_env_reenter(); |
| 1045 | 1037 | ||
| @@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
| 1047 | * Find out how many resources we may release. | 1039 | * Find out how many resources we may release. |
| 1048 | */ | 1040 | */ |
| 1049 | for (nr_ns = ldlm_namespace_nr_read(client); | 1041 | for (nr_ns = ldlm_namespace_nr_read(client); |
| 1050 | nr_ns > 0; nr_ns--) | 1042 | nr_ns > 0; nr_ns--) { |
| 1051 | { | ||
| 1052 | mutex_lock(ldlm_namespace_lock(client)); | 1043 | mutex_lock(ldlm_namespace_lock(client)); |
| 1053 | if (list_empty(ldlm_namespace_list(client))) { | 1044 | if (list_empty(ldlm_namespace_list(client))) { |
| 1054 | mutex_unlock(ldlm_namespace_lock(client)); | 1045 | mutex_unlock(ldlm_namespace_lock(client)); |
| @@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
| 1078 | ldlm_namespace_put(ns); | 1069 | ldlm_namespace_put(ns); |
| 1079 | } | 1070 | } |
| 1080 | 1071 | ||
| 1081 | if (nr == 0 || total == 0) { | 1072 | cl_env_reexit(cookie); |
| 1082 | cl_env_reexit(cookie); | 1073 | return total; |
| 1083 | return total; | 1074 | } |
| 1084 | } | 1075 | |
| 1076 | static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask) | ||
| 1077 | { | ||
| 1078 | unsigned long freed = 0; | ||
| 1079 | int tmp, nr_ns; | ||
| 1080 | struct ldlm_namespace *ns; | ||
| 1081 | void *cookie; | ||
| 1082 | |||
| 1083 | if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) | ||
| 1084 | return -1; | ||
| 1085 | |||
| 1086 | cookie = cl_env_reenter(); | ||
| 1085 | 1087 | ||
| 1086 | /* | 1088 | /* |
| 1087 | * Shrink at least ldlm_namespace_nr(client) namespaces. | 1089 | * Shrink at least ldlm_namespace_nr_read(client) namespaces. |
| 1088 | */ | 1090 | */ |
| 1089 | for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns; | 1091 | for (tmp = nr_ns = ldlm_namespace_nr_read(client); |
| 1090 | nr_ns > 0; nr_ns--) | 1092 | tmp > 0; tmp--) { |
| 1091 | { | ||
| 1092 | int cancel, nr_locks; | 1093 | int cancel, nr_locks; |
| 1093 | 1094 | ||
| 1094 | /* | 1095 | /* |
| @@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
| 1097 | mutex_lock(ldlm_namespace_lock(client)); | 1098 | mutex_lock(ldlm_namespace_lock(client)); |
| 1098 | if (list_empty(ldlm_namespace_list(client))) { | 1099 | if (list_empty(ldlm_namespace_list(client))) { |
| 1099 | mutex_unlock(ldlm_namespace_lock(client)); | 1100 | mutex_unlock(ldlm_namespace_lock(client)); |
| 1100 | /* | ||
| 1101 | * If list is empty, we can't return any @cached > 0, | ||
| 1102 | * that probably would cause needless shrinker | ||
| 1103 | * call. | ||
| 1104 | */ | ||
| 1105 | cached = 0; | ||
| 1106 | break; | 1101 | break; |
| 1107 | } | 1102 | } |
| 1108 | ns = ldlm_namespace_first_locked(client); | 1103 | ns = ldlm_namespace_first_locked(client); |
| @@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, | |||
| 1111 | mutex_unlock(ldlm_namespace_lock(client)); | 1106 | mutex_unlock(ldlm_namespace_lock(client)); |
| 1112 | 1107 | ||
| 1113 | nr_locks = ldlm_pool_granted(&ns->ns_pool); | 1108 | nr_locks = ldlm_pool_granted(&ns->ns_pool); |
| 1114 | cancel = 1 + nr_locks * nr / total; | 1109 | /* |
| 1115 | ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); | 1110 | * We use to shrink propotionally but with new shrinker API, |
| 1116 | cached += ldlm_pool_granted(&ns->ns_pool); | 1111 | * we lost the total number of freeable locks. |
| 1112 | */ | ||
| 1113 | cancel = 1 + min_t(int, nr_locks, nr / nr_ns); | ||
| 1114 | freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); | ||
| 1117 | ldlm_namespace_put(ns); | 1115 | ldlm_namespace_put(ns); |
| 1118 | } | 1116 | } |
| 1119 | cl_env_reexit(cookie); | 1117 | cl_env_reexit(cookie); |
| 1120 | /* we only decrease the SLV in server pools shrinker, return -1 to | 1118 | /* |
| 1121 | * kernel to avoid needless loop. LU-1128 */ | 1119 | * we only decrease the SLV in server pools shrinker, return |
| 1122 | return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; | 1120 | * SHRINK_STOP to kernel to avoid needless loop. LU-1128 |
| 1121 | */ | ||
| 1122 | return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed; | ||
| 1123 | } | ||
| 1124 | |||
| 1125 | static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc) | ||
| 1126 | { | ||
| 1127 | return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask); | ||
| 1123 | } | 1128 | } |
| 1124 | 1129 | ||
| 1125 | static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1130 | static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc) |
| 1126 | { | 1131 | { |
| 1127 | return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, | 1132 | return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan, |
| 1128 | shrink_param(sc, nr_to_scan), | 1133 | sc->gfp_mask); |
| 1129 | shrink_param(sc, gfp_mask)); | ||
| 1130 | } | 1134 | } |
| 1131 | 1135 | ||
| 1132 | static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1136 | static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc) |
| 1133 | { | 1137 | { |
| 1134 | return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, | 1138 | return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); |
| 1135 | shrink_param(sc, nr_to_scan), | 1139 | } |
| 1136 | shrink_param(sc, gfp_mask)); | 1140 | |
| 1141 | static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc) | ||
| 1142 | { | ||
| 1143 | return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, | ||
| 1144 | sc->gfp_mask); | ||
| 1137 | } | 1145 | } |
| 1138 | 1146 | ||
| 1139 | int ldlm_pools_recalc(ldlm_side_t client) | 1147 | int ldlm_pools_recalc(ldlm_side_t client) |
| @@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client) | |||
| 1216 | } | 1224 | } |
| 1217 | 1225 | ||
| 1218 | /* | 1226 | /* |
| 1219 | * Recalc at least ldlm_namespace_nr(client) namespaces. | 1227 | * Recalc at least ldlm_namespace_nr_read(client) namespaces. |
| 1220 | */ | 1228 | */ |
| 1221 | for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { | 1229 | for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { |
| 1222 | int skip; | 1230 | int skip; |
| @@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void) | |||
| 1383 | ldlm_pools_thread = NULL; | 1391 | ldlm_pools_thread = NULL; |
| 1384 | } | 1392 | } |
| 1385 | 1393 | ||
| 1394 | static struct shrinker ldlm_pools_srv_shrinker = { | ||
| 1395 | .count_objects = ldlm_pools_srv_count, | ||
| 1396 | .scan_objects = ldlm_pools_srv_scan, | ||
| 1397 | .seeks = DEFAULT_SEEKS, | ||
| 1398 | }; | ||
| 1399 | |||
| 1400 | static struct shrinker ldlm_pools_cli_shrinker = { | ||
| 1401 | .count_objects = ldlm_pools_cli_count, | ||
| 1402 | .scan_objects = ldlm_pools_cli_scan, | ||
| 1403 | .seeks = DEFAULT_SEEKS, | ||
| 1404 | }; | ||
| 1405 | |||
| 1386 | int ldlm_pools_init(void) | 1406 | int ldlm_pools_init(void) |
| 1387 | { | 1407 | { |
| 1388 | int rc; | 1408 | int rc; |
| 1389 | 1409 | ||
| 1390 | rc = ldlm_pools_thread_start(); | 1410 | rc = ldlm_pools_thread_start(); |
| 1391 | if (rc == 0) { | 1411 | if (rc == 0) { |
| 1392 | ldlm_pools_srv_shrinker = | 1412 | register_shrinker(&ldlm_pools_srv_shrinker); |
| 1393 | set_shrinker(DEFAULT_SEEKS, | 1413 | register_shrinker(&ldlm_pools_cli_shrinker); |
| 1394 | ldlm_pools_srv_shrink); | ||
| 1395 | ldlm_pools_cli_shrinker = | ||
| 1396 | set_shrinker(DEFAULT_SEEKS, | ||
| 1397 | ldlm_pools_cli_shrink); | ||
| 1398 | } | 1414 | } |
| 1399 | return rc; | 1415 | return rc; |
| 1400 | } | 1416 | } |
| @@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init); | |||
| 1402 | 1418 | ||
| 1403 | void ldlm_pools_fini(void) | 1419 | void ldlm_pools_fini(void) |
| 1404 | { | 1420 | { |
| 1405 | if (ldlm_pools_srv_shrinker != NULL) { | 1421 | unregister_shrinker(&ldlm_pools_srv_shrinker); |
| 1406 | remove_shrinker(ldlm_pools_srv_shrinker); | 1422 | unregister_shrinker(&ldlm_pools_cli_shrinker); |
| 1407 | ldlm_pools_srv_shrinker = NULL; | ||
| 1408 | } | ||
| 1409 | if (ldlm_pools_cli_shrinker != NULL) { | ||
| 1410 | remove_shrinker(ldlm_pools_cli_shrinker); | ||
| 1411 | ldlm_pools_cli_shrinker = NULL; | ||
| 1412 | } | ||
| 1413 | ldlm_pools_thread_stop(); | 1423 | ldlm_pools_thread_stop(); |
| 1414 | } | 1424 | } |
| 1415 | EXPORT_SYMBOL(ldlm_pools_fini); | 1425 | EXPORT_SYMBOL(ldlm_pools_fini); |
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index c29ac1c2defd..3a3d5bc5a628 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c | |||
| @@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, | |||
| 1779 | } | 1779 | } |
| 1780 | EXPORT_SYMBOL(lu_env_refill_by_tags); | 1780 | EXPORT_SYMBOL(lu_env_refill_by_tags); |
| 1781 | 1781 | ||
| 1782 | static struct shrinker *lu_site_shrinker = NULL; | ||
| 1783 | 1782 | ||
| 1784 | typedef struct lu_site_stats{ | 1783 | typedef struct lu_site_stats{ |
| 1785 | unsigned lss_populated; | 1784 | unsigned lss_populated; |
| @@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs, | |||
| 1835 | * objects without taking the lu_sites_guard lock, but this is not | 1834 | * objects without taking the lu_sites_guard lock, but this is not |
| 1836 | * possible in the current implementation. | 1835 | * possible in the current implementation. |
| 1837 | */ | 1836 | */ |
| 1838 | static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 1837 | static unsigned long lu_cache_shrink_count(struct shrinker *sk, |
| 1838 | struct shrink_control *sc) | ||
| 1839 | { | 1839 | { |
| 1840 | lu_site_stats_t stats; | 1840 | lu_site_stats_t stats; |
| 1841 | struct lu_site *s; | 1841 | struct lu_site *s; |
| 1842 | struct lu_site *tmp; | 1842 | struct lu_site *tmp; |
| 1843 | int cached = 0; | 1843 | unsigned long cached = 0; |
| 1844 | int remain = shrink_param(sc, nr_to_scan); | ||
| 1845 | LIST_HEAD(splice); | ||
| 1846 | |||
| 1847 | if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) { | ||
| 1848 | if (remain != 0) | ||
| 1849 | return -1; | ||
| 1850 | else | ||
| 1851 | /* We must not take the lu_sites_guard lock when | ||
| 1852 | * __GFP_FS is *not* set because of the deadlock | ||
| 1853 | * possibility detailed above. Additionally, | ||
| 1854 | * since we cannot determine the number of | ||
| 1855 | * objects in the cache without taking this | ||
| 1856 | * lock, we're in a particularly tough spot. As | ||
| 1857 | * a result, we'll just lie and say our cache is | ||
| 1858 | * empty. This _should_ be ok, as we can't | ||
| 1859 | * reclaim objects when __GFP_FS is *not* set | ||
| 1860 | * anyways. | ||
| 1861 | */ | ||
| 1862 | return 0; | ||
| 1863 | } | ||
| 1864 | 1844 | ||
| 1865 | CDEBUG(D_INODE, "Shrink %d objects\n", remain); | 1845 | if (!(sc->gfp_mask & __GFP_FS)) |
| 1846 | return 0; | ||
| 1866 | 1847 | ||
| 1867 | mutex_lock(&lu_sites_guard); | 1848 | mutex_lock(&lu_sites_guard); |
| 1868 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { | 1849 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { |
| 1869 | if (shrink_param(sc, nr_to_scan) != 0) { | ||
| 1870 | remain = lu_site_purge(&lu_shrink_env, s, remain); | ||
| 1871 | /* | ||
| 1872 | * Move just shrunk site to the tail of site list to | ||
| 1873 | * assure shrinking fairness. | ||
| 1874 | */ | ||
| 1875 | list_move_tail(&s->ls_linkage, &splice); | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | memset(&stats, 0, sizeof(stats)); | 1850 | memset(&stats, 0, sizeof(stats)); |
| 1879 | lu_site_stats_get(s->ls_obj_hash, &stats, 0); | 1851 | lu_site_stats_get(s->ls_obj_hash, &stats, 0); |
| 1880 | cached += stats.lss_total - stats.lss_busy; | 1852 | cached += stats.lss_total - stats.lss_busy; |
| 1881 | if (shrink_param(sc, nr_to_scan) && remain <= 0) | ||
| 1882 | break; | ||
| 1883 | } | 1853 | } |
| 1884 | list_splice(&splice, lu_sites.prev); | ||
| 1885 | mutex_unlock(&lu_sites_guard); | 1854 | mutex_unlock(&lu_sites_guard); |
| 1886 | 1855 | ||
| 1887 | cached = (cached / 100) * sysctl_vfs_cache_pressure; | 1856 | cached = (cached / 100) * sysctl_vfs_cache_pressure; |
| 1888 | if (shrink_param(sc, nr_to_scan) == 0) | 1857 | CDEBUG(D_INODE, "%ld objects cached\n", cached); |
| 1889 | CDEBUG(D_INODE, "%d objects cached\n", cached); | ||
| 1890 | return cached; | 1858 | return cached; |
| 1891 | } | 1859 | } |
| 1892 | 1860 | ||
| 1861 | static unsigned long lu_cache_shrink_scan(struct shrinker *sk, | ||
| 1862 | struct shrink_control *sc) | ||
| 1863 | { | ||
| 1864 | struct lu_site *s; | ||
| 1865 | struct lu_site *tmp; | ||
| 1866 | unsigned long remain = sc->nr_to_scan, freed = 0; | ||
| 1867 | LIST_HEAD(splice); | ||
| 1868 | |||
| 1869 | if (!(sc->gfp_mask & __GFP_FS)) | ||
| 1870 | /* We must not take the lu_sites_guard lock when | ||
| 1871 | * __GFP_FS is *not* set because of the deadlock | ||
| 1872 | * possibility detailed above. Additionally, | ||
| 1873 | * since we cannot determine the number of | ||
| 1874 | * objects in the cache without taking this | ||
| 1875 | * lock, we're in a particularly tough spot. As | ||
| 1876 | * a result, we'll just lie and say our cache is | ||
| 1877 | * empty. This _should_ be ok, as we can't | ||
| 1878 | * reclaim objects when __GFP_FS is *not* set | ||
| 1879 | * anyways. | ||
| 1880 | */ | ||
| 1881 | return SHRINK_STOP; | ||
| 1882 | |||
| 1883 | mutex_lock(&lu_sites_guard); | ||
| 1884 | list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { | ||
| 1885 | freed = lu_site_purge(&lu_shrink_env, s, remain); | ||
| 1886 | remain -= freed; | ||
| 1887 | /* | ||
| 1888 | * Move just shrunk site to the tail of site list to | ||
| 1889 | * assure shrinking fairness. | ||
| 1890 | */ | ||
| 1891 | list_move_tail(&s->ls_linkage, &splice); | ||
| 1892 | } | ||
| 1893 | list_splice(&splice, lu_sites.prev); | ||
| 1894 | mutex_unlock(&lu_sites_guard); | ||
| 1895 | |||
| 1896 | return sc->nr_to_scan - remain; | ||
| 1897 | } | ||
| 1898 | |||
| 1893 | /* | 1899 | /* |
| 1894 | * Debugging stuff. | 1900 | * Debugging stuff. |
| 1895 | */ | 1901 | */ |
| @@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env, | |||
| 1913 | return 0; | 1919 | return 0; |
| 1914 | } | 1920 | } |
| 1915 | 1921 | ||
| 1922 | static struct shrinker lu_site_shrinker = { | ||
| 1923 | .count_objects = lu_cache_shrink_count, | ||
| 1924 | .scan_objects = lu_cache_shrink_scan, | ||
| 1925 | .seeks = DEFAULT_SEEKS, | ||
| 1926 | }; | ||
| 1927 | |||
| 1916 | /** | 1928 | /** |
| 1917 | * Initialization of global lu_* data. | 1929 | * Initialization of global lu_* data. |
| 1918 | */ | 1930 | */ |
| @@ -1947,9 +1959,7 @@ int lu_global_init(void) | |||
| 1947 | * inode, one for ea. Unfortunately setting this high value results in | 1959 | * inode, one for ea. Unfortunately setting this high value results in |
| 1948 | * lu_object/inode cache consuming all the memory. | 1960 | * lu_object/inode cache consuming all the memory. |
| 1949 | */ | 1961 | */ |
| 1950 | lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); | 1962 | register_shrinker(&lu_site_shrinker); |
| 1951 | if (lu_site_shrinker == NULL) | ||
| 1952 | return -ENOMEM; | ||
| 1953 | 1963 | ||
| 1954 | return result; | 1964 | return result; |
| 1955 | } | 1965 | } |
| @@ -1959,11 +1969,7 @@ int lu_global_init(void) | |||
| 1959 | */ | 1969 | */ |
| 1960 | void lu_global_fini(void) | 1970 | void lu_global_fini(void) |
| 1961 | { | 1971 | { |
| 1962 | if (lu_site_shrinker != NULL) { | 1972 | unregister_shrinker(&lu_site_shrinker); |
| 1963 | remove_shrinker(lu_site_shrinker); | ||
| 1964 | lu_site_shrinker = NULL; | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | lu_context_key_degister(&lu_global_key); | 1973 | lu_context_key_degister(&lu_global_key); |
| 1968 | 1974 | ||
| 1969 | /* | 1975 | /* |
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index 9013745ab105..e90c8fb7da6a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c | |||
| @@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool { | |||
| 121 | } page_pools; | 121 | } page_pools; |
| 122 | 122 | ||
| 123 | /* | 123 | /* |
| 124 | * memory shrinker | ||
| 125 | */ | ||
| 126 | const int pools_shrinker_seeks = DEFAULT_SEEKS; | ||
| 127 | static struct shrinker *pools_shrinker = NULL; | ||
| 128 | |||
| 129 | |||
| 130 | /* | ||
| 131 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools | 124 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools |
| 132 | */ | 125 | */ |
| 133 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) | 126 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) |
| @@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages) | |||
| 226 | } | 219 | } |
| 227 | 220 | ||
| 228 | /* | 221 | /* |
| 229 | * could be called frequently for query (@nr_to_scan == 0). | ||
| 230 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | 222 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. |
| 231 | */ | 223 | */ |
| 232 | static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | 224 | static unsigned long enc_pools_shrink_count(struct shrinker *s, |
| 225 | struct shrink_control *sc) | ||
| 233 | { | 226 | { |
| 234 | if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { | 227 | /* |
| 228 | * if no pool access for a long time, we consider it's fully idle. | ||
| 229 | * a little race here is fine. | ||
| 230 | */ | ||
| 231 | if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > | ||
| 232 | CACHE_QUIESCENT_PERIOD)) { | ||
| 235 | spin_lock(&page_pools.epp_lock); | 233 | spin_lock(&page_pools.epp_lock); |
| 236 | shrink_param(sc, nr_to_scan) = min_t(unsigned long, | 234 | page_pools.epp_idle_idx = IDLE_IDX_MAX; |
| 237 | shrink_param(sc, nr_to_scan), | ||
| 238 | page_pools.epp_free_pages - | ||
| 239 | PTLRPC_MAX_BRW_PAGES); | ||
| 240 | if (shrink_param(sc, nr_to_scan) > 0) { | ||
| 241 | enc_pools_release_free_pages(shrink_param(sc, | ||
| 242 | nr_to_scan)); | ||
| 243 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | ||
| 244 | (long)shrink_param(sc, nr_to_scan), | ||
| 245 | page_pools.epp_free_pages); | ||
| 246 | |||
| 247 | page_pools.epp_st_shrinks++; | ||
| 248 | page_pools.epp_last_shrink = cfs_time_current_sec(); | ||
| 249 | } | ||
| 250 | spin_unlock(&page_pools.epp_lock); | 235 | spin_unlock(&page_pools.epp_lock); |
| 251 | } | 236 | } |
| 252 | 237 | ||
| 238 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | ||
| 239 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | ||
| 240 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* | ||
| 244 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | ||
| 245 | */ | ||
| 246 | static unsigned long enc_pools_shrink_scan(struct shrinker *s, | ||
| 247 | struct shrink_control *sc) | ||
| 248 | { | ||
| 249 | spin_lock(&page_pools.epp_lock); | ||
| 250 | sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, | ||
| 251 | page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); | ||
| 252 | if (sc->nr_to_scan > 0) { | ||
| 253 | enc_pools_release_free_pages(sc->nr_to_scan); | ||
| 254 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | ||
| 255 | (long)sc->nr_to_scan, page_pools.epp_free_pages); | ||
| 256 | |||
| 257 | page_pools.epp_st_shrinks++; | ||
| 258 | page_pools.epp_last_shrink = cfs_time_current_sec(); | ||
| 259 | } | ||
| 260 | spin_unlock(&page_pools.epp_lock); | ||
| 261 | |||
| 253 | /* | 262 | /* |
| 254 | * if no pool access for a long time, we consider it's fully idle. | 263 | * if no pool access for a long time, we consider it's fully idle. |
| 255 | * a little race here is fine. | 264 | * a little race here is fine. |
| @@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) | |||
| 262 | } | 271 | } |
| 263 | 272 | ||
| 264 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | 273 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); |
| 265 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | 274 | return sc->nr_to_scan; |
| 266 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | ||
| 267 | } | 275 | } |
| 268 | 276 | ||
| 269 | static inline | 277 | static inline |
| @@ -699,6 +707,12 @@ static inline void enc_pools_free(void) | |||
| 699 | sizeof(*page_pools.epp_pools)); | 707 | sizeof(*page_pools.epp_pools)); |
| 700 | } | 708 | } |
| 701 | 709 | ||
| 710 | static struct shrinker pools_shrinker = { | ||
| 711 | .count_objects = enc_pools_shrink_count, | ||
| 712 | .scan_objects = enc_pools_shrink_scan, | ||
| 713 | .seeks = DEFAULT_SEEKS, | ||
| 714 | }; | ||
| 715 | |||
| 702 | int sptlrpc_enc_pool_init(void) | 716 | int sptlrpc_enc_pool_init(void) |
| 703 | { | 717 | { |
| 704 | /* | 718 | /* |
| @@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void) | |||
| 736 | if (page_pools.epp_pools == NULL) | 750 | if (page_pools.epp_pools == NULL) |
| 737 | return -ENOMEM; | 751 | return -ENOMEM; |
| 738 | 752 | ||
| 739 | pools_shrinker = set_shrinker(pools_shrinker_seeks, | 753 | register_shrinker(&pools_shrinker); |
| 740 | enc_pools_shrink); | ||
| 741 | if (pools_shrinker == NULL) { | ||
| 742 | enc_pools_free(); | ||
| 743 | return -ENOMEM; | ||
| 744 | } | ||
| 745 | 754 | ||
| 746 | return 0; | 755 | return 0; |
| 747 | } | 756 | } |
| @@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void) | |||
| 750 | { | 759 | { |
| 751 | unsigned long cleaned, npools; | 760 | unsigned long cleaned, npools; |
| 752 | 761 | ||
| 753 | LASSERT(pools_shrinker); | ||
| 754 | LASSERT(page_pools.epp_pools); | 762 | LASSERT(page_pools.epp_pools); |
| 755 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); | 763 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); |
| 756 | 764 | ||
| 757 | remove_shrinker(pools_shrinker); | 765 | unregister_shrinker(&pools_shrinker); |
| 758 | 766 | ||
| 759 | npools = npages_to_npools(page_pools.epp_total_pages); | 767 | npools = npages_to_npools(page_pools.epp_total_pages); |
| 760 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); | 768 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); |
diff --git a/fs/dcache.c b/fs/dcache.c index dddc67fed732..1bd4614ce93b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/rculist_bl.h> | 37 | #include <linux/rculist_bl.h> |
| 38 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
| 39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
| 40 | #include <linux/list_lru.h> | ||
| 40 | #include "internal.h" | 41 | #include "internal.h" |
| 41 | #include "mount.h" | 42 | #include "mount.h" |
| 42 | 43 | ||
| @@ -48,7 +49,7 @@ | |||
| 48 | * - the dcache hash table | 49 | * - the dcache hash table |
| 49 | * s_anon bl list spinlock protects: | 50 | * s_anon bl list spinlock protects: |
| 50 | * - the s_anon list (see __d_drop) | 51 | * - the s_anon list (see __d_drop) |
| 51 | * dcache_lru_lock protects: | 52 | * dentry->d_sb->s_dentry_lru_lock protects: |
| 52 | * - the dcache lru lists and counters | 53 | * - the dcache lru lists and counters |
| 53 | * d_lock protects: | 54 | * d_lock protects: |
| 54 | * - d_flags | 55 | * - d_flags |
| @@ -63,7 +64,7 @@ | |||
| 63 | * Ordering: | 64 | * Ordering: |
| 64 | * dentry->d_inode->i_lock | 65 | * dentry->d_inode->i_lock |
| 65 | * dentry->d_lock | 66 | * dentry->d_lock |
| 66 | * dcache_lru_lock | 67 | * dentry->d_sb->s_dentry_lru_lock |
| 67 | * dcache_hash_bucket lock | 68 | * dcache_hash_bucket lock |
| 68 | * s_anon lock | 69 | * s_anon lock |
| 69 | * | 70 | * |
| @@ -81,7 +82,6 @@ | |||
| 81 | int sysctl_vfs_cache_pressure __read_mostly = 100; | 82 | int sysctl_vfs_cache_pressure __read_mostly = 100; |
| 82 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); | 83 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); |
| 83 | 84 | ||
| 84 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); | ||
| 85 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); | 85 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); |
| 86 | 86 | ||
| 87 | EXPORT_SYMBOL(rename_lock); | 87 | EXPORT_SYMBOL(rename_lock); |
| @@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = { | |||
| 146 | .age_limit = 45, | 146 | .age_limit = 45, |
| 147 | }; | 147 | }; |
| 148 | 148 | ||
| 149 | static DEFINE_PER_CPU(unsigned int, nr_dentry); | 149 | static DEFINE_PER_CPU(long, nr_dentry); |
| 150 | static DEFINE_PER_CPU(long, nr_dentry_unused); | ||
| 150 | 151 | ||
| 151 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 152 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
| 152 | static int get_nr_dentry(void) | 153 | |
| 154 | /* | ||
| 155 | * Here we resort to our own counters instead of using generic per-cpu counters | ||
| 156 | * for consistency with what the vfs inode code does. We are expected to harvest | ||
| 157 | * better code and performance by having our own specialized counters. | ||
| 158 | * | ||
| 159 | * Please note that the loop is done over all possible CPUs, not over all online | ||
| 160 | * CPUs. The reason for this is that we don't want to play games with CPUs going | ||
| 161 | * on and off. If one of them goes off, we will just keep their counters. | ||
| 162 | * | ||
| 163 | * glommer: See cffbc8a for details, and if you ever intend to change this, | ||
| 164 | * please update all vfs counters to match. | ||
| 165 | */ | ||
| 166 | static long get_nr_dentry(void) | ||
| 153 | { | 167 | { |
| 154 | int i; | 168 | int i; |
| 155 | int sum = 0; | 169 | long sum = 0; |
| 156 | for_each_possible_cpu(i) | 170 | for_each_possible_cpu(i) |
| 157 | sum += per_cpu(nr_dentry, i); | 171 | sum += per_cpu(nr_dentry, i); |
| 158 | return sum < 0 ? 0 : sum; | 172 | return sum < 0 ? 0 : sum; |
| 159 | } | 173 | } |
| 160 | 174 | ||
| 175 | static long get_nr_dentry_unused(void) | ||
| 176 | { | ||
| 177 | int i; | ||
| 178 | long sum = 0; | ||
| 179 | for_each_possible_cpu(i) | ||
| 180 | sum += per_cpu(nr_dentry_unused, i); | ||
| 181 | return sum < 0 ? 0 : sum; | ||
| 182 | } | ||
| 183 | |||
| 161 | int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | 184 | int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, |
| 162 | size_t *lenp, loff_t *ppos) | 185 | size_t *lenp, loff_t *ppos) |
| 163 | { | 186 | { |
| 164 | dentry_stat.nr_dentry = get_nr_dentry(); | 187 | dentry_stat.nr_dentry = get_nr_dentry(); |
| 165 | return proc_dointvec(table, write, buffer, lenp, ppos); | 188 | dentry_stat.nr_unused = get_nr_dentry_unused(); |
| 189 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); | ||
| 166 | } | 190 | } |
| 167 | #endif | 191 | #endif |
| 168 | 192 | ||
| @@ -333,52 +357,35 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
| 333 | } | 357 | } |
| 334 | 358 | ||
| 335 | /* | 359 | /* |
| 336 | * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. | 360 | * dentry_lru_(add|del)_list) must be called with d_lock held. |
| 337 | */ | 361 | */ |
| 338 | static void dentry_lru_add(struct dentry *dentry) | 362 | static void dentry_lru_add(struct dentry *dentry) |
| 339 | { | 363 | { |
| 340 | if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { | 364 | if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { |
| 341 | spin_lock(&dcache_lru_lock); | 365 | if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) |
| 366 | this_cpu_inc(nr_dentry_unused); | ||
| 342 | dentry->d_flags |= DCACHE_LRU_LIST; | 367 | dentry->d_flags |= DCACHE_LRU_LIST; |
| 343 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | ||
| 344 | dentry->d_sb->s_nr_dentry_unused++; | ||
| 345 | dentry_stat.nr_unused++; | ||
| 346 | spin_unlock(&dcache_lru_lock); | ||
| 347 | } | 368 | } |
| 348 | } | 369 | } |
| 349 | 370 | ||
| 350 | static void __dentry_lru_del(struct dentry *dentry) | ||
| 351 | { | ||
| 352 | list_del_init(&dentry->d_lru); | ||
| 353 | dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); | ||
| 354 | dentry->d_sb->s_nr_dentry_unused--; | ||
| 355 | dentry_stat.nr_unused--; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* | 371 | /* |
| 359 | * Remove a dentry with references from the LRU. | 372 | * Remove a dentry with references from the LRU. |
| 373 | * | ||
| 374 | * If we are on the shrink list, then we can get to try_prune_one_dentry() and | ||
| 375 | * lose our last reference through the parent walk. In this case, we need to | ||
| 376 | * remove ourselves from the shrink list, not the LRU. | ||
| 360 | */ | 377 | */ |
| 361 | static void dentry_lru_del(struct dentry *dentry) | 378 | static void dentry_lru_del(struct dentry *dentry) |
| 362 | { | 379 | { |
| 363 | if (!list_empty(&dentry->d_lru)) { | 380 | if (dentry->d_flags & DCACHE_SHRINK_LIST) { |
| 364 | spin_lock(&dcache_lru_lock); | 381 | list_del_init(&dentry->d_lru); |
| 365 | __dentry_lru_del(dentry); | 382 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; |
| 366 | spin_unlock(&dcache_lru_lock); | 383 | return; |
| 367 | } | 384 | } |
| 368 | } | ||
| 369 | 385 | ||
| 370 | static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) | 386 | if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) |
| 371 | { | 387 | this_cpu_dec(nr_dentry_unused); |
| 372 | spin_lock(&dcache_lru_lock); | 388 | dentry->d_flags &= ~DCACHE_LRU_LIST; |
| 373 | if (list_empty(&dentry->d_lru)) { | ||
| 374 | dentry->d_flags |= DCACHE_LRU_LIST; | ||
| 375 | list_add_tail(&dentry->d_lru, list); | ||
| 376 | dentry->d_sb->s_nr_dentry_unused++; | ||
| 377 | dentry_stat.nr_unused++; | ||
| 378 | } else { | ||
| 379 | list_move_tail(&dentry->d_lru, list); | ||
| 380 | } | ||
| 381 | spin_unlock(&dcache_lru_lock); | ||
| 382 | } | 389 | } |
| 383 | 390 | ||
| 384 | /** | 391 | /** |
| @@ -474,7 +481,8 @@ EXPORT_SYMBOL(d_drop); | |||
| 474 | * If ref is non-zero, then decrement the refcount too. | 481 | * If ref is non-zero, then decrement the refcount too. |
| 475 | * Returns dentry requiring refcount drop, or NULL if we're done. | 482 | * Returns dentry requiring refcount drop, or NULL if we're done. |
| 476 | */ | 483 | */ |
| 477 | static inline struct dentry *dentry_kill(struct dentry *dentry) | 484 | static inline struct dentry * |
| 485 | dentry_kill(struct dentry *dentry, int unlock_on_failure) | ||
| 478 | __releases(dentry->d_lock) | 486 | __releases(dentry->d_lock) |
| 479 | { | 487 | { |
| 480 | struct inode *inode; | 488 | struct inode *inode; |
| @@ -483,8 +491,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry) | |||
| 483 | inode = dentry->d_inode; | 491 | inode = dentry->d_inode; |
| 484 | if (inode && !spin_trylock(&inode->i_lock)) { | 492 | if (inode && !spin_trylock(&inode->i_lock)) { |
| 485 | relock: | 493 | relock: |
| 486 | spin_unlock(&dentry->d_lock); | 494 | if (unlock_on_failure) { |
| 487 | cpu_relax(); | 495 | spin_unlock(&dentry->d_lock); |
| 496 | cpu_relax(); | ||
| 497 | } | ||
| 488 | return dentry; /* try again with same dentry */ | 498 | return dentry; /* try again with same dentry */ |
| 489 | } | 499 | } |
| 490 | if (IS_ROOT(dentry)) | 500 | if (IS_ROOT(dentry)) |
| @@ -567,7 +577,7 @@ repeat: | |||
| 567 | return; | 577 | return; |
| 568 | 578 | ||
| 569 | kill_it: | 579 | kill_it: |
| 570 | dentry = dentry_kill(dentry); | 580 | dentry = dentry_kill(dentry, 1); |
| 571 | if (dentry) | 581 | if (dentry) |
| 572 | goto repeat; | 582 | goto repeat; |
| 573 | } | 583 | } |
| @@ -787,12 +797,12 @@ EXPORT_SYMBOL(d_prune_aliases); | |||
| 787 | * | 797 | * |
| 788 | * This may fail if locks cannot be acquired no problem, just try again. | 798 | * This may fail if locks cannot be acquired no problem, just try again. |
| 789 | */ | 799 | */ |
| 790 | static void try_prune_one_dentry(struct dentry *dentry) | 800 | static struct dentry * try_prune_one_dentry(struct dentry *dentry) |
| 791 | __releases(dentry->d_lock) | 801 | __releases(dentry->d_lock) |
| 792 | { | 802 | { |
| 793 | struct dentry *parent; | 803 | struct dentry *parent; |
| 794 | 804 | ||
| 795 | parent = dentry_kill(dentry); | 805 | parent = dentry_kill(dentry, 0); |
| 796 | /* | 806 | /* |
| 797 | * If dentry_kill returns NULL, we have nothing more to do. | 807 | * If dentry_kill returns NULL, we have nothing more to do. |
| 798 | * if it returns the same dentry, trylocks failed. In either | 808 | * if it returns the same dentry, trylocks failed. In either |
| @@ -804,17 +814,18 @@ static void try_prune_one_dentry(struct dentry *dentry) | |||
| 804 | * fragmentation. | 814 | * fragmentation. |
| 805 | */ | 815 | */ |
| 806 | if (!parent) | 816 | if (!parent) |
| 807 | return; | 817 | return NULL; |
| 808 | if (parent == dentry) | 818 | if (parent == dentry) |
| 809 | return; | 819 | return dentry; |
| 810 | 820 | ||
| 811 | /* Prune ancestors. */ | 821 | /* Prune ancestors. */ |
| 812 | dentry = parent; | 822 | dentry = parent; |
| 813 | while (dentry) { | 823 | while (dentry) { |
| 814 | if (lockref_put_or_lock(&dentry->d_lockref)) | 824 | if (lockref_put_or_lock(&dentry->d_lockref)) |
| 815 | return; | 825 | return NULL; |
| 816 | dentry = dentry_kill(dentry); | 826 | dentry = dentry_kill(dentry, 1); |
| 817 | } | 827 | } |
| 828 | return NULL; | ||
| 818 | } | 829 | } |
| 819 | 830 | ||
| 820 | static void shrink_dentry_list(struct list_head *list) | 831 | static void shrink_dentry_list(struct list_head *list) |
| @@ -833,76 +844,143 @@ static void shrink_dentry_list(struct list_head *list) | |||
| 833 | } | 844 | } |
| 834 | 845 | ||
| 835 | /* | 846 | /* |
| 847 | * The dispose list is isolated and dentries are not accounted | ||
| 848 | * to the LRU here, so we can simply remove it from the list | ||
| 849 | * here regardless of whether it is referenced or not. | ||
| 850 | */ | ||
| 851 | list_del_init(&dentry->d_lru); | ||
| 852 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | ||
| 853 | |||
| 854 | /* | ||
| 836 | * We found an inuse dentry which was not removed from | 855 | * We found an inuse dentry which was not removed from |
| 837 | * the LRU because of laziness during lookup. Do not free | 856 | * the LRU because of laziness during lookup. Do not free it. |
| 838 | * it - just keep it off the LRU list. | ||
| 839 | */ | 857 | */ |
| 840 | if (dentry->d_lockref.count) { | 858 | if (dentry->d_lockref.count) { |
| 841 | dentry_lru_del(dentry); | ||
| 842 | spin_unlock(&dentry->d_lock); | 859 | spin_unlock(&dentry->d_lock); |
| 843 | continue; | 860 | continue; |
| 844 | } | 861 | } |
| 845 | |||
| 846 | rcu_read_unlock(); | 862 | rcu_read_unlock(); |
| 847 | 863 | ||
| 848 | try_prune_one_dentry(dentry); | 864 | dentry = try_prune_one_dentry(dentry); |
| 849 | 865 | ||
| 850 | rcu_read_lock(); | 866 | rcu_read_lock(); |
| 867 | if (dentry) { | ||
| 868 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
| 869 | list_add(&dentry->d_lru, list); | ||
| 870 | spin_unlock(&dentry->d_lock); | ||
| 871 | } | ||
| 851 | } | 872 | } |
| 852 | rcu_read_unlock(); | 873 | rcu_read_unlock(); |
| 853 | } | 874 | } |
| 854 | 875 | ||
| 876 | static enum lru_status | ||
| 877 | dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | ||
| 878 | { | ||
| 879 | struct list_head *freeable = arg; | ||
| 880 | struct dentry *dentry = container_of(item, struct dentry, d_lru); | ||
| 881 | |||
| 882 | |||
| 883 | /* | ||
| 884 | * we are inverting the lru lock/dentry->d_lock here, | ||
| 885 | * so use a trylock. If we fail to get the lock, just skip | ||
| 886 | * it | ||
| 887 | */ | ||
| 888 | if (!spin_trylock(&dentry->d_lock)) | ||
| 889 | return LRU_SKIP; | ||
| 890 | |||
| 891 | /* | ||
| 892 | * Referenced dentries are still in use. If they have active | ||
| 893 | * counts, just remove them from the LRU. Otherwise give them | ||
| 894 | * another pass through the LRU. | ||
| 895 | */ | ||
| 896 | if (dentry->d_lockref.count) { | ||
| 897 | list_del_init(&dentry->d_lru); | ||
| 898 | spin_unlock(&dentry->d_lock); | ||
| 899 | return LRU_REMOVED; | ||
| 900 | } | ||
| 901 | |||
| 902 | if (dentry->d_flags & DCACHE_REFERENCED) { | ||
| 903 | dentry->d_flags &= ~DCACHE_REFERENCED; | ||
| 904 | spin_unlock(&dentry->d_lock); | ||
| 905 | |||
| 906 | /* | ||
| 907 | * The list move itself will be made by the common LRU code. At | ||
| 908 | * this point, we've dropped the dentry->d_lock but keep the | ||
| 909 | * lru lock. This is safe to do, since every list movement is | ||
| 910 | * protected by the lru lock even if both locks are held. | ||
| 911 | * | ||
| 912 | * This is guaranteed by the fact that all LRU management | ||
| 913 | * functions are intermediated by the LRU API calls like | ||
| 914 | * list_lru_add and list_lru_del. List movement in this file | ||
| 915 | * only ever occur through this functions or through callbacks | ||
| 916 | * like this one, that are called from the LRU API. | ||
| 917 | * | ||
| 918 | * The only exceptions to this are functions like | ||
| 919 | * shrink_dentry_list, and code that first checks for the | ||
| 920 | * DCACHE_SHRINK_LIST flag. Those are guaranteed to be | ||
| 921 | * operating only with stack provided lists after they are | ||
| 922 | * properly isolated from the main list. It is thus, always a | ||
| 923 | * local access. | ||
| 924 | */ | ||
| 925 | return LRU_ROTATE; | ||
| 926 | } | ||
| 927 | |||
| 928 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
| 929 | list_move_tail(&dentry->d_lru, freeable); | ||
| 930 | this_cpu_dec(nr_dentry_unused); | ||
| 931 | spin_unlock(&dentry->d_lock); | ||
| 932 | |||
| 933 | return LRU_REMOVED; | ||
| 934 | } | ||
| 935 | |||
| 855 | /** | 936 | /** |
| 856 | * prune_dcache_sb - shrink the dcache | 937 | * prune_dcache_sb - shrink the dcache |
| 857 | * @sb: superblock | 938 | * @sb: superblock |
| 858 | * @count: number of entries to try to free | 939 | * @nr_to_scan : number of entries to try to free |
| 940 | * @nid: which node to scan for freeable entities | ||
| 859 | * | 941 | * |
| 860 | * Attempt to shrink the superblock dcache LRU by @count entries. This is | 942 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is |
| 861 | * done when we need more memory an called from the superblock shrinker | 943 | * done when we need more memory an called from the superblock shrinker |
| 862 | * function. | 944 | * function. |
| 863 | * | 945 | * |
| 864 | * This function may fail to free any resources if all the dentries are in | 946 | * This function may fail to free any resources if all the dentries are in |
| 865 | * use. | 947 | * use. |
| 866 | */ | 948 | */ |
| 867 | void prune_dcache_sb(struct super_block *sb, int count) | 949 | long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, |
| 950 | int nid) | ||
| 868 | { | 951 | { |
| 869 | struct dentry *dentry; | 952 | LIST_HEAD(dispose); |
| 870 | LIST_HEAD(referenced); | 953 | long freed; |
| 871 | LIST_HEAD(tmp); | ||
| 872 | 954 | ||
| 873 | relock: | 955 | freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, |
| 874 | spin_lock(&dcache_lru_lock); | 956 | &dispose, &nr_to_scan); |
| 875 | while (!list_empty(&sb->s_dentry_lru)) { | 957 | shrink_dentry_list(&dispose); |
| 876 | dentry = list_entry(sb->s_dentry_lru.prev, | 958 | return freed; |
| 877 | struct dentry, d_lru); | 959 | } |
| 878 | BUG_ON(dentry->d_sb != sb); | ||
| 879 | |||
| 880 | if (!spin_trylock(&dentry->d_lock)) { | ||
| 881 | spin_unlock(&dcache_lru_lock); | ||
| 882 | cpu_relax(); | ||
| 883 | goto relock; | ||
| 884 | } | ||
| 885 | 960 | ||
| 886 | if (dentry->d_flags & DCACHE_REFERENCED) { | 961 | static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, |
| 887 | dentry->d_flags &= ~DCACHE_REFERENCED; | 962 | spinlock_t *lru_lock, void *arg) |
| 888 | list_move(&dentry->d_lru, &referenced); | 963 | { |
| 889 | spin_unlock(&dentry->d_lock); | 964 | struct list_head *freeable = arg; |
| 890 | } else { | 965 | struct dentry *dentry = container_of(item, struct dentry, d_lru); |
| 891 | list_move_tail(&dentry->d_lru, &tmp); | ||
| 892 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
| 893 | spin_unlock(&dentry->d_lock); | ||
| 894 | if (!--count) | ||
| 895 | break; | ||
| 896 | } | ||
| 897 | cond_resched_lock(&dcache_lru_lock); | ||
| 898 | } | ||
| 899 | if (!list_empty(&referenced)) | ||
| 900 | list_splice(&referenced, &sb->s_dentry_lru); | ||
| 901 | spin_unlock(&dcache_lru_lock); | ||
| 902 | 966 | ||
| 903 | shrink_dentry_list(&tmp); | 967 | /* |
| 968 | * we are inverting the lru lock/dentry->d_lock here, | ||
| 969 | * so use a trylock. If we fail to get the lock, just skip | ||
| 970 | * it | ||
| 971 | */ | ||
| 972 | if (!spin_trylock(&dentry->d_lock)) | ||
| 973 | return LRU_SKIP; | ||
| 974 | |||
| 975 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
| 976 | list_move_tail(&dentry->d_lru, freeable); | ||
| 977 | this_cpu_dec(nr_dentry_unused); | ||
| 978 | spin_unlock(&dentry->d_lock); | ||
| 979 | |||
| 980 | return LRU_REMOVED; | ||
| 904 | } | 981 | } |
| 905 | 982 | ||
| 983 | |||
| 906 | /** | 984 | /** |
| 907 | * shrink_dcache_sb - shrink dcache for a superblock | 985 | * shrink_dcache_sb - shrink dcache for a superblock |
| 908 | * @sb: superblock | 986 | * @sb: superblock |
| @@ -912,16 +990,17 @@ relock: | |||
| 912 | */ | 990 | */ |
| 913 | void shrink_dcache_sb(struct super_block *sb) | 991 | void shrink_dcache_sb(struct super_block *sb) |
| 914 | { | 992 | { |
| 915 | LIST_HEAD(tmp); | 993 | long freed; |
| 916 | 994 | ||
| 917 | spin_lock(&dcache_lru_lock); | 995 | do { |
| 918 | while (!list_empty(&sb->s_dentry_lru)) { | 996 | LIST_HEAD(dispose); |
| 919 | list_splice_init(&sb->s_dentry_lru, &tmp); | 997 | |
| 920 | spin_unlock(&dcache_lru_lock); | 998 | freed = list_lru_walk(&sb->s_dentry_lru, |
| 921 | shrink_dentry_list(&tmp); | 999 | dentry_lru_isolate_shrink, &dispose, UINT_MAX); |
| 922 | spin_lock(&dcache_lru_lock); | 1000 | |
| 923 | } | 1001 | this_cpu_sub(nr_dentry_unused, freed); |
| 924 | spin_unlock(&dcache_lru_lock); | 1002 | shrink_dentry_list(&dispose); |
| 1003 | } while (freed > 0); | ||
| 925 | } | 1004 | } |
| 926 | EXPORT_SYMBOL(shrink_dcache_sb); | 1005 | EXPORT_SYMBOL(shrink_dcache_sb); |
| 927 | 1006 | ||
| @@ -1283,7 +1362,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) | |||
| 1283 | if (dentry->d_lockref.count) { | 1362 | if (dentry->d_lockref.count) { |
| 1284 | dentry_lru_del(dentry); | 1363 | dentry_lru_del(dentry); |
| 1285 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { | 1364 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { |
| 1286 | dentry_lru_move_list(dentry, &data->dispose); | 1365 | dentry_lru_del(dentry); |
| 1366 | list_add_tail(&dentry->d_lru, &data->dispose); | ||
| 1287 | dentry->d_flags |= DCACHE_SHRINK_LIST; | 1367 | dentry->d_flags |= DCACHE_SHRINK_LIST; |
| 1288 | data->found++; | 1368 | data->found++; |
| 1289 | ret = D_WALK_NORETRY; | 1369 | ret = D_WALK_NORETRY; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index c00e055b6282..9fd702f5bfb2 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
| @@ -44,6 +44,7 @@ static void drop_slab(void) | |||
| 44 | .gfp_mask = GFP_KERNEL, | 44 | .gfp_mask = GFP_KERNEL, |
| 45 | }; | 45 | }; |
| 46 | 46 | ||
| 47 | nodes_setall(shrink.nodes_to_scan); | ||
| 47 | do { | 48 | do { |
| 48 | nr_objects = shrink_slab(&shrink, 1000, 1000); | 49 | nr_objects = shrink_slab(&shrink, 1000, 1000); |
| 49 | } while (nr_objects > 10); | 50 | } while (nr_objects > 10); |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 2d1bdbe78c04..3981ff783950 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
| 931 | struct ext4_inode_info *ei; | 931 | struct ext4_inode_info *ei; |
| 932 | struct list_head *cur, *tmp; | 932 | struct list_head *cur, *tmp; |
| 933 | LIST_HEAD(skipped); | 933 | LIST_HEAD(skipped); |
| 934 | int ret, nr_shrunk = 0; | 934 | int nr_shrunk = 0; |
| 935 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 935 | int retried = 0, skip_precached = 1, nr_skipped = 0; |
| 936 | 936 | ||
| 937 | spin_lock(&sbi->s_es_lru_lock); | 937 | spin_lock(&sbi->s_es_lru_lock); |
| 938 | 938 | ||
| 939 | retry: | 939 | retry: |
| 940 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 940 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
| 941 | int shrunk; | ||
| 942 | |||
| 941 | /* | 943 | /* |
| 942 | * If we have already reclaimed all extents from extent | 944 | * If we have already reclaimed all extents from extent |
| 943 | * status tree, just stop the loop immediately. | 945 | * status tree, just stop the loop immediately. |
| @@ -964,13 +966,13 @@ retry: | |||
| 964 | continue; | 966 | continue; |
| 965 | 967 | ||
| 966 | write_lock(&ei->i_es_lock); | 968 | write_lock(&ei->i_es_lock); |
| 967 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 969 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); |
| 968 | if (ei->i_es_lru_nr == 0) | 970 | if (ei->i_es_lru_nr == 0) |
| 969 | list_del_init(&ei->i_es_lru); | 971 | list_del_init(&ei->i_es_lru); |
| 970 | write_unlock(&ei->i_es_lock); | 972 | write_unlock(&ei->i_es_lock); |
| 971 | 973 | ||
| 972 | nr_shrunk += ret; | 974 | nr_shrunk += shrunk; |
| 973 | nr_to_scan -= ret; | 975 | nr_to_scan -= shrunk; |
| 974 | if (nr_to_scan == 0) | 976 | if (nr_to_scan == 0) |
| 975 | break; | 977 | break; |
| 976 | } | 978 | } |
| @@ -1007,7 +1009,20 @@ retry: | |||
| 1007 | return nr_shrunk; | 1009 | return nr_shrunk; |
| 1008 | } | 1010 | } |
| 1009 | 1011 | ||
| 1010 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 1012 | static unsigned long ext4_es_count(struct shrinker *shrink, |
| 1013 | struct shrink_control *sc) | ||
| 1014 | { | ||
| 1015 | unsigned long nr; | ||
| 1016 | struct ext4_sb_info *sbi; | ||
| 1017 | |||
| 1018 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | ||
| 1019 | nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
| 1020 | trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); | ||
| 1021 | return nr; | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | static unsigned long ext4_es_scan(struct shrinker *shrink, | ||
| 1025 | struct shrink_control *sc) | ||
| 1011 | { | 1026 | { |
| 1012 | struct ext4_sb_info *sbi = container_of(shrink, | 1027 | struct ext4_sb_info *sbi = container_of(shrink, |
| 1013 | struct ext4_sb_info, s_es_shrinker); | 1028 | struct ext4_sb_info, s_es_shrinker); |
| @@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 1022 | 1037 | ||
| 1023 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1038 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); |
| 1024 | 1039 | ||
| 1025 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
| 1026 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 1040 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
| 1027 | return ret; | 1041 | return nr_shrunk; |
| 1028 | } | 1042 | } |
| 1029 | 1043 | ||
| 1030 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | 1044 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
| @@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
| 1032 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1046 | INIT_LIST_HEAD(&sbi->s_es_lru); |
| 1033 | spin_lock_init(&sbi->s_es_lru_lock); | 1047 | spin_lock_init(&sbi->s_es_lru_lock); |
| 1034 | sbi->s_es_last_sorted = 0; | 1048 | sbi->s_es_last_sorted = 0; |
| 1035 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 1049 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; |
| 1050 | sbi->s_es_shrinker.count_objects = ext4_es_count; | ||
| 1036 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 1051 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
| 1037 | register_shrinker(&sbi->s_es_shrinker); | 1052 | register_shrinker(&sbi->s_es_shrinker); |
| 1038 | } | 1053 | } |
| @@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | |||
| 1076 | struct ext4_es_tree *tree = &ei->i_es_tree; | 1091 | struct ext4_es_tree *tree = &ei->i_es_tree; |
| 1077 | struct rb_node *node; | 1092 | struct rb_node *node; |
| 1078 | struct extent_status *es; | 1093 | struct extent_status *es; |
| 1079 | int nr_shrunk = 0; | 1094 | unsigned long nr_shrunk = 0; |
| 1080 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1095 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
| 1081 | DEFAULT_RATELIMIT_BURST); | 1096 | DEFAULT_RATELIMIT_BURST); |
| 1082 | 1097 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 722329cac98f..c2f41b4d00b9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -1427,21 +1427,22 @@ __acquires(&lru_lock) | |||
| 1427 | * gfs2_dispose_glock_lru() above. | 1427 | * gfs2_dispose_glock_lru() above. |
| 1428 | */ | 1428 | */ |
| 1429 | 1429 | ||
| 1430 | static void gfs2_scan_glock_lru(int nr) | 1430 | static long gfs2_scan_glock_lru(int nr) |
| 1431 | { | 1431 | { |
| 1432 | struct gfs2_glock *gl; | 1432 | struct gfs2_glock *gl; |
| 1433 | LIST_HEAD(skipped); | 1433 | LIST_HEAD(skipped); |
| 1434 | LIST_HEAD(dispose); | 1434 | LIST_HEAD(dispose); |
| 1435 | long freed = 0; | ||
| 1435 | 1436 | ||
| 1436 | spin_lock(&lru_lock); | 1437 | spin_lock(&lru_lock); |
| 1437 | while(nr && !list_empty(&lru_list)) { | 1438 | while ((nr-- >= 0) && !list_empty(&lru_list)) { |
| 1438 | gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); | 1439 | gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); |
| 1439 | 1440 | ||
| 1440 | /* Test for being demotable */ | 1441 | /* Test for being demotable */ |
| 1441 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { | 1442 | if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { |
| 1442 | list_move(&gl->gl_lru, &dispose); | 1443 | list_move(&gl->gl_lru, &dispose); |
| 1443 | atomic_dec(&lru_count); | 1444 | atomic_dec(&lru_count); |
| 1444 | nr--; | 1445 | freed++; |
| 1445 | continue; | 1446 | continue; |
| 1446 | } | 1447 | } |
| 1447 | 1448 | ||
| @@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr) | |||
| 1451 | if (!list_empty(&dispose)) | 1452 | if (!list_empty(&dispose)) |
| 1452 | gfs2_dispose_glock_lru(&dispose); | 1453 | gfs2_dispose_glock_lru(&dispose); |
| 1453 | spin_unlock(&lru_lock); | 1454 | spin_unlock(&lru_lock); |
| 1455 | |||
| 1456 | return freed; | ||
| 1454 | } | 1457 | } |
| 1455 | 1458 | ||
| 1456 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, | 1459 | static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, |
| 1457 | struct shrink_control *sc) | 1460 | struct shrink_control *sc) |
| 1458 | { | 1461 | { |
| 1459 | if (sc->nr_to_scan) { | 1462 | if (!(sc->gfp_mask & __GFP_FS)) |
| 1460 | if (!(sc->gfp_mask & __GFP_FS)) | 1463 | return SHRINK_STOP; |
| 1461 | return -1; | 1464 | return gfs2_scan_glock_lru(sc->nr_to_scan); |
| 1462 | gfs2_scan_glock_lru(sc->nr_to_scan); | 1465 | } |
| 1463 | } | ||
| 1464 | 1466 | ||
| 1465 | return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; | 1467 | static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, |
| 1468 | struct shrink_control *sc) | ||
| 1469 | { | ||
| 1470 | return vfs_pressure_ratio(atomic_read(&lru_count)); | ||
| 1466 | } | 1471 | } |
| 1467 | 1472 | ||
| 1468 | static struct shrinker glock_shrinker = { | 1473 | static struct shrinker glock_shrinker = { |
| 1469 | .shrink = gfs2_shrink_glock_memory, | ||
| 1470 | .seeks = DEFAULT_SEEKS, | 1474 | .seeks = DEFAULT_SEEKS, |
| 1475 | .count_objects = gfs2_glock_shrink_count, | ||
| 1476 | .scan_objects = gfs2_glock_shrink_scan, | ||
| 1471 | }; | 1477 | }; |
| 1472 | 1478 | ||
| 1473 | /** | 1479 | /** |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7b0f5043cf24..351586e24e30 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -32,7 +32,8 @@ | |||
| 32 | struct workqueue_struct *gfs2_control_wq; | 32 | struct workqueue_struct *gfs2_control_wq; |
| 33 | 33 | ||
| 34 | static struct shrinker qd_shrinker = { | 34 | static struct shrinker qd_shrinker = { |
| 35 | .shrink = gfs2_shrink_qd_memory, | 35 | .count_objects = gfs2_qd_shrink_count, |
| 36 | .scan_objects = gfs2_qd_shrink_scan, | ||
| 36 | .seeks = DEFAULT_SEEKS, | 37 | .seeks = DEFAULT_SEEKS, |
| 37 | }; | 38 | }; |
| 38 | 39 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3768c2f40e43..db441359ee8c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list); | |||
| 75 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 75 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
| 76 | static DEFINE_SPINLOCK(qd_lru_lock); | 76 | static DEFINE_SPINLOCK(qd_lru_lock); |
| 77 | 77 | ||
| 78 | int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) | 78 | unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, |
| 79 | struct shrink_control *sc) | ||
| 79 | { | 80 | { |
| 80 | struct gfs2_quota_data *qd; | 81 | struct gfs2_quota_data *qd; |
| 81 | struct gfs2_sbd *sdp; | 82 | struct gfs2_sbd *sdp; |
| 82 | int nr_to_scan = sc->nr_to_scan; | 83 | int nr_to_scan = sc->nr_to_scan; |
| 83 | 84 | long freed = 0; | |
| 84 | if (nr_to_scan == 0) | ||
| 85 | goto out; | ||
| 86 | 85 | ||
| 87 | if (!(sc->gfp_mask & __GFP_FS)) | 86 | if (!(sc->gfp_mask & __GFP_FS)) |
| 88 | return -1; | 87 | return SHRINK_STOP; |
| 89 | 88 | ||
| 90 | spin_lock(&qd_lru_lock); | 89 | spin_lock(&qd_lru_lock); |
| 91 | while (nr_to_scan && !list_empty(&qd_lru_list)) { | 90 | while (nr_to_scan && !list_empty(&qd_lru_list)) { |
| @@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) | |||
| 110 | kmem_cache_free(gfs2_quotad_cachep, qd); | 109 | kmem_cache_free(gfs2_quotad_cachep, qd); |
| 111 | spin_lock(&qd_lru_lock); | 110 | spin_lock(&qd_lru_lock); |
| 112 | nr_to_scan--; | 111 | nr_to_scan--; |
| 112 | freed++; | ||
| 113 | } | 113 | } |
| 114 | spin_unlock(&qd_lru_lock); | 114 | spin_unlock(&qd_lru_lock); |
| 115 | return freed; | ||
| 116 | } | ||
| 115 | 117 | ||
| 116 | out: | 118 | unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, |
| 117 | return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; | 119 | struct shrink_control *sc) |
| 120 | { | ||
| 121 | return vfs_pressure_ratio(atomic_read(&qd_lru_count)); | ||
| 118 | } | 122 | } |
| 119 | 123 | ||
| 120 | static u64 qd2index(struct gfs2_quota_data *qd) | 124 | static u64 qd2index(struct gfs2_quota_data *qd) |
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed83..0f64d9deb1b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
| @@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
| 53 | return ret; | 53 | return ret; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, | 56 | extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, |
| 57 | struct shrink_control *sc); | 57 | struct shrink_control *sc); |
| 58 | extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, | ||
| 59 | struct shrink_control *sc); | ||
| 58 | extern const struct quotactl_ops gfs2_quotactl_ops; | 60 | extern const struct quotactl_ops gfs2_quotactl_ops; |
| 59 | 61 | ||
| 60 | #endif /* __QUOTA_DOT_H__ */ | 62 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..b33ba8e021cc 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/prefetch.h> | 17 | #include <linux/prefetch.h> |
| 18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ | 18 | #include <linux/buffer_head.h> /* for inode_has_buffers */ |
| 19 | #include <linux/ratelimit.h> | 19 | #include <linux/ratelimit.h> |
| 20 | #include <linux/list_lru.h> | ||
| 20 | #include "internal.h" | 21 | #include "internal.h" |
| 21 | 22 | ||
| 22 | /* | 23 | /* |
| @@ -24,7 +25,7 @@ | |||
| 24 | * | 25 | * |
| 25 | * inode->i_lock protects: | 26 | * inode->i_lock protects: |
| 26 | * inode->i_state, inode->i_hash, __iget() | 27 | * inode->i_state, inode->i_hash, __iget() |
| 27 | * inode->i_sb->s_inode_lru_lock protects: | 28 | * Inode LRU list locks protect: |
| 28 | * inode->i_sb->s_inode_lru, inode->i_lru | 29 | * inode->i_sb->s_inode_lru, inode->i_lru |
| 29 | * inode_sb_list_lock protects: | 30 | * inode_sb_list_lock protects: |
| 30 | * sb->s_inodes, inode->i_sb_list | 31 | * sb->s_inodes, inode->i_sb_list |
| @@ -37,7 +38,7 @@ | |||
| 37 | * | 38 | * |
| 38 | * inode_sb_list_lock | 39 | * inode_sb_list_lock |
| 39 | * inode->i_lock | 40 | * inode->i_lock |
| 40 | * inode->i_sb->s_inode_lru_lock | 41 | * Inode LRU list locks |
| 41 | * | 42 | * |
| 42 | * bdi->wb.list_lock | 43 | * bdi->wb.list_lock |
| 43 | * inode->i_lock | 44 | * inode->i_lock |
| @@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops); | |||
| 70 | */ | 71 | */ |
| 71 | struct inodes_stat_t inodes_stat; | 72 | struct inodes_stat_t inodes_stat; |
| 72 | 73 | ||
| 73 | static DEFINE_PER_CPU(unsigned int, nr_inodes); | 74 | static DEFINE_PER_CPU(unsigned long, nr_inodes); |
| 74 | static DEFINE_PER_CPU(unsigned int, nr_unused); | 75 | static DEFINE_PER_CPU(unsigned long, nr_unused); |
| 75 | 76 | ||
| 76 | static struct kmem_cache *inode_cachep __read_mostly; | 77 | static struct kmem_cache *inode_cachep __read_mostly; |
| 77 | 78 | ||
| 78 | static int get_nr_inodes(void) | 79 | static long get_nr_inodes(void) |
| 79 | { | 80 | { |
| 80 | int i; | 81 | int i; |
| 81 | int sum = 0; | 82 | long sum = 0; |
| 82 | for_each_possible_cpu(i) | 83 | for_each_possible_cpu(i) |
| 83 | sum += per_cpu(nr_inodes, i); | 84 | sum += per_cpu(nr_inodes, i); |
| 84 | return sum < 0 ? 0 : sum; | 85 | return sum < 0 ? 0 : sum; |
| 85 | } | 86 | } |
| 86 | 87 | ||
| 87 | static inline int get_nr_inodes_unused(void) | 88 | static inline long get_nr_inodes_unused(void) |
| 88 | { | 89 | { |
| 89 | int i; | 90 | int i; |
| 90 | int sum = 0; | 91 | long sum = 0; |
| 91 | for_each_possible_cpu(i) | 92 | for_each_possible_cpu(i) |
| 92 | sum += per_cpu(nr_unused, i); | 93 | sum += per_cpu(nr_unused, i); |
| 93 | return sum < 0 ? 0 : sum; | 94 | return sum < 0 ? 0 : sum; |
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | int get_nr_dirty_inodes(void) | 97 | long get_nr_dirty_inodes(void) |
| 97 | { | 98 | { |
| 98 | /* not actually dirty inodes, but a wild approximation */ | 99 | /* not actually dirty inodes, but a wild approximation */ |
| 99 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | 100 | long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); |
| 100 | return nr_dirty > 0 ? nr_dirty : 0; | 101 | return nr_dirty > 0 ? nr_dirty : 0; |
| 101 | } | 102 | } |
| 102 | 103 | ||
| @@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
| 109 | { | 110 | { |
| 110 | inodes_stat.nr_inodes = get_nr_inodes(); | 111 | inodes_stat.nr_inodes = get_nr_inodes(); |
| 111 | inodes_stat.nr_unused = get_nr_inodes_unused(); | 112 | inodes_stat.nr_unused = get_nr_inodes_unused(); |
| 112 | return proc_dointvec(table, write, buffer, lenp, ppos); | 113 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
| 113 | } | 114 | } |
| 114 | #endif | 115 | #endif |
| 115 | 116 | ||
| @@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold); | |||
| 401 | 402 | ||
| 402 | static void inode_lru_list_add(struct inode *inode) | 403 | static void inode_lru_list_add(struct inode *inode) |
| 403 | { | 404 | { |
| 404 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 405 | if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
| 405 | if (list_empty(&inode->i_lru)) { | ||
| 406 | list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); | ||
| 407 | inode->i_sb->s_nr_inodes_unused++; | ||
| 408 | this_cpu_inc(nr_unused); | 406 | this_cpu_inc(nr_unused); |
| 409 | } | ||
| 410 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
| 411 | } | 407 | } |
| 412 | 408 | ||
| 413 | /* | 409 | /* |
| @@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode) | |||
| 425 | 421 | ||
| 426 | static void inode_lru_list_del(struct inode *inode) | 422 | static void inode_lru_list_del(struct inode *inode) |
| 427 | { | 423 | { |
| 428 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 424 | |
| 429 | if (!list_empty(&inode->i_lru)) { | 425 | if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) |
| 430 | list_del_init(&inode->i_lru); | ||
| 431 | inode->i_sb->s_nr_inodes_unused--; | ||
| 432 | this_cpu_dec(nr_unused); | 426 | this_cpu_dec(nr_unused); |
| 433 | } | ||
| 434 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | ||
| 435 | } | 427 | } |
| 436 | 428 | ||
| 437 | /** | 429 | /** |
| @@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
| 675 | return busy; | 667 | return busy; |
| 676 | } | 668 | } |
| 677 | 669 | ||
| 678 | static int can_unuse(struct inode *inode) | ||
| 679 | { | ||
| 680 | if (inode->i_state & ~I_REFERENCED) | ||
| 681 | return 0; | ||
| 682 | if (inode_has_buffers(inode)) | ||
| 683 | return 0; | ||
| 684 | if (atomic_read(&inode->i_count)) | ||
| 685 | return 0; | ||
| 686 | if (inode->i_data.nrpages) | ||
| 687 | return 0; | ||
| 688 | return 1; | ||
| 689 | } | ||
| 690 | |||
| 691 | /* | 670 | /* |
| 692 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. | 671 | * Isolate the inode from the LRU in preparation for freeing it. |
| 693 | * This is called from the superblock shrinker function with a number of inodes | ||
| 694 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and | ||
| 695 | * then are freed outside inode_lock by dispose_list(). | ||
| 696 | * | 672 | * |
| 697 | * Any inodes which are pinned purely because of attached pagecache have their | 673 | * Any inodes which are pinned purely because of attached pagecache have their |
| 698 | * pagecache removed. If the inode has metadata buffers attached to | 674 | * pagecache removed. If the inode has metadata buffers attached to |
| @@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode) | |||
| 706 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | 682 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes |
| 707 | * with this flag set because they are the inodes that are out of order. | 683 | * with this flag set because they are the inodes that are out of order. |
| 708 | */ | 684 | */ |
| 709 | void prune_icache_sb(struct super_block *sb, int nr_to_scan) | 685 | static enum lru_status |
| 686 | inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) | ||
| 710 | { | 687 | { |
| 711 | LIST_HEAD(freeable); | 688 | struct list_head *freeable = arg; |
| 712 | int nr_scanned; | 689 | struct inode *inode = container_of(item, struct inode, i_lru); |
| 713 | unsigned long reap = 0; | ||
| 714 | 690 | ||
| 715 | spin_lock(&sb->s_inode_lru_lock); | 691 | /* |
| 716 | for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { | 692 | * we are inverting the lru lock/inode->i_lock here, so use a trylock. |
| 717 | struct inode *inode; | 693 | * If we fail to get the lock, just skip it. |
| 694 | */ | ||
| 695 | if (!spin_trylock(&inode->i_lock)) | ||
| 696 | return LRU_SKIP; | ||
| 718 | 697 | ||
| 719 | if (list_empty(&sb->s_inode_lru)) | 698 | /* |
| 720 | break; | 699 | * Referenced or dirty inodes are still in use. Give them another pass |
| 700 | * through the LRU as we canot reclaim them now. | ||
| 701 | */ | ||
| 702 | if (atomic_read(&inode->i_count) || | ||
| 703 | (inode->i_state & ~I_REFERENCED)) { | ||
| 704 | list_del_init(&inode->i_lru); | ||
| 705 | spin_unlock(&inode->i_lock); | ||
| 706 | this_cpu_dec(nr_unused); | ||
| 707 | return LRU_REMOVED; | ||
| 708 | } | ||
| 721 | 709 | ||
| 722 | inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); | 710 | /* recently referenced inodes get one more pass */ |
| 711 | if (inode->i_state & I_REFERENCED) { | ||
| 712 | inode->i_state &= ~I_REFERENCED; | ||
| 713 | spin_unlock(&inode->i_lock); | ||
| 714 | return LRU_ROTATE; | ||
| 715 | } | ||
| 723 | 716 | ||
| 724 | /* | 717 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
| 725 | * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, | 718 | __iget(inode); |
| 726 | * so use a trylock. If we fail to get the lock, just move the | 719 | spin_unlock(&inode->i_lock); |
| 727 | * inode to the back of the list so we don't spin on it. | 720 | spin_unlock(lru_lock); |
| 728 | */ | 721 | if (remove_inode_buffers(inode)) { |
| 729 | if (!spin_trylock(&inode->i_lock)) { | 722 | unsigned long reap; |
| 730 | list_move(&inode->i_lru, &sb->s_inode_lru); | 723 | reap = invalidate_mapping_pages(&inode->i_data, 0, -1); |
| 731 | continue; | 724 | if (current_is_kswapd()) |
| 725 | __count_vm_events(KSWAPD_INODESTEAL, reap); | ||
| 726 | else | ||
| 727 | __count_vm_events(PGINODESTEAL, reap); | ||
| 728 | if (current->reclaim_state) | ||
| 729 | current->reclaim_state->reclaimed_slab += reap; | ||
| 732 | } | 730 | } |
| 731 | iput(inode); | ||
| 732 | spin_lock(lru_lock); | ||
| 733 | return LRU_RETRY; | ||
| 734 | } | ||
| 733 | 735 | ||
| 734 | /* | 736 | WARN_ON(inode->i_state & I_NEW); |
| 735 | * Referenced or dirty inodes are still in use. Give them | 737 | inode->i_state |= I_FREEING; |
| 736 | * another pass through the LRU as we canot reclaim them now. | 738 | list_move(&inode->i_lru, freeable); |
| 737 | */ | 739 | spin_unlock(&inode->i_lock); |
| 738 | if (atomic_read(&inode->i_count) || | ||
| 739 | (inode->i_state & ~I_REFERENCED)) { | ||
| 740 | list_del_init(&inode->i_lru); | ||
| 741 | spin_unlock(&inode->i_lock); | ||
| 742 | sb->s_nr_inodes_unused--; | ||
| 743 | this_cpu_dec(nr_unused); | ||
| 744 | continue; | ||
| 745 | } | ||
| 746 | 740 | ||
| 747 | /* recently referenced inodes get one more pass */ | 741 | this_cpu_dec(nr_unused); |
| 748 | if (inode->i_state & I_REFERENCED) { | 742 | return LRU_REMOVED; |
| 749 | inode->i_state &= ~I_REFERENCED; | 743 | } |
| 750 | list_move(&inode->i_lru, &sb->s_inode_lru); | ||
| 751 | spin_unlock(&inode->i_lock); | ||
| 752 | continue; | ||
| 753 | } | ||
| 754 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | ||
| 755 | __iget(inode); | ||
| 756 | spin_unlock(&inode->i_lock); | ||
| 757 | spin_unlock(&sb->s_inode_lru_lock); | ||
| 758 | if (remove_inode_buffers(inode)) | ||
| 759 | reap += invalidate_mapping_pages(&inode->i_data, | ||
| 760 | 0, -1); | ||
| 761 | iput(inode); | ||
| 762 | spin_lock(&sb->s_inode_lru_lock); | ||
| 763 | |||
| 764 | if (inode != list_entry(sb->s_inode_lru.next, | ||
| 765 | struct inode, i_lru)) | ||
| 766 | continue; /* wrong inode or list_empty */ | ||
| 767 | /* avoid lock inversions with trylock */ | ||
| 768 | if (!spin_trylock(&inode->i_lock)) | ||
| 769 | continue; | ||
| 770 | if (!can_unuse(inode)) { | ||
| 771 | spin_unlock(&inode->i_lock); | ||
| 772 | continue; | ||
| 773 | } | ||
| 774 | } | ||
| 775 | WARN_ON(inode->i_state & I_NEW); | ||
| 776 | inode->i_state |= I_FREEING; | ||
| 777 | spin_unlock(&inode->i_lock); | ||
| 778 | 744 | ||
| 779 | list_move(&inode->i_lru, &freeable); | 745 | /* |
| 780 | sb->s_nr_inodes_unused--; | 746 | * Walk the superblock inode LRU for freeable inodes and attempt to free them. |
| 781 | this_cpu_dec(nr_unused); | 747 | * This is called from the superblock shrinker function with a number of inodes |
| 782 | } | 748 | * to trim from the LRU. Inodes to be freed are moved to a temporary list and |
| 783 | if (current_is_kswapd()) | 749 | * then are freed outside inode_lock by dispose_list(). |
| 784 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 750 | */ |
| 785 | else | 751 | long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, |
| 786 | __count_vm_events(PGINODESTEAL, reap); | 752 | int nid) |
| 787 | spin_unlock(&sb->s_inode_lru_lock); | 753 | { |
| 788 | if (current->reclaim_state) | 754 | LIST_HEAD(freeable); |
| 789 | current->reclaim_state->reclaimed_slab += reap; | 755 | long freed; |
| 790 | 756 | ||
| 757 | freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, | ||
| 758 | &freeable, &nr_to_scan); | ||
| 791 | dispose_list(&freeable); | 759 | dispose_list(&freeable); |
| 760 | return freed; | ||
| 792 | } | 761 | } |
| 793 | 762 | ||
| 794 | static void __wait_on_freeing_inode(struct inode *inode); | 763 | static void __wait_on_freeing_inode(struct inode *inode); |
diff --git a/fs/internal.h b/fs/internal.h index 2be46ea5dd0b..513e0d859a6c 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f); | |||
| 114 | * inode.c | 114 | * inode.c |
| 115 | */ | 115 | */ |
| 116 | extern spinlock_t inode_sb_list_lock; | 116 | extern spinlock_t inode_sb_list_lock; |
| 117 | extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, | ||
| 118 | int nid); | ||
| 117 | extern void inode_add_lru(struct inode *inode); | 119 | extern void inode_add_lru(struct inode *inode); |
| 118 | 120 | ||
| 119 | /* | 121 | /* |
| @@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode); | |||
| 121 | */ | 123 | */ |
| 122 | extern void inode_wb_list_del(struct inode *inode); | 124 | extern void inode_wb_list_del(struct inode *inode); |
| 123 | 125 | ||
| 124 | extern int get_nr_dirty_inodes(void); | 126 | extern long get_nr_dirty_inodes(void); |
| 125 | extern void evict_inodes(struct super_block *); | 127 | extern void evict_inodes(struct super_block *); |
| 126 | extern int invalidate_inodes(struct super_block *, bool); | 128 | extern int invalidate_inodes(struct super_block *, bool); |
| 127 | 129 | ||
| @@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool); | |||
| 130 | */ | 132 | */ |
| 131 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | 133 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); |
| 132 | extern int d_set_mounted(struct dentry *dentry); | 134 | extern int d_set_mounted(struct dentry *dentry); |
| 135 | extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | ||
| 136 | int nid); | ||
| 133 | 137 | ||
| 134 | /* | 138 | /* |
| 135 | * read_write.c | 139 | * read_write.c |
diff --git a/fs/mbcache.c b/fs/mbcache.c index 8c32ef3ba88e..e519e45bf673 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list); | |||
| 86 | static LIST_HEAD(mb_cache_lru_list); | 86 | static LIST_HEAD(mb_cache_lru_list); |
| 87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 87 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
| 88 | 88 | ||
| 89 | /* | ||
| 90 | * What the mbcache registers as to get shrunk dynamically. | ||
| 91 | */ | ||
| 92 | |||
| 93 | static int mb_cache_shrink_fn(struct shrinker *shrink, | ||
| 94 | struct shrink_control *sc); | ||
| 95 | |||
| 96 | static struct shrinker mb_cache_shrinker = { | ||
| 97 | .shrink = mb_cache_shrink_fn, | ||
| 98 | .seeks = DEFAULT_SEEKS, | ||
| 99 | }; | ||
| 100 | |||
| 101 | static inline int | 89 | static inline int |
| 102 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) |
| 103 | { | 91 | { |
| @@ -151,7 +139,7 @@ forget: | |||
| 151 | 139 | ||
| 152 | 140 | ||
| 153 | /* | 141 | /* |
| 154 | * mb_cache_shrink_fn() memory pressure callback | 142 | * mb_cache_shrink_scan() memory pressure callback |
| 155 | * | 143 | * |
| 156 | * This function is called by the kernel memory management when memory | 144 | * This function is called by the kernel memory management when memory |
| 157 | * gets low. | 145 | * gets low. |
| @@ -159,17 +147,16 @@ forget: | |||
| 159 | * @shrink: (ignored) | 147 | * @shrink: (ignored) |
| 160 | * @sc: shrink_control passed from reclaim | 148 | * @sc: shrink_control passed from reclaim |
| 161 | * | 149 | * |
| 162 | * Returns the number of objects which are present in the cache. | 150 | * Returns the number of objects freed. |
| 163 | */ | 151 | */ |
| 164 | static int | 152 | static unsigned long |
| 165 | mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) | 153 | mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 166 | { | 154 | { |
| 167 | LIST_HEAD(free_list); | 155 | LIST_HEAD(free_list); |
| 168 | struct mb_cache *cache; | ||
| 169 | struct mb_cache_entry *entry, *tmp; | 156 | struct mb_cache_entry *entry, *tmp; |
| 170 | int count = 0; | ||
| 171 | int nr_to_scan = sc->nr_to_scan; | 157 | int nr_to_scan = sc->nr_to_scan; |
| 172 | gfp_t gfp_mask = sc->gfp_mask; | 158 | gfp_t gfp_mask = sc->gfp_mask; |
| 159 | unsigned long freed = 0; | ||
| 173 | 160 | ||
| 174 | mb_debug("trying to free %d entries", nr_to_scan); | 161 | mb_debug("trying to free %d entries", nr_to_scan); |
| 175 | spin_lock(&mb_cache_spinlock); | 162 | spin_lock(&mb_cache_spinlock); |
| @@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) | |||
| 179 | struct mb_cache_entry, e_lru_list); | 166 | struct mb_cache_entry, e_lru_list); |
| 180 | list_move_tail(&ce->e_lru_list, &free_list); | 167 | list_move_tail(&ce->e_lru_list, &free_list); |
| 181 | __mb_cache_entry_unhash(ce); | 168 | __mb_cache_entry_unhash(ce); |
| 169 | freed++; | ||
| 170 | } | ||
| 171 | spin_unlock(&mb_cache_spinlock); | ||
| 172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | ||
| 173 | __mb_cache_entry_forget(entry, gfp_mask); | ||
| 182 | } | 174 | } |
| 175 | return freed; | ||
| 176 | } | ||
| 177 | |||
| 178 | static unsigned long | ||
| 179 | mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 180 | { | ||
| 181 | struct mb_cache *cache; | ||
| 182 | unsigned long count = 0; | ||
| 183 | |||
| 184 | spin_lock(&mb_cache_spinlock); | ||
| 183 | list_for_each_entry(cache, &mb_cache_list, c_cache_list) { | 185 | list_for_each_entry(cache, &mb_cache_list, c_cache_list) { |
| 184 | mb_debug("cache %s (%d)", cache->c_name, | 186 | mb_debug("cache %s (%d)", cache->c_name, |
| 185 | atomic_read(&cache->c_entry_count)); | 187 | atomic_read(&cache->c_entry_count)); |
| 186 | count += atomic_read(&cache->c_entry_count); | 188 | count += atomic_read(&cache->c_entry_count); |
| 187 | } | 189 | } |
| 188 | spin_unlock(&mb_cache_spinlock); | 190 | spin_unlock(&mb_cache_spinlock); |
| 189 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 191 | |
| 190 | __mb_cache_entry_forget(entry, gfp_mask); | 192 | return vfs_pressure_ratio(count); |
| 191 | } | ||
| 192 | return (count / 100) * sysctl_vfs_cache_pressure; | ||
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | static struct shrinker mb_cache_shrinker = { | ||
| 196 | .count_objects = mb_cache_shrink_count, | ||
| 197 | .scan_objects = mb_cache_shrink_scan, | ||
| 198 | .seeks = DEFAULT_SEEKS, | ||
| 199 | }; | ||
| 195 | 200 | ||
| 196 | /* | 201 | /* |
| 197 | * mb_cache_create() create a new cache | 202 | * mb_cache_create() create a new cache |
diff --git a/fs/namei.c b/fs/namei.c index 409a441ba2ae..0dc4cbf21f37 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -660,29 +660,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd) | |||
| 660 | } | 660 | } |
| 661 | } | 661 | } |
| 662 | 662 | ||
| 663 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) | ||
| 664 | { | ||
| 665 | int ret; | ||
| 666 | |||
| 667 | if (IS_ERR(link)) | ||
| 668 | goto fail; | ||
| 669 | |||
| 670 | if (*link == '/') { | ||
| 671 | set_root(nd); | ||
| 672 | path_put(&nd->path); | ||
| 673 | nd->path = nd->root; | ||
| 674 | path_get(&nd->root); | ||
| 675 | nd->flags |= LOOKUP_JUMPED; | ||
| 676 | } | ||
| 677 | nd->inode = nd->path.dentry->d_inode; | ||
| 678 | |||
| 679 | ret = link_path_walk(link, nd); | ||
| 680 | return ret; | ||
| 681 | fail: | ||
| 682 | path_put(&nd->path); | ||
| 683 | return PTR_ERR(link); | ||
| 684 | } | ||
| 685 | |||
| 686 | static void path_put_conditional(struct path *path, struct nameidata *nd) | 663 | static void path_put_conditional(struct path *path, struct nameidata *nd) |
| 687 | { | 664 | { |
| 688 | dput(path->dentry); | 665 | dput(path->dentry); |
| @@ -874,7 +851,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p) | |||
| 874 | error = 0; | 851 | error = 0; |
| 875 | s = nd_get_link(nd); | 852 | s = nd_get_link(nd); |
| 876 | if (s) { | 853 | if (s) { |
| 877 | error = __vfs_follow_link(nd, s); | 854 | if (unlikely(IS_ERR(s))) { |
| 855 | path_put(&nd->path); | ||
| 856 | put_link(nd, link, *p); | ||
| 857 | return PTR_ERR(s); | ||
| 858 | } | ||
| 859 | if (*s == '/') { | ||
| 860 | set_root(nd); | ||
| 861 | path_put(&nd->path); | ||
| 862 | nd->path = nd->root; | ||
| 863 | path_get(&nd->root); | ||
| 864 | nd->flags |= LOOKUP_JUMPED; | ||
| 865 | } | ||
| 866 | nd->inode = nd->path.dentry->d_inode; | ||
| 867 | error = link_path_walk(s, nd); | ||
| 878 | if (unlikely(error)) | 868 | if (unlikely(error)) |
| 879 | put_link(nd, link, *p); | 869 | put_link(nd, link, *p); |
| 880 | } | 870 | } |
| @@ -2271,12 +2261,15 @@ mountpoint_last(struct nameidata *nd, struct path *path) | |||
| 2271 | dentry = d_alloc(dir, &nd->last); | 2261 | dentry = d_alloc(dir, &nd->last); |
| 2272 | if (!dentry) { | 2262 | if (!dentry) { |
| 2273 | error = -ENOMEM; | 2263 | error = -ENOMEM; |
| 2264 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 2274 | goto out; | 2265 | goto out; |
| 2275 | } | 2266 | } |
| 2276 | dentry = lookup_real(dir->d_inode, dentry, nd->flags); | 2267 | dentry = lookup_real(dir->d_inode, dentry, nd->flags); |
| 2277 | error = PTR_ERR(dentry); | 2268 | error = PTR_ERR(dentry); |
| 2278 | if (IS_ERR(dentry)) | 2269 | if (IS_ERR(dentry)) { |
| 2270 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 2279 | goto out; | 2271 | goto out; |
| 2272 | } | ||
| 2280 | } | 2273 | } |
| 2281 | mutex_unlock(&dir->d_inode->i_mutex); | 2274 | mutex_unlock(&dir->d_inode->i_mutex); |
| 2282 | 2275 | ||
| @@ -4236,11 +4229,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) | |||
| 4236 | return res; | 4229 | return res; |
| 4237 | } | 4230 | } |
| 4238 | 4231 | ||
| 4239 | int vfs_follow_link(struct nameidata *nd, const char *link) | ||
| 4240 | { | ||
| 4241 | return __vfs_follow_link(nd, link); | ||
| 4242 | } | ||
| 4243 | |||
| 4244 | /* get the link contents into pagecache */ | 4232 | /* get the link contents into pagecache */ |
| 4245 | static char *page_getlink(struct dentry * dentry, struct page **ppage) | 4233 | static char *page_getlink(struct dentry * dentry, struct page **ppage) |
| 4246 | { | 4234 | { |
| @@ -4352,7 +4340,6 @@ EXPORT_SYMBOL(vfs_path_lookup); | |||
| 4352 | EXPORT_SYMBOL(inode_permission); | 4340 | EXPORT_SYMBOL(inode_permission); |
| 4353 | EXPORT_SYMBOL(unlock_rename); | 4341 | EXPORT_SYMBOL(unlock_rename); |
| 4354 | EXPORT_SYMBOL(vfs_create); | 4342 | EXPORT_SYMBOL(vfs_create); |
| 4355 | EXPORT_SYMBOL(vfs_follow_link); | ||
| 4356 | EXPORT_SYMBOL(vfs_link); | 4343 | EXPORT_SYMBOL(vfs_link); |
| 4357 | EXPORT_SYMBOL(vfs_mkdir); | 4344 | EXPORT_SYMBOL(vfs_mkdir); |
| 4358 | EXPORT_SYMBOL(vfs_mknod); | 4345 | EXPORT_SYMBOL(vfs_mknod); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e79bc6ce828e..de434f309af0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head) | |||
| 2006 | } | 2006 | } |
| 2007 | } | 2007 | } |
| 2008 | 2008 | ||
| 2009 | int nfs_access_cache_shrinker(struct shrinker *shrink, | 2009 | unsigned long |
| 2010 | struct shrink_control *sc) | 2010 | nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 2011 | { | 2011 | { |
| 2012 | LIST_HEAD(head); | 2012 | LIST_HEAD(head); |
| 2013 | struct nfs_inode *nfsi, *next; | 2013 | struct nfs_inode *nfsi, *next; |
| 2014 | struct nfs_access_entry *cache; | 2014 | struct nfs_access_entry *cache; |
| 2015 | int nr_to_scan = sc->nr_to_scan; | 2015 | int nr_to_scan = sc->nr_to_scan; |
| 2016 | gfp_t gfp_mask = sc->gfp_mask; | 2016 | gfp_t gfp_mask = sc->gfp_mask; |
| 2017 | long freed = 0; | ||
| 2017 | 2018 | ||
| 2018 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 2019 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
| 2019 | return (nr_to_scan == 0) ? 0 : -1; | 2020 | return SHRINK_STOP; |
| 2020 | 2021 | ||
| 2021 | spin_lock(&nfs_access_lru_lock); | 2022 | spin_lock(&nfs_access_lru_lock); |
| 2022 | list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { | 2023 | list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { |
| @@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, | |||
| 2032 | struct nfs_access_entry, lru); | 2033 | struct nfs_access_entry, lru); |
| 2033 | list_move(&cache->lru, &head); | 2034 | list_move(&cache->lru, &head); |
| 2034 | rb_erase(&cache->rb_node, &nfsi->access_cache); | 2035 | rb_erase(&cache->rb_node, &nfsi->access_cache); |
| 2036 | freed++; | ||
| 2035 | if (!list_empty(&nfsi->access_cache_entry_lru)) | 2037 | if (!list_empty(&nfsi->access_cache_entry_lru)) |
| 2036 | list_move_tail(&nfsi->access_cache_inode_lru, | 2038 | list_move_tail(&nfsi->access_cache_inode_lru, |
| 2037 | &nfs_access_lru_list); | 2039 | &nfs_access_lru_list); |
| @@ -2046,7 +2048,13 @@ remove_lru_entry: | |||
| 2046 | } | 2048 | } |
| 2047 | spin_unlock(&nfs_access_lru_lock); | 2049 | spin_unlock(&nfs_access_lru_lock); |
| 2048 | nfs_access_free_list(&head); | 2050 | nfs_access_free_list(&head); |
| 2049 | return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; | 2051 | return freed; |
| 2052 | } | ||
| 2053 | |||
| 2054 | unsigned long | ||
| 2055 | nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 2056 | { | ||
| 2057 | return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); | ||
| 2050 | } | 2058 | } |
| 2051 | 2059 | ||
| 2052 | static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) | 2060 | static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d388302c005f..38da8c2b81ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, | |||
| 273 | const char *ip_addr); | 273 | const char *ip_addr); |
| 274 | 274 | ||
| 275 | /* dir.c */ | 275 | /* dir.c */ |
| 276 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, | 276 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, |
| 277 | struct shrink_control *sc); | 277 | struct shrink_control *sc); |
| 278 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, | ||
| 279 | struct shrink_control *sc); | ||
| 278 | struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); | 280 | struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); |
| 279 | int nfs_create(struct inode *, struct dentry *, umode_t, bool); | 281 | int nfs_create(struct inode *, struct dentry *, umode_t, bool); |
| 280 | int nfs_mkdir(struct inode *, struct dentry *, umode_t); | 282 | int nfs_mkdir(struct inode *, struct dentry *, umode_t); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5793f24613c8..a03b9c6f9489 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void) | |||
| 360 | #endif | 360 | #endif |
| 361 | 361 | ||
| 362 | static struct shrinker acl_shrinker = { | 362 | static struct shrinker acl_shrinker = { |
| 363 | .shrink = nfs_access_cache_shrinker, | 363 | .count_objects = nfs_access_cache_count, |
| 364 | .scan_objects = nfs_access_cache_scan, | ||
| 364 | .seeks = DEFAULT_SEEKS, | 365 | .seeks = DEFAULT_SEEKS, |
| 365 | }; | 366 | }; |
| 366 | 367 | ||
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..9186c7ce0b14 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
| @@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize; | |||
| 59 | 59 | ||
| 60 | static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); | 60 | static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); |
| 61 | static void cache_cleaner_func(struct work_struct *unused); | 61 | static void cache_cleaner_func(struct work_struct *unused); |
| 62 | static int nfsd_reply_cache_shrink(struct shrinker *shrink, | 62 | static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, |
| 63 | struct shrink_control *sc); | 63 | struct shrink_control *sc); |
| 64 | static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, | ||
| 65 | struct shrink_control *sc); | ||
| 64 | 66 | ||
| 65 | static struct shrinker nfsd_reply_cache_shrinker = { | 67 | static struct shrinker nfsd_reply_cache_shrinker = { |
| 66 | .shrink = nfsd_reply_cache_shrink, | 68 | .scan_objects = nfsd_reply_cache_scan, |
| 69 | .count_objects = nfsd_reply_cache_count, | ||
| 67 | .seeks = 1, | 70 | .seeks = 1, |
| 68 | }; | 71 | }; |
| 69 | 72 | ||
| @@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) | |||
| 232 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. | 235 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. |
| 233 | * Also prune the oldest ones when the total exceeds the max number of entries. | 236 | * Also prune the oldest ones when the total exceeds the max number of entries. |
| 234 | */ | 237 | */ |
| 235 | static void | 238 | static long |
| 236 | prune_cache_entries(void) | 239 | prune_cache_entries(void) |
| 237 | { | 240 | { |
| 238 | struct svc_cacherep *rp, *tmp; | 241 | struct svc_cacherep *rp, *tmp; |
| 242 | long freed = 0; | ||
| 239 | 243 | ||
| 240 | list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { | 244 | list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { |
| 241 | if (!nfsd_cache_entry_expired(rp) && | 245 | if (!nfsd_cache_entry_expired(rp) && |
| 242 | num_drc_entries <= max_drc_entries) | 246 | num_drc_entries <= max_drc_entries) |
| 243 | break; | 247 | break; |
| 244 | nfsd_reply_cache_free_locked(rp); | 248 | nfsd_reply_cache_free_locked(rp); |
| 249 | freed++; | ||
| 245 | } | 250 | } |
| 246 | 251 | ||
| 247 | /* | 252 | /* |
| @@ -254,6 +259,7 @@ prune_cache_entries(void) | |||
| 254 | cancel_delayed_work(&cache_cleaner); | 259 | cancel_delayed_work(&cache_cleaner); |
| 255 | else | 260 | else |
| 256 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); | 261 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); |
| 262 | return freed; | ||
| 257 | } | 263 | } |
| 258 | 264 | ||
| 259 | static void | 265 | static void |
| @@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused) | |||
| 264 | spin_unlock(&cache_lock); | 270 | spin_unlock(&cache_lock); |
| 265 | } | 271 | } |
| 266 | 272 | ||
| 267 | static int | 273 | static unsigned long |
| 268 | nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) | 274 | nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) |
| 269 | { | 275 | { |
| 270 | unsigned int num; | 276 | unsigned long num; |
| 271 | 277 | ||
| 272 | spin_lock(&cache_lock); | 278 | spin_lock(&cache_lock); |
| 273 | if (sc->nr_to_scan) | ||
| 274 | prune_cache_entries(); | ||
| 275 | num = num_drc_entries; | 279 | num = num_drc_entries; |
| 276 | spin_unlock(&cache_lock); | 280 | spin_unlock(&cache_lock); |
| 277 | 281 | ||
| 278 | return num; | 282 | return num; |
| 279 | } | 283 | } |
| 280 | 284 | ||
| 285 | static unsigned long | ||
| 286 | nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) | ||
| 287 | { | ||
| 288 | unsigned long freed; | ||
| 289 | |||
| 290 | spin_lock(&cache_lock); | ||
| 291 | freed = prune_cache_entries(); | ||
| 292 | spin_unlock(&cache_lock); | ||
| 293 | return freed; | ||
| 294 | } | ||
| 281 | /* | 295 | /* |
| 282 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes | 296 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes |
| 283 | */ | 297 | */ |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9a702e193538..831d49a4111f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
| @@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type) | |||
| 687 | } | 687 | } |
| 688 | EXPORT_SYMBOL(dquot_quota_sync); | 688 | EXPORT_SYMBOL(dquot_quota_sync); |
| 689 | 689 | ||
| 690 | /* Free unused dquots from cache */ | 690 | static unsigned long |
| 691 | static void prune_dqcache(int count) | 691 | dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 692 | { | 692 | { |
| 693 | struct list_head *head; | 693 | struct list_head *head; |
| 694 | struct dquot *dquot; | 694 | struct dquot *dquot; |
| 695 | unsigned long freed = 0; | ||
| 695 | 696 | ||
| 696 | head = free_dquots.prev; | 697 | head = free_dquots.prev; |
| 697 | while (head != &free_dquots && count) { | 698 | while (head != &free_dquots && sc->nr_to_scan) { |
| 698 | dquot = list_entry(head, struct dquot, dq_free); | 699 | dquot = list_entry(head, struct dquot, dq_free); |
| 699 | remove_dquot_hash(dquot); | 700 | remove_dquot_hash(dquot); |
| 700 | remove_free_dquot(dquot); | 701 | remove_free_dquot(dquot); |
| 701 | remove_inuse(dquot); | 702 | remove_inuse(dquot); |
| 702 | do_destroy_dquot(dquot); | 703 | do_destroy_dquot(dquot); |
| 703 | count--; | 704 | sc->nr_to_scan--; |
| 705 | freed++; | ||
| 704 | head = free_dquots.prev; | 706 | head = free_dquots.prev; |
| 705 | } | 707 | } |
| 708 | return freed; | ||
| 706 | } | 709 | } |
| 707 | 710 | ||
| 708 | /* | 711 | static unsigned long |
| 709 | * This is called from kswapd when we think we need some | 712 | dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) |
| 710 | * more memory | ||
| 711 | */ | ||
| 712 | static int shrink_dqcache_memory(struct shrinker *shrink, | ||
| 713 | struct shrink_control *sc) | ||
| 714 | { | 713 | { |
| 715 | int nr = sc->nr_to_scan; | 714 | return vfs_pressure_ratio( |
| 716 | 715 | percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); | |
| 717 | if (nr) { | ||
| 718 | spin_lock(&dq_list_lock); | ||
| 719 | prune_dqcache(nr); | ||
| 720 | spin_unlock(&dq_list_lock); | ||
| 721 | } | ||
| 722 | return ((unsigned) | ||
| 723 | percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) | ||
| 724 | /100) * sysctl_vfs_cache_pressure; | ||
| 725 | } | 716 | } |
| 726 | 717 | ||
| 727 | static struct shrinker dqcache_shrinker = { | 718 | static struct shrinker dqcache_shrinker = { |
| 728 | .shrink = shrink_dqcache_memory, | 719 | .count_objects = dqcache_shrink_count, |
| 720 | .scan_objects = dqcache_shrink_scan, | ||
| 729 | .seeks = DEFAULT_SEEKS, | 721 | .seeks = DEFAULT_SEEKS, |
| 730 | }; | 722 | }; |
| 731 | 723 | ||
diff --git a/fs/super.c b/fs/super.c index f6961ea84c56..3a96c9783a8b 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { | |||
| 53 | * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we | 53 | * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we |
| 54 | * take a passive reference to the superblock to avoid this from occurring. | 54 | * take a passive reference to the superblock to avoid this from occurring. |
| 55 | */ | 55 | */ |
| 56 | static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | 56 | static unsigned long super_cache_scan(struct shrinker *shrink, |
| 57 | struct shrink_control *sc) | ||
| 57 | { | 58 | { |
| 58 | struct super_block *sb; | 59 | struct super_block *sb; |
| 59 | int fs_objects = 0; | 60 | long fs_objects = 0; |
| 60 | int total_objects; | 61 | long total_objects; |
| 62 | long freed = 0; | ||
| 63 | long dentries; | ||
| 64 | long inodes; | ||
| 61 | 65 | ||
| 62 | sb = container_of(shrink, struct super_block, s_shrink); | 66 | sb = container_of(shrink, struct super_block, s_shrink); |
| 63 | 67 | ||
| @@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | |||
| 65 | * Deadlock avoidance. We may hold various FS locks, and we don't want | 69 | * Deadlock avoidance. We may hold various FS locks, and we don't want |
| 66 | * to recurse into the FS that called us in clear_inode() and friends.. | 70 | * to recurse into the FS that called us in clear_inode() and friends.. |
| 67 | */ | 71 | */ |
| 68 | if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) | 72 | if (!(sc->gfp_mask & __GFP_FS)) |
| 69 | return -1; | 73 | return SHRINK_STOP; |
| 70 | 74 | ||
| 71 | if (!grab_super_passive(sb)) | 75 | if (!grab_super_passive(sb)) |
| 72 | return -1; | 76 | return SHRINK_STOP; |
| 73 | 77 | ||
| 74 | if (sb->s_op->nr_cached_objects) | 78 | if (sb->s_op->nr_cached_objects) |
| 75 | fs_objects = sb->s_op->nr_cached_objects(sb); | 79 | fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); |
| 76 | |||
| 77 | total_objects = sb->s_nr_dentry_unused + | ||
| 78 | sb->s_nr_inodes_unused + fs_objects + 1; | ||
| 79 | |||
| 80 | if (sc->nr_to_scan) { | ||
| 81 | int dentries; | ||
| 82 | int inodes; | ||
| 83 | |||
| 84 | /* proportion the scan between the caches */ | ||
| 85 | dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) / | ||
| 86 | total_objects; | ||
| 87 | inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) / | ||
| 88 | total_objects; | ||
| 89 | if (fs_objects) | ||
| 90 | fs_objects = (sc->nr_to_scan * fs_objects) / | ||
| 91 | total_objects; | ||
| 92 | /* | ||
| 93 | * prune the dcache first as the icache is pinned by it, then | ||
| 94 | * prune the icache, followed by the filesystem specific caches | ||
| 95 | */ | ||
| 96 | prune_dcache_sb(sb, dentries); | ||
| 97 | prune_icache_sb(sb, inodes); | ||
| 98 | 80 | ||
| 99 | if (fs_objects && sb->s_op->free_cached_objects) { | 81 | inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); |
| 100 | sb->s_op->free_cached_objects(sb, fs_objects); | 82 | dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); |
| 101 | fs_objects = sb->s_op->nr_cached_objects(sb); | 83 | total_objects = dentries + inodes + fs_objects + 1; |
| 102 | } | 84 | |
| 103 | total_objects = sb->s_nr_dentry_unused + | 85 | /* proportion the scan between the caches */ |
| 104 | sb->s_nr_inodes_unused + fs_objects; | 86 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); |
| 87 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); | ||
| 88 | |||
| 89 | /* | ||
| 90 | * prune the dcache first as the icache is pinned by it, then | ||
| 91 | * prune the icache, followed by the filesystem specific caches | ||
| 92 | */ | ||
| 93 | freed = prune_dcache_sb(sb, dentries, sc->nid); | ||
| 94 | freed += prune_icache_sb(sb, inodes, sc->nid); | ||
| 95 | |||
| 96 | if (fs_objects) { | ||
| 97 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, | ||
| 98 | total_objects); | ||
| 99 | freed += sb->s_op->free_cached_objects(sb, fs_objects, | ||
| 100 | sc->nid); | ||
| 105 | } | 101 | } |
| 106 | 102 | ||
| 107 | total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; | 103 | drop_super(sb); |
| 104 | return freed; | ||
| 105 | } | ||
| 106 | |||
| 107 | static unsigned long super_cache_count(struct shrinker *shrink, | ||
| 108 | struct shrink_control *sc) | ||
| 109 | { | ||
| 110 | struct super_block *sb; | ||
| 111 | long total_objects = 0; | ||
| 112 | |||
| 113 | sb = container_of(shrink, struct super_block, s_shrink); | ||
| 114 | |||
| 115 | if (!grab_super_passive(sb)) | ||
| 116 | return 0; | ||
| 117 | |||
| 118 | if (sb->s_op && sb->s_op->nr_cached_objects) | ||
| 119 | total_objects = sb->s_op->nr_cached_objects(sb, | ||
| 120 | sc->nid); | ||
| 121 | |||
| 122 | total_objects += list_lru_count_node(&sb->s_dentry_lru, | ||
| 123 | sc->nid); | ||
| 124 | total_objects += list_lru_count_node(&sb->s_inode_lru, | ||
| 125 | sc->nid); | ||
| 126 | |||
| 127 | total_objects = vfs_pressure_ratio(total_objects); | ||
| 108 | drop_super(sb); | 128 | drop_super(sb); |
| 109 | return total_objects; | 129 | return total_objects; |
| 110 | } | 130 | } |
| @@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
| 175 | INIT_HLIST_NODE(&s->s_instances); | 195 | INIT_HLIST_NODE(&s->s_instances); |
| 176 | INIT_HLIST_BL_HEAD(&s->s_anon); | 196 | INIT_HLIST_BL_HEAD(&s->s_anon); |
| 177 | INIT_LIST_HEAD(&s->s_inodes); | 197 | INIT_LIST_HEAD(&s->s_inodes); |
| 178 | INIT_LIST_HEAD(&s->s_dentry_lru); | 198 | |
| 179 | INIT_LIST_HEAD(&s->s_inode_lru); | 199 | if (list_lru_init(&s->s_dentry_lru)) |
| 180 | spin_lock_init(&s->s_inode_lru_lock); | 200 | goto err_out; |
| 201 | if (list_lru_init(&s->s_inode_lru)) | ||
| 202 | goto err_out_dentry_lru; | ||
| 203 | |||
| 181 | INIT_LIST_HEAD(&s->s_mounts); | 204 | INIT_LIST_HEAD(&s->s_mounts); |
| 182 | init_rwsem(&s->s_umount); | 205 | init_rwsem(&s->s_umount); |
| 183 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | 206 | lockdep_set_class(&s->s_umount, &type->s_umount_key); |
| @@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
| 210 | s->cleancache_poolid = -1; | 233 | s->cleancache_poolid = -1; |
| 211 | 234 | ||
| 212 | s->s_shrink.seeks = DEFAULT_SEEKS; | 235 | s->s_shrink.seeks = DEFAULT_SEEKS; |
| 213 | s->s_shrink.shrink = prune_super; | 236 | s->s_shrink.scan_objects = super_cache_scan; |
| 237 | s->s_shrink.count_objects = super_cache_count; | ||
| 214 | s->s_shrink.batch = 1024; | 238 | s->s_shrink.batch = 1024; |
| 239 | s->s_shrink.flags = SHRINKER_NUMA_AWARE; | ||
| 215 | } | 240 | } |
| 216 | out: | 241 | out: |
| 217 | return s; | 242 | return s; |
| 243 | |||
| 244 | err_out_dentry_lru: | ||
| 245 | list_lru_destroy(&s->s_dentry_lru); | ||
| 218 | err_out: | 246 | err_out: |
| 219 | security_sb_free(s); | 247 | security_sb_free(s); |
| 220 | #ifdef CONFIG_SMP | 248 | #ifdef CONFIG_SMP |
| @@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s) | |||
| 295 | 323 | ||
| 296 | /* caches are now gone, we can safely kill the shrinker now */ | 324 | /* caches are now gone, we can safely kill the shrinker now */ |
| 297 | unregister_shrinker(&s->s_shrink); | 325 | unregister_shrinker(&s->s_shrink); |
| 326 | list_lru_destroy(&s->s_dentry_lru); | ||
| 327 | list_lru_destroy(&s->s_inode_lru); | ||
| 328 | |||
| 298 | put_filesystem(fs); | 329 | put_filesystem(fs); |
| 299 | put_super(s); | 330 | put_super(s); |
| 300 | } else { | 331 | } else { |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d05666fed..f35135e28e96 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
| @@ -277,18 +277,25 @@ static int kick_a_thread(void) | |||
| 277 | return 0; | 277 | return 0; |
| 278 | } | 278 | } |
| 279 | 279 | ||
| 280 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) | 280 | unsigned long ubifs_shrink_count(struct shrinker *shrink, |
| 281 | struct shrink_control *sc) | ||
| 281 | { | 282 | { |
| 282 | int nr = sc->nr_to_scan; | ||
| 283 | int freed, contention = 0; | ||
| 284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | 283 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
| 285 | 284 | ||
| 286 | if (nr == 0) | 285 | /* |
| 287 | /* | 286 | * Due to the way UBIFS updates the clean znode counter it may |
| 288 | * Due to the way UBIFS updates the clean znode counter it may | 287 | * temporarily be negative. |
| 289 | * temporarily be negative. | 288 | */ |
| 290 | */ | 289 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; |
| 291 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | 290 | } |
| 291 | |||
| 292 | unsigned long ubifs_shrink_scan(struct shrinker *shrink, | ||
| 293 | struct shrink_control *sc) | ||
| 294 | { | ||
| 295 | unsigned long nr = sc->nr_to_scan; | ||
| 296 | int contention = 0; | ||
| 297 | unsigned long freed; | ||
| 298 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | ||
| 292 | 299 | ||
| 293 | if (!clean_zn_cnt) { | 300 | if (!clean_zn_cnt) { |
| 294 | /* | 301 | /* |
| @@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) | |||
| 316 | 323 | ||
| 317 | if (!freed && contention) { | 324 | if (!freed && contention) { |
| 318 | dbg_tnc("freed nothing, but contention"); | 325 | dbg_tnc("freed nothing, but contention"); |
| 319 | return -1; | 326 | return SHRINK_STOP; |
| 320 | } | 327 | } |
| 321 | 328 | ||
| 322 | out: | 329 | out: |
| 323 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | 330 | dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); |
| 324 | return freed; | 331 | return freed; |
| 325 | } | 332 | } |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 879b9976c12b..3e4aa7281e04 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; | |||
| 49 | 49 | ||
| 50 | /* UBIFS TNC shrinker description */ | 50 | /* UBIFS TNC shrinker description */ |
| 51 | static struct shrinker ubifs_shrinker_info = { | 51 | static struct shrinker ubifs_shrinker_info = { |
| 52 | .shrink = ubifs_shrinker, | 52 | .scan_objects = ubifs_shrink_scan, |
| 53 | .count_objects = ubifs_shrink_count, | ||
| 53 | .seeks = DEFAULT_SEEKS, | 54 | .seeks = DEFAULT_SEEKS, |
| 54 | }; | 55 | }; |
| 55 | 56 | ||
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce4d70f..e8c8cfe1435c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
| 1624 | int ubifs_tnc_end_commit(struct ubifs_info *c); | 1624 | int ubifs_tnc_end_commit(struct ubifs_info *c); |
| 1625 | 1625 | ||
| 1626 | /* shrinker.c */ | 1626 | /* shrinker.c */ |
| 1627 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); | 1627 | unsigned long ubifs_shrink_scan(struct shrinker *shrink, |
| 1628 | struct shrink_control *sc); | ||
| 1629 | unsigned long ubifs_shrink_count(struct shrinker *shrink, | ||
| 1630 | struct shrink_control *sc); | ||
| 1628 | 1631 | ||
| 1629 | /* commit.c */ | 1632 | /* commit.c */ |
| 1630 | int ubifs_bg_thread(void *info); | 1633 | int ubifs_bg_thread(void *info); |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06823fe10d3..263470075ea2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
| @@ -81,54 +81,6 @@ xfs_buf_vmap_len( | |||
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | /* | 83 | /* |
| 84 | * xfs_buf_lru_add - add a buffer to the LRU. | ||
| 85 | * | ||
| 86 | * The LRU takes a new reference to the buffer so that it will only be freed | ||
| 87 | * once the shrinker takes the buffer off the LRU. | ||
| 88 | */ | ||
| 89 | STATIC void | ||
| 90 | xfs_buf_lru_add( | ||
| 91 | struct xfs_buf *bp) | ||
| 92 | { | ||
| 93 | struct xfs_buftarg *btp = bp->b_target; | ||
| 94 | |||
| 95 | spin_lock(&btp->bt_lru_lock); | ||
| 96 | if (list_empty(&bp->b_lru)) { | ||
| 97 | atomic_inc(&bp->b_hold); | ||
| 98 | list_add_tail(&bp->b_lru, &btp->bt_lru); | ||
| 99 | btp->bt_lru_nr++; | ||
| 100 | bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; | ||
| 101 | } | ||
| 102 | spin_unlock(&btp->bt_lru_lock); | ||
| 103 | } | ||
| 104 | |||
| 105 | /* | ||
| 106 | * xfs_buf_lru_del - remove a buffer from the LRU | ||
| 107 | * | ||
| 108 | * The unlocked check is safe here because it only occurs when there are not | ||
| 109 | * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there | ||
| 110 | * to optimise the shrinker removing the buffer from the LRU and calling | ||
| 111 | * xfs_buf_free(). i.e. it removes an unnecessary round trip on the | ||
| 112 | * bt_lru_lock. | ||
| 113 | */ | ||
| 114 | STATIC void | ||
| 115 | xfs_buf_lru_del( | ||
| 116 | struct xfs_buf *bp) | ||
| 117 | { | ||
| 118 | struct xfs_buftarg *btp = bp->b_target; | ||
| 119 | |||
| 120 | if (list_empty(&bp->b_lru)) | ||
| 121 | return; | ||
| 122 | |||
| 123 | spin_lock(&btp->bt_lru_lock); | ||
| 124 | if (!list_empty(&bp->b_lru)) { | ||
| 125 | list_del_init(&bp->b_lru); | ||
| 126 | btp->bt_lru_nr--; | ||
| 127 | } | ||
| 128 | spin_unlock(&btp->bt_lru_lock); | ||
| 129 | } | ||
| 130 | |||
| 131 | /* | ||
| 132 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the | 84 | * When we mark a buffer stale, we remove the buffer from the LRU and clear the |
| 133 | * b_lru_ref count so that the buffer is freed immediately when the buffer | 85 | * b_lru_ref count so that the buffer is freed immediately when the buffer |
| 134 | * reference count falls to zero. If the buffer is already on the LRU, we need | 86 | * reference count falls to zero. If the buffer is already on the LRU, we need |
| @@ -151,20 +103,14 @@ xfs_buf_stale( | |||
| 151 | */ | 103 | */ |
| 152 | bp->b_flags &= ~_XBF_DELWRI_Q; | 104 | bp->b_flags &= ~_XBF_DELWRI_Q; |
| 153 | 105 | ||
| 154 | atomic_set(&(bp)->b_lru_ref, 0); | 106 | spin_lock(&bp->b_lock); |
| 155 | if (!list_empty(&bp->b_lru)) { | 107 | atomic_set(&bp->b_lru_ref, 0); |
| 156 | struct xfs_buftarg *btp = bp->b_target; | 108 | if (!(bp->b_state & XFS_BSTATE_DISPOSE) && |
| 109 | (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) | ||
| 110 | atomic_dec(&bp->b_hold); | ||
| 157 | 111 | ||
| 158 | spin_lock(&btp->bt_lru_lock); | ||
| 159 | if (!list_empty(&bp->b_lru) && | ||
| 160 | !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { | ||
| 161 | list_del_init(&bp->b_lru); | ||
| 162 | btp->bt_lru_nr--; | ||
| 163 | atomic_dec(&bp->b_hold); | ||
| 164 | } | ||
| 165 | spin_unlock(&btp->bt_lru_lock); | ||
| 166 | } | ||
| 167 | ASSERT(atomic_read(&bp->b_hold) >= 1); | 112 | ASSERT(atomic_read(&bp->b_hold) >= 1); |
| 113 | spin_unlock(&bp->b_lock); | ||
| 168 | } | 114 | } |
| 169 | 115 | ||
| 170 | static int | 116 | static int |
| @@ -228,6 +174,7 @@ _xfs_buf_alloc( | |||
| 228 | INIT_LIST_HEAD(&bp->b_list); | 174 | INIT_LIST_HEAD(&bp->b_list); |
| 229 | RB_CLEAR_NODE(&bp->b_rbnode); | 175 | RB_CLEAR_NODE(&bp->b_rbnode); |
| 230 | sema_init(&bp->b_sema, 0); /* held, no waiters */ | 176 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
| 177 | spin_lock_init(&bp->b_lock); | ||
| 231 | XB_SET_OWNER(bp); | 178 | XB_SET_OWNER(bp); |
| 232 | bp->b_target = target; | 179 | bp->b_target = target; |
| 233 | bp->b_flags = flags; | 180 | bp->b_flags = flags; |
| @@ -917,12 +864,33 @@ xfs_buf_rele( | |||
| 917 | 864 | ||
| 918 | ASSERT(atomic_read(&bp->b_hold) > 0); | 865 | ASSERT(atomic_read(&bp->b_hold) > 0); |
| 919 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 866 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
| 920 | if (!(bp->b_flags & XBF_STALE) && | 867 | spin_lock(&bp->b_lock); |
| 921 | atomic_read(&bp->b_lru_ref)) { | 868 | if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { |
| 922 | xfs_buf_lru_add(bp); | 869 | /* |
| 870 | * If the buffer is added to the LRU take a new | ||
| 871 | * reference to the buffer for the LRU and clear the | ||
| 872 | * (now stale) dispose list state flag | ||
| 873 | */ | ||
| 874 | if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { | ||
| 875 | bp->b_state &= ~XFS_BSTATE_DISPOSE; | ||
| 876 | atomic_inc(&bp->b_hold); | ||
| 877 | } | ||
| 878 | spin_unlock(&bp->b_lock); | ||
| 923 | spin_unlock(&pag->pag_buf_lock); | 879 | spin_unlock(&pag->pag_buf_lock); |
| 924 | } else { | 880 | } else { |
| 925 | xfs_buf_lru_del(bp); | 881 | /* |
| 882 | * most of the time buffers will already be removed from | ||
| 883 | * the LRU, so optimise that case by checking for the | ||
| 884 | * XFS_BSTATE_DISPOSE flag indicating the last list the | ||
| 885 | * buffer was on was the disposal list | ||
| 886 | */ | ||
| 887 | if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { | ||
| 888 | list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); | ||
| 889 | } else { | ||
| 890 | ASSERT(list_empty(&bp->b_lru)); | ||
| 891 | } | ||
| 892 | spin_unlock(&bp->b_lock); | ||
| 893 | |||
| 926 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); | 894 | ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); |
| 927 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); | 895 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
| 928 | spin_unlock(&pag->pag_buf_lock); | 896 | spin_unlock(&pag->pag_buf_lock); |
| @@ -1502,83 +1470,121 @@ xfs_buf_iomove( | |||
| 1502 | * returned. These buffers will have an elevated hold count, so wait on those | 1470 | * returned. These buffers will have an elevated hold count, so wait on those |
| 1503 | * while freeing all the buffers only held by the LRU. | 1471 | * while freeing all the buffers only held by the LRU. |
| 1504 | */ | 1472 | */ |
| 1473 | static enum lru_status | ||
| 1474 | xfs_buftarg_wait_rele( | ||
| 1475 | struct list_head *item, | ||
| 1476 | spinlock_t *lru_lock, | ||
| 1477 | void *arg) | ||
| 1478 | |||
| 1479 | { | ||
| 1480 | struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); | ||
| 1481 | struct list_head *dispose = arg; | ||
| 1482 | |||
| 1483 | if (atomic_read(&bp->b_hold) > 1) { | ||
| 1484 | /* need to wait, so skip it this pass */ | ||
| 1485 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | ||
| 1486 | return LRU_SKIP; | ||
| 1487 | } | ||
| 1488 | if (!spin_trylock(&bp->b_lock)) | ||
| 1489 | return LRU_SKIP; | ||
| 1490 | |||
| 1491 | /* | ||
| 1492 | * clear the LRU reference count so the buffer doesn't get | ||
| 1493 | * ignored in xfs_buf_rele(). | ||
| 1494 | */ | ||
| 1495 | atomic_set(&bp->b_lru_ref, 0); | ||
| 1496 | bp->b_state |= XFS_BSTATE_DISPOSE; | ||
| 1497 | list_move(item, dispose); | ||
| 1498 | spin_unlock(&bp->b_lock); | ||
| 1499 | return LRU_REMOVED; | ||
| 1500 | } | ||
| 1501 | |||
| 1505 | void | 1502 | void |
| 1506 | xfs_wait_buftarg( | 1503 | xfs_wait_buftarg( |
| 1507 | struct xfs_buftarg *btp) | 1504 | struct xfs_buftarg *btp) |
| 1508 | { | 1505 | { |
| 1509 | struct xfs_buf *bp; | 1506 | LIST_HEAD(dispose); |
| 1507 | int loop = 0; | ||
| 1510 | 1508 | ||
| 1511 | restart: | 1509 | /* loop until there is nothing left on the lru list. */ |
| 1512 | spin_lock(&btp->bt_lru_lock); | 1510 | while (list_lru_count(&btp->bt_lru)) { |
| 1513 | while (!list_empty(&btp->bt_lru)) { | 1511 | list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, |
| 1514 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | 1512 | &dispose, LONG_MAX); |
| 1515 | if (atomic_read(&bp->b_hold) > 1) { | 1513 | |
| 1516 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | 1514 | while (!list_empty(&dispose)) { |
| 1517 | list_move_tail(&bp->b_lru, &btp->bt_lru); | 1515 | struct xfs_buf *bp; |
| 1518 | spin_unlock(&btp->bt_lru_lock); | 1516 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
| 1519 | delay(100); | 1517 | list_del_init(&bp->b_lru); |
| 1520 | goto restart; | 1518 | xfs_buf_rele(bp); |
| 1521 | } | 1519 | } |
| 1522 | /* | 1520 | if (loop++ != 0) |
| 1523 | * clear the LRU reference count so the buffer doesn't get | 1521 | delay(100); |
| 1524 | * ignored in xfs_buf_rele(). | ||
| 1525 | */ | ||
| 1526 | atomic_set(&bp->b_lru_ref, 0); | ||
| 1527 | spin_unlock(&btp->bt_lru_lock); | ||
| 1528 | xfs_buf_rele(bp); | ||
| 1529 | spin_lock(&btp->bt_lru_lock); | ||
| 1530 | } | 1522 | } |
| 1531 | spin_unlock(&btp->bt_lru_lock); | ||
| 1532 | } | 1523 | } |
| 1533 | 1524 | ||
| 1534 | int | 1525 | static enum lru_status |
| 1535 | xfs_buftarg_shrink( | 1526 | xfs_buftarg_isolate( |
| 1527 | struct list_head *item, | ||
| 1528 | spinlock_t *lru_lock, | ||
| 1529 | void *arg) | ||
| 1530 | { | ||
| 1531 | struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); | ||
| 1532 | struct list_head *dispose = arg; | ||
| 1533 | |||
| 1534 | /* | ||
| 1535 | * we are inverting the lru lock/bp->b_lock here, so use a trylock. | ||
| 1536 | * If we fail to get the lock, just skip it. | ||
| 1537 | */ | ||
| 1538 | if (!spin_trylock(&bp->b_lock)) | ||
| 1539 | return LRU_SKIP; | ||
| 1540 | /* | ||
| 1541 | * Decrement the b_lru_ref count unless the value is already | ||
| 1542 | * zero. If the value is already zero, we need to reclaim the | ||
| 1543 | * buffer, otherwise it gets another trip through the LRU. | ||
| 1544 | */ | ||
| 1545 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
| 1546 | spin_unlock(&bp->b_lock); | ||
| 1547 | return LRU_ROTATE; | ||
| 1548 | } | ||
| 1549 | |||
| 1550 | bp->b_state |= XFS_BSTATE_DISPOSE; | ||
| 1551 | list_move(item, dispose); | ||
| 1552 | spin_unlock(&bp->b_lock); | ||
| 1553 | return LRU_REMOVED; | ||
| 1554 | } | ||
| 1555 | |||
| 1556 | static unsigned long | ||
| 1557 | xfs_buftarg_shrink_scan( | ||
| 1536 | struct shrinker *shrink, | 1558 | struct shrinker *shrink, |
| 1537 | struct shrink_control *sc) | 1559 | struct shrink_control *sc) |
| 1538 | { | 1560 | { |
| 1539 | struct xfs_buftarg *btp = container_of(shrink, | 1561 | struct xfs_buftarg *btp = container_of(shrink, |
| 1540 | struct xfs_buftarg, bt_shrinker); | 1562 | struct xfs_buftarg, bt_shrinker); |
| 1541 | struct xfs_buf *bp; | ||
| 1542 | int nr_to_scan = sc->nr_to_scan; | ||
| 1543 | LIST_HEAD(dispose); | 1563 | LIST_HEAD(dispose); |
| 1564 | unsigned long freed; | ||
| 1565 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
| 1544 | 1566 | ||
| 1545 | if (!nr_to_scan) | 1567 | freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, |
| 1546 | return btp->bt_lru_nr; | 1568 | &dispose, &nr_to_scan); |
| 1547 | |||
| 1548 | spin_lock(&btp->bt_lru_lock); | ||
| 1549 | while (!list_empty(&btp->bt_lru)) { | ||
| 1550 | if (nr_to_scan-- <= 0) | ||
| 1551 | break; | ||
| 1552 | |||
| 1553 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | ||
| 1554 | |||
| 1555 | /* | ||
| 1556 | * Decrement the b_lru_ref count unless the value is already | ||
| 1557 | * zero. If the value is already zero, we need to reclaim the | ||
| 1558 | * buffer, otherwise it gets another trip through the LRU. | ||
| 1559 | */ | ||
| 1560 | if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { | ||
| 1561 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
| 1562 | continue; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | /* | ||
| 1566 | * remove the buffer from the LRU now to avoid needing another | ||
| 1567 | * lock round trip inside xfs_buf_rele(). | ||
| 1568 | */ | ||
| 1569 | list_move(&bp->b_lru, &dispose); | ||
| 1570 | btp->bt_lru_nr--; | ||
| 1571 | bp->b_lru_flags |= _XBF_LRU_DISPOSE; | ||
| 1572 | } | ||
| 1573 | spin_unlock(&btp->bt_lru_lock); | ||
| 1574 | 1569 | ||
| 1575 | while (!list_empty(&dispose)) { | 1570 | while (!list_empty(&dispose)) { |
| 1571 | struct xfs_buf *bp; | ||
| 1576 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); | 1572 | bp = list_first_entry(&dispose, struct xfs_buf, b_lru); |
| 1577 | list_del_init(&bp->b_lru); | 1573 | list_del_init(&bp->b_lru); |
| 1578 | xfs_buf_rele(bp); | 1574 | xfs_buf_rele(bp); |
| 1579 | } | 1575 | } |
| 1580 | 1576 | ||
| 1581 | return btp->bt_lru_nr; | 1577 | return freed; |
| 1578 | } | ||
| 1579 | |||
| 1580 | static unsigned long | ||
| 1581 | xfs_buftarg_shrink_count( | ||
| 1582 | struct shrinker *shrink, | ||
| 1583 | struct shrink_control *sc) | ||
| 1584 | { | ||
| 1585 | struct xfs_buftarg *btp = container_of(shrink, | ||
| 1586 | struct xfs_buftarg, bt_shrinker); | ||
| 1587 | return list_lru_count_node(&btp->bt_lru, sc->nid); | ||
| 1582 | } | 1588 | } |
| 1583 | 1589 | ||
| 1584 | void | 1590 | void |
| @@ -1587,6 +1593,7 @@ xfs_free_buftarg( | |||
| 1587 | struct xfs_buftarg *btp) | 1593 | struct xfs_buftarg *btp) |
| 1588 | { | 1594 | { |
| 1589 | unregister_shrinker(&btp->bt_shrinker); | 1595 | unregister_shrinker(&btp->bt_shrinker); |
| 1596 | list_lru_destroy(&btp->bt_lru); | ||
| 1590 | 1597 | ||
| 1591 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1598 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
| 1592 | xfs_blkdev_issue_flush(btp); | 1599 | xfs_blkdev_issue_flush(btp); |
| @@ -1660,12 +1667,16 @@ xfs_alloc_buftarg( | |||
| 1660 | if (!btp->bt_bdi) | 1667 | if (!btp->bt_bdi) |
| 1661 | goto error; | 1668 | goto error; |
| 1662 | 1669 | ||
| 1663 | INIT_LIST_HEAD(&btp->bt_lru); | ||
| 1664 | spin_lock_init(&btp->bt_lru_lock); | ||
| 1665 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1670 | if (xfs_setsize_buftarg_early(btp, bdev)) |
| 1666 | goto error; | 1671 | goto error; |
| 1667 | btp->bt_shrinker.shrink = xfs_buftarg_shrink; | 1672 | |
| 1673 | if (list_lru_init(&btp->bt_lru)) | ||
| 1674 | goto error; | ||
| 1675 | |||
| 1676 | btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; | ||
| 1677 | btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; | ||
| 1668 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; | 1678 | btp->bt_shrinker.seeks = DEFAULT_SEEKS; |
| 1679 | btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; | ||
| 1669 | register_shrinker(&btp->bt_shrinker); | 1680 | register_shrinker(&btp->bt_shrinker); |
| 1670 | return btp; | 1681 | return btp; |
| 1671 | 1682 | ||
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 433a12ed7b17..e65683361017 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
| 26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
| 27 | #include <linux/uio.h> | 27 | #include <linux/uio.h> |
| 28 | #include <linux/list_lru.h> | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * Base types | 31 | * Base types |
| @@ -59,7 +60,6 @@ typedef enum { | |||
| 59 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ | 60 | #define _XBF_KMEM (1 << 21)/* backed by heap memory */ |
| 60 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ | 61 | #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ |
| 61 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ | 62 | #define _XBF_COMPOUND (1 << 23)/* compound buffer */ |
| 62 | #define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ | ||
| 63 | 63 | ||
| 64 | typedef unsigned int xfs_buf_flags_t; | 64 | typedef unsigned int xfs_buf_flags_t; |
| 65 | 65 | ||
| @@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t; | |||
| 78 | { _XBF_PAGES, "PAGES" }, \ | 78 | { _XBF_PAGES, "PAGES" }, \ |
| 79 | { _XBF_KMEM, "KMEM" }, \ | 79 | { _XBF_KMEM, "KMEM" }, \ |
| 80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 80 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
| 81 | { _XBF_COMPOUND, "COMPOUND" }, \ | 81 | { _XBF_COMPOUND, "COMPOUND" } |
| 82 | { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } | 82 | |
| 83 | /* | ||
| 84 | * Internal state flags. | ||
| 85 | */ | ||
| 86 | #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ | ||
| 83 | 87 | ||
| 84 | typedef struct xfs_buftarg { | 88 | typedef struct xfs_buftarg { |
| 85 | dev_t bt_dev; | 89 | dev_t bt_dev; |
| @@ -92,9 +96,7 @@ typedef struct xfs_buftarg { | |||
| 92 | 96 | ||
| 93 | /* LRU control structures */ | 97 | /* LRU control structures */ |
| 94 | struct shrinker bt_shrinker; | 98 | struct shrinker bt_shrinker; |
| 95 | struct list_head bt_lru; | 99 | struct list_lru bt_lru; |
| 96 | spinlock_t bt_lru_lock; | ||
| 97 | unsigned int bt_lru_nr; | ||
| 98 | } xfs_buftarg_t; | 100 | } xfs_buftarg_t; |
| 99 | 101 | ||
| 100 | struct xfs_buf; | 102 | struct xfs_buf; |
| @@ -137,7 +139,8 @@ typedef struct xfs_buf { | |||
| 137 | * bt_lru_lock and not by b_sema | 139 | * bt_lru_lock and not by b_sema |
| 138 | */ | 140 | */ |
| 139 | struct list_head b_lru; /* lru list */ | 141 | struct list_head b_lru; /* lru list */ |
| 140 | xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ | 142 | spinlock_t b_lock; /* internal state lock */ |
| 143 | unsigned int b_state; /* internal state flags */ | ||
| 141 | wait_queue_head_t b_waiters; /* unpin waiters */ | 144 | wait_queue_head_t b_waiters; /* unpin waiters */ |
| 142 | struct list_head b_list; | 145 | struct list_head b_list; |
| 143 | struct xfs_perag *b_pag; /* contains rbtree root */ | 146 | struct xfs_perag *b_pag; /* contains rbtree root */ |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 251c66632e5e..71520e6e5d65 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
| @@ -940,13 +940,8 @@ xfs_qm_dqput_final( | |||
| 940 | 940 | ||
| 941 | trace_xfs_dqput_free(dqp); | 941 | trace_xfs_dqput_free(dqp); |
| 942 | 942 | ||
| 943 | mutex_lock(&qi->qi_lru_lock); | 943 | if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) |
| 944 | if (list_empty(&dqp->q_lru)) { | ||
| 945 | list_add_tail(&dqp->q_lru, &qi->qi_lru_list); | ||
| 946 | qi->qi_lru_count++; | ||
| 947 | XFS_STATS_INC(xs_qm_dquot_unused); | 944 | XFS_STATS_INC(xs_qm_dquot_unused); |
| 948 | } | ||
| 949 | mutex_unlock(&qi->qi_lru_lock); | ||
| 950 | 945 | ||
| 951 | /* | 946 | /* |
| 952 | * If we just added a udquot to the freelist, then we want to release | 947 | * If we just added a udquot to the freelist, then we want to release |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c6790..73b62a24ceac 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
| @@ -1167,7 +1167,7 @@ xfs_reclaim_inodes( | |||
| 1167 | * them to be cleaned, which we hope will not be very long due to the | 1167 | * them to be cleaned, which we hope will not be very long due to the |
| 1168 | * background walker having already kicked the IO off on those dirty inodes. | 1168 | * background walker having already kicked the IO off on those dirty inodes. |
| 1169 | */ | 1169 | */ |
| 1170 | void | 1170 | long |
| 1171 | xfs_reclaim_inodes_nr( | 1171 | xfs_reclaim_inodes_nr( |
| 1172 | struct xfs_mount *mp, | 1172 | struct xfs_mount *mp, |
| 1173 | int nr_to_scan) | 1173 | int nr_to_scan) |
| @@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr( | |||
| 1176 | xfs_reclaim_work_queue(mp); | 1176 | xfs_reclaim_work_queue(mp); |
| 1177 | xfs_ail_push_all(mp->m_ail); | 1177 | xfs_ail_push_all(mp->m_ail); |
| 1178 | 1178 | ||
| 1179 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); | 1179 | return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); |
| 1180 | } | 1180 | } |
| 1181 | 1181 | ||
| 1182 | /* | 1182 | /* |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791bd..456f0144e1b6 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
| @@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work); | |||
| 46 | 46 | ||
| 47 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 47 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
| 48 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); | 48 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); |
| 49 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); | 49 | long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); |
| 50 | 50 | ||
| 51 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 51 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
| 52 | 52 | ||
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6218a0aeeeea..3e6c2e6c9cd2 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
| @@ -51,8 +51,9 @@ | |||
| 51 | */ | 51 | */ |
| 52 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 52 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
| 53 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 53 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
| 54 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); | ||
| 55 | 54 | ||
| 55 | |||
| 56 | STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); | ||
| 56 | /* | 57 | /* |
| 57 | * We use the batch lookup interface to iterate over the dquots as it | 58 | * We use the batch lookup interface to iterate over the dquots as it |
| 58 | * currently is the only interface into the radix tree code that allows | 59 | * currently is the only interface into the radix tree code that allows |
| @@ -203,12 +204,9 @@ xfs_qm_dqpurge( | |||
| 203 | * We move dquots to the freelist as soon as their reference count | 204 | * We move dquots to the freelist as soon as their reference count |
| 204 | * hits zero, so it really should be on the freelist here. | 205 | * hits zero, so it really should be on the freelist here. |
| 205 | */ | 206 | */ |
| 206 | mutex_lock(&qi->qi_lru_lock); | ||
| 207 | ASSERT(!list_empty(&dqp->q_lru)); | 207 | ASSERT(!list_empty(&dqp->q_lru)); |
| 208 | list_del_init(&dqp->q_lru); | 208 | list_lru_del(&qi->qi_lru, &dqp->q_lru); |
| 209 | qi->qi_lru_count--; | ||
| 210 | XFS_STATS_DEC(xs_qm_dquot_unused); | 209 | XFS_STATS_DEC(xs_qm_dquot_unused); |
| 211 | mutex_unlock(&qi->qi_lru_lock); | ||
| 212 | 210 | ||
| 213 | xfs_qm_dqdestroy(dqp); | 211 | xfs_qm_dqdestroy(dqp); |
| 214 | 212 | ||
| @@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk( | |||
| 680 | return ndquots; | 678 | return ndquots; |
| 681 | } | 679 | } |
| 682 | 680 | ||
| 681 | struct xfs_qm_isolate { | ||
| 682 | struct list_head buffers; | ||
| 683 | struct list_head dispose; | ||
| 684 | }; | ||
| 685 | |||
| 686 | static enum lru_status | ||
| 687 | xfs_qm_dquot_isolate( | ||
| 688 | struct list_head *item, | ||
| 689 | spinlock_t *lru_lock, | ||
| 690 | void *arg) | ||
| 691 | { | ||
| 692 | struct xfs_dquot *dqp = container_of(item, | ||
| 693 | struct xfs_dquot, q_lru); | ||
| 694 | struct xfs_qm_isolate *isol = arg; | ||
| 695 | |||
| 696 | if (!xfs_dqlock_nowait(dqp)) | ||
| 697 | goto out_miss_busy; | ||
| 698 | |||
| 699 | /* | ||
| 700 | * This dquot has acquired a reference in the meantime remove it from | ||
| 701 | * the freelist and try again. | ||
| 702 | */ | ||
| 703 | if (dqp->q_nrefs) { | ||
| 704 | xfs_dqunlock(dqp); | ||
| 705 | XFS_STATS_INC(xs_qm_dqwants); | ||
| 706 | |||
| 707 | trace_xfs_dqreclaim_want(dqp); | ||
| 708 | list_del_init(&dqp->q_lru); | ||
| 709 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
| 710 | return LRU_REMOVED; | ||
| 711 | } | ||
| 712 | |||
| 713 | /* | ||
| 714 | * If the dquot is dirty, flush it. If it's already being flushed, just | ||
| 715 | * skip it so there is time for the IO to complete before we try to | ||
| 716 | * reclaim it again on the next LRU pass. | ||
| 717 | */ | ||
| 718 | if (!xfs_dqflock_nowait(dqp)) { | ||
| 719 | xfs_dqunlock(dqp); | ||
| 720 | goto out_miss_busy; | ||
| 721 | } | ||
| 722 | |||
| 723 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
| 724 | struct xfs_buf *bp = NULL; | ||
| 725 | int error; | ||
| 726 | |||
| 727 | trace_xfs_dqreclaim_dirty(dqp); | ||
| 728 | |||
| 729 | /* we have to drop the LRU lock to flush the dquot */ | ||
| 730 | spin_unlock(lru_lock); | ||
| 731 | |||
| 732 | error = xfs_qm_dqflush(dqp, &bp); | ||
| 733 | if (error) { | ||
| 734 | xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", | ||
| 735 | __func__, dqp); | ||
| 736 | goto out_unlock_dirty; | ||
| 737 | } | ||
| 738 | |||
| 739 | xfs_buf_delwri_queue(bp, &isol->buffers); | ||
| 740 | xfs_buf_relse(bp); | ||
| 741 | goto out_unlock_dirty; | ||
| 742 | } | ||
| 743 | xfs_dqfunlock(dqp); | ||
| 744 | |||
| 745 | /* | ||
| 746 | * Prevent lookups now that we are past the point of no return. | ||
| 747 | */ | ||
| 748 | dqp->dq_flags |= XFS_DQ_FREEING; | ||
| 749 | xfs_dqunlock(dqp); | ||
| 750 | |||
| 751 | ASSERT(dqp->q_nrefs == 0); | ||
| 752 | list_move_tail(&dqp->q_lru, &isol->dispose); | ||
| 753 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
| 754 | trace_xfs_dqreclaim_done(dqp); | ||
| 755 | XFS_STATS_INC(xs_qm_dqreclaims); | ||
| 756 | return LRU_REMOVED; | ||
| 757 | |||
| 758 | out_miss_busy: | ||
| 759 | trace_xfs_dqreclaim_busy(dqp); | ||
| 760 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
| 761 | return LRU_SKIP; | ||
| 762 | |||
| 763 | out_unlock_dirty: | ||
| 764 | trace_xfs_dqreclaim_busy(dqp); | ||
| 765 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
| 766 | xfs_dqunlock(dqp); | ||
| 767 | spin_lock(lru_lock); | ||
| 768 | return LRU_RETRY; | ||
| 769 | } | ||
| 770 | |||
| 771 | static unsigned long | ||
| 772 | xfs_qm_shrink_scan( | ||
| 773 | struct shrinker *shrink, | ||
| 774 | struct shrink_control *sc) | ||
| 775 | { | ||
| 776 | struct xfs_quotainfo *qi = container_of(shrink, | ||
| 777 | struct xfs_quotainfo, qi_shrinker); | ||
| 778 | struct xfs_qm_isolate isol; | ||
| 779 | unsigned long freed; | ||
| 780 | int error; | ||
| 781 | unsigned long nr_to_scan = sc->nr_to_scan; | ||
| 782 | |||
| 783 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | ||
| 784 | return 0; | ||
| 785 | |||
| 786 | INIT_LIST_HEAD(&isol.buffers); | ||
| 787 | INIT_LIST_HEAD(&isol.dispose); | ||
| 788 | |||
| 789 | freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, | ||
| 790 | &nr_to_scan); | ||
| 791 | |||
| 792 | error = xfs_buf_delwri_submit(&isol.buffers); | ||
| 793 | if (error) | ||
| 794 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
| 795 | |||
| 796 | while (!list_empty(&isol.dispose)) { | ||
| 797 | struct xfs_dquot *dqp; | ||
| 798 | |||
| 799 | dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru); | ||
| 800 | list_del_init(&dqp->q_lru); | ||
| 801 | xfs_qm_dqfree_one(dqp); | ||
| 802 | } | ||
| 803 | |||
| 804 | return freed; | ||
| 805 | } | ||
| 806 | |||
| 807 | static unsigned long | ||
| 808 | xfs_qm_shrink_count( | ||
| 809 | struct shrinker *shrink, | ||
| 810 | struct shrink_control *sc) | ||
| 811 | { | ||
| 812 | struct xfs_quotainfo *qi = container_of(shrink, | ||
| 813 | struct xfs_quotainfo, qi_shrinker); | ||
| 814 | |||
| 815 | return list_lru_count_node(&qi->qi_lru, sc->nid); | ||
| 816 | } | ||
| 817 | |||
| 683 | /* | 818 | /* |
| 684 | * This initializes all the quota information that's kept in the | 819 | * This initializes all the quota information that's kept in the |
| 685 | * mount structure | 820 | * mount structure |
| @@ -696,11 +831,18 @@ xfs_qm_init_quotainfo( | |||
| 696 | 831 | ||
| 697 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); | 832 | qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); |
| 698 | 833 | ||
| 834 | if ((error = list_lru_init(&qinf->qi_lru))) { | ||
| 835 | kmem_free(qinf); | ||
| 836 | mp->m_quotainfo = NULL; | ||
| 837 | return error; | ||
| 838 | } | ||
| 839 | |||
| 699 | /* | 840 | /* |
| 700 | * See if quotainodes are setup, and if not, allocate them, | 841 | * See if quotainodes are setup, and if not, allocate them, |
| 701 | * and change the superblock accordingly. | 842 | * and change the superblock accordingly. |
| 702 | */ | 843 | */ |
| 703 | if ((error = xfs_qm_init_quotainos(mp))) { | 844 | if ((error = xfs_qm_init_quotainos(mp))) { |
| 845 | list_lru_destroy(&qinf->qi_lru); | ||
| 704 | kmem_free(qinf); | 846 | kmem_free(qinf); |
| 705 | mp->m_quotainfo = NULL; | 847 | mp->m_quotainfo = NULL; |
| 706 | return error; | 848 | return error; |
| @@ -711,10 +853,6 @@ xfs_qm_init_quotainfo( | |||
| 711 | INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); | 853 | INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); |
| 712 | mutex_init(&qinf->qi_tree_lock); | 854 | mutex_init(&qinf->qi_tree_lock); |
| 713 | 855 | ||
| 714 | INIT_LIST_HEAD(&qinf->qi_lru_list); | ||
| 715 | qinf->qi_lru_count = 0; | ||
| 716 | mutex_init(&qinf->qi_lru_lock); | ||
| 717 | |||
| 718 | /* mutex used to serialize quotaoffs */ | 856 | /* mutex used to serialize quotaoffs */ |
| 719 | mutex_init(&qinf->qi_quotaofflock); | 857 | mutex_init(&qinf->qi_quotaofflock); |
| 720 | 858 | ||
| @@ -779,8 +917,10 @@ xfs_qm_init_quotainfo( | |||
| 779 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; | 917 | qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; |
| 780 | } | 918 | } |
| 781 | 919 | ||
| 782 | qinf->qi_shrinker.shrink = xfs_qm_shake; | 920 | qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; |
| 921 | qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; | ||
| 783 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; | 922 | qinf->qi_shrinker.seeks = DEFAULT_SEEKS; |
| 923 | qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; | ||
| 784 | register_shrinker(&qinf->qi_shrinker); | 924 | register_shrinker(&qinf->qi_shrinker); |
| 785 | return 0; | 925 | return 0; |
| 786 | } | 926 | } |
| @@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo( | |||
| 801 | ASSERT(qi != NULL); | 941 | ASSERT(qi != NULL); |
| 802 | 942 | ||
| 803 | unregister_shrinker(&qi->qi_shrinker); | 943 | unregister_shrinker(&qi->qi_shrinker); |
| 944 | list_lru_destroy(&qi->qi_lru); | ||
| 804 | 945 | ||
| 805 | if (qi->qi_uquotaip) { | 946 | if (qi->qi_uquotaip) { |
| 806 | IRELE(qi->qi_uquotaip); | 947 | IRELE(qi->qi_uquotaip); |
| @@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one( | |||
| 1599 | xfs_qm_dqdestroy(dqp); | 1740 | xfs_qm_dqdestroy(dqp); |
| 1600 | } | 1741 | } |
| 1601 | 1742 | ||
| 1602 | STATIC void | ||
| 1603 | xfs_qm_dqreclaim_one( | ||
| 1604 | struct xfs_dquot *dqp, | ||
| 1605 | struct list_head *buffer_list, | ||
| 1606 | struct list_head *dispose_list) | ||
| 1607 | { | ||
| 1608 | struct xfs_mount *mp = dqp->q_mount; | ||
| 1609 | struct xfs_quotainfo *qi = mp->m_quotainfo; | ||
| 1610 | int error; | ||
| 1611 | |||
| 1612 | if (!xfs_dqlock_nowait(dqp)) | ||
| 1613 | goto out_move_tail; | ||
| 1614 | |||
| 1615 | /* | ||
| 1616 | * This dquot has acquired a reference in the meantime remove it from | ||
| 1617 | * the freelist and try again. | ||
| 1618 | */ | ||
| 1619 | if (dqp->q_nrefs) { | ||
| 1620 | xfs_dqunlock(dqp); | ||
| 1621 | |||
| 1622 | trace_xfs_dqreclaim_want(dqp); | ||
| 1623 | XFS_STATS_INC(xs_qm_dqwants); | ||
| 1624 | |||
| 1625 | list_del_init(&dqp->q_lru); | ||
| 1626 | qi->qi_lru_count--; | ||
| 1627 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
| 1628 | return; | ||
| 1629 | } | ||
| 1630 | |||
| 1631 | /* | ||
| 1632 | * Try to grab the flush lock. If this dquot is in the process of | ||
| 1633 | * getting flushed to disk, we don't want to reclaim it. | ||
| 1634 | */ | ||
| 1635 | if (!xfs_dqflock_nowait(dqp)) | ||
| 1636 | goto out_unlock_move_tail; | ||
| 1637 | |||
| 1638 | if (XFS_DQ_IS_DIRTY(dqp)) { | ||
| 1639 | struct xfs_buf *bp = NULL; | ||
| 1640 | |||
| 1641 | trace_xfs_dqreclaim_dirty(dqp); | ||
| 1642 | |||
| 1643 | error = xfs_qm_dqflush(dqp, &bp); | ||
| 1644 | if (error) { | ||
| 1645 | xfs_warn(mp, "%s: dquot %p flush failed", | ||
| 1646 | __func__, dqp); | ||
| 1647 | goto out_unlock_move_tail; | ||
| 1648 | } | ||
| 1649 | |||
| 1650 | xfs_buf_delwri_queue(bp, buffer_list); | ||
| 1651 | xfs_buf_relse(bp); | ||
| 1652 | /* | ||
| 1653 | * Give the dquot another try on the freelist, as the | ||
| 1654 | * flushing will take some time. | ||
| 1655 | */ | ||
| 1656 | goto out_unlock_move_tail; | ||
| 1657 | } | ||
| 1658 | xfs_dqfunlock(dqp); | ||
| 1659 | |||
| 1660 | /* | ||
| 1661 | * Prevent lookups now that we are past the point of no return. | ||
| 1662 | */ | ||
| 1663 | dqp->dq_flags |= XFS_DQ_FREEING; | ||
| 1664 | xfs_dqunlock(dqp); | ||
| 1665 | |||
| 1666 | ASSERT(dqp->q_nrefs == 0); | ||
| 1667 | list_move_tail(&dqp->q_lru, dispose_list); | ||
| 1668 | qi->qi_lru_count--; | ||
| 1669 | XFS_STATS_DEC(xs_qm_dquot_unused); | ||
| 1670 | |||
| 1671 | trace_xfs_dqreclaim_done(dqp); | ||
| 1672 | XFS_STATS_INC(xs_qm_dqreclaims); | ||
| 1673 | return; | ||
| 1674 | |||
| 1675 | /* | ||
| 1676 | * Move the dquot to the tail of the list so that we don't spin on it. | ||
| 1677 | */ | ||
| 1678 | out_unlock_move_tail: | ||
| 1679 | xfs_dqunlock(dqp); | ||
| 1680 | out_move_tail: | ||
| 1681 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); | ||
| 1682 | trace_xfs_dqreclaim_busy(dqp); | ||
| 1683 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | ||
| 1684 | } | ||
| 1685 | |||
| 1686 | STATIC int | ||
| 1687 | xfs_qm_shake( | ||
| 1688 | struct shrinker *shrink, | ||
| 1689 | struct shrink_control *sc) | ||
| 1690 | { | ||
| 1691 | struct xfs_quotainfo *qi = | ||
| 1692 | container_of(shrink, struct xfs_quotainfo, qi_shrinker); | ||
| 1693 | int nr_to_scan = sc->nr_to_scan; | ||
| 1694 | LIST_HEAD (buffer_list); | ||
| 1695 | LIST_HEAD (dispose_list); | ||
| 1696 | struct xfs_dquot *dqp; | ||
| 1697 | int error; | ||
| 1698 | |||
| 1699 | if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) | ||
| 1700 | return 0; | ||
| 1701 | if (!nr_to_scan) | ||
| 1702 | goto out; | ||
| 1703 | |||
| 1704 | mutex_lock(&qi->qi_lru_lock); | ||
| 1705 | while (!list_empty(&qi->qi_lru_list)) { | ||
| 1706 | if (nr_to_scan-- <= 0) | ||
| 1707 | break; | ||
| 1708 | dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, | ||
| 1709 | q_lru); | ||
| 1710 | xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); | ||
| 1711 | } | ||
| 1712 | mutex_unlock(&qi->qi_lru_lock); | ||
| 1713 | |||
| 1714 | error = xfs_buf_delwri_submit(&buffer_list); | ||
| 1715 | if (error) | ||
| 1716 | xfs_warn(NULL, "%s: dquot reclaim failed", __func__); | ||
| 1717 | |||
| 1718 | while (!list_empty(&dispose_list)) { | ||
| 1719 | dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); | ||
| 1720 | list_del_init(&dqp->q_lru); | ||
| 1721 | xfs_qm_dqfree_one(dqp); | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | out: | ||
| 1725 | return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | /* | 1743 | /* |
| 1729 | * Start a transaction and write the incore superblock changes to | 1744 | * Start a transaction and write the incore superblock changes to |
| 1730 | * disk. flags parameter indicates which fields have changed. | 1745 | * disk. flags parameter indicates which fields have changed. |
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 670cd4464070..2b602df9c242 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
| @@ -49,9 +49,7 @@ typedef struct xfs_quotainfo { | |||
| 49 | struct xfs_inode *qi_uquotaip; /* user quota inode */ | 49 | struct xfs_inode *qi_uquotaip; /* user quota inode */ |
| 50 | struct xfs_inode *qi_gquotaip; /* group quota inode */ | 50 | struct xfs_inode *qi_gquotaip; /* group quota inode */ |
| 51 | struct xfs_inode *qi_pquotaip; /* project quota inode */ | 51 | struct xfs_inode *qi_pquotaip; /* project quota inode */ |
| 52 | struct list_head qi_lru_list; | 52 | struct list_lru qi_lru; |
| 53 | struct mutex qi_lru_lock; | ||
| 54 | int qi_lru_count; | ||
| 55 | int qi_dquots; | 53 | int qi_dquots; |
| 56 | time_t qi_btimelimit; /* limit for blks timer */ | 54 | time_t qi_btimelimit; /* limit for blks timer */ |
| 57 | time_t qi_itimelimit; /* limit for inodes timer */ | 55 | time_t qi_itimelimit; /* limit for inodes timer */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 979a77d4b87d..15188cc99449 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
| @@ -1535,19 +1535,21 @@ xfs_fs_mount( | |||
| 1535 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); | 1535 | return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); |
| 1536 | } | 1536 | } |
| 1537 | 1537 | ||
| 1538 | static int | 1538 | static long |
| 1539 | xfs_fs_nr_cached_objects( | 1539 | xfs_fs_nr_cached_objects( |
| 1540 | struct super_block *sb) | 1540 | struct super_block *sb, |
| 1541 | int nid) | ||
| 1541 | { | 1542 | { |
| 1542 | return xfs_reclaim_inodes_count(XFS_M(sb)); | 1543 | return xfs_reclaim_inodes_count(XFS_M(sb)); |
| 1543 | } | 1544 | } |
| 1544 | 1545 | ||
| 1545 | static void | 1546 | static long |
| 1546 | xfs_fs_free_cached_objects( | 1547 | xfs_fs_free_cached_objects( |
| 1547 | struct super_block *sb, | 1548 | struct super_block *sb, |
| 1548 | int nr_to_scan) | 1549 | long nr_to_scan, |
| 1550 | int nid) | ||
| 1549 | { | 1551 | { |
| 1550 | xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); | 1552 | return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); |
| 1551 | } | 1553 | } |
| 1552 | 1554 | ||
| 1553 | static const struct super_operations xfs_super_operations = { | 1555 | static const struct super_operations xfs_super_operations = { |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..59066e0b4ff1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
| @@ -55,11 +55,11 @@ struct qstr { | |||
| 55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) | 55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) |
| 56 | 56 | ||
| 57 | struct dentry_stat_t { | 57 | struct dentry_stat_t { |
| 58 | int nr_dentry; | 58 | long nr_dentry; |
| 59 | int nr_unused; | 59 | long nr_unused; |
| 60 | int age_limit; /* age in seconds */ | 60 | long age_limit; /* age in seconds */ |
| 61 | int want_pages; /* pages requested by system */ | 61 | long want_pages; /* pages requested by system */ |
| 62 | int dummy[2]; | 62 | long dummy[2]; |
| 63 | }; | 63 | }; |
| 64 | extern struct dentry_stat_t dentry_stat; | 64 | extern struct dentry_stat_t dentry_stat; |
| 65 | 65 | ||
| @@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry) | |||
| 395 | 395 | ||
| 396 | extern int sysctl_vfs_cache_pressure; | 396 | extern int sysctl_vfs_cache_pressure; |
| 397 | 397 | ||
| 398 | static inline unsigned long vfs_pressure_ratio(unsigned long val) | ||
| 399 | { | ||
| 400 | return mult_frac(val, sysctl_vfs_cache_pressure, 100); | ||
| 401 | } | ||
| 398 | #endif /* __LINUX_DCACHE_H */ | 402 | #endif /* __LINUX_DCACHE_H */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 529d8711baba..a4acd3c61190 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/stat.h> | 10 | #include <linux/stat.h> |
| 11 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
| 12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
| 13 | #include <linux/list_lru.h> | ||
| 13 | #include <linux/llist.h> | 14 | #include <linux/llist.h> |
| 14 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
| 15 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
| @@ -1269,15 +1270,6 @@ struct super_block { | |||
| 1269 | struct list_head s_files; | 1270 | struct list_head s_files; |
| 1270 | #endif | 1271 | #endif |
| 1271 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ | 1272 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ |
| 1272 | /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ | ||
| 1273 | struct list_head s_dentry_lru; /* unused dentry lru */ | ||
| 1274 | int s_nr_dentry_unused; /* # of dentry on lru */ | ||
| 1275 | |||
| 1276 | /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ | ||
| 1277 | spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; | ||
| 1278 | struct list_head s_inode_lru; /* unused inode lru */ | ||
| 1279 | int s_nr_inodes_unused; /* # of inodes on lru */ | ||
| 1280 | |||
| 1281 | struct block_device *s_bdev; | 1273 | struct block_device *s_bdev; |
| 1282 | struct backing_dev_info *s_bdi; | 1274 | struct backing_dev_info *s_bdi; |
| 1283 | struct mtd_info *s_mtd; | 1275 | struct mtd_info *s_mtd; |
| @@ -1331,11 +1323,14 @@ struct super_block { | |||
| 1331 | 1323 | ||
| 1332 | /* AIO completions deferred from interrupt context */ | 1324 | /* AIO completions deferred from interrupt context */ |
| 1333 | struct workqueue_struct *s_dio_done_wq; | 1325 | struct workqueue_struct *s_dio_done_wq; |
| 1334 | }; | ||
| 1335 | 1326 | ||
| 1336 | /* superblock cache pruning functions */ | 1327 | /* |
| 1337 | extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); | 1328 | * Keep the lru lists last in the structure so they always sit on their |
| 1338 | extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); | 1329 | * own individual cachelines. |
| 1330 | */ | ||
| 1331 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | ||
| 1332 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | ||
| 1333 | }; | ||
| 1339 | 1334 | ||
| 1340 | extern struct timespec current_fs_time(struct super_block *sb); | 1335 | extern struct timespec current_fs_time(struct super_block *sb); |
| 1341 | 1336 | ||
| @@ -1629,8 +1624,8 @@ struct super_operations { | |||
| 1629 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1624 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
| 1630 | #endif | 1625 | #endif |
| 1631 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1626 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
| 1632 | int (*nr_cached_objects)(struct super_block *); | 1627 | long (*nr_cached_objects)(struct super_block *, int); |
| 1633 | void (*free_cached_objects)(struct super_block *, int); | 1628 | long (*free_cached_objects)(struct super_block *, long, int); |
| 1634 | }; | 1629 | }; |
| 1635 | 1630 | ||
| 1636 | /* | 1631 | /* |
| @@ -2494,7 +2489,6 @@ extern const struct file_operations generic_ro_fops; | |||
| 2494 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2489 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
| 2495 | 2490 | ||
| 2496 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2491 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
| 2497 | extern int vfs_follow_link(struct nameidata *, const char *); | ||
| 2498 | extern int page_readlink(struct dentry *, char __user *, int); | 2492 | extern int page_readlink(struct dentry *, char __user *, int); |
| 2499 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2493 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
| 2500 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2494 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h new file mode 100644 index 000000000000..3ce541753c88 --- /dev/null +++ b/include/linux/list_lru.h | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | ||
| 3 | * Authors: David Chinner and Glauber Costa | ||
| 4 | * | ||
| 5 | * Generic LRU infrastructure | ||
| 6 | */ | ||
| 7 | #ifndef _LRU_LIST_H | ||
| 8 | #define _LRU_LIST_H | ||
| 9 | |||
| 10 | #include <linux/list.h> | ||
| 11 | #include <linux/nodemask.h> | ||
| 12 | |||
| 13 | /* list_lru_walk_cb has to always return one of those */ | ||
| 14 | enum lru_status { | ||
| 15 | LRU_REMOVED, /* item removed from list */ | ||
| 16 | LRU_ROTATE, /* item referenced, give another pass */ | ||
| 17 | LRU_SKIP, /* item cannot be locked, skip */ | ||
| 18 | LRU_RETRY, /* item not freeable. May drop the lock | ||
| 19 | internally, but has to return locked. */ | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct list_lru_node { | ||
| 23 | spinlock_t lock; | ||
| 24 | struct list_head list; | ||
| 25 | /* kept as signed so we can catch imbalance bugs */ | ||
| 26 | long nr_items; | ||
| 27 | } ____cacheline_aligned_in_smp; | ||
| 28 | |||
| 29 | struct list_lru { | ||
| 30 | struct list_lru_node *node; | ||
| 31 | nodemask_t active_nodes; | ||
| 32 | }; | ||
| 33 | |||
| 34 | void list_lru_destroy(struct list_lru *lru); | ||
| 35 | int list_lru_init(struct list_lru *lru); | ||
| 36 | |||
| 37 | /** | ||
| 38 | * list_lru_add: add an element to the lru list's tail | ||
| 39 | * @list_lru: the lru pointer | ||
| 40 | * @item: the item to be added. | ||
| 41 | * | ||
| 42 | * If the element is already part of a list, this function returns doing | ||
| 43 | * nothing. Therefore the caller does not need to keep state about whether or | ||
| 44 | * not the element already belongs in the list and is allowed to lazy update | ||
| 45 | * it. Note however that this is valid for *a* list, not *this* list. If | ||
| 46 | * the caller organize itself in a way that elements can be in more than | ||
| 47 | * one type of list, it is up to the caller to fully remove the item from | ||
| 48 | * the previous list (with list_lru_del() for instance) before moving it | ||
| 49 | * to @list_lru | ||
| 50 | * | ||
| 51 | * Return value: true if the list was updated, false otherwise | ||
| 52 | */ | ||
| 53 | bool list_lru_add(struct list_lru *lru, struct list_head *item); | ||
| 54 | |||
| 55 | /** | ||
| 56 | * list_lru_del: delete an element to the lru list | ||
| 57 | * @list_lru: the lru pointer | ||
| 58 | * @item: the item to be deleted. | ||
| 59 | * | ||
| 60 | * This function works analogously as list_lru_add in terms of list | ||
| 61 | * manipulation. The comments about an element already pertaining to | ||
| 62 | * a list are also valid for list_lru_del. | ||
| 63 | * | ||
| 64 | * Return value: true if the list was updated, false otherwise | ||
| 65 | */ | ||
| 66 | bool list_lru_del(struct list_lru *lru, struct list_head *item); | ||
| 67 | |||
| 68 | /** | ||
| 69 | * list_lru_count_node: return the number of objects currently held by @lru | ||
| 70 | * @lru: the lru pointer. | ||
| 71 | * @nid: the node id to count from. | ||
| 72 | * | ||
| 73 | * Always return a non-negative number, 0 for empty lists. There is no | ||
| 74 | * guarantee that the list is not updated while the count is being computed. | ||
| 75 | * Callers that want such a guarantee need to provide an outer lock. | ||
| 76 | */ | ||
| 77 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); | ||
| 78 | static inline unsigned long list_lru_count(struct list_lru *lru) | ||
| 79 | { | ||
| 80 | long count = 0; | ||
| 81 | int nid; | ||
| 82 | |||
| 83 | for_each_node_mask(nid, lru->active_nodes) | ||
| 84 | count += list_lru_count_node(lru, nid); | ||
| 85 | |||
| 86 | return count; | ||
| 87 | } | ||
| 88 | |||
| 89 | typedef enum lru_status | ||
| 90 | (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); | ||
| 91 | /** | ||
| 92 | * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. | ||
| 93 | * @lru: the lru pointer. | ||
| 94 | * @nid: the node id to scan from. | ||
| 95 | * @isolate: callback function that is resposible for deciding what to do with | ||
| 96 | * the item currently being scanned | ||
| 97 | * @cb_arg: opaque type that will be passed to @isolate | ||
| 98 | * @nr_to_walk: how many items to scan. | ||
| 99 | * | ||
| 100 | * This function will scan all elements in a particular list_lru, calling the | ||
| 101 | * @isolate callback for each of those items, along with the current list | ||
| 102 | * spinlock and a caller-provided opaque. The @isolate callback can choose to | ||
| 103 | * drop the lock internally, but *must* return with the lock held. The callback | ||
| 104 | * will return an enum lru_status telling the list_lru infrastructure what to | ||
| 105 | * do with the object being scanned. | ||
| 106 | * | ||
| 107 | * Please note that nr_to_walk does not mean how many objects will be freed, | ||
| 108 | * just how many objects will be scanned. | ||
| 109 | * | ||
| 110 | * Return value: the number of objects effectively removed from the LRU. | ||
| 111 | */ | ||
| 112 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | ||
| 113 | list_lru_walk_cb isolate, void *cb_arg, | ||
| 114 | unsigned long *nr_to_walk); | ||
| 115 | |||
| 116 | static inline unsigned long | ||
| 117 | list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, | ||
| 118 | void *cb_arg, unsigned long nr_to_walk) | ||
| 119 | { | ||
| 120 | long isolated = 0; | ||
| 121 | int nid; | ||
| 122 | |||
| 123 | for_each_node_mask(nid, lru->active_nodes) { | ||
| 124 | isolated += list_lru_walk_node(lru, nid, isolate, | ||
| 125 | cb_arg, &nr_to_walk); | ||
| 126 | if (nr_to_walk <= 0) | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | return isolated; | ||
| 130 | } | ||
| 131 | #endif /* _LRU_LIST_H */ | ||
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ac6b8ee07825..68c097077ef0 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h | |||
| @@ -4,39 +4,67 @@ | |||
| 4 | /* | 4 | /* |
| 5 | * This struct is used to pass information from page reclaim to the shrinkers. | 5 | * This struct is used to pass information from page reclaim to the shrinkers. |
| 6 | * We consolidate the values for easier extention later. | 6 | * We consolidate the values for easier extention later. |
| 7 | * | ||
| 8 | * The 'gfpmask' refers to the allocation we are currently trying to | ||
| 9 | * fulfil. | ||
| 7 | */ | 10 | */ |
| 8 | struct shrink_control { | 11 | struct shrink_control { |
| 9 | gfp_t gfp_mask; | 12 | gfp_t gfp_mask; |
| 10 | 13 | ||
| 11 | /* How many slab objects shrinker() should scan and try to reclaim */ | 14 | /* |
| 15 | * How many objects scan_objects should scan and try to reclaim. | ||
| 16 | * This is reset before every call, so it is safe for callees | ||
| 17 | * to modify. | ||
| 18 | */ | ||
| 12 | unsigned long nr_to_scan; | 19 | unsigned long nr_to_scan; |
| 20 | |||
| 21 | /* shrink from these nodes */ | ||
| 22 | nodemask_t nodes_to_scan; | ||
| 23 | /* current node being shrunk (for NUMA aware shrinkers) */ | ||
| 24 | int nid; | ||
| 13 | }; | 25 | }; |
| 14 | 26 | ||
| 27 | #define SHRINK_STOP (~0UL) | ||
| 15 | /* | 28 | /* |
| 16 | * A callback you can register to apply pressure to ageable caches. | 29 | * A callback you can register to apply pressure to ageable caches. |
| 17 | * | 30 | * |
| 18 | * 'sc' is passed shrink_control which includes a count 'nr_to_scan' | 31 | * @count_objects should return the number of freeable items in the cache. If |
| 19 | * and a 'gfpmask'. It should look through the least-recently-used | 32 | * there are no objects to free or the number of freeable items cannot be |
| 20 | * 'nr_to_scan' entries and attempt to free them up. It should return | 33 | * determined, it should return 0. No deadlock checks should be done during the |
| 21 | * the number of objects which remain in the cache. If it returns -1, it means | 34 | * count callback - the shrinker relies on aggregating scan counts that couldn't |
| 22 | * it cannot do any scanning at this time (eg. there is a risk of deadlock). | 35 | * be executed due to potential deadlocks to be run at a later call when the |
| 36 | * deadlock condition is no longer pending. | ||
| 23 | * | 37 | * |
| 24 | * The 'gfpmask' refers to the allocation we are currently trying to | 38 | * @scan_objects will only be called if @count_objects returned a non-zero |
| 25 | * fulfil. | 39 | * value for the number of freeable objects. The callout should scan the cache |
| 40 | * and attempt to free items from the cache. It should then return the number | ||
| 41 | * of objects freed during the scan, or SHRINK_STOP if progress cannot be made | ||
| 42 | * due to potential deadlocks. If SHRINK_STOP is returned, then no further | ||
| 43 | * attempts to call the @scan_objects will be made from the current reclaim | ||
| 44 | * context. | ||
| 26 | * | 45 | * |
| 27 | * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is | 46 | * @flags determine the shrinker abilities, like numa awareness |
| 28 | * querying the cache size, so a fastpath for that case is appropriate. | ||
| 29 | */ | 47 | */ |
| 30 | struct shrinker { | 48 | struct shrinker { |
| 31 | int (*shrink)(struct shrinker *, struct shrink_control *sc); | 49 | unsigned long (*count_objects)(struct shrinker *, |
| 50 | struct shrink_control *sc); | ||
| 51 | unsigned long (*scan_objects)(struct shrinker *, | ||
| 52 | struct shrink_control *sc); | ||
| 53 | |||
| 32 | int seeks; /* seeks to recreate an obj */ | 54 | int seeks; /* seeks to recreate an obj */ |
| 33 | long batch; /* reclaim batch size, 0 = default */ | 55 | long batch; /* reclaim batch size, 0 = default */ |
| 56 | unsigned long flags; | ||
| 34 | 57 | ||
| 35 | /* These are for internal use */ | 58 | /* These are for internal use */ |
| 36 | struct list_head list; | 59 | struct list_head list; |
| 37 | atomic_long_t nr_in_batch; /* objs pending delete */ | 60 | /* objs pending delete, per node */ |
| 61 | atomic_long_t *nr_deferred; | ||
| 38 | }; | 62 | }; |
| 39 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ | 63 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ |
| 40 | extern void register_shrinker(struct shrinker *); | 64 | |
| 65 | /* Flags */ | ||
| 66 | #define SHRINKER_NUMA_AWARE (1 << 0) | ||
| 67 | |||
| 68 | extern int register_shrinker(struct shrinker *); | ||
| 41 | extern void unregister_shrinker(struct shrinker *); | 69 | extern void unregister_shrinker(struct shrinker *); |
| 42 | #endif | 70 | #endif |
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 63cfcccaebb3..132a985aba8b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
| @@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start, | |||
| 202 | 202 | ||
| 203 | TP_fast_assign( | 203 | TP_fast_assign( |
| 204 | __entry->shr = shr; | 204 | __entry->shr = shr; |
| 205 | __entry->shrink = shr->shrink; | 205 | __entry->shrink = shr->scan_objects; |
| 206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; | 206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; |
| 207 | __entry->gfp_flags = sc->gfp_mask; | 207 | __entry->gfp_flags = sc->gfp_mask; |
| 208 | __entry->pgs_scanned = pgs_scanned; | 208 | __entry->pgs_scanned = pgs_scanned; |
| @@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end, | |||
| 241 | 241 | ||
| 242 | TP_fast_assign( | 242 | TP_fast_assign( |
| 243 | __entry->shr = shr; | 243 | __entry->shr = shr; |
| 244 | __entry->shrink = shr->shrink; | 244 | __entry->shrink = shr->scan_objects; |
| 245 | __entry->unused_scan = unused_scan_cnt; | 245 | __entry->unused_scan = unused_scan_cnt; |
| 246 | __entry->new_scan = new_scan_cnt; | 246 | __entry->new_scan = new_scan_cnt; |
| 247 | __entry->retval = shrinker_retval; | 247 | __entry->retval = shrinker_retval; |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
| @@ -49,9 +49,9 @@ struct files_stat_struct { | |||
| 49 | }; | 49 | }; |
| 50 | 50 | ||
| 51 | struct inodes_stat_t { | 51 | struct inodes_stat_t { |
| 52 | int nr_inodes; | 52 | long nr_inodes; |
| 53 | int nr_unused; | 53 | long nr_unused; |
| 54 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 54 | long dummy[5]; /* padding for sysctl ABI compatibility */ |
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | 57 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dc69093a8ec4..b2f06f3c6a3f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { | |||
| 1471 | { | 1471 | { |
| 1472 | .procname = "inode-nr", | 1472 | .procname = "inode-nr", |
| 1473 | .data = &inodes_stat, | 1473 | .data = &inodes_stat, |
| 1474 | .maxlen = 2*sizeof(int), | 1474 | .maxlen = 2*sizeof(long), |
| 1475 | .mode = 0444, | 1475 | .mode = 0444, |
| 1476 | .proc_handler = proc_nr_inodes, | 1476 | .proc_handler = proc_nr_inodes, |
| 1477 | }, | 1477 | }, |
| 1478 | { | 1478 | { |
| 1479 | .procname = "inode-state", | 1479 | .procname = "inode-state", |
| 1480 | .data = &inodes_stat, | 1480 | .data = &inodes_stat, |
| 1481 | .maxlen = 7*sizeof(int), | 1481 | .maxlen = 7*sizeof(long), |
| 1482 | .mode = 0444, | 1482 | .mode = 0444, |
| 1483 | .proc_handler = proc_nr_inodes, | 1483 | .proc_handler = proc_nr_inodes, |
| 1484 | }, | 1484 | }, |
| @@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { | |||
| 1508 | { | 1508 | { |
| 1509 | .procname = "dentry-state", | 1509 | .procname = "dentry-state", |
| 1510 | .data = &dentry_stat, | 1510 | .data = &dentry_stat, |
| 1511 | .maxlen = 6*sizeof(int), | 1511 | .maxlen = 6*sizeof(long), |
| 1512 | .mode = 0444, | 1512 | .mode = 0444, |
| 1513 | .proc_handler = proc_nr_dentry, | 1513 | .proc_handler = proc_nr_dentry, |
| 1514 | }, | 1514 | }, |
diff --git a/mm/Makefile b/mm/Makefile index f00803386a67..305d10acd081 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ | |||
| 17 | util.o mmzone.o vmstat.o backing-dev.o \ | 17 | util.o mmzone.o vmstat.o backing-dev.o \ |
| 18 | mm_init.o mmu_context.o percpu.o slab_common.o \ | 18 | mm_init.o mmu_context.o percpu.o slab_common.o \ |
| 19 | compaction.o balloon_compaction.o \ | 19 | compaction.o balloon_compaction.o \ |
| 20 | interval_tree.o $(mmu-y) | 20 | interval_tree.o list_lru.o $(mmu-y) |
| 21 | 21 | ||
| 22 | obj-y += init-mm.o | 22 | obj-y += init-mm.o |
| 23 | 23 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 963e14c0486f..d66010e0049d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -211,24 +211,29 @@ static void put_huge_zero_page(void) | |||
| 211 | BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); | 211 | BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); |
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | static int shrink_huge_zero_page(struct shrinker *shrink, | 214 | static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, |
| 215 | struct shrink_control *sc) | 215 | struct shrink_control *sc) |
| 216 | { | 216 | { |
| 217 | if (!sc->nr_to_scan) | 217 | /* we can free zero page only if last reference remains */ |
| 218 | /* we can free zero page only if last reference remains */ | 218 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; |
| 219 | return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; | 219 | } |
| 220 | 220 | ||
| 221 | static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, | ||
| 222 | struct shrink_control *sc) | ||
| 223 | { | ||
| 221 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { | 224 | if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { |
| 222 | struct page *zero_page = xchg(&huge_zero_page, NULL); | 225 | struct page *zero_page = xchg(&huge_zero_page, NULL); |
| 223 | BUG_ON(zero_page == NULL); | 226 | BUG_ON(zero_page == NULL); |
| 224 | __free_page(zero_page); | 227 | __free_page(zero_page); |
| 228 | return HPAGE_PMD_NR; | ||
| 225 | } | 229 | } |
| 226 | 230 | ||
| 227 | return 0; | 231 | return 0; |
| 228 | } | 232 | } |
| 229 | 233 | ||
| 230 | static struct shrinker huge_zero_page_shrinker = { | 234 | static struct shrinker huge_zero_page_shrinker = { |
| 231 | .shrink = shrink_huge_zero_page, | 235 | .count_objects = shrink_huge_zero_page_count, |
| 236 | .scan_objects = shrink_huge_zero_page_scan, | ||
| 232 | .seeks = DEFAULT_SEEKS, | 237 | .seeks = DEFAULT_SEEKS, |
| 233 | }; | 238 | }; |
| 234 | 239 | ||
diff --git a/mm/list_lru.c b/mm/list_lru.c new file mode 100644 index 000000000000..72467914b856 --- /dev/null +++ b/mm/list_lru.c | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | ||
| 3 | * Authors: David Chinner and Glauber Costa | ||
| 4 | * | ||
| 5 | * Generic LRU infrastructure | ||
| 6 | */ | ||
| 7 | #include <linux/kernel.h> | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/mm.h> | ||
| 10 | #include <linux/list_lru.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | |||
| 13 | bool list_lru_add(struct list_lru *lru, struct list_head *item) | ||
| 14 | { | ||
| 15 | int nid = page_to_nid(virt_to_page(item)); | ||
| 16 | struct list_lru_node *nlru = &lru->node[nid]; | ||
| 17 | |||
| 18 | spin_lock(&nlru->lock); | ||
| 19 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
| 20 | if (list_empty(item)) { | ||
| 21 | list_add_tail(item, &nlru->list); | ||
| 22 | if (nlru->nr_items++ == 0) | ||
| 23 | node_set(nid, lru->active_nodes); | ||
| 24 | spin_unlock(&nlru->lock); | ||
| 25 | return true; | ||
| 26 | } | ||
| 27 | spin_unlock(&nlru->lock); | ||
| 28 | return false; | ||
| 29 | } | ||
| 30 | EXPORT_SYMBOL_GPL(list_lru_add); | ||
| 31 | |||
| 32 | bool list_lru_del(struct list_lru *lru, struct list_head *item) | ||
| 33 | { | ||
| 34 | int nid = page_to_nid(virt_to_page(item)); | ||
| 35 | struct list_lru_node *nlru = &lru->node[nid]; | ||
| 36 | |||
| 37 | spin_lock(&nlru->lock); | ||
| 38 | if (!list_empty(item)) { | ||
| 39 | list_del_init(item); | ||
| 40 | if (--nlru->nr_items == 0) | ||
| 41 | node_clear(nid, lru->active_nodes); | ||
| 42 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
| 43 | spin_unlock(&nlru->lock); | ||
| 44 | return true; | ||
| 45 | } | ||
| 46 | spin_unlock(&nlru->lock); | ||
| 47 | return false; | ||
| 48 | } | ||
| 49 | EXPORT_SYMBOL_GPL(list_lru_del); | ||
| 50 | |||
| 51 | unsigned long | ||
| 52 | list_lru_count_node(struct list_lru *lru, int nid) | ||
| 53 | { | ||
| 54 | unsigned long count = 0; | ||
| 55 | struct list_lru_node *nlru = &lru->node[nid]; | ||
| 56 | |||
| 57 | spin_lock(&nlru->lock); | ||
| 58 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
| 59 | count += nlru->nr_items; | ||
| 60 | spin_unlock(&nlru->lock); | ||
| 61 | |||
| 62 | return count; | ||
| 63 | } | ||
| 64 | EXPORT_SYMBOL_GPL(list_lru_count_node); | ||
| 65 | |||
| 66 | unsigned long | ||
| 67 | list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, | ||
| 68 | void *cb_arg, unsigned long *nr_to_walk) | ||
| 69 | { | ||
| 70 | |||
| 71 | struct list_lru_node *nlru = &lru->node[nid]; | ||
| 72 | struct list_head *item, *n; | ||
| 73 | unsigned long isolated = 0; | ||
| 74 | |||
| 75 | spin_lock(&nlru->lock); | ||
| 76 | restart: | ||
| 77 | list_for_each_safe(item, n, &nlru->list) { | ||
| 78 | enum lru_status ret; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * decrement nr_to_walk first so that we don't livelock if we | ||
| 82 | * get stuck on large numbesr of LRU_RETRY items | ||
| 83 | */ | ||
| 84 | if (--(*nr_to_walk) == 0) | ||
| 85 | break; | ||
| 86 | |||
| 87 | ret = isolate(item, &nlru->lock, cb_arg); | ||
| 88 | switch (ret) { | ||
| 89 | case LRU_REMOVED: | ||
| 90 | if (--nlru->nr_items == 0) | ||
| 91 | node_clear(nid, lru->active_nodes); | ||
| 92 | WARN_ON_ONCE(nlru->nr_items < 0); | ||
| 93 | isolated++; | ||
| 94 | break; | ||
| 95 | case LRU_ROTATE: | ||
| 96 | list_move_tail(item, &nlru->list); | ||
| 97 | break; | ||
| 98 | case LRU_SKIP: | ||
| 99 | break; | ||
| 100 | case LRU_RETRY: | ||
| 101 | /* | ||
| 102 | * The lru lock has been dropped, our list traversal is | ||
| 103 | * now invalid and so we have to restart from scratch. | ||
| 104 | */ | ||
| 105 | goto restart; | ||
| 106 | default: | ||
| 107 | BUG(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | spin_unlock(&nlru->lock); | ||
| 112 | return isolated; | ||
| 113 | } | ||
| 114 | EXPORT_SYMBOL_GPL(list_lru_walk_node); | ||
| 115 | |||
| 116 | int list_lru_init(struct list_lru *lru) | ||
| 117 | { | ||
| 118 | int i; | ||
| 119 | size_t size = sizeof(*lru->node) * nr_node_ids; | ||
| 120 | |||
| 121 | lru->node = kzalloc(size, GFP_KERNEL); | ||
| 122 | if (!lru->node) | ||
| 123 | return -ENOMEM; | ||
| 124 | |||
| 125 | nodes_clear(lru->active_nodes); | ||
| 126 | for (i = 0; i < nr_node_ids; i++) { | ||
| 127 | spin_lock_init(&lru->node[i].lock); | ||
| 128 | INIT_LIST_HEAD(&lru->node[i].list); | ||
| 129 | lru->node[i].nr_items = 0; | ||
| 130 | } | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL_GPL(list_lru_init); | ||
| 134 | |||
| 135 | void list_lru_destroy(struct list_lru *lru) | ||
| 136 | { | ||
| 137 | kfree(lru->node); | ||
| 138 | } | ||
| 139 | EXPORT_SYMBOL_GPL(list_lru_destroy); | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d472e14c6808..947ed5413279 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) | |||
| 248 | */ | 248 | */ |
| 249 | if (access) { | 249 | if (access) { |
| 250 | int nr; | 250 | int nr; |
| 251 | int nid = page_to_nid(p); | ||
| 251 | do { | 252 | do { |
| 252 | struct shrink_control shrink = { | 253 | struct shrink_control shrink = { |
| 253 | .gfp_mask = GFP_KERNEL, | 254 | .gfp_mask = GFP_KERNEL, |
| 254 | }; | 255 | }; |
| 256 | node_set(nid, shrink.nodes_to_scan); | ||
| 255 | 257 | ||
| 256 | nr = shrink_slab(&shrink, 1000, 1000); | 258 | nr = shrink_slab(&shrink, 1000, 1000); |
| 257 | if (page_count(p) == 1) | 259 | if (page_count(p) == 1) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715daeb8bc..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -174,14 +174,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | |||
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | /* | 176 | /* |
| 177 | * Add a shrinker callback to be called from the vm | 177 | * Add a shrinker callback to be called from the vm. |
| 178 | */ | 178 | */ |
| 179 | void register_shrinker(struct shrinker *shrinker) | 179 | int register_shrinker(struct shrinker *shrinker) |
| 180 | { | 180 | { |
| 181 | atomic_long_set(&shrinker->nr_in_batch, 0); | 181 | size_t size = sizeof(*shrinker->nr_deferred); |
| 182 | |||
| 183 | /* | ||
| 184 | * If we only have one possible node in the system anyway, save | ||
| 185 | * ourselves the trouble and disable NUMA aware behavior. This way we | ||
| 186 | * will save memory and some small loop time later. | ||
| 187 | */ | ||
| 188 | if (nr_node_ids == 1) | ||
| 189 | shrinker->flags &= ~SHRINKER_NUMA_AWARE; | ||
| 190 | |||
| 191 | if (shrinker->flags & SHRINKER_NUMA_AWARE) | ||
| 192 | size *= nr_node_ids; | ||
| 193 | |||
| 194 | shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); | ||
| 195 | if (!shrinker->nr_deferred) | ||
| 196 | return -ENOMEM; | ||
| 197 | |||
| 182 | down_write(&shrinker_rwsem); | 198 | down_write(&shrinker_rwsem); |
| 183 | list_add_tail(&shrinker->list, &shrinker_list); | 199 | list_add_tail(&shrinker->list, &shrinker_list); |
| 184 | up_write(&shrinker_rwsem); | 200 | up_write(&shrinker_rwsem); |
| 201 | return 0; | ||
| 185 | } | 202 | } |
| 186 | EXPORT_SYMBOL(register_shrinker); | 203 | EXPORT_SYMBOL(register_shrinker); |
| 187 | 204 | ||
| @@ -196,15 +213,102 @@ void unregister_shrinker(struct shrinker *shrinker) | |||
| 196 | } | 213 | } |
| 197 | EXPORT_SYMBOL(unregister_shrinker); | 214 | EXPORT_SYMBOL(unregister_shrinker); |
| 198 | 215 | ||
| 199 | static inline int do_shrinker_shrink(struct shrinker *shrinker, | 216 | #define SHRINK_BATCH 128 |
| 200 | struct shrink_control *sc, | 217 | |
| 201 | unsigned long nr_to_scan) | 218 | static unsigned long |
| 202 | { | 219 | shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, |
| 203 | sc->nr_to_scan = nr_to_scan; | 220 | unsigned long nr_pages_scanned, unsigned long lru_pages) |
| 204 | return (*shrinker->shrink)(shrinker, sc); | 221 | { |
| 222 | unsigned long freed = 0; | ||
| 223 | unsigned long long delta; | ||
| 224 | long total_scan; | ||
| 225 | long max_pass; | ||
| 226 | long nr; | ||
| 227 | long new_nr; | ||
| 228 | int nid = shrinkctl->nid; | ||
| 229 | long batch_size = shrinker->batch ? shrinker->batch | ||
| 230 | : SHRINK_BATCH; | ||
| 231 | |||
| 232 | max_pass = shrinker->count_objects(shrinker, shrinkctl); | ||
| 233 | if (max_pass == 0) | ||
| 234 | return 0; | ||
| 235 | |||
| 236 | /* | ||
| 237 | * copy the current shrinker scan count into a local variable | ||
| 238 | * and zero it so that other concurrent shrinker invocations | ||
| 239 | * don't also do this scanning work. | ||
| 240 | */ | ||
| 241 | nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); | ||
| 242 | |||
| 243 | total_scan = nr; | ||
| 244 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
| 245 | delta *= max_pass; | ||
| 246 | do_div(delta, lru_pages + 1); | ||
| 247 | total_scan += delta; | ||
| 248 | if (total_scan < 0) { | ||
| 249 | printk(KERN_ERR | ||
| 250 | "shrink_slab: %pF negative objects to delete nr=%ld\n", | ||
| 251 | shrinker->scan_objects, total_scan); | ||
| 252 | total_scan = max_pass; | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * We need to avoid excessive windup on filesystem shrinkers | ||
| 257 | * due to large numbers of GFP_NOFS allocations causing the | ||
| 258 | * shrinkers to return -1 all the time. This results in a large | ||
| 259 | * nr being built up so when a shrink that can do some work | ||
| 260 | * comes along it empties the entire cache due to nr >>> | ||
| 261 | * max_pass. This is bad for sustaining a working set in | ||
| 262 | * memory. | ||
| 263 | * | ||
| 264 | * Hence only allow the shrinker to scan the entire cache when | ||
| 265 | * a large delta change is calculated directly. | ||
| 266 | */ | ||
| 267 | if (delta < max_pass / 4) | ||
| 268 | total_scan = min(total_scan, max_pass / 2); | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Avoid risking looping forever due to too large nr value: | ||
| 272 | * never try to free more than twice the estimate number of | ||
| 273 | * freeable entries. | ||
| 274 | */ | ||
| 275 | if (total_scan > max_pass * 2) | ||
| 276 | total_scan = max_pass * 2; | ||
| 277 | |||
| 278 | trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, | ||
| 279 | nr_pages_scanned, lru_pages, | ||
| 280 | max_pass, delta, total_scan); | ||
| 281 | |||
| 282 | while (total_scan >= batch_size) { | ||
| 283 | unsigned long ret; | ||
| 284 | |||
| 285 | shrinkctl->nr_to_scan = batch_size; | ||
| 286 | ret = shrinker->scan_objects(shrinker, shrinkctl); | ||
| 287 | if (ret == SHRINK_STOP) | ||
| 288 | break; | ||
| 289 | freed += ret; | ||
| 290 | |||
| 291 | count_vm_events(SLABS_SCANNED, batch_size); | ||
| 292 | total_scan -= batch_size; | ||
| 293 | |||
| 294 | cond_resched(); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * move the unused scan count back into the shrinker in a | ||
| 299 | * manner that handles concurrent updates. If we exhausted the | ||
| 300 | * scan, there is no need to do an update. | ||
| 301 | */ | ||
| 302 | if (total_scan > 0) | ||
| 303 | new_nr = atomic_long_add_return(total_scan, | ||
| 304 | &shrinker->nr_deferred[nid]); | ||
| 305 | else | ||
| 306 | new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); | ||
| 307 | |||
| 308 | trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); | ||
| 309 | return freed; | ||
| 205 | } | 310 | } |
| 206 | 311 | ||
| 207 | #define SHRINK_BATCH 128 | ||
| 208 | /* | 312 | /* |
| 209 | * Call the shrink functions to age shrinkable caches | 313 | * Call the shrink functions to age shrinkable caches |
| 210 | * | 314 | * |
| @@ -224,115 +328,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, | |||
| 224 | * | 328 | * |
| 225 | * Returns the number of slab objects which we shrunk. | 329 | * Returns the number of slab objects which we shrunk. |
| 226 | */ | 330 | */ |
| 227 | unsigned long shrink_slab(struct shrink_control *shrink, | 331 | unsigned long shrink_slab(struct shrink_control *shrinkctl, |
| 228 | unsigned long nr_pages_scanned, | 332 | unsigned long nr_pages_scanned, |
| 229 | unsigned long lru_pages) | 333 | unsigned long lru_pages) |
| 230 | { | 334 | { |
| 231 | struct shrinker *shrinker; | 335 | struct shrinker *shrinker; |
| 232 | unsigned long ret = 0; | 336 | unsigned long freed = 0; |
| 233 | 337 | ||
| 234 | if (nr_pages_scanned == 0) | 338 | if (nr_pages_scanned == 0) |
| 235 | nr_pages_scanned = SWAP_CLUSTER_MAX; | 339 | nr_pages_scanned = SWAP_CLUSTER_MAX; |
| 236 | 340 | ||
| 237 | if (!down_read_trylock(&shrinker_rwsem)) { | 341 | if (!down_read_trylock(&shrinker_rwsem)) { |
| 238 | /* Assume we'll be able to shrink next time */ | 342 | /* |
| 239 | ret = 1; | 343 | * If we would return 0, our callers would understand that we |
| 344 | * have nothing else to shrink and give up trying. By returning | ||
| 345 | * 1 we keep it going and assume we'll be able to shrink next | ||
| 346 | * time. | ||
| 347 | */ | ||
| 348 | freed = 1; | ||
| 240 | goto out; | 349 | goto out; |
| 241 | } | 350 | } |
| 242 | 351 | ||
| 243 | list_for_each_entry(shrinker, &shrinker_list, list) { | 352 | list_for_each_entry(shrinker, &shrinker_list, list) { |
| 244 | unsigned long long delta; | 353 | for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { |
| 245 | long total_scan; | 354 | if (!node_online(shrinkctl->nid)) |
| 246 | long max_pass; | 355 | continue; |
| 247 | int shrink_ret = 0; | ||
| 248 | long nr; | ||
| 249 | long new_nr; | ||
| 250 | long batch_size = shrinker->batch ? shrinker->batch | ||
| 251 | : SHRINK_BATCH; | ||
| 252 | |||
| 253 | max_pass = do_shrinker_shrink(shrinker, shrink, 0); | ||
| 254 | if (max_pass <= 0) | ||
| 255 | continue; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * copy the current shrinker scan count into a local variable | ||
| 259 | * and zero it so that other concurrent shrinker invocations | ||
| 260 | * don't also do this scanning work. | ||
| 261 | */ | ||
| 262 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); | ||
| 263 | |||
| 264 | total_scan = nr; | ||
| 265 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
| 266 | delta *= max_pass; | ||
| 267 | do_div(delta, lru_pages + 1); | ||
| 268 | total_scan += delta; | ||
| 269 | if (total_scan < 0) { | ||
| 270 | printk(KERN_ERR "shrink_slab: %pF negative objects to " | ||
| 271 | "delete nr=%ld\n", | ||
| 272 | shrinker->shrink, total_scan); | ||
| 273 | total_scan = max_pass; | ||
| 274 | } | ||
| 275 | |||
| 276 | /* | ||
| 277 | * We need to avoid excessive windup on filesystem shrinkers | ||
| 278 | * due to large numbers of GFP_NOFS allocations causing the | ||
| 279 | * shrinkers to return -1 all the time. This results in a large | ||
| 280 | * nr being built up so when a shrink that can do some work | ||
| 281 | * comes along it empties the entire cache due to nr >>> | ||
| 282 | * max_pass. This is bad for sustaining a working set in | ||
| 283 | * memory. | ||
| 284 | * | ||
| 285 | * Hence only allow the shrinker to scan the entire cache when | ||
| 286 | * a large delta change is calculated directly. | ||
| 287 | */ | ||
| 288 | if (delta < max_pass / 4) | ||
| 289 | total_scan = min(total_scan, max_pass / 2); | ||
| 290 | |||
| 291 | /* | ||
| 292 | * Avoid risking looping forever due to too large nr value: | ||
| 293 | * never try to free more than twice the estimate number of | ||
| 294 | * freeable entries. | ||
| 295 | */ | ||
| 296 | if (total_scan > max_pass * 2) | ||
| 297 | total_scan = max_pass * 2; | ||
| 298 | |||
| 299 | trace_mm_shrink_slab_start(shrinker, shrink, nr, | ||
| 300 | nr_pages_scanned, lru_pages, | ||
| 301 | max_pass, delta, total_scan); | ||
| 302 | |||
| 303 | while (total_scan >= batch_size) { | ||
| 304 | int nr_before; | ||
| 305 | 356 | ||
| 306 | nr_before = do_shrinker_shrink(shrinker, shrink, 0); | 357 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && |
| 307 | shrink_ret = do_shrinker_shrink(shrinker, shrink, | 358 | (shrinkctl->nid != 0)) |
| 308 | batch_size); | ||
| 309 | if (shrink_ret == -1) | ||
| 310 | break; | 359 | break; |
| 311 | if (shrink_ret < nr_before) | ||
| 312 | ret += nr_before - shrink_ret; | ||
| 313 | count_vm_events(SLABS_SCANNED, batch_size); | ||
| 314 | total_scan -= batch_size; | ||
| 315 | 360 | ||
| 316 | cond_resched(); | 361 | freed += shrink_slab_node(shrinkctl, shrinker, |
| 317 | } | 362 | nr_pages_scanned, lru_pages); |
| 318 | 363 | ||
| 319 | /* | 364 | } |
| 320 | * move the unused scan count back into the shrinker in a | ||
| 321 | * manner that handles concurrent updates. If we exhausted the | ||
| 322 | * scan, there is no need to do an update. | ||
| 323 | */ | ||
| 324 | if (total_scan > 0) | ||
| 325 | new_nr = atomic_long_add_return(total_scan, | ||
| 326 | &shrinker->nr_in_batch); | ||
| 327 | else | ||
| 328 | new_nr = atomic_long_read(&shrinker->nr_in_batch); | ||
| 329 | |||
| 330 | trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); | ||
| 331 | } | 365 | } |
| 332 | up_read(&shrinker_rwsem); | 366 | up_read(&shrinker_rwsem); |
| 333 | out: | 367 | out: |
| 334 | cond_resched(); | 368 | cond_resched(); |
| 335 | return ret; | 369 | return freed; |
| 336 | } | 370 | } |
| 337 | 371 | ||
| 338 | static inline int is_page_cache_freeable(struct page *page) | 372 | static inline int is_page_cache_freeable(struct page *page) |
| @@ -2368,12 +2402,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 2368 | */ | 2402 | */ |
| 2369 | if (global_reclaim(sc)) { | 2403 | if (global_reclaim(sc)) { |
| 2370 | unsigned long lru_pages = 0; | 2404 | unsigned long lru_pages = 0; |
| 2405 | |||
| 2406 | nodes_clear(shrink->nodes_to_scan); | ||
| 2371 | for_each_zone_zonelist(zone, z, zonelist, | 2407 | for_each_zone_zonelist(zone, z, zonelist, |
| 2372 | gfp_zone(sc->gfp_mask)) { | 2408 | gfp_zone(sc->gfp_mask)) { |
| 2373 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2409 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
| 2374 | continue; | 2410 | continue; |
| 2375 | 2411 | ||
| 2376 | lru_pages += zone_reclaimable_pages(zone); | 2412 | lru_pages += zone_reclaimable_pages(zone); |
| 2413 | node_set(zone_to_nid(zone), | ||
| 2414 | shrink->nodes_to_scan); | ||
| 2377 | } | 2415 | } |
| 2378 | 2416 | ||
| 2379 | shrink_slab(shrink, sc->nr_scanned, lru_pages); | 2417 | shrink_slab(shrink, sc->nr_scanned, lru_pages); |
| @@ -2829,6 +2867,8 @@ static bool kswapd_shrink_zone(struct zone *zone, | |||
| 2829 | return true; | 2867 | return true; |
| 2830 | 2868 | ||
| 2831 | shrink_zone(zone, sc); | 2869 | shrink_zone(zone, sc); |
| 2870 | nodes_clear(shrink.nodes_to_scan); | ||
| 2871 | node_set(zone_to_nid(zone), shrink.nodes_to_scan); | ||
| 2832 | 2872 | ||
| 2833 | reclaim_state->reclaimed_slab = 0; | 2873 | reclaim_state->reclaimed_slab = 0; |
| 2834 | shrink_slab(&shrink, sc->nr_scanned, lru_pages); | 2874 | shrink_slab(&shrink, sc->nr_scanned, lru_pages); |
| @@ -3520,10 +3560,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 3520 | * number of slab pages and shake the slab until it is reduced | 3560 | * number of slab pages and shake the slab until it is reduced |
| 3521 | * by the same nr_pages that we used for reclaiming unmapped | 3561 | * by the same nr_pages that we used for reclaiming unmapped |
| 3522 | * pages. | 3562 | * pages. |
| 3523 | * | ||
| 3524 | * Note that shrink_slab will free memory on all zones and may | ||
| 3525 | * take a long time. | ||
| 3526 | */ | 3563 | */ |
| 3564 | nodes_clear(shrink.nodes_to_scan); | ||
| 3565 | node_set(zone_to_nid(zone), shrink.nodes_to_scan); | ||
| 3527 | for (;;) { | 3566 | for (;;) { |
| 3528 | unsigned long lru_pages = zone_reclaimable_pages(zone); | 3567 | unsigned long lru_pages = zone_reclaimable_pages(zone); |
| 3529 | 3568 | ||
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 415159061cd0..5285ead196c0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
| @@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); | |||
| 434 | /* | 434 | /* |
| 435 | * Remove stale credentials. Avoid sleeping inside the loop. | 435 | * Remove stale credentials. Avoid sleeping inside the loop. |
| 436 | */ | 436 | */ |
| 437 | static int | 437 | static long |
| 438 | rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | 438 | rpcauth_prune_expired(struct list_head *free, int nr_to_scan) |
| 439 | { | 439 | { |
| 440 | spinlock_t *cache_lock; | 440 | spinlock_t *cache_lock; |
| 441 | struct rpc_cred *cred, *next; | 441 | struct rpc_cred *cred, *next; |
| 442 | unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; | 442 | unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; |
| 443 | long freed = 0; | ||
| 443 | 444 | ||
| 444 | list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { | 445 | list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { |
| 445 | 446 | ||
| @@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | |||
| 451 | */ | 452 | */ |
| 452 | if (time_in_range(cred->cr_expire, expired, jiffies) && | 453 | if (time_in_range(cred->cr_expire, expired, jiffies) && |
| 453 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) | 454 | test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) |
| 454 | return 0; | 455 | break; |
| 455 | 456 | ||
| 456 | list_del_init(&cred->cr_lru); | 457 | list_del_init(&cred->cr_lru); |
| 457 | number_cred_unused--; | 458 | number_cred_unused--; |
| 459 | freed++; | ||
| 458 | if (atomic_read(&cred->cr_count) != 0) | 460 | if (atomic_read(&cred->cr_count) != 0) |
| 459 | continue; | 461 | continue; |
| 460 | 462 | ||
| @@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) | |||
| 467 | } | 469 | } |
| 468 | spin_unlock(cache_lock); | 470 | spin_unlock(cache_lock); |
| 469 | } | 471 | } |
| 470 | return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; | 472 | return freed; |
| 471 | } | 473 | } |
| 472 | 474 | ||
| 473 | /* | 475 | /* |
| 474 | * Run memory cache shrinker. | 476 | * Run memory cache shrinker. |
| 475 | */ | 477 | */ |
| 476 | static int | 478 | static unsigned long |
| 477 | rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) | 479 | rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) |
| 480 | |||
| 478 | { | 481 | { |
| 479 | LIST_HEAD(free); | 482 | LIST_HEAD(free); |
| 480 | int res; | 483 | unsigned long freed; |
| 481 | int nr_to_scan = sc->nr_to_scan; | 484 | |
| 482 | gfp_t gfp_mask = sc->gfp_mask; | 485 | if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
| 486 | return SHRINK_STOP; | ||
| 483 | 487 | ||
| 484 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 488 | /* nothing left, don't come back */ |
| 485 | return (nr_to_scan == 0) ? 0 : -1; | ||
| 486 | if (list_empty(&cred_unused)) | 489 | if (list_empty(&cred_unused)) |
| 487 | return 0; | 490 | return SHRINK_STOP; |
| 491 | |||
| 488 | spin_lock(&rpc_credcache_lock); | 492 | spin_lock(&rpc_credcache_lock); |
| 489 | res = rpcauth_prune_expired(&free, nr_to_scan); | 493 | freed = rpcauth_prune_expired(&free, sc->nr_to_scan); |
| 490 | spin_unlock(&rpc_credcache_lock); | 494 | spin_unlock(&rpc_credcache_lock); |
| 491 | rpcauth_destroy_credlist(&free); | 495 | rpcauth_destroy_credlist(&free); |
| 492 | return res; | 496 | |
| 497 | return freed; | ||
| 498 | } | ||
| 499 | |||
| 500 | static unsigned long | ||
| 501 | rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | ||
| 502 | |||
| 503 | { | ||
| 504 | return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; | ||
| 493 | } | 505 | } |
| 494 | 506 | ||
| 495 | /* | 507 | /* |
| @@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task) | |||
| 805 | } | 817 | } |
| 806 | 818 | ||
| 807 | static struct shrinker rpc_cred_shrinker = { | 819 | static struct shrinker rpc_cred_shrinker = { |
| 808 | .shrink = rpcauth_cache_shrinker, | 820 | .count_objects = rpcauth_cache_shrink_count, |
| 821 | .scan_objects = rpcauth_cache_shrink_scan, | ||
| 809 | .seeks = DEFAULT_SEEKS, | 822 | .seeks = DEFAULT_SEEKS, |
| 810 | }; | 823 | }; |
| 811 | 824 | ||
