54 files changed, 1755 insertions, 1129 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6e2d2c8f230b..dce0df8150df 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
        }
 }
-static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct kvm *kvm;
        int nr_to_scan = sc->nr_to_scan;
+        unsigned long freed = 0;
-        if (nr_to_scan == 0)
-                goto out;
        raw_spin_lock(&kvm_lock);
@@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
                        goto unlock;
                }
-                prepare_zap_oldest_mmu_page(kvm, &invalid_list);
+                if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+                        freed++;
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
+                /*
+                 * unfair on small ones
+                 * per-vm shrinkers cry out
+                 * sadness comes quickly
+                 */
                list_move_tail(&kvm->vm_list, &vm_list);
                break;
        }
        raw_spin_unlock(&kvm_lock);
+        return freed;
-out:
+}
+static unsigned long
+mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
        return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
 }
 static struct shrinker mmu_shrinker = {
-        .shrink = mmu_shrink,
+        .count_objects = mmu_shrink_count,
+        .scan_objects = mmu_shrink_scan,
        .seeks = DEFAULT_SEEKS * 10,
 };
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index fdaa0915ce56..d5c784d48671 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1667,7 +1667,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        return 0;
 out_gem_unload:
-        if (dev_priv->mm.inactive_shrinker.shrink)
+        if (dev_priv->mm.inactive_shrinker.scan_objects)
                unregister_shrinker(&dev_priv->mm.inactive_shrinker);
        if (dev->pdev->msi_enabled)
@@ -1706,7 +1706,7 @@ int i915_driver_unload(struct drm_device *dev)
        i915_teardown_sysfs(dev);
-        if (dev_priv->mm.inactive_shrinker.shrink)
+        if (dev_priv->mm.inactive_shrinker.scan_objects)
                unregister_shrinker(&dev_priv->mm.inactive_shrinker);
        mutex_lock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2d1cb10d846f..a7ff3db4f607 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
                                         struct drm_i915_fence_reg *fence,
                                         bool enable);
-static int i915_gem_inactive_shrink(struct shrinker *shrinker,
+static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
-                                    struct shrink_control *sc);
+                                             struct shrink_control *sc);
+static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
+                                            struct shrink_control *sc);
 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
-static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
+static long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 static bool cpu_cache_is_coherent(struct drm_device *dev,
@@ -1736,16 +1738,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
        return __i915_gem_shrink(dev_priv, target, true);
 }
-static void
+static long
 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
        struct drm_i915_gem_object *obj, *next;
+        long freed = 0;
        i915_gem_evict_everything(dev_priv->dev);
        list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
-                                 global_list)
+                                 global_list) {
+                if (obj->pages_pin_count == 0)
+                        freed += obj->base.size >> PAGE_SHIFT;
                i915_gem_object_put_pages(obj);
+        }
+        return freed;
 }
 static int
@@ -4526,7 +4533,8 @@ i915_gem_load(struct drm_device *dev)
        dev_priv->mm.interruptible = true;
-        dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
+        dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
+        dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
        dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
        register_shrinker(&dev_priv->mm.inactive_shrinker);
 }
@@ -4749,8 +4757,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
-static int
+static unsigned long
-i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
        struct drm_i915_private *dev_priv =
                container_of(shrinker,
@@ -4758,45 +4766,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
                             mm.inactive_shrinker);
        struct drm_device *dev = dev_priv->dev;
        struct drm_i915_gem_object *obj;
-        int nr_to_scan = sc->nr_to_scan;
        bool unlock = true;
-        int cnt;
+        unsigned long count;
        if (!mutex_trylock(&dev->struct_mutex)) {
                if (!mutex_is_locked_by(&dev->struct_mutex, current))
-                        return 0;
+                        return SHRINK_STOP;
                if (dev_priv->mm.shrinker_no_lock_stealing)
-                        return 0;
+                        return SHRINK_STOP;
                unlock = false;
        }
-        if (nr_to_scan) {
+        count = 0;
-                nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
-                if (nr_to_scan > 0)
-                        nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan,
-                                                        false);
-                if (nr_to_scan > 0)
-                        i915_gem_shrink_all(dev_priv);
-        }
-        cnt = 0;
        list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
                if (obj->pages_pin_count == 0)
-                        cnt += obj->base.size >> PAGE_SHIFT;
+                        count += obj->base.size >> PAGE_SHIFT;
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
                if (obj->active)
                        continue;
                if (obj->pin_count == 0 && obj->pages_pin_count == 0)
-                        cnt += obj->base.size >> PAGE_SHIFT;
+                        count += obj->base.size >> PAGE_SHIFT;
        }
        if (unlock)
                mutex_unlock(&dev->struct_mutex);
-        return cnt;
+        return count;
 }
 /* All the new VM stuff */
@@ -4860,6 +4858,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
        return 0;
 }
+static unsigned long
+i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+        struct drm_i915_private *dev_priv =
+                container_of(shrinker,
+                             struct drm_i915_private,
+                             mm.inactive_shrinker);
+        struct drm_device *dev = dev_priv->dev;
+        int nr_to_scan = sc->nr_to_scan;
+        unsigned long freed;
+        bool unlock = true;
+        if (!mutex_trylock(&dev->struct_mutex)) {
+                if (!mutex_is_locked_by(&dev->struct_mutex, current))
+                        return 0;
+                if (dev_priv->mm.shrinker_no_lock_stealing)
+                        return 0;
+                unlock = false;
+        }
+        freed = i915_gem_purge(dev_priv, nr_to_scan);
+        if (freed < nr_to_scan)
+                freed += __i915_gem_shrink(dev_priv, nr_to_scan,
+                                                        false);
+        if (freed < nr_to_scan)
+                freed += i915_gem_shrink_all(dev_priv);
+        if (unlock)
+                mutex_unlock(&dev->struct_mutex);
+        return freed;
+}
 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
                                     struct i915_address_space *vm)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index bd2a3b40cd12..863bef9f9234 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -377,28 +377,26 @@ out:
        return nr_free;
 }
-/* Get good estimation how many pages are free in pools */
-static int ttm_pool_get_num_unused_pages(void)
-{
-        unsigned i;
-        int total = 0;
-        for (i = 0; i < NUM_POOLS; ++i)
-                total += _manager->pools[i].npages;
-        return total;
-}
 /**
 * Callback for mm to request pool to reduce number of page held.
+ *
+ * XXX: (dchinner) Deadlock warning!
+ *
+ * ttm_page_pool_free() does memory allocation using GFP_KERNEL.  that means
+ * this can deadlock when called a sc->gfp_mask that is not equal to
+ * GFP_KERNEL.
+ *
+ * This code is crying out for a shrinker per pool....
 */
-static int ttm_pool_mm_shrink(struct shrinker *shrink,
+static unsigned long
-                              struct shrink_control *sc)
+ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        static atomic_t start_pool = ATOMIC_INIT(0);
        unsigned i;
        unsigned pool_offset = atomic_add_return(1, &start_pool);
        struct ttm_page_pool *pool;
        int shrink_pages = sc->nr_to_scan;
+        unsigned long freed = 0;
        pool_offset = pool_offset % NUM_POOLS;
        /* select start pool in round robin fashion */
@@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink,
                        break;
                pool = &_manager->pools[(i + pool_offset)%NUM_POOLS];
                shrink_pages = ttm_page_pool_free(pool, nr_free);
+                freed += nr_free - shrink_pages;
        }
-        /* return estimated number of unused pages in pool */
+        return freed;
-        return ttm_pool_get_num_unused_pages();
+}
+static unsigned long
+ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        unsigned i;
+        unsigned long count = 0;
+        for (i = 0; i < NUM_POOLS; ++i)
+                count += _manager->pools[i].npages;
+        return count;
 }
 static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
-        manager->mm_shrink.shrink = &ttm_pool_mm_shrink;
+        manager->mm_shrink.count_objects = ttm_pool_shrink_count;
+        manager->mm_shrink.scan_objects = ttm_pool_shrink_scan;
        manager->mm_shrink.seeks = 1;
        register_shrinker(&manager->mm_shrink);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index b8b394319b45..7957beeeaf73 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ttm_dma_populate);
-/* Get good estimation how many pages are free in pools */
-static int ttm_dma_pool_get_num_unused_pages(void)
-{
-        struct device_pools *p;
-        unsigned total = 0;
-        mutex_lock(&_manager->lock);
-        list_for_each_entry(p, &_manager->pools, pools)
-                total += p->pool->npages_free;
-        mutex_unlock(&_manager->lock);
-        return total;
-}
 /* Put all pages in pages list to correct pool to wait for reuse */
 void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 {
@@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
 /**
 * Callback for mm to request pool to reduce number of page held.
+ *
+ * XXX: (dchinner) Deadlock warning!
+ *
+ * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention
+ * needs to be paid to sc->gfp_mask to determine if this can be done or not.
+ * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really
+ * bad.
+ *
+ * I'm getting sadder as I hear more pathetical whimpers about needing per-pool
+ * shrinkers
 */
-static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
+static unsigned long
-                                  struct shrink_control *sc)
+ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        static atomic_t start_pool = ATOMIC_INIT(0);
        unsigned idx = 0;
        unsigned pool_offset = atomic_add_return(1, &start_pool);
        unsigned shrink_pages = sc->nr_to_scan;
        struct device_pools *p;
+        unsigned long freed = 0;
        if (list_empty(&_manager->pools))
-                return 0;
+                return SHRINK_STOP;
        mutex_lock(&_manager->lock);
        pool_offset = pool_offset % _manager->npools;
@@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
                        continue;
                nr_free = shrink_pages;
                shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free);
+                freed += nr_free - shrink_pages;
                pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n",
                         p->pool->dev_name, p->pool->name, current->pid,
                         nr_free, shrink_pages);
        }
        mutex_unlock(&_manager->lock);
-        /* return estimated number of unused pages in pool */
+        return freed;
-        return ttm_dma_pool_get_num_unused_pages();
+}
+static unsigned long
+ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        struct device_pools *p;
+        unsigned long count = 0;
+        mutex_lock(&_manager->lock);
+        list_for_each_entry(p, &_manager->pools, pools)
+                count += p->pool->npages_free;
+        mutex_unlock(&_manager->lock);
+        return count;
 }
 static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
-        manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink;
+        manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count;
+        manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan;
        manager->mm_shrink.seeks = 1;
        register_shrinker(&manager->mm_shrink);
 }
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ee372884c405..f9764e61978b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
        return 0;
 }
-static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long bch_mca_scan(struct shrinker *shrink,
+                                  struct shrink_control *sc)
 {
        struct cache_set *c = container_of(shrink, struct cache_set, shrink);
        struct btree *b, *t;
        unsigned long i, nr = sc->nr_to_scan;
+        unsigned long freed = 0;
        if (c->shrinker_disabled)
-                return 0;
+                return SHRINK_STOP;
        if (c->try_harder)
-                return 0;
+                return SHRINK_STOP;
-        /*
-         * If nr == 0, we're supposed to return the number of items we have
-         * cached. Not allowed to return -1.
-         */
-        if (!nr)
-                return mca_can_free(c) * c->btree_pages;
        /* Return -1 if we can't do anything right now */
        if (sc->gfp_mask & __GFP_WAIT)
@@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
        i = 0;
        list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
-                if (!nr)
+                if (freed >= nr)
                        break;
                if (++i > 3 &&
                    !mca_reap(b, NULL, 0)) {
                        mca_data_free(b);
                        rw_unlock(true, b);
-                        --nr;
+                        freed++;
                }
        }
@@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
        if (list_empty(&c->btree_cache))
                goto out;
-        for (i = 0; nr && i < c->bucket_cache_used; i++) {
+        for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
                b = list_first_entry(&c->btree_cache, struct btree, list);
                list_rotate_left(&c->btree_cache);
@@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
                        mca_bucket_free(b);
                        mca_data_free(b);
                        rw_unlock(true, b);
-                        --nr;
+                        freed++;
                } else
                        b->accessed = 0;
        }
 out:
-        nr = mca_can_free(c) * c->btree_pages;
        mutex_unlock(&c->bucket_lock);
-        return nr;
+        return freed;
+}
+static unsigned long bch_mca_count(struct shrinker *shrink,
+                                   struct shrink_control *sc)
+{
+        struct cache_set *c = container_of(shrink, struct cache_set, shrink);
+        if (c->shrinker_disabled)
+                return 0;
+        if (c->try_harder)
+                return 0;
+        return mca_can_free(c) * c->btree_pages;
 }
 void bch_btree_cache_free(struct cache_set *c)
@@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c)
                c->verify_data = NULL;
 #endif
-        c->shrink.shrink = bch_mca_shrink;
+        c->shrink.count_objects = bch_mca_count;
+        c->shrink.scan_objects = bch_mca_scan;
        c->shrink.seeks = 4;
        c->shrink.batch = c->btree_pages * 2;
        register_shrinker(&c->shrink);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 12a2c2846f99..4fe6ab2fbe2e 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -556,7 +556,7 @@ STORE(__bch_cache_set)
                struct shrink_control sc;
                sc.gfp_mask = GFP_KERNEL;
                sc.nr_to_scan = strtoul_or_return(buf);
-                c->shrink.shrink(&c->shrink, &sc);
+                c->shrink.scan_objects(&c->shrink, &sc);
        }
        sysfs_strtoul(congested_read_threshold_us,
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 5227e079a6e3..173cbb20d104 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
                                unsigned long max_jiffies)
 {
        if (jiffies - b->last_accessed < max_jiffies)
-                return 1;
+                return 0;
        if (!(gfp & __GFP_IO)) {
                if (test_bit(B_READING, &b->state) ||
                    test_bit(B_WRITING, &b->state) ||
                    test_bit(B_DIRTY, &b->state))
-                        return 1;
+                        return 0;
        }
        if (b->hold_count)
-                return 1;
+                return 0;
        __make_buffer_clean(b);
        __unlink_buffer(b);
        __free_buffer_wake(b);
-        return 0;
+        return 1;
 }
-static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
-                   struct shrink_control *sc)
+                   gfp_t gfp_mask)
 {
        int l;
        struct dm_buffer *b, *tmp;
+        long freed = 0;
        for (l = 0; l < LIST_SIZE; l++) {
-                list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
+                list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
-                        if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
+                        freed += __cleanup_old_buffer(b, gfp_mask, 0);
-                            !--nr_to_scan)
+                        if (!--nr_to_scan)
-                                return;
+                                break;
+                }
                dm_bufio_cond_resched();
        }
+        return freed;
 }
-static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
+static unsigned long
+dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
-        struct dm_bufio_client *c =
+        struct dm_bufio_client *c;
-            container_of(shrinker, struct dm_bufio_client, shrinker);
+        unsigned long freed;
-        unsigned long r;
-        unsigned long nr_to_scan = sc->nr_to_scan;
+        c = container_of(shrink, struct dm_bufio_client, shrinker);
        if (sc->gfp_mask & __GFP_IO)
                dm_bufio_lock(c);
        else if (!dm_bufio_trylock(c))
-                return !nr_to_scan ? 0 : -1;
+                return SHRINK_STOP;
-        if (nr_to_scan)
+        freed  = __scan(c, sc->nr_to_scan, sc->gfp_mask);
-                __scan(c, nr_to_scan, sc);
+        dm_bufio_unlock(c);
+        return freed;
+}
-        r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+static unsigned long
-        if (r > INT_MAX)
+dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
-                r = INT_MAX;
+{
+        struct dm_bufio_client *c;
+        unsigned long count;
-        dm_bufio_unlock(c);
+        c = container_of(shrink, struct dm_bufio_client, shrinker);
+        if (sc->gfp_mask & __GFP_IO)
+                dm_bufio_lock(c);
+        else if (!dm_bufio_trylock(c))
+                return 0;
-        return r;
+        count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+        dm_bufio_unlock(c);
+        return count;
 }
 /*
@@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
        __cache_size_refresh();
        mutex_unlock(&dm_bufio_clients_lock);
-        c->shrinker.shrink = shrink;
+        c->shrinker.count_objects = dm_bufio_shrink_count;
+        c->shrinker.scan_objects = dm_bufio_shrink_scan;
        c->shrinker.seeks = 1;
        c->shrinker.batch = 0;
        register_shrinker(&c->shrinker);
@@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void)
                        struct dm_buffer *b;
                        b = list_entry(c->lru[LIST_CLEAN].prev,
                                       struct dm_buffer, lru_list);
-                        if (__cleanup_old_buffer(b, 0, max_age * HZ))
+                        if (!__cleanup_old_buffer(b, 0, max_age * HZ))
                                break;
                        dm_bufio_cond_resched();
                }
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 21a3f7250531..8e76ddca0999 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -341,27 +341,26 @@ out:
 /*
 * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab
 *
- * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how
+ * 'nr_to_scan' is the number of objects to scan for freeing.
- * many objects (pages) we have in total.
 *
 * 'gfp_mask' is the mask of the allocation that got us into this mess.
 *
- * Return value is the number of objects (pages) remaining, or -1 if we cannot
+ * Return value is the number of objects freed or -1 if we cannot
 * proceed without risk of deadlock (due to gfp_mask).
 *
 * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial
 * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan'
 * pages freed.
 */
-static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
+static unsigned long
+ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct ashmem_range *range, *next;
+        unsigned long freed = 0;
        /* We might recurse into filesystem code, so bail out if necessary */
-        if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+        if (!(sc->gfp_mask & __GFP_FS))
-                return -1;
+                return SHRINK_STOP;
-        if (!sc->nr_to_scan)
-                return lru_count;
        mutex_lock(&ashmem_mutex);
        list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
@@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
                range->purged = ASHMEM_WAS_PURGED;
                lru_del(range);
-                sc->nr_to_scan -= range_size(range);
+                freed += range_size(range);
-                if (sc->nr_to_scan <= 0)
+                if (--sc->nr_to_scan <= 0)
                        break;
        }
        mutex_unlock(&ashmem_mutex);
+        return freed;
+}
+static unsigned long
+ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        /*
+         * note that lru_count is count of pages on the lru, not a count of
+         * objects on the list. This means the scan function needs to return the
+         * number of pages freed, not the number of objects scanned.
+         */
        return lru_count;
 }
 static struct shrinker ashmem_shrinker = {
-        .shrink = ashmem_shrink,
+        .count_objects = ashmem_shrink_count,
+        .scan_objects = ashmem_shrink_scan,
+        /*
+         * XXX (dchinner): I wish people would comment on why they need on
+         * significant changes to the default value here
+         */
        .seeks = DEFAULT_SEEKS * 4,
 };
@@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (capable(CAP_SYS_ADMIN)) {
                        struct shrink_control sc = {
                                .gfp_mask = GFP_KERNEL,
-                                .nr_to_scan = 0,
+                                .nr_to_scan = LONG_MAX,
                        };
-                        ret = ashmem_shrink(&ashmem_shrinker, &sc);
-                        sc.nr_to_scan = ret;
+                        nodes_setall(sc.nodes_to_scan);
-                        ashmem_shrink(&ashmem_shrinker, &sc);
+                        ashmem_shrink_scan(&ashmem_shrinker, &sc);
                }
                break;
        }
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index fe74494868ef..6f094b37f1f1 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout;
                        pr_info(x);                     \
        } while (0)
-static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
+static unsigned long lowmem_count(struct shrinker *s,
+                                  struct shrink_control *sc)
+{
+        return global_page_state(NR_ACTIVE_ANON) +
+                global_page_state(NR_ACTIVE_FILE) +
+                global_page_state(NR_INACTIVE_ANON) +
+                global_page_state(NR_INACTIVE_FILE);
+}
+static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
 {
        struct task_struct *tsk;
        struct task_struct *selected = NULL;
-        int rem = 0;
+        unsigned long rem = 0;
        int tasksize;
        int i;
        short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
@@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
                        break;
                }
        }
-        if (sc->nr_to_scan > 0)
-                lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
+        lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
-                                sc->nr_to_scan, sc->gfp_mask, other_free,
+                        sc->nr_to_scan, sc->gfp_mask, other_free,
-                                other_file, min_score_adj);
+                        other_file, min_score_adj);
-        rem = global_page_state(NR_ACTIVE_ANON) +
-                global_page_state(NR_ACTIVE_FILE) +
+        if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
-                global_page_state(NR_INACTIVE_ANON) +
+                lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
-                global_page_state(NR_INACTIVE_FILE);
+                             sc->nr_to_scan, sc->gfp_mask);
-        if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
+                return 0;
-                lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
-                             sc->nr_to_scan, sc->gfp_mask, rem);
-                return rem;
        }
        selected_oom_score_adj = min_score_adj;
        rcu_read_lock();
@@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
                lowmem_deathpending_timeout = jiffies + HZ;
                send_sig(SIGKILL, selected, 0);
                set_tsk_thread_flag(selected, TIF_MEMDIE);
-                rem -= selected_tasksize;
+                rem += selected_tasksize;
        }
-        lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
+        lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
                     sc->nr_to_scan, sc->gfp_mask, rem);
        rcu_read_unlock();
        return rem;
 }
 static struct shrinker lowmem_shrinker = {
-        .shrink = lowmem_shrink,
+        .scan_objects = lowmem_scan,
+        .count_objects = lowmem_count,
        .seeks = DEFAULT_SEEKS * 16
 };
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
index 63efb7b456c6..2af15d41e77a 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
@@ -79,42 +79,4 @@
        do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
 #define MMSPACE_CLOSE          set_fs(__oldfs)
-/*
- * Shrinker
- */
-# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)  \
-                       struct shrinker *shrinker, \
-                       struct shrink_control *sc
-# define shrink_param(sc, var) ((sc)->var)
-typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
-static inline
-struct shrinker *set_shrinker(int seek, shrinker_t func)
-{
-        struct shrinker *s;
-        s = kmalloc(sizeof(*s), GFP_KERNEL);
-        if (s == NULL)
-                return (NULL);
-        s->shrink = func;
-        s->seeks = seek;
-        register_shrinker(s);
-        return s;
-}
-static inline
-void remove_shrinker(struct shrinker *shrinker)
-{
-        if (shrinker == NULL)
-                return;
-        unregister_shrinker(shrinker);
-        kfree(shrinker);
-}
 #endif /* __LINUX_CFS_MEM_H__ */
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 454027d68d54..0025ee6356da 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
                                int nr, unsigned int gfp_mask)
 {
        struct ldlm_namespace *ns;
-        int canceled = 0, unused;
+        int unused;
        ns = ldlm_pl2ns(pl);
@@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
        unused = ns->ns_nr_unused;
        spin_unlock(&ns->ns_lock);
-        if (nr) {
+        if (nr == 0)
-                canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
+                return (unused / 100) * sysctl_vfs_cache_pressure;
-                                           LDLM_CANCEL_SHRINK);
+        else
-        }
+                return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
-        /*
-         * Return the number of potentially reclaimable locks.
-         */
-        return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
 }
 struct ldlm_pool_ops ldlm_srv_pool_ops = {
@@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
        return recalc_interval_sec;
 }
-/**
+/*
 * Pool shrink wrapper. Will call either client or server pool recalc callback
- * depending what pool \a pl is used.
+ * depending what pool pl is used. When nr == 0, just return the number of
+ * freeable locks. Otherwise, return the number of canceled locks.
 */
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
                     unsigned int gfp_mask)
@@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl)
 }
 static struct ptlrpc_thread *ldlm_pools_thread;
-static struct shrinker *ldlm_pools_srv_shrinker;
-static struct shrinker *ldlm_pools_cli_shrinker;
 static struct completion ldlm_pools_comp;
 /*
- * Cancel \a nr locks from all namespaces (if possible). Returns number of
+ * count locks from all namespaces (if possible). Returns number of
- * cached locks after shrink is finished. All namespaces are asked to
+ * cached locks.
- * cancel approximately equal amount of locks to keep balancing.
 */
-static int ldlm_pools_shrink(ldlm_side_t client, int nr,
+static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask)
-                             unsigned int gfp_mask)
 {
-        int total = 0, cached = 0, nr_ns;
+        int total = 0, nr_ns;
        struct ldlm_namespace *ns;
        struct ldlm_namespace *ns_old = NULL; /* loop detection */
        void *cookie;
-        if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
+        if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
-            !(gfp_mask & __GFP_FS))
+                return 0;
-                return -1;
-        CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
+        CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
-               nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+               client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
        cookie = cl_env_reenter();
@@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         * Find out how many resources we may release.
         */
        for (nr_ns = ldlm_namespace_nr_read(client);
-             nr_ns > 0; nr_ns--)
+             nr_ns > 0; nr_ns--) {
-        {
                mutex_lock(ldlm_namespace_lock(client));
                if (list_empty(ldlm_namespace_list(client))) {
                        mutex_unlock(ldlm_namespace_lock(client));
@@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                ldlm_namespace_put(ns);
        }
-        if (nr == 0 || total == 0) {
+        cl_env_reexit(cookie);
-                cl_env_reexit(cookie);
+        return total;
-                return total;
+}
-        }
+static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask)
+{
+        unsigned long freed = 0;
+        int tmp, nr_ns;
+        struct ldlm_namespace *ns;
+        void *cookie;
+        if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+                return -1;
+        cookie = cl_env_reenter();
        /*
-         * Shrink at least ldlm_namespace_nr(client) namespaces.
+         * Shrink at least ldlm_namespace_nr_read(client) namespaces.
         */
-        for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns;
+        for (tmp = nr_ns = ldlm_namespace_nr_read(client);
-             nr_ns > 0; nr_ns--)
+             tmp > 0; tmp--) {
-        {
                int cancel, nr_locks;
                /*
@@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                mutex_lock(ldlm_namespace_lock(client));
                if (list_empty(ldlm_namespace_list(client))) {
                        mutex_unlock(ldlm_namespace_lock(client));
-                        /*
-                         * If list is empty, we can't return any @cached > 0,
-                         * that probably would cause needless shrinker
-                         * call.
-                         */
-                        cached = 0;
                        break;
                }
                ns = ldlm_namespace_first_locked(client);
@@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                mutex_unlock(ldlm_namespace_lock(client));
                nr_locks = ldlm_pool_granted(&ns->ns_pool);
-                cancel = 1 + nr_locks * nr / total;
+                /*
-                ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
+                 * We use to shrink propotionally but with new shrinker API,
-                cached += ldlm_pool_granted(&ns->ns_pool);
+                 * we lost the total number of freeable locks.
+                 */
+                cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
+                freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
                ldlm_namespace_put(ns);
        }
        cl_env_reexit(cookie);
-        /* we only decrease the SLV in server pools shrinker, return -1 to
+        /*
-         * kernel to avoid needless loop. LU-1128 */
+         * we only decrease the SLV in server pools shrinker, return
-        return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+         * SHRINK_STOP to kernel to avoid needless loop. LU-1128
+         */
+        return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed;
+}
+static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc)
+{
+        return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask);
 }
-static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc)
 {
-        return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER,
+        return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan,
-                                 shrink_param(sc, nr_to_scan),
+                               sc->gfp_mask);
-                                 shrink_param(sc, gfp_mask));
 }
-static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc)
 {
-        return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT,
+        return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
-                                 shrink_param(sc, nr_to_scan),
+}
-                                 shrink_param(sc, gfp_mask));
+static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc)
+{
+        return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
+                               sc->gfp_mask);
 }
 int ldlm_pools_recalc(ldlm_side_t client)
@@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client)
        }
        /*
-         * Recalc at least ldlm_namespace_nr(client) namespaces.
+         * Recalc at least ldlm_namespace_nr_read(client) namespaces.
         */
        for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
                int     skip;
@@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void)
        ldlm_pools_thread = NULL;
 }
+static struct shrinker ldlm_pools_srv_shrinker = {
+        .count_objects  = ldlm_pools_srv_count,
+        .scan_objects   = ldlm_pools_srv_scan,
+        .seeks          = DEFAULT_SEEKS,
+};
+static struct shrinker ldlm_pools_cli_shrinker = {
+        .count_objects  = ldlm_pools_cli_count,
+        .scan_objects   = ldlm_pools_cli_scan,
+        .seeks          = DEFAULT_SEEKS,
+};
 int ldlm_pools_init(void)
 {
        int rc;
        rc = ldlm_pools_thread_start();
        if (rc == 0) {
-                ldlm_pools_srv_shrinker =
+                register_shrinker(&ldlm_pools_srv_shrinker);
-                        set_shrinker(DEFAULT_SEEKS,
+                register_shrinker(&ldlm_pools_cli_shrinker);
-                                         ldlm_pools_srv_shrink);
-                ldlm_pools_cli_shrinker =
-                        set_shrinker(DEFAULT_SEEKS,
-                                         ldlm_pools_cli_shrink);
        }
        return rc;
 }
@@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init);
 void ldlm_pools_fini(void)
 {
-        if (ldlm_pools_srv_shrinker != NULL) {
+        unregister_shrinker(&ldlm_pools_srv_shrinker);
-                remove_shrinker(ldlm_pools_srv_shrinker);
+        unregister_shrinker(&ldlm_pools_cli_shrinker);
-                ldlm_pools_srv_shrinker = NULL;
-        }
-        if (ldlm_pools_cli_shrinker != NULL) {
-                remove_shrinker(ldlm_pools_cli_shrinker);
-                ldlm_pools_cli_shrinker = NULL;
-        }
        ldlm_pools_thread_stop();
 }
 EXPORT_SYMBOL(ldlm_pools_fini);
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index c29ac1c2defd..3a3d5bc5a628 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags,
 }
 EXPORT_SYMBOL(lu_env_refill_by_tags);
-static struct shrinker *lu_site_shrinker = NULL;
 typedef struct lu_site_stats{
        unsigned        lss_populated;
@@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs,
 * objects without taking the  lu_sites_guard lock, but this is not
 * possible in the current implementation.
 */
-static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long lu_cache_shrink_count(struct shrinker *sk,
+                                           struct shrink_control *sc)
 {
        lu_site_stats_t stats;
        struct lu_site *s;
        struct lu_site *tmp;
-        int cached = 0;
+        unsigned long cached = 0;
-        int remain = shrink_param(sc, nr_to_scan);
-        LIST_HEAD(splice);
-        if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) {
-                if (remain != 0)
-                        return -1;
-                else
-                        /* We must not take the lu_sites_guard lock when
-                         * __GFP_FS is *not* set because of the deadlock
-                         * possibility detailed above. Additionally,
-                         * since we cannot determine the number of
-                         * objects in the cache without taking this
-                         * lock, we're in a particularly tough spot. As
-                         * a result, we'll just lie and say our cache is
-                         * empty. This _should_ be ok, as we can't
-                         * reclaim objects when __GFP_FS is *not* set
-                         * anyways.
-                         */
-                        return 0;
-        }
-        CDEBUG(D_INODE, "Shrink %d objects\n", remain);
+        if (!(sc->gfp_mask & __GFP_FS))
+                return 0;
        mutex_lock(&lu_sites_guard);
        list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
-                if (shrink_param(sc, nr_to_scan) != 0) {
-                        remain = lu_site_purge(&lu_shrink_env, s, remain);
-                        /*
-                         * Move just shrunk site to the tail of site list to
-                         * assure shrinking fairness.
-                         */
-                        list_move_tail(&s->ls_linkage, &splice);
-                }
                memset(&stats, 0, sizeof(stats));
                lu_site_stats_get(s->ls_obj_hash, &stats, 0);
                cached += stats.lss_total - stats.lss_busy;
-                if (shrink_param(sc, nr_to_scan) && remain <= 0)
-                        break;
        }
-        list_splice(&splice, lu_sites.prev);
        mutex_unlock(&lu_sites_guard);
        cached = (cached / 100) * sysctl_vfs_cache_pressure;
-        if (shrink_param(sc, nr_to_scan) == 0)
+        CDEBUG(D_INODE, "%ld objects cached\n", cached);
-                CDEBUG(D_INODE, "%d objects cached\n", cached);
        return cached;
 }
+static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
+                                          struct shrink_control *sc)
+{
+        struct lu_site *s;
+        struct lu_site *tmp;
+        unsigned long remain = sc->nr_to_scan, freed = 0;
+        LIST_HEAD(splice);
+        if (!(sc->gfp_mask & __GFP_FS))
+                /* We must not take the lu_sites_guard lock when
+                 * __GFP_FS is *not* set because of the deadlock
+                 * possibility detailed above. Additionally,
+                 * since we cannot determine the number of
+                 * objects in the cache without taking this
+                 * lock, we're in a particularly tough spot. As
+                 * a result, we'll just lie and say our cache is
+                 * empty. This _should_ be ok, as we can't
+                 * reclaim objects when __GFP_FS is *not* set
+                 * anyways.
+                 */
+                return SHRINK_STOP;
+        mutex_lock(&lu_sites_guard);
+        list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+                freed = lu_site_purge(&lu_shrink_env, s, remain);
+                remain -= freed;
+                /*
+                 * Move just shrunk site to the tail of site list to
+                 * assure shrinking fairness.
+                 */
+                list_move_tail(&s->ls_linkage, &splice);
+        }
+        list_splice(&splice, lu_sites.prev);
+        mutex_unlock(&lu_sites_guard);
+        return sc->nr_to_scan - remain;
+}
 /*
 * Debugging stuff.
 */
@@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env,
        return 0;
 }
+static struct shrinker lu_site_shrinker = {
+        .count_objects  = lu_cache_shrink_count,
+        .scan_objects   = lu_cache_shrink_scan,
+        .seeks          = DEFAULT_SEEKS,
+};
 /**
 * Initialization of global lu_* data.
 */
@@ -1947,9 +1959,7 @@ int lu_global_init(void)
         * inode, one for ea. Unfortunately setting this high value results in
         * lu_object/inode cache consuming all the memory.
         */
-        lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink);
+        register_shrinker(&lu_site_shrinker);
-        if (lu_site_shrinker == NULL)
-                return -ENOMEM;
        return result;
 }
@@ -1959,11 +1969,7 @@ int lu_global_init(void)
 */
 void lu_global_fini(void)
 {
-        if (lu_site_shrinker != NULL) {
+        unregister_shrinker(&lu_site_shrinker);
-                remove_shrinker(lu_site_shrinker);
-                lu_site_shrinker = NULL;
-        }
        lu_context_key_degister(&lu_global_key);
        /*
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 9013745ab105..e90c8fb7da6a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool {
 } page_pools;
 /*
- * memory shrinker
- */
-const int pools_shrinker_seeks = DEFAULT_SEEKS;
-static struct shrinker *pools_shrinker = NULL;
-/*
 * /proc/fs/lustre/sptlrpc/encrypt_page_pools
 */
 int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
@@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages)
 }
 /*
- * could be called frequently for query (@nr_to_scan == 0).
 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 */
-static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long enc_pools_shrink_count(struct shrinker *s,
+                                            struct shrink_control *sc)
 {
-        if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
+        /*
+         * if no pool access for a long time, we consider it's fully idle.
+         * a little race here is fine.
+         */
+        if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
+                     CACHE_QUIESCENT_PERIOD)) {
                spin_lock(&page_pools.epp_lock);
-                shrink_param(sc, nr_to_scan) = min_t(unsigned long,
+                page_pools.epp_idle_idx = IDLE_IDX_MAX;
-                                                   shrink_param(sc, nr_to_scan),
-                                                   page_pools.epp_free_pages -
-                                                   PTLRPC_MAX_BRW_PAGES);
-                if (shrink_param(sc, nr_to_scan) > 0) {
-                        enc_pools_release_free_pages(shrink_param(sc,
-                                                                  nr_to_scan));
-                        CDEBUG(D_SEC, "released %ld pages, %ld left\n",
-                               (long)shrink_param(sc, nr_to_scan),
-                               page_pools.epp_free_pages);
-                        page_pools.epp_st_shrinks++;
-                        page_pools.epp_last_shrink = cfs_time_current_sec();
-                }
                spin_unlock(&page_pools.epp_lock);
        }
+        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+        return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+                (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_scan(struct shrinker *s,
+                                           struct shrink_control *sc)
+{
+        spin_lock(&page_pools.epp_lock);
+        sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
+                              page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
+        if (sc->nr_to_scan > 0) {
+                enc_pools_release_free_pages(sc->nr_to_scan);
+                CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+                       (long)sc->nr_to_scan, page_pools.epp_free_pages);
+                page_pools.epp_st_shrinks++;
+                page_pools.epp_last_shrink = cfs_time_current_sec();
+        }
+        spin_unlock(&page_pools.epp_lock);
        /*
         * if no pool access for a long time, we consider it's fully idle.
         * a little race here is fine.
@@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
        }
        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
-        return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+        return sc->nr_to_scan;
-                (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
 }
 static inline
@@ -699,6 +707,12 @@ static inline void enc_pools_free(void)
                       sizeof(*page_pools.epp_pools));
 }
+static struct shrinker pools_shrinker = {
+        .count_objects  = enc_pools_shrink_count,
+        .scan_objects   = enc_pools_shrink_scan,
+        .seeks          = DEFAULT_SEEKS,
+};
 int sptlrpc_enc_pool_init(void)
 {
        /*
@@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void)
        if (page_pools.epp_pools == NULL)
                return -ENOMEM;
-        pools_shrinker = set_shrinker(pools_shrinker_seeks,
+        register_shrinker(&pools_shrinker);
-                                          enc_pools_shrink);
-        if (pools_shrinker == NULL) {
-                enc_pools_free();
-                return -ENOMEM;
-        }
        return 0;
 }
@@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void)
 {
        unsigned long cleaned, npools;
-        LASSERT(pools_shrinker);
        LASSERT(page_pools.epp_pools);
        LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
-        remove_shrinker(pools_shrinker);
+        unregister_shrinker(&pools_shrinker);
        npools = npages_to_npools(page_pools.epp_total_pages);
        cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d9df3c940e6..c932ed32c77b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -37,6 +37,7 @@
 #include <linux/rculist_bl.h>
 #include <linux/prefetch.h>
 #include <linux/ratelimit.h>
+#include <linux/list_lru.h>
 #include "internal.h"
 #include "mount.h"
@@ -48,7 +49,7 @@
 *   - the dcache hash table
 * s_anon bl list spinlock protects:
 *   - the s_anon list (see __d_drop)
- * dcache_lru_lock protects:
+ * dentry->d_sb->s_dentry_lru_lock protects:
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
@@ -63,7 +64,7 @@
 * Ordering:
 * dentry->d_inode->i_lock
 *   dentry->d_lock
- *     dcache_lru_lock
+ *     dentry->d_sb->s_dentry_lru_lock
 *     dcache_hash_bucket lock
 *     s_anon lock
 *
@@ -81,7 +82,6 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 EXPORT_SYMBOL(rename_lock);
@@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = {
        .age_limit = 45,
 };
-static DEFINE_PER_CPU(unsigned int, nr_dentry);
+static DEFINE_PER_CPU(long, nr_dentry);
+static DEFINE_PER_CPU(long, nr_dentry_unused);
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-static int get_nr_dentry(void)
+/*
+ * Here we resort to our own counters instead of using generic per-cpu counters
+ * for consistency with what the vfs inode code does. We are expected to harvest
+ * better code and performance by having our own specialized counters.
+ *
+ * Please note that the loop is done over all possible CPUs, not over all online
+ * CPUs. The reason for this is that we don't want to play games with CPUs going
+ * on and off. If one of them goes off, we will just keep their counters.
+ *
+ * glommer: See cffbc8a for details, and if you ever intend to change this,
+ * please update all vfs counters to match.
+ */
+static long get_nr_dentry(void)
 {
        int i;
-        int sum = 0;
+        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_dentry, i);
        return sum < 0 ? 0 : sum;
 }
+static long get_nr_dentry_unused(void)
+{
+        int i;
+        long sum = 0;
+        for_each_possible_cpu(i)
+                sum += per_cpu(nr_dentry_unused, i);
+        return sum < 0 ? 0 : sum;
+}
 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
                   size_t *lenp, loff_t *ppos)
 {
        dentry_stat.nr_dentry = get_nr_dentry();
-        return proc_dointvec(table, write, buffer, lenp, ppos);
+        dentry_stat.nr_unused = get_nr_dentry_unused();
+        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
@@ -333,52 +357,35 @@ static void dentry_unlink_inode(struct dentry * dentry)
 }
 /*
- * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
+ * dentry_lru_(add|del)_list) must be called with d_lock held.
 */
 static void dentry_lru_add(struct dentry *dentry)
 {
        if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
-                spin_lock(&dcache_lru_lock);
+                if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
+                        this_cpu_inc(nr_dentry_unused);
                dentry->d_flags |= DCACHE_LRU_LIST;
-                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-                dentry->d_sb->s_nr_dentry_unused++;
-                dentry_stat.nr_unused++;
-                spin_unlock(&dcache_lru_lock);
        }
 }
-static void __dentry_lru_del(struct dentry *dentry)
-{
-        list_del_init(&dentry->d_lru);
-        dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
-        dentry->d_sb->s_nr_dentry_unused--;
-        dentry_stat.nr_unused--;
-}
 /*
 * Remove a dentry with references from the LRU.
+ *
+ * If we are on the shrink list, then we can get to try_prune_one_dentry() and
+ * lose our last reference through the parent walk. In this case, we need to
+ * remove ourselves from the shrink list, not the LRU.
 */
 static void dentry_lru_del(struct dentry *dentry)
 {
-        if (!list_empty(&dentry->d_lru)) {
+        if (dentry->d_flags & DCACHE_SHRINK_LIST) {
-                spin_lock(&dcache_lru_lock);
+                list_del_init(&dentry->d_lru);
-                __dentry_lru_del(dentry);
+                dentry->d_flags &= ~DCACHE_SHRINK_LIST;
-                spin_unlock(&dcache_lru_lock);
+                return;
        }
-}
-static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
+        if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
-{
+                this_cpu_dec(nr_dentry_unused);
-        spin_lock(&dcache_lru_lock);
+        dentry->d_flags &= ~DCACHE_LRU_LIST;
-        if (list_empty(&dentry->d_lru)) {
-                dentry->d_flags |= DCACHE_LRU_LIST;
-                list_add_tail(&dentry->d_lru, list);
-                dentry->d_sb->s_nr_dentry_unused++;
-                dentry_stat.nr_unused++;
-        } else {
-                list_move_tail(&dentry->d_lru, list);
-        }
-        spin_unlock(&dcache_lru_lock);
 }
 /**
@@ -474,7 +481,8 @@ EXPORT_SYMBOL(d_drop);
 * If ref is non-zero, then decrement the refcount too.
 * Returns dentry requiring refcount drop, or NULL if we're done.
 */
-static inline struct dentry *dentry_kill(struct dentry *dentry)
+static inline struct dentry *
+dentry_kill(struct dentry *dentry, int unlock_on_failure)
        __releases(dentry->d_lock)
 {
        struct inode *inode;
@@ -483,8 +491,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry)
        inode = dentry->d_inode;
        if (inode && !spin_trylock(&inode->i_lock)) {
 relock:
-                spin_unlock(&dentry->d_lock);
+                if (unlock_on_failure) {
-                cpu_relax();
+                        spin_unlock(&dentry->d_lock);
+                        cpu_relax();
+                }
                return dentry; /* try again with same dentry */
        }
        if (IS_ROOT(dentry))
@@ -567,7 +577,7 @@ repeat:
        return;
 kill_it:
-        dentry = dentry_kill(dentry);
+        dentry = dentry_kill(dentry, 1);
        if (dentry)
                goto repeat;
 }
@@ -787,12 +797,12 @@ EXPORT_SYMBOL(d_prune_aliases);
 *
 * This may fail if locks cannot be acquired no problem, just try again.
 */
-static void try_prune_one_dentry(struct dentry *dentry)
+static struct dentry * try_prune_one_dentry(struct dentry *dentry)
        __releases(dentry->d_lock)
 {
        struct dentry *parent;
-        parent = dentry_kill(dentry);
+        parent = dentry_kill(dentry, 0);
        /*
         * If dentry_kill returns NULL, we have nothing more to do.
         * if it returns the same dentry, trylocks failed. In either
@@ -804,17 +814,18 @@ static void try_prune_one_dentry(struct dentry *dentry)
         * fragmentation.
         */
        if (!parent)
-                return;
+                return NULL;
        if (parent == dentry)
-                return;
+                return dentry;
        /* Prune ancestors. */
        dentry = parent;
        while (dentry) {
                if (lockref_put_or_lock(&dentry->d_lockref))
-                        return;
+                        return NULL;
-                dentry = dentry_kill(dentry);
+                dentry = dentry_kill(dentry, 1);
        }
+        return NULL;
 }
 static void shrink_dentry_list(struct list_head *list)
@@ -833,76 +844,143 @@ static void shrink_dentry_list(struct list_head *list)
                }
                /*
+                 * The dispose list is isolated and dentries are not accounted
+                 * to the LRU here, so we can simply remove it from the list
+                 * here regardless of whether it is referenced or not.
+                 */
+                list_del_init(&dentry->d_lru);
+                dentry->d_flags &= ~DCACHE_SHRINK_LIST;
+                /*
                 * We found an inuse dentry which was not removed from
-                 * the LRU because of laziness during lookup.  Do not free
+                 * the LRU because of laziness during lookup. Do not free it.
-                 * it - just keep it off the LRU list.
                 */
                if (dentry->d_lockref.count) {
-                        dentry_lru_del(dentry);
                        spin_unlock(&dentry->d_lock);
                        continue;
                }
                rcu_read_unlock();
-                try_prune_one_dentry(dentry);
+                dentry = try_prune_one_dentry(dentry);
                rcu_read_lock();
+                if (dentry) {
+                        dentry->d_flags |= DCACHE_SHRINK_LIST;
+                        list_add(&dentry->d_lru, list);
+                        spin_unlock(&dentry->d_lock);
+                }
        }
        rcu_read_unlock();
 }
+static enum lru_status
+dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
+{
+        struct list_head *freeable = arg;
+        struct dentry   *dentry = container_of(item, struct dentry, d_lru);
+        /*
+         * we are inverting the lru lock/dentry->d_lock here,
+         * so use a trylock. If we fail to get the lock, just skip
+         * it
+         */
+        if (!spin_trylock(&dentry->d_lock))
+                return LRU_SKIP;
+        /*
+         * Referenced dentries are still in use. If they have active
+         * counts, just remove them from the LRU. Otherwise give them
+         * another pass through the LRU.
+         */
+        if (dentry->d_lockref.count) {
+                list_del_init(&dentry->d_lru);
+                spin_unlock(&dentry->d_lock);
+                return LRU_REMOVED;
+        }
+        if (dentry->d_flags & DCACHE_REFERENCED) {
+                dentry->d_flags &= ~DCACHE_REFERENCED;
+                spin_unlock(&dentry->d_lock);
+                /*
+                 * The list move itself will be made by the common LRU code. At
+                 * this point, we've dropped the dentry->d_lock but keep the
+                 * lru lock. This is safe to do, since every list movement is
+                 * protected by the lru lock even if both locks are held.
+                 *
+                 * This is guaranteed by the fact that all LRU management
+                 * functions are intermediated by the LRU API calls like
+                 * list_lru_add and list_lru_del. List movement in this file
+                 * only ever occur through this functions or through callbacks
+                 * like this one, that are called from the LRU API.
+                 *
+                 * The only exceptions to this are functions like
+                 * shrink_dentry_list, and code that first checks for the
+                 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
+                 * operating only with stack provided lists after they are
+                 * properly isolated from the main list.  It is thus, always a
+                 * local access.
+                 */
+                return LRU_ROTATE;
+        }
+        dentry->d_flags |= DCACHE_SHRINK_LIST;
+        list_move_tail(&dentry->d_lru, freeable);
+        this_cpu_dec(nr_dentry_unused);
+        spin_unlock(&dentry->d_lock);
+        return LRU_REMOVED;
+}
 /**
 * prune_dcache_sb - shrink the dcache
 * @sb: superblock
- * @count: number of entries to try to free
+ * @nr_to_scan : number of entries to try to free
+ * @nid: which node to scan for freeable entities
 *
- * Attempt to shrink the superblock dcache LRU by @count entries. This is
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
 * done when we need more memory an called from the superblock shrinker
 * function.
 *
 * This function may fail to free any resources if all the dentries are in
 * use.
 */
-void prune_dcache_sb(struct super_block *sb, int count)
+long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                     int nid)
 {
-        struct dentry *dentry;
+        LIST_HEAD(dispose);
-        LIST_HEAD(referenced);
+        long freed;
-        LIST_HEAD(tmp);
-relock:
+        freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
-        spin_lock(&dcache_lru_lock);
+                                       &dispose, &nr_to_scan);
-        while (!list_empty(&sb->s_dentry_lru)) {
+        shrink_dentry_list(&dispose);
-                dentry = list_entry(sb->s_dentry_lru.prev,
+        return freed;
-                                struct dentry, d_lru);
+}
-                BUG_ON(dentry->d_sb != sb);
-                if (!spin_trylock(&dentry->d_lock)) {
-                        spin_unlock(&dcache_lru_lock);
-                        cpu_relax();
-                        goto relock;
-                }
-                if (dentry->d_flags & DCACHE_REFERENCED) {
+static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
-                        dentry->d_flags &= ~DCACHE_REFERENCED;
+                                                spinlock_t *lru_lock, void *arg)
-                        list_move(&dentry->d_lru, &referenced);
+{
-                        spin_unlock(&dentry->d_lock);
+        struct list_head *freeable = arg;
-                } else {
+        struct dentry   *dentry = container_of(item, struct dentry, d_lru);
-                        list_move_tail(&dentry->d_lru, &tmp);
-                        dentry->d_flags |= DCACHE_SHRINK_LIST;
-                        spin_unlock(&dentry->d_lock);
-                        if (!--count)
-                                break;
-                }
-                cond_resched_lock(&dcache_lru_lock);
-        }
-        if (!list_empty(&referenced))
-                list_splice(&referenced, &sb->s_dentry_lru);
-        spin_unlock(&dcache_lru_lock);
-        shrink_dentry_list(&tmp);
+        /*
+         * we are inverting the lru lock/dentry->d_lock here,
+         * so use a trylock. If we fail to get the lock, just skip
+         * it
+         */
+        if (!spin_trylock(&dentry->d_lock))
+                return LRU_SKIP;
+        dentry->d_flags |= DCACHE_SHRINK_LIST;
+        list_move_tail(&dentry->d_lru, freeable);
+        this_cpu_dec(nr_dentry_unused);
+        spin_unlock(&dentry->d_lock);
+        return LRU_REMOVED;
 }
 /**
 * shrink_dcache_sb - shrink dcache for a superblock
 * @sb: superblock
@@ -912,16 +990,17 @@ relock:
 */
 void shrink_dcache_sb(struct super_block *sb)
 {
-        LIST_HEAD(tmp);
+        long freed;
-        spin_lock(&dcache_lru_lock);
+        do {
-        while (!list_empty(&sb->s_dentry_lru)) {
+                LIST_HEAD(dispose);
-                list_splice_init(&sb->s_dentry_lru, &tmp);
-                spin_unlock(&dcache_lru_lock);
+                freed = list_lru_walk(&sb->s_dentry_lru,
-                shrink_dentry_list(&tmp);
+                        dentry_lru_isolate_shrink, &dispose, UINT_MAX);
-                spin_lock(&dcache_lru_lock);
-        }
+                this_cpu_sub(nr_dentry_unused, freed);
-        spin_unlock(&dcache_lru_lock);
+                shrink_dentry_list(&dispose);
+        } while (freed > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1283,7 +1362,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
        if (dentry->d_lockref.count) {
                dentry_lru_del(dentry);
        } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
-                dentry_lru_move_list(dentry, &data->dispose);
+                dentry_lru_del(dentry);
+                list_add_tail(&dentry->d_lru, &data->dispose);
                dentry->d_flags |= DCACHE_SHRINK_LIST;
                data->found++;
                ret = D_WALK_NORETRY;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c00e055b6282..9fd702f5bfb2 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -44,6 +44,7 @@ static void drop_slab(void)
                .gfp_mask = GFP_KERNEL,
        };
+        nodes_setall(shrink.nodes_to_scan);
        do {
                nr_objects = shrink_slab(&shrink, 1000, 1000);
        } while (nr_objects > 10);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 2d1bdbe78c04..3981ff783950 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
        struct ext4_inode_info *ei;
        struct list_head *cur, *tmp;
        LIST_HEAD(skipped);
-        int ret, nr_shrunk = 0;
+        int nr_shrunk = 0;
        int retried = 0, skip_precached = 1, nr_skipped = 0;
        spin_lock(&sbi->s_es_lru_lock);
 retry:
        list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
+                int shrunk;
                /*
                 * If we have already reclaimed all extents from extent
                 * status tree, just stop the loop immediately.
@@ -964,13 +966,13 @@ retry:
                        continue;
                write_lock(&ei->i_es_lock);
-                ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+                shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
                if (ei->i_es_lru_nr == 0)
                        list_del_init(&ei->i_es_lru);
                write_unlock(&ei->i_es_lock);
-                nr_shrunk += ret;
+                nr_shrunk += shrunk;
-                nr_to_scan -= ret;
+                nr_to_scan -= shrunk;
                if (nr_to_scan == 0)
                        break;
        }
@@ -1007,7 +1009,20 @@ retry:
        return nr_shrunk;
 }
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long ext4_es_count(struct shrinker *shrink,
+                                   struct shrink_control *sc)
+{
+        unsigned long nr;
+        struct ext4_sb_info *sbi;
+        sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
+        nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+        trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+        return nr;
+}
+static unsigned long ext4_es_scan(struct shrinker *shrink,
+                                  struct shrink_control *sc)
 {
        struct ext4_sb_info *sbi = container_of(shrink,
                                        struct ext4_sb_info, s_es_shrinker);
@@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
        nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
-        ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
        trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
-        return ret;
+        return nr_shrunk;
 }
 void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
@@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
        INIT_LIST_HEAD(&sbi->s_es_lru);
        spin_lock_init(&sbi->s_es_lru_lock);
        sbi->s_es_last_sorted = 0;
-        sbi->s_es_shrinker.shrink = ext4_es_shrink;
+        sbi->s_es_shrinker.scan_objects = ext4_es_scan;
+        sbi->s_es_shrinker.count_objects = ext4_es_count;
        sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
        register_shrinker(&sbi->s_es_shrinker);
 }
@@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
        struct ext4_es_tree *tree = &ei->i_es_tree;
        struct rb_node *node;
        struct extent_status *es;
-        int nr_shrunk = 0;
+        unsigned long nr_shrunk = 0;
        static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 722329cac98f..c2f41b4d00b9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1427,21 +1427,22 @@ __acquires(&lru_lock)
 * gfs2_dispose_glock_lru() above.
 */
-static void gfs2_scan_glock_lru(int nr)
+static long gfs2_scan_glock_lru(int nr)
 {
        struct gfs2_glock *gl;
        LIST_HEAD(skipped);
        LIST_HEAD(dispose);
+        long freed = 0;
        spin_lock(&lru_lock);
-        while(nr && !list_empty(&lru_list)) {
+        while ((nr-- >= 0) && !list_empty(&lru_list)) {
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
                /* Test for being demotable */
                if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
                        list_move(&gl->gl_lru, &dispose);
                        atomic_dec(&lru_count);
-                        nr--;
+                        freed++;
                        continue;
                }
@@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr)
        if (!list_empty(&dispose))
                gfs2_dispose_glock_lru(&dispose);
        spin_unlock(&lru_lock);
+        return freed;
 }
-static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
-                                    struct shrink_control *sc)
+                                            struct shrink_control *sc)
 {
-        if (sc->nr_to_scan) {
+        if (!(sc->gfp_mask & __GFP_FS))
-                if (!(sc->gfp_mask & __GFP_FS))
+                return SHRINK_STOP;
-                        return -1;
+        return gfs2_scan_glock_lru(sc->nr_to_scan);
-                gfs2_scan_glock_lru(sc->nr_to_scan);
+}
-        }
-        return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
+static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
+                                             struct shrink_control *sc)
+{
+        return vfs_pressure_ratio(atomic_read(&lru_count));
 }
 static struct shrinker glock_shrinker = {
-        .shrink = gfs2_shrink_glock_memory,
        .seeks = DEFAULT_SEEKS,
+        .count_objects = gfs2_glock_shrink_count,
+        .scan_objects = gfs2_glock_shrink_scan,
 };
 /**
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7b0f5043cf24..351586e24e30 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -32,7 +32,8 @@
 struct workqueue_struct *gfs2_control_wq;
 static struct shrinker qd_shrinker = {
-        .shrink = gfs2_shrink_qd_memory,
+        .count_objects = gfs2_qd_shrink_count,
+        .scan_objects = gfs2_qd_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3768c2f40e43..db441359ee8c 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
+unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+                                  struct shrink_control *sc)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_sbd *sdp;
        int nr_to_scan = sc->nr_to_scan;
+        long freed = 0;
-        if (nr_to_scan == 0)
-                goto out;
        if (!(sc->gfp_mask & __GFP_FS))
-                return -1;
+                return SHRINK_STOP;
        spin_lock(&qd_lru_lock);
        while (nr_to_scan && !list_empty(&qd_lru_list)) {
@@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
                kmem_cache_free(gfs2_quotad_cachep, qd);
                spin_lock(&qd_lru_lock);
                nr_to_scan--;
+                freed++;
        }
        spin_unlock(&qd_lru_lock);
+        return freed;
+}
-out:
+unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
-        return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
+                                   struct shrink_control *sc)
+{
+        return vfs_pressure_ratio(atomic_read(&qd_lru_count));
 }
 static u64 qd2index(struct gfs2_quota_data *qd)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 4f5e6e44ed83..0f64d9deb1b0 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        return ret;
 }
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
-                                 struct shrink_control *sc);
+                                          struct shrink_control *sc);
+extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+                                         struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 93a0625b46e4..b33ba8e021cc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
 #include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
+#include <linux/list_lru.h>
 #include "internal.h"
 /*
@@ -24,7 +25,7 @@
 *
 * inode->i_lock protects:
 *   inode->i_state, inode->i_hash, __iget()
- * inode->i_sb->s_inode_lru_lock protects:
+ * Inode LRU list locks protect:
 *   inode->i_sb->s_inode_lru, inode->i_lru
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
@@ -37,7 +38,7 @@
 *
 * inode_sb_list_lock
 *   inode->i_lock
- *     inode->i_sb->s_inode_lru_lock
+ *     Inode LRU list locks
 *
 * bdi->wb.list_lock
 *   inode->i_lock
@@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops);
 */
 struct inodes_stat_t inodes_stat;
-static DEFINE_PER_CPU(unsigned int, nr_inodes);
+static DEFINE_PER_CPU(unsigned long, nr_inodes);
-static DEFINE_PER_CPU(unsigned int, nr_unused);
+static DEFINE_PER_CPU(unsigned long, nr_unused);
 static struct kmem_cache *inode_cachep __read_mostly;
-static int get_nr_inodes(void)
+static long get_nr_inodes(void)
 {
        int i;
-        int sum = 0;
+        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_inodes, i);
        return sum < 0 ? 0 : sum;
 }
-static inline int get_nr_inodes_unused(void)
+static inline long get_nr_inodes_unused(void)
 {
        int i;
-        int sum = 0;
+        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_unused, i);
        return sum < 0 ? 0 : sum;
 }
-int get_nr_dirty_inodes(void)
+long get_nr_dirty_inodes(void)
 {
        /* not actually dirty inodes, but a wild approximation */
-        int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+        long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
        return nr_dirty > 0 ? nr_dirty : 0;
 }
@@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write,
 {
        inodes_stat.nr_inodes = get_nr_inodes();
        inodes_stat.nr_unused = get_nr_inodes_unused();
-        return proc_dointvec(table, write, buffer, lenp, ppos);
+        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
@@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold);
 static void inode_lru_list_add(struct inode *inode)
 {
-        spin_lock(&inode->i_sb->s_inode_lru_lock);
+        if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
-        if (list_empty(&inode->i_lru)) {
-                list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
-                inode->i_sb->s_nr_inodes_unused++;
                this_cpu_inc(nr_unused);
-        }
-        spin_unlock(&inode->i_sb->s_inode_lru_lock);
 }
 /*
@@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode)
 static void inode_lru_list_del(struct inode *inode)
 {
-        spin_lock(&inode->i_sb->s_inode_lru_lock);
-        if (!list_empty(&inode->i_lru)) {
+        if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
-                list_del_init(&inode->i_lru);
-                inode->i_sb->s_nr_inodes_unused--;
                this_cpu_dec(nr_unused);
-        }
-        spin_unlock(&inode->i_sb->s_inode_lru_lock);
 }
 /**
@@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
        return busy;
 }
-static int can_unuse(struct inode *inode)
-{
-        if (inode->i_state & ~I_REFERENCED)
-                return 0;
-        if (inode_has_buffers(inode))
-                return 0;
-        if (atomic_read(&inode->i_count))
-                return 0;
-        if (inode->i_data.nrpages)
-                return 0;
-        return 1;
-}
 /*
- * Walk the superblock inode LRU for freeable inodes and attempt to free them.
+ * Isolate the inode from the LRU in preparation for freeing it.
- * This is called from the superblock shrinker function with a number of inodes
- * to trim from the LRU. Inodes to be freed are moved to a temporary list and
- * then are freed outside inode_lock by dispose_list().
 *
 * Any inodes which are pinned purely because of attached pagecache have their
 * pagecache removed.  If the inode has metadata buffers attached to
@@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode)
 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
 * with this flag set because they are the inodes that are out of order.
 */
-void prune_icache_sb(struct super_block *sb, int nr_to_scan)
+static enum lru_status
+inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
 {
-        LIST_HEAD(freeable);
+        struct list_head *freeable = arg;
-        int nr_scanned;
+        struct inode    *inode = container_of(item, struct inode, i_lru);
-        unsigned long reap = 0;
-        spin_lock(&sb->s_inode_lru_lock);
+        /*
-        for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
+         * we are inverting the lru lock/inode->i_lock here, so use a trylock.
-                struct inode *inode;
+         * If we fail to get the lock, just skip it.
+         */
+        if (!spin_trylock(&inode->i_lock))
+                return LRU_SKIP;
-                if (list_empty(&sb->s_inode_lru))
+        /*
-                        break;
+         * Referenced or dirty inodes are still in use. Give them another pass
+         * through the LRU as we canot reclaim them now.
+         */
+        if (atomic_read(&inode->i_count) ||
+            (inode->i_state & ~I_REFERENCED)) {
+                list_del_init(&inode->i_lru);
+                spin_unlock(&inode->i_lock);
+                this_cpu_dec(nr_unused);
+                return LRU_REMOVED;
+        }
-                inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
+        /* recently referenced inodes get one more pass */
+        if (inode->i_state & I_REFERENCED) {
+                inode->i_state &= ~I_REFERENCED;
+                spin_unlock(&inode->i_lock);
+                return LRU_ROTATE;
+        }
-                /*
+        if (inode_has_buffers(inode) || inode->i_data.nrpages) {
-                 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
+                __iget(inode);
-                 * so use a trylock. If we fail to get the lock, just move the
+                spin_unlock(&inode->i_lock);
-                 * inode to the back of the list so we don't spin on it.
+                spin_unlock(lru_lock);
-                 */
+                if (remove_inode_buffers(inode)) {
-                if (!spin_trylock(&inode->i_lock)) {
+                        unsigned long reap;
-                        list_move(&inode->i_lru, &sb->s_inode_lru);
+                        reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
-                        continue;
+                        if (current_is_kswapd())
+                                __count_vm_events(KSWAPD_INODESTEAL, reap);
+                        else
+                                __count_vm_events(PGINODESTEAL, reap);
+                        if (current->reclaim_state)
+                                current->reclaim_state->reclaimed_slab += reap;
                }
+                iput(inode);
+                spin_lock(lru_lock);
+                return LRU_RETRY;
+        }
-                /*
+        WARN_ON(inode->i_state & I_NEW);
-                 * Referenced or dirty inodes are still in use. Give them
+        inode->i_state |= I_FREEING;
-                 * another pass through the LRU as we canot reclaim them now.
+        list_move(&inode->i_lru, freeable);
-                 */
+        spin_unlock(&inode->i_lock);
-                if (atomic_read(&inode->i_count) ||
-                    (inode->i_state & ~I_REFERENCED)) {
-                        list_del_init(&inode->i_lru);
-                        spin_unlock(&inode->i_lock);
-                        sb->s_nr_inodes_unused--;
-                        this_cpu_dec(nr_unused);
-                        continue;
-                }
-                /* recently referenced inodes get one more pass */
+        this_cpu_dec(nr_unused);
-                if (inode->i_state & I_REFERENCED) {
+        return LRU_REMOVED;
-                        inode->i_state &= ~I_REFERENCED;
+}
-                        list_move(&inode->i_lru, &sb->s_inode_lru);
-                        spin_unlock(&inode->i_lock);
-                        continue;
-                }
-                if (inode_has_buffers(inode) || inode->i_data.nrpages) {
-                        __iget(inode);
-                        spin_unlock(&inode->i_lock);
-                        spin_unlock(&sb->s_inode_lru_lock);
-                        if (remove_inode_buffers(inode))
-                                reap += invalidate_mapping_pages(&inode->i_data,
-                                                                0, -1);
-                        iput(inode);
-                        spin_lock(&sb->s_inode_lru_lock);
-                        if (inode != list_entry(sb->s_inode_lru.next,
-                                                struct inode, i_lru))
-                                continue;       /* wrong inode or list_empty */
-                        /* avoid lock inversions with trylock */
-                        if (!spin_trylock(&inode->i_lock))
-                                continue;
-                        if (!can_unuse(inode)) {
-                                spin_unlock(&inode->i_lock);
-                                continue;
-                        }
-                }
-                WARN_ON(inode->i_state & I_NEW);
-                inode->i_state |= I_FREEING;
-                spin_unlock(&inode->i_lock);
-                list_move(&inode->i_lru, &freeable);
+/*
-                sb->s_nr_inodes_unused--;
+ * Walk the superblock inode LRU for freeable inodes and attempt to free them.
-                this_cpu_dec(nr_unused);
+ * This is called from the superblock shrinker function with a number of inodes
-        }
+ * to trim from the LRU. Inodes to be freed are moved to a temporary list and
-        if (current_is_kswapd())
+ * then are freed outside inode_lock by dispose_list().
-                __count_vm_events(KSWAPD_INODESTEAL, reap);
+ */
-        else
+long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
-                __count_vm_events(PGINODESTEAL, reap);
+                     int nid)
-        spin_unlock(&sb->s_inode_lru_lock);
+{
-        if (current->reclaim_state)
+        LIST_HEAD(freeable);
-                current->reclaim_state->reclaimed_slab += reap;
+        long freed;
+        freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate,
+                                       &freeable, &nr_to_scan);
        dispose_list(&freeable);
+        return freed;
 }
 static void __wait_on_freeing_inode(struct inode *inode);
diff --git a/fs/internal.h b/fs/internal.h
index 2be46ea5dd0b..513e0d859a6c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f);
 * inode.c
 */
 extern spinlock_t inode_sb_list_lock;
+extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                            int nid);
 extern void inode_add_lru(struct inode *inode);
 /*
@@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode);
 */
 extern void inode_wb_list_del(struct inode *inode);
-extern int get_nr_dirty_inodes(void);
+extern long get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *, bool);
@@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool);
 */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 extern int d_set_mounted(struct dentry *dentry);
+extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                            int nid);
 /*
 * read_write.c
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 8c32ef3ba88e..e519e45bf673 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list);
 static LIST_HEAD(mb_cache_lru_list);
 static DEFINE_SPINLOCK(mb_cache_spinlock);
-/*
- * What the mbcache registers as to get shrunk dynamically.
- */
-static int mb_cache_shrink_fn(struct shrinker *shrink,
-                              struct shrink_control *sc);
-static struct shrinker mb_cache_shrinker = {
-        .shrink = mb_cache_shrink_fn,
-        .seeks = DEFAULT_SEEKS,
-};
 static inline int
 __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
 {
@@ -151,7 +139,7 @@ forget:
 /*
- * mb_cache_shrink_fn()  memory pressure callback
+ * mb_cache_shrink_scan()  memory pressure callback
 *
 * This function is called by the kernel memory management when memory
 * gets low.
@@ -159,17 +147,16 @@ forget:
 * @shrink: (ignored)
 * @sc: shrink_control passed from reclaim
 *
- * Returns the number of objects which are present in the cache.
+ * Returns the number of objects freed.
 */
-static int
+static unsigned long
-mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
+mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(free_list);
-        struct mb_cache *cache;
        struct mb_cache_entry *entry, *tmp;
-        int count = 0;
        int nr_to_scan = sc->nr_to_scan;
        gfp_t gfp_mask = sc->gfp_mask;
+        unsigned long freed = 0;
        mb_debug("trying to free %d entries", nr_to_scan);
        spin_lock(&mb_cache_spinlock);
@@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
                                   struct mb_cache_entry, e_lru_list);
                list_move_tail(&ce->e_lru_list, &free_list);
                __mb_cache_entry_unhash(ce);
+                freed++;
+        }
+        spin_unlock(&mb_cache_spinlock);
+        list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
+                __mb_cache_entry_forget(entry, gfp_mask);
        }
+        return freed;
+}
+static unsigned long
+mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        struct mb_cache *cache;
+        unsigned long count = 0;
+        spin_lock(&mb_cache_spinlock);
        list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
                mb_debug("cache %s (%d)", cache->c_name,
                          atomic_read(&cache->c_entry_count));
                count += atomic_read(&cache->c_entry_count);
        }
        spin_unlock(&mb_cache_spinlock);
-        list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
-                __mb_cache_entry_forget(entry, gfp_mask);
+        return vfs_pressure_ratio(count);
-        }
-        return (count / 100) * sysctl_vfs_cache_pressure;
 }
+static struct shrinker mb_cache_shrinker = {
+        .count_objects = mb_cache_shrink_count,
+        .scan_objects = mb_cache_shrink_scan,
+        .seeks = DEFAULT_SEEKS,
+};
 /*
 * mb_cache_create()  create a new cache
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e79bc6ce828e..de434f309af0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
-int nfs_access_cache_shrinker(struct shrinker *shrink,
+unsigned long
-                              struct shrink_control *sc)
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
        int nr_to_scan = sc->nr_to_scan;
        gfp_t gfp_mask = sc->gfp_mask;
+        long freed = 0;
        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
-                return (nr_to_scan == 0) ? 0 : -1;
+                return SHRINK_STOP;
        spin_lock(&nfs_access_lru_lock);
        list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
@@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink,
                                struct nfs_access_entry, lru);
                list_move(&cache->lru, &head);
                rb_erase(&cache->rb_node, &nfsi->access_cache);
+                freed++;
                if (!list_empty(&nfsi->access_cache_entry_lru))
                        list_move_tail(&nfsi->access_cache_inode_lru,
                                        &nfs_access_lru_list);
@@ -2046,7 +2048,13 @@ remove_lru_entry:
        }
        spin_unlock(&nfs_access_lru_lock);
        nfs_access_free_list(&head);
-        return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+        return freed;
+}
+unsigned long
+nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
 }
 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d388302c005f..38da8c2b81ac 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
                           const char *ip_addr);
 /* dir.c */
-extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
-                                        struct shrink_control *sc);
+                                            struct shrink_control *sc);
+extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
+                                           struct shrink_control *sc);
 struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
 int nfs_create(struct inode *, struct dentry *, umode_t, bool);
 int nfs_mkdir(struct inode *, struct dentry *, umode_t);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5793f24613c8..a03b9c6f9489 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void)
 #endif
 static struct shrinker acl_shrinker = {
-        .shrink         = nfs_access_cache_shrinker,
+        .count_objects  = nfs_access_cache_count,
+        .scan_objects   = nfs_access_cache_scan,
        .seeks          = DEFAULT_SEEKS,
 };
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e76244edd748..9186c7ce0b14 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -59,11 +59,14 @@ static unsigned int		longest_chain_cachesize;
 static int      nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 static void     cache_cleaner_func(struct work_struct *unused);
-static int      nfsd_reply_cache_shrink(struct shrinker *shrink,
+static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
-                                        struct shrink_control *sc);
+                                            struct shrink_control *sc);
+static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
+                                           struct shrink_control *sc);
 static struct shrinker nfsd_reply_cache_shrinker = {
-        .shrink = nfsd_reply_cache_shrink,
+        .scan_objects = nfsd_reply_cache_scan,
+        .count_objects = nfsd_reply_cache_count,
        .seeks  = 1,
 };
@@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp)
 * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
 * Also prune the oldest ones when the total exceeds the max number of entries.
 */
-static void
+static long
 prune_cache_entries(void)
 {
        struct svc_cacherep *rp, *tmp;
+        long freed = 0;
        list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
                if (!nfsd_cache_entry_expired(rp) &&
                    num_drc_entries <= max_drc_entries)
                        break;
                nfsd_reply_cache_free_locked(rp);
+                freed++;
        }
        /*
@@ -254,6 +259,7 @@ prune_cache_entries(void)
                cancel_delayed_work(&cache_cleaner);
        else
                mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
+        return freed;
 }
 static void
@@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused)
        spin_unlock(&cache_lock);
 }
-static int
+static unsigned long
-nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc)
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-        unsigned int num;
+        unsigned long num;
        spin_lock(&cache_lock);
-        if (sc->nr_to_scan)
-                prune_cache_entries();
        num = num_drc_entries;
        spin_unlock(&cache_lock);
        return num;
 }
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+        unsigned long freed;
+        spin_lock(&cache_lock);
+        freed = prune_cache_entries();
+        spin_unlock(&cache_lock);
+        return freed;
+}
 /*
 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
 */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9a702e193538..831d49a4111f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type)
 }
 EXPORT_SYMBOL(dquot_quota_sync);
-/* Free unused dquots from cache */
+static unsigned long
-static void prune_dqcache(int count)
+dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct list_head *head;
        struct dquot *dquot;
+        unsigned long freed = 0;
        head = free_dquots.prev;
-        while (head != &free_dquots && count) {
+        while (head != &free_dquots && sc->nr_to_scan) {
                dquot = list_entry(head, struct dquot, dq_free);
                remove_dquot_hash(dquot);
                remove_free_dquot(dquot);
                remove_inuse(dquot);
                do_destroy_dquot(dquot);
-                count--;
+                sc->nr_to_scan--;
+                freed++;
                head = free_dquots.prev;
        }
+        return freed;
 }
-/*
+static unsigned long
- * This is called from kswapd when we think we need some
+dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
- * more memory
- */
-static int shrink_dqcache_memory(struct shrinker *shrink,
-                                 struct shrink_control *sc)
 {
-        int nr = sc->nr_to_scan;
+        return vfs_pressure_ratio(
+        percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]));
-        if (nr) {
-                spin_lock(&dq_list_lock);
-                prune_dqcache(nr);
-                spin_unlock(&dq_list_lock);
-        }
-        return ((unsigned)
-                percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
-                /100) * sysctl_vfs_cache_pressure;
 }
 static struct shrinker dqcache_shrinker = {
-        .shrink = shrink_dqcache_memory,
+        .count_objects = dqcache_shrink_count,
+        .scan_objects = dqcache_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };
diff --git a/fs/super.c b/fs/super.c
index f6961ea84c56..3a96c9783a8b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
 * take a passive reference to the superblock to avoid this from occurring.
 */
-static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long super_cache_scan(struct shrinker *shrink,
+                                      struct shrink_control *sc)
 {
        struct super_block *sb;
-        int     fs_objects = 0;
+        long    fs_objects = 0;
-        int     total_objects;
+        long    total_objects;
+        long    freed = 0;
+        long    dentries;
+        long    inodes;
        sb = container_of(shrink, struct super_block, s_shrink);
@@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
         * Deadlock avoidance.  We may hold various FS locks, and we don't want
         * to recurse into the FS that called us in clear_inode() and friends..
         */
-        if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+        if (!(sc->gfp_mask & __GFP_FS))
-                return -1;
+                return SHRINK_STOP;
        if (!grab_super_passive(sb))
-                return -1;
+                return SHRINK_STOP;
        if (sb->s_op->nr_cached_objects)
-                fs_objects = sb->s_op->nr_cached_objects(sb);
+                fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
-        total_objects = sb->s_nr_dentry_unused +
-                        sb->s_nr_inodes_unused + fs_objects + 1;
-        if (sc->nr_to_scan) {
-                int     dentries;
-                int     inodes;
-                /* proportion the scan between the caches */
-                dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
-                                                        total_objects;
-                inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
-                                                        total_objects;
-                if (fs_objects)
-                        fs_objects = (sc->nr_to_scan * fs_objects) /
-                                                        total_objects;
-                /*
-                 * prune the dcache first as the icache is pinned by it, then
-                 * prune the icache, followed by the filesystem specific caches
-                 */
-                prune_dcache_sb(sb, dentries);
-                prune_icache_sb(sb, inodes);
-                if (fs_objects && sb->s_op->free_cached_objects) {
+        inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
-                        sb->s_op->free_cached_objects(sb, fs_objects);
+        dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
-                        fs_objects = sb->s_op->nr_cached_objects(sb);
+        total_objects = dentries + inodes + fs_objects + 1;
-                }
-                total_objects = sb->s_nr_dentry_unused +
+        /* proportion the scan between the caches */
-                                sb->s_nr_inodes_unused + fs_objects;
+        dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
+        inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
+        /*
+         * prune the dcache first as the icache is pinned by it, then
+         * prune the icache, followed by the filesystem specific caches
+         */
+        freed = prune_dcache_sb(sb, dentries, sc->nid);
+        freed += prune_icache_sb(sb, inodes, sc->nid);
+        if (fs_objects) {
+                fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
+                                                                total_objects);
+                freed += sb->s_op->free_cached_objects(sb, fs_objects,
+                                                       sc->nid);
        }
-        total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
+        drop_super(sb);
+        return freed;
+}
+static unsigned long super_cache_count(struct shrinker *shrink,
+                                       struct shrink_control *sc)
+{
+        struct super_block *sb;
+        long    total_objects = 0;
+        sb = container_of(shrink, struct super_block, s_shrink);
+        if (!grab_super_passive(sb))
+                return 0;
+        if (sb->s_op && sb->s_op->nr_cached_objects)
+                total_objects = sb->s_op->nr_cached_objects(sb,
+                                                 sc->nid);
+        total_objects += list_lru_count_node(&sb->s_dentry_lru,
+                                                 sc->nid);
+        total_objects += list_lru_count_node(&sb->s_inode_lru,
+                                                 sc->nid);
+        total_objects = vfs_pressure_ratio(total_objects);
        drop_super(sb);
        return total_objects;
 }
@@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                INIT_HLIST_NODE(&s->s_instances);
                INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
-                INIT_LIST_HEAD(&s->s_dentry_lru);
-                INIT_LIST_HEAD(&s->s_inode_lru);
+                if (list_lru_init(&s->s_dentry_lru))
-                spin_lock_init(&s->s_inode_lru_lock);
+                        goto err_out;
+                if (list_lru_init(&s->s_inode_lru))
+                        goto err_out_dentry_lru;
                INIT_LIST_HEAD(&s->s_mounts);
                init_rwsem(&s->s_umount);
                lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                s->cleancache_poolid = -1;
                s->s_shrink.seeks = DEFAULT_SEEKS;
-                s->s_shrink.shrink = prune_super;
+                s->s_shrink.scan_objects = super_cache_scan;
+                s->s_shrink.count_objects = super_cache_count;
                s->s_shrink.batch = 1024;
+                s->s_shrink.flags = SHRINKER_NUMA_AWARE;
        }
 out:
        return s;
+err_out_dentry_lru:
+        list_lru_destroy(&s->s_dentry_lru);
 err_out:
        security_sb_free(s);
 #ifdef CONFIG_SMP
@@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s)
                /* caches are now gone, we can safely kill the shrinker now */
                unregister_shrinker(&s->s_shrink);
+                list_lru_destroy(&s->s_dentry_lru);
+                list_lru_destroy(&s->s_inode_lru);
                put_filesystem(fs);
                put_super(s);
        } else {
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 9e1d05666fed..f35135e28e96 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,18 +277,25 @@ static int kick_a_thread(void)
        return 0;
 }
-int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+                                 struct shrink_control *sc)
 {
-        int nr = sc->nr_to_scan;
-        int freed, contention = 0;
        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
-        if (nr == 0)
+        /*
-                /*
+         * Due to the way UBIFS updates the clean znode counter it may
-                 * Due to the way UBIFS updates the clean znode counter it may
+         * temporarily be negative.
-                 * temporarily be negative.
+         */
-                 */
+        return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
-                return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
+}
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+                                struct shrink_control *sc)
+{
+        unsigned long nr = sc->nr_to_scan;
+        int contention = 0;
+        unsigned long freed;
+        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
        if (!clean_zn_cnt) {
                /*
@@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
        if (!freed && contention) {
                dbg_tnc("freed nothing, but contention");
-                return -1;
+                return SHRINK_STOP;
        }
 out:
-        dbg_tnc("%d znodes were freed, requested %d", freed, nr);
+        dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
        return freed;
 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 879b9976c12b..3e4aa7281e04 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab;
 /* UBIFS TNC shrinker description */
 static struct shrinker ubifs_shrinker_info = {
-        .shrink = ubifs_shrinker,
+        .scan_objects = ubifs_shrink_scan,
+        .count_objects = ubifs_shrink_count,
        .seeks = DEFAULT_SEEKS,
 };
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2babce4d70f..e8c8cfe1435c 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 /* shrinker.c */
-int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc);
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+                                struct shrink_control *sc);
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+                                 struct shrink_control *sc);
 /* commit.c */
 int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c06823fe10d3..263470075ea2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -81,54 +81,6 @@ xfs_buf_vmap_len(
 }
 /*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-        struct xfs_buf  *bp)
-{
-        struct xfs_buftarg *btp = bp->b_target;
-        spin_lock(&btp->bt_lru_lock);
-        if (list_empty(&bp->b_lru)) {
-                atomic_inc(&bp->b_hold);
-                list_add_tail(&bp->b_lru, &btp->bt_lru);
-                btp->bt_lru_nr++;
-                bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
-        }
-        spin_unlock(&btp->bt_lru_lock);
-}
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-        struct xfs_buf  *bp)
-{
-        struct xfs_buftarg *btp = bp->b_target;
-        if (list_empty(&bp->b_lru))
-                return;
-        spin_lock(&btp->bt_lru_lock);
-        if (!list_empty(&bp->b_lru)) {
-                list_del_init(&bp->b_lru);
-                btp->bt_lru_nr--;
-        }
-        spin_unlock(&btp->bt_lru_lock);
-}
-/*
 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
 * b_lru_ref count so that the buffer is freed immediately when the buffer
 * reference count falls to zero. If the buffer is already on the LRU, we need
@@ -151,20 +103,14 @@ xfs_buf_stale(
         */
        bp->b_flags &= ~_XBF_DELWRI_Q;
-        atomic_set(&(bp)->b_lru_ref, 0);
+        spin_lock(&bp->b_lock);
-        if (!list_empty(&bp->b_lru)) {
+        atomic_set(&bp->b_lru_ref, 0);
-                struct xfs_buftarg *btp = bp->b_target;
+        if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
+            (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
+                atomic_dec(&bp->b_hold);
-                spin_lock(&btp->bt_lru_lock);
-                if (!list_empty(&bp->b_lru) &&
-                    !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
-                        list_del_init(&bp->b_lru);
-                        btp->bt_lru_nr--;
-                        atomic_dec(&bp->b_hold);
-                }
-                spin_unlock(&btp->bt_lru_lock);
-        }
        ASSERT(atomic_read(&bp->b_hold) >= 1);
+        spin_unlock(&bp->b_lock);
 }
 static int
@@ -228,6 +174,7 @@ _xfs_buf_alloc(
        INIT_LIST_HEAD(&bp->b_list);
        RB_CLEAR_NODE(&bp->b_rbnode);
        sema_init(&bp->b_sema, 0); /* held, no waiters */
+        spin_lock_init(&bp->b_lock);
        XB_SET_OWNER(bp);
        bp->b_target = target;
        bp->b_flags = flags;
@@ -917,12 +864,33 @@ xfs_buf_rele(
        ASSERT(atomic_read(&bp->b_hold) > 0);
        if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-                if (!(bp->b_flags & XBF_STALE) &&
+                spin_lock(&bp->b_lock);
-                           atomic_read(&bp->b_lru_ref)) {
+                if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
-                        xfs_buf_lru_add(bp);
+                        /*
+                         * If the buffer is added to the LRU take a new
+                         * reference to the buffer for the LRU and clear the
+                         * (now stale) dispose list state flag
+                         */
+                        if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
+                                bp->b_state &= ~XFS_BSTATE_DISPOSE;
+                                atomic_inc(&bp->b_hold);
+                        }
+                        spin_unlock(&bp->b_lock);
                        spin_unlock(&pag->pag_buf_lock);
                } else {
-                        xfs_buf_lru_del(bp);
+                        /*
+                         * most of the time buffers will already be removed from
+                         * the LRU, so optimise that case by checking for the
+                         * XFS_BSTATE_DISPOSE flag indicating the last list the
+                         * buffer was on was the disposal list
+                         */
+                        if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
+                                list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
+                        } else {
+                                ASSERT(list_empty(&bp->b_lru));
+                        }
+                        spin_unlock(&bp->b_lock);
                        ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
                        rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
                        spin_unlock(&pag->pag_buf_lock);
@@ -1502,83 +1470,121 @@ xfs_buf_iomove(
 * returned. These buffers will have an elevated hold count, so wait on those
 * while freeing all the buffers only held by the LRU.
 */
+static enum lru_status
+xfs_buftarg_wait_rele(
+        struct list_head        *item,
+        spinlock_t              *lru_lock,
+        void                    *arg)
+{
+        struct xfs_buf          *bp = container_of(item, struct xfs_buf, b_lru);
+        struct list_head        *dispose = arg;
+        if (atomic_read(&bp->b_hold) > 1) {
+                /* need to wait, so skip it this pass */
+                trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+                return LRU_SKIP;
+        }
+        if (!spin_trylock(&bp->b_lock))
+                return LRU_SKIP;
+        /*
+         * clear the LRU reference count so the buffer doesn't get
+         * ignored in xfs_buf_rele().
+         */
+        atomic_set(&bp->b_lru_ref, 0);
+        bp->b_state |= XFS_BSTATE_DISPOSE;
+        list_move(item, dispose);
+        spin_unlock(&bp->b_lock);
+        return LRU_REMOVED;
+}
 void
 xfs_wait_buftarg(
        struct xfs_buftarg      *btp)
 {
-        struct xfs_buf          *bp;
+        LIST_HEAD(dispose);
+        int loop = 0;
-restart:
+        /* loop until there is nothing left on the lru list. */
-        spin_lock(&btp->bt_lru_lock);
+        while (list_lru_count(&btp->bt_lru)) {
-        while (!list_empty(&btp->bt_lru)) {
+                list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
-                bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+                              &dispose, LONG_MAX);
-                if (atomic_read(&bp->b_hold) > 1) {
-                        trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+                while (!list_empty(&dispose)) {
-                        list_move_tail(&bp->b_lru, &btp->bt_lru);
+                        struct xfs_buf *bp;
-                        spin_unlock(&btp->bt_lru_lock);
+                        bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-                        delay(100);
+                        list_del_init(&bp->b_lru);
-                        goto restart;
+                        xfs_buf_rele(bp);
                }
-                /*
+                if (loop++ != 0)
-                 * clear the LRU reference count so the buffer doesn't get
+                        delay(100);
-                 * ignored in xfs_buf_rele().
-                 */
-                atomic_set(&bp->b_lru_ref, 0);
-                spin_unlock(&btp->bt_lru_lock);
-                xfs_buf_rele(bp);
-                spin_lock(&btp->bt_lru_lock);
        }
-        spin_unlock(&btp->bt_lru_lock);
 }
-int
+static enum lru_status
-xfs_buftarg_shrink(
+xfs_buftarg_isolate(
+        struct list_head        *item,
+        spinlock_t              *lru_lock,
+        void                    *arg)
+{
+        struct xfs_buf          *bp = container_of(item, struct xfs_buf, b_lru);
+        struct list_head        *dispose = arg;
+        /*
+         * we are inverting the lru lock/bp->b_lock here, so use a trylock.
+         * If we fail to get the lock, just skip it.
+         */
+        if (!spin_trylock(&bp->b_lock))
+                return LRU_SKIP;
+        /*
+         * Decrement the b_lru_ref count unless the value is already
+         * zero. If the value is already zero, we need to reclaim the
+         * buffer, otherwise it gets another trip through the LRU.
+         */
+        if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+                spin_unlock(&bp->b_lock);
+                return LRU_ROTATE;
+        }
+        bp->b_state |= XFS_BSTATE_DISPOSE;
+        list_move(item, dispose);
+        spin_unlock(&bp->b_lock);
+        return LRU_REMOVED;
+}
+static unsigned long
+xfs_buftarg_shrink_scan(
        struct shrinker         *shrink,
        struct shrink_control   *sc)
 {
        struct xfs_buftarg      *btp = container_of(shrink,
                                        struct xfs_buftarg, bt_shrinker);
-        struct xfs_buf          *bp;
-        int nr_to_scan = sc->nr_to_scan;
        LIST_HEAD(dispose);
+        unsigned long           freed;
+        unsigned long           nr_to_scan = sc->nr_to_scan;
-        if (!nr_to_scan)
+        freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate,
-                return btp->bt_lru_nr;
+                                       &dispose, &nr_to_scan);
-        spin_lock(&btp->bt_lru_lock);
-        while (!list_empty(&btp->bt_lru)) {
-                if (nr_to_scan-- <= 0)
-                        break;
-                bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-                /*
-                 * Decrement the b_lru_ref count unless the value is already
-                 * zero. If the value is already zero, we need to reclaim the
-                 * buffer, otherwise it gets another trip through the LRU.
-                 */
-                if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-                        list_move_tail(&bp->b_lru, &btp->bt_lru);
-                        continue;
-                }
-                /*
-                 * remove the buffer from the LRU now to avoid needing another
-                 * lock round trip inside xfs_buf_rele().
-                 */
-                list_move(&bp->b_lru, &dispose);
-                btp->bt_lru_nr--;
-                bp->b_lru_flags |= _XBF_LRU_DISPOSE;
-        }
-        spin_unlock(&btp->bt_lru_lock);
        while (!list_empty(&dispose)) {
+                struct xfs_buf *bp;
                bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                list_del_init(&bp->b_lru);
                xfs_buf_rele(bp);
        }
-        return btp->bt_lru_nr;
+        return freed;
+}
+static unsigned long
+xfs_buftarg_shrink_count(
+        struct shrinker         *shrink,
+        struct shrink_control   *sc)
+{
+        struct xfs_buftarg      *btp = container_of(shrink,
+                                        struct xfs_buftarg, bt_shrinker);
+        return list_lru_count_node(&btp->bt_lru, sc->nid);
 }
 void
@@ -1587,6 +1593,7 @@ xfs_free_buftarg(
        struct xfs_buftarg      *btp)
 {
        unregister_shrinker(&btp->bt_shrinker);
+        list_lru_destroy(&btp->bt_lru);
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_blkdev_issue_flush(btp);
@@ -1660,12 +1667,16 @@ xfs_alloc_buftarg(
        if (!btp->bt_bdi)
                goto error;
-        INIT_LIST_HEAD(&btp->bt_lru);
-        spin_lock_init(&btp->bt_lru_lock);
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
-        btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+        if (list_lru_init(&btp->bt_lru))
+                goto error;
+        btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
+        btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
        btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+        btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
        register_shrinker(&btp->bt_shrinker);
        return btp;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 433a12ed7b17..e65683361017 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -25,6 +25,7 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/uio.h>
+#include <linux/list_lru.h>
 /*
 *      Base types
@@ -59,7 +60,6 @@ typedef enum {
 #define _XBF_KMEM        (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q    (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND    (1 << 23)/* compound buffer */
-#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
 typedef unsigned int xfs_buf_flags_t;
@@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t;
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_KMEM,            "KMEM" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-        { _XBF_COMPOUND,        "COMPOUND" }, \
+        { _XBF_COMPOUND,        "COMPOUND" }
-        { _XBF_LRU_DISPOSE,     "LRU_DISPOSE" }
+/*
+ * Internal state flags.
+ */
+#define XFS_BSTATE_DISPOSE       (1 << 0)       /* buffer being discarded */
 typedef struct xfs_buftarg {
        dev_t                   bt_dev;
@@ -92,9 +96,7 @@ typedef struct xfs_buftarg {
        /* LRU control structures */
        struct shrinker         bt_shrinker;
-        struct list_head        bt_lru;
+        struct list_lru         bt_lru;
-        spinlock_t              bt_lru_lock;
-        unsigned int            bt_lru_nr;
 } xfs_buftarg_t;
 struct xfs_buf;
@@ -137,7 +139,8 @@ typedef struct xfs_buf {
         * bt_lru_lock and not by b_sema
         */
        struct list_head        b_lru;          /* lru list */
-        xfs_buf_flags_t         b_lru_flags;    /* internal lru status flags */
+        spinlock_t              b_lock;         /* internal state lock */
+        unsigned int            b_state;        /* internal state flags */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;         /* contains rbtree root */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 251c66632e5e..71520e6e5d65 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -940,13 +940,8 @@ xfs_qm_dqput_final(
        trace_xfs_dqput_free(dqp);
-        mutex_lock(&qi->qi_lru_lock);
+        if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
-        if (list_empty(&dqp->q_lru)) {
-                list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
-                qi->qi_lru_count++;
                XFS_STATS_INC(xs_qm_dquot_unused);
-        }
-        mutex_unlock(&qi->qi_lru_lock);
        /*
         * If we just added a udquot to the freelist, then we want to release
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 16219b9c6790..73b62a24ceac 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1167,7 +1167,7 @@ xfs_reclaim_inodes(
 * them to be cleaned, which we hope will not be very long due to the
 * background walker having already kicked the IO off on those dirty inodes.
 */
-void
+long
 xfs_reclaim_inodes_nr(
        struct xfs_mount        *mp,
        int                     nr_to_scan)
@@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr(
        xfs_reclaim_work_queue(mp);
        xfs_ail_push_all(mp->m_ail);
-        xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+        return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
 }
 /*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 8a89f7d791bd..456f0144e1b6 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -46,7 +46,7 @@ void xfs_reclaim_worker(struct work_struct *work);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6218a0aeeeea..3e6c2e6c9cd2 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -51,8 +51,9 @@
 */
 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int      xfs_qm_shake(struct shrinker *, struct shrink_control *);
+STATIC void     xfs_qm_dqfree_one(struct xfs_dquot *dqp);
 /*
 * We use the batch lookup interface to iterate over the dquots as it
 * currently is the only interface into the radix tree code that allows
@@ -203,12 +204,9 @@ xfs_qm_dqpurge(
         * We move dquots to the freelist as soon as their reference count
         * hits zero, so it really should be on the freelist here.
         */
-        mutex_lock(&qi->qi_lru_lock);
        ASSERT(!list_empty(&dqp->q_lru));
-        list_del_init(&dqp->q_lru);
+        list_lru_del(&qi->qi_lru, &dqp->q_lru);
-        qi->qi_lru_count--;
        XFS_STATS_DEC(xs_qm_dquot_unused);
-        mutex_unlock(&qi->qi_lru_lock);
        xfs_qm_dqdestroy(dqp);
@@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk(
        return ndquots;
 }
+struct xfs_qm_isolate {
+        struct list_head        buffers;
+        struct list_head        dispose;
+};
+static enum lru_status
+xfs_qm_dquot_isolate(
+        struct list_head        *item,
+        spinlock_t              *lru_lock,
+        void                    *arg)
+{
+        struct xfs_dquot        *dqp = container_of(item,
+                                                struct xfs_dquot, q_lru);
+        struct xfs_qm_isolate   *isol = arg;
+        if (!xfs_dqlock_nowait(dqp))
+                goto out_miss_busy;
+        /*
+         * This dquot has acquired a reference in the meantime remove it from
+         * the freelist and try again.
+         */
+        if (dqp->q_nrefs) {
+                xfs_dqunlock(dqp);
+                XFS_STATS_INC(xs_qm_dqwants);
+                trace_xfs_dqreclaim_want(dqp);
+                list_del_init(&dqp->q_lru);
+                XFS_STATS_DEC(xs_qm_dquot_unused);
+                return LRU_REMOVED;
+        }
+        /*
+         * If the dquot is dirty, flush it. If it's already being flushed, just
+         * skip it so there is time for the IO to complete before we try to
+         * reclaim it again on the next LRU pass.
+         */
+        if (!xfs_dqflock_nowait(dqp)) {
+                xfs_dqunlock(dqp);
+                goto out_miss_busy;
+        }
+        if (XFS_DQ_IS_DIRTY(dqp)) {
+                struct xfs_buf  *bp = NULL;
+                int             error;
+                trace_xfs_dqreclaim_dirty(dqp);
+                /* we have to drop the LRU lock to flush the dquot */
+                spin_unlock(lru_lock);
+                error = xfs_qm_dqflush(dqp, &bp);
+                if (error) {
+                        xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
+                                 __func__, dqp);
+                        goto out_unlock_dirty;
+                }
+                xfs_buf_delwri_queue(bp, &isol->buffers);
+                xfs_buf_relse(bp);
+                goto out_unlock_dirty;
+        }
+        xfs_dqfunlock(dqp);
+        /*
+         * Prevent lookups now that we are past the point of no return.
+         */
+        dqp->dq_flags |= XFS_DQ_FREEING;
+        xfs_dqunlock(dqp);
+        ASSERT(dqp->q_nrefs == 0);
+        list_move_tail(&dqp->q_lru, &isol->dispose);
+        XFS_STATS_DEC(xs_qm_dquot_unused);
+        trace_xfs_dqreclaim_done(dqp);
+        XFS_STATS_INC(xs_qm_dqreclaims);
+        return LRU_REMOVED;
+out_miss_busy:
+        trace_xfs_dqreclaim_busy(dqp);
+        XFS_STATS_INC(xs_qm_dqreclaim_misses);
+        return LRU_SKIP;
+out_unlock_dirty:
+        trace_xfs_dqreclaim_busy(dqp);
+        XFS_STATS_INC(xs_qm_dqreclaim_misses);
+        xfs_dqunlock(dqp);
+        spin_lock(lru_lock);
+        return LRU_RETRY;
+}
+static unsigned long
+xfs_qm_shrink_scan(
+        struct shrinker         *shrink,
+        struct shrink_control   *sc)
+{
+        struct xfs_quotainfo    *qi = container_of(shrink,
+                                        struct xfs_quotainfo, qi_shrinker);
+        struct xfs_qm_isolate   isol;
+        unsigned long           freed;
+        int                     error;
+        unsigned long           nr_to_scan = sc->nr_to_scan;
+        if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
+                return 0;
+        INIT_LIST_HEAD(&isol.buffers);
+        INIT_LIST_HEAD(&isol.dispose);
+        freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol,
+                                        &nr_to_scan);
+        error = xfs_buf_delwri_submit(&isol.buffers);
+        if (error)
+                xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
+        while (!list_empty(&isol.dispose)) {
+                struct xfs_dquot        *dqp;
+                dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
+                list_del_init(&dqp->q_lru);
+                xfs_qm_dqfree_one(dqp);
+        }
+        return freed;
+}
+static unsigned long
+xfs_qm_shrink_count(
+        struct shrinker         *shrink,
+        struct shrink_control   *sc)
+{
+        struct xfs_quotainfo    *qi = container_of(shrink,
+                                        struct xfs_quotainfo, qi_shrinker);
+        return list_lru_count_node(&qi->qi_lru, sc->nid);
+}
 /*
 * This initializes all the quota information that's kept in the
 * mount structure
@@ -696,11 +831,18 @@ xfs_qm_init_quotainfo(
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+        if ((error = list_lru_init(&qinf->qi_lru))) {
+                kmem_free(qinf);
+                mp->m_quotainfo = NULL;
+                return error;
+        }
        /*
         * See if quotainodes are setup, and if not, allocate them,
         * and change the superblock accordingly.
         */
        if ((error = xfs_qm_init_quotainos(mp))) {
+                list_lru_destroy(&qinf->qi_lru);
                kmem_free(qinf);
                mp->m_quotainfo = NULL;
                return error;
@@ -711,10 +853,6 @@ xfs_qm_init_quotainfo(
        INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
        mutex_init(&qinf->qi_tree_lock);
-        INIT_LIST_HEAD(&qinf->qi_lru_list);
-        qinf->qi_lru_count = 0;
-        mutex_init(&qinf->qi_lru_lock);
        /* mutex used to serialize quotaoffs */
        mutex_init(&qinf->qi_quotaofflock);
@@ -779,8 +917,10 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
-        qinf->qi_shrinker.shrink = xfs_qm_shake;
+        qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
+        qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
+        qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
        register_shrinker(&qinf->qi_shrinker);
        return 0;
 }
@@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo(
        ASSERT(qi != NULL);
        unregister_shrinker(&qi->qi_shrinker);
+        list_lru_destroy(&qi->qi_lru);
        if (qi->qi_uquotaip) {
                IRELE(qi->qi_uquotaip);
@@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one(
        xfs_qm_dqdestroy(dqp);
 }
-STATIC void
-xfs_qm_dqreclaim_one(
-        struct xfs_dquot        *dqp,
-        struct list_head        *buffer_list,
-        struct list_head        *dispose_list)
-{
-        struct xfs_mount        *mp = dqp->q_mount;
-        struct xfs_quotainfo    *qi = mp->m_quotainfo;
-        int                     error;
-        if (!xfs_dqlock_nowait(dqp))
-                goto out_move_tail;
-        /*
-         * This dquot has acquired a reference in the meantime remove it from
-         * the freelist and try again.
-         */
-        if (dqp->q_nrefs) {
-                xfs_dqunlock(dqp);
-                trace_xfs_dqreclaim_want(dqp);
-                XFS_STATS_INC(xs_qm_dqwants);
-                list_del_init(&dqp->q_lru);
-                qi->qi_lru_count--;
-                XFS_STATS_DEC(xs_qm_dquot_unused);
-                return;
-        }
-        /*
-         * Try to grab the flush lock. If this dquot is in the process of
-         * getting flushed to disk, we don't want to reclaim it.
-         */
-        if (!xfs_dqflock_nowait(dqp))
-                goto out_unlock_move_tail;
-        if (XFS_DQ_IS_DIRTY(dqp)) {
-                struct xfs_buf  *bp = NULL;
-                trace_xfs_dqreclaim_dirty(dqp);
-                error = xfs_qm_dqflush(dqp, &bp);
-                if (error) {
-                        xfs_warn(mp, "%s: dquot %p flush failed",
-                                 __func__, dqp);
-                        goto out_unlock_move_tail;
-                }
-                xfs_buf_delwri_queue(bp, buffer_list);
-                xfs_buf_relse(bp);
-                /*
-                 * Give the dquot another try on the freelist, as the
-                 * flushing will take some time.
-                 */
-                goto out_unlock_move_tail;
-        }
-        xfs_dqfunlock(dqp);
-        /*
-         * Prevent lookups now that we are past the point of no return.
-         */
-        dqp->dq_flags |= XFS_DQ_FREEING;
-        xfs_dqunlock(dqp);
-        ASSERT(dqp->q_nrefs == 0);
-        list_move_tail(&dqp->q_lru, dispose_list);
-        qi->qi_lru_count--;
-        XFS_STATS_DEC(xs_qm_dquot_unused);
-        trace_xfs_dqreclaim_done(dqp);
-        XFS_STATS_INC(xs_qm_dqreclaims);
-        return;
-        /*
-         * Move the dquot to the tail of the list so that we don't spin on it.
-         */
-out_unlock_move_tail:
-        xfs_dqunlock(dqp);
-out_move_tail:
-        list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
-        trace_xfs_dqreclaim_busy(dqp);
-        XFS_STATS_INC(xs_qm_dqreclaim_misses);
-}
-STATIC int
-xfs_qm_shake(
-        struct shrinker         *shrink,
-        struct shrink_control   *sc)
-{
-        struct xfs_quotainfo    *qi =
-                container_of(shrink, struct xfs_quotainfo, qi_shrinker);
-        int                     nr_to_scan = sc->nr_to_scan;
-        LIST_HEAD               (buffer_list);
-        LIST_HEAD               (dispose_list);
-        struct xfs_dquot        *dqp;
-        int                     error;
-        if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
-                return 0;
-        if (!nr_to_scan)
-                goto out;
-        mutex_lock(&qi->qi_lru_lock);
-        while (!list_empty(&qi->qi_lru_list)) {
-                if (nr_to_scan-- <= 0)
-                        break;
-                dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
-                                       q_lru);
-                xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
-        }
-        mutex_unlock(&qi->qi_lru_lock);
-        error = xfs_buf_delwri_submit(&buffer_list);
-        if (error)
-                xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
-        while (!list_empty(&dispose_list)) {
-                dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
-                list_del_init(&dqp->q_lru);
-                xfs_qm_dqfree_one(dqp);
-        }
-out:
-        return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
-}
 /*
 * Start a transaction and write the incore superblock changes to
 * disk. flags parameter indicates which fields have changed.
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 670cd4464070..2b602df9c242 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -49,9 +49,7 @@ typedef struct xfs_quotainfo {
        struct xfs_inode        *qi_uquotaip;   /* user quota inode */
        struct xfs_inode        *qi_gquotaip;   /* group quota inode */
        struct xfs_inode        *qi_pquotaip;   /* project quota inode */
-        struct list_head qi_lru_list;
+        struct list_lru  qi_lru;
-        struct mutex     qi_lru_lock;
-        int              qi_lru_count;
        int              qi_dquots;
        time_t           qi_btimelimit;  /* limit for blks timer */
        time_t           qi_itimelimit;  /* limit for inodes timer */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 979a77d4b87d..15188cc99449 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1535,19 +1535,21 @@ xfs_fs_mount(
        return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
 }
-static int
+static long
 xfs_fs_nr_cached_objects(
-        struct super_block      *sb)
+        struct super_block      *sb,
+        int                     nid)
 {
        return xfs_reclaim_inodes_count(XFS_M(sb));
 }
-static void
+static long
 xfs_fs_free_cached_objects(
        struct super_block      *sb,
-        int                     nr_to_scan)
+        long                    nr_to_scan,
+        int                     nid)
 {
-        xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+        return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
 }
 static const struct super_operations xfs_super_operations = {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index feaa8d88eef7..59066e0b4ff1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -55,11 +55,11 @@ struct qstr {
 #define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
 struct dentry_stat_t {
-        int nr_dentry;
+        long nr_dentry;
-        int nr_unused;
+        long nr_unused;
-        int age_limit;          /* age in seconds */
+        long age_limit;          /* age in seconds */
-        int want_pages;         /* pages requested by system */
+        long want_pages;         /* pages requested by system */
-        int dummy[2];
+        long dummy[2];
 };
 extern struct dentry_stat_t dentry_stat;
@@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry)
 extern int sysctl_vfs_cache_pressure;
+static inline unsigned long vfs_pressure_ratio(unsigned long val)
+{
+        return mult_frac(val, sysctl_vfs_cache_pressure, 100);
+}
 #endif  /* __LINUX_DCACHE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 49e71b0f0e9f..a4acd3c61190 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/list.h>
+#include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
 #include <linux/rbtree.h>
@@ -1269,15 +1270,6 @@ struct super_block {
        struct list_head        s_files;
 #endif
        struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
-        /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
-        struct list_head        s_dentry_lru;   /* unused dentry lru */
-        int                     s_nr_dentry_unused;     /* # of dentry on lru */
-        /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
-        spinlock_t              s_inode_lru_lock ____cacheline_aligned_in_smp;
-        struct list_head        s_inode_lru;            /* unused inode lru */
-        int                     s_nr_inodes_unused;     /* # of inodes on lru */
        struct block_device     *s_bdev;
        struct backing_dev_info *s_bdi;
        struct mtd_info         *s_mtd;
@@ -1331,11 +1323,14 @@ struct super_block {
        /* AIO completions deferred from interrupt context */
        struct workqueue_struct *s_dio_done_wq;
-};
-/* superblock cache pruning functions */
+        /*
-extern void prune_icache_sb(struct super_block *sb, int nr_to_scan);
+         * Keep the lru lists last in the structure so they always sit on their
-extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan);
+         * own individual cachelines.
+         */
+        struct list_lru         s_dentry_lru ____cacheline_aligned_in_smp;
+        struct list_lru         s_inode_lru ____cacheline_aligned_in_smp;
+};
 extern struct timespec current_fs_time(struct super_block *sb);
@@ -1629,8 +1624,8 @@ struct super_operations {
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
        int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
-        int (*nr_cached_objects)(struct super_block *);
+        long (*nr_cached_objects)(struct super_block *, int);
-        void (*free_cached_objects)(struct super_block *, int);
+        long (*free_cached_objects)(struct super_block *, long, int);
 };
 /*
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
new file mode 100644
index 000000000000..3ce541753c88
--- /dev/null
+++ b/include/linux/list_lru.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
+ * Authors: David Chinner and Glauber Costa
+ *
+ * Generic LRU infrastructure
+ */
+#ifndef _LRU_LIST_H
+#define _LRU_LIST_H
+#include <linux/list.h>
+#include <linux/nodemask.h>
+/* list_lru_walk_cb has to always return one of those */
+enum lru_status {
+        LRU_REMOVED,            /* item removed from list */
+        LRU_ROTATE,             /* item referenced, give another pass */
+        LRU_SKIP,               /* item cannot be locked, skip */
+        LRU_RETRY,              /* item not freeable. May drop the lock
+                                   internally, but has to return locked. */
+};
+struct list_lru_node {
+        spinlock_t              lock;
+        struct list_head        list;
+        /* kept as signed so we can catch imbalance bugs */
+        long                    nr_items;
+} ____cacheline_aligned_in_smp;
+struct list_lru {
+        struct list_lru_node    *node;
+        nodemask_t              active_nodes;
+};
+void list_lru_destroy(struct list_lru *lru);
+int list_lru_init(struct list_lru *lru);
+/**
+ * list_lru_add: add an element to the lru list's tail
+ * @list_lru: the lru pointer
+ * @item: the item to be added.
+ *
+ * If the element is already part of a list, this function returns doing
+ * nothing. Therefore the caller does not need to keep state about whether or
+ * not the element already belongs in the list and is allowed to lazy update
+ * it. Note however that this is valid for *a* list, not *this* list. If
+ * the caller organize itself in a way that elements can be in more than
+ * one type of list, it is up to the caller to fully remove the item from
+ * the previous list (with list_lru_del() for instance) before moving it
+ * to @list_lru
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool list_lru_add(struct list_lru *lru, struct list_head *item);
+/**
+ * list_lru_del: delete an element to the lru list
+ * @list_lru: the lru pointer
+ * @item: the item to be deleted.
+ *
+ * This function works analogously as list_lru_add in terms of list
+ * manipulation. The comments about an element already pertaining to
+ * a list are also valid for list_lru_del.
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool list_lru_del(struct list_lru *lru, struct list_head *item);
+/**
+ * list_lru_count_node: return the number of objects currently held by @lru
+ * @lru: the lru pointer.
+ * @nid: the node id to count from.
+ *
+ * Always return a non-negative number, 0 for empty lists. There is no
+ * guarantee that the list is not updated while the count is being computed.
+ * Callers that want such a guarantee need to provide an outer lock.
+ */
+unsigned long list_lru_count_node(struct list_lru *lru, int nid);
+static inline unsigned long list_lru_count(struct list_lru *lru)
+{
+        long count = 0;
+        int nid;
+        for_each_node_mask(nid, lru->active_nodes)
+                count += list_lru_count_node(lru, nid);
+        return count;
+}
+typedef enum lru_status
+(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
+/**
+ * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items.
+ * @lru: the lru pointer.
+ * @nid: the node id to scan from.
+ * @isolate: callback function that is resposible for deciding what to do with
+ *  the item currently being scanned
+ * @cb_arg: opaque type that will be passed to @isolate
+ * @nr_to_walk: how many items to scan.
+ *
+ * This function will scan all elements in a particular list_lru, calling the
+ * @isolate callback for each of those items, along with the current list
+ * spinlock and a caller-provided opaque. The @isolate callback can choose to
+ * drop the lock internally, but *must* return with the lock held. The callback
+ * will return an enum lru_status telling the list_lru infrastructure what to
+ * do with the object being scanned.
+ *
+ * Please note that nr_to_walk does not mean how many objects will be freed,
+ * just how many objects will be scanned.
+ *
+ * Return value: the number of objects effectively removed from the LRU.
+ */
+unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
+                                 list_lru_walk_cb isolate, void *cb_arg,
+                                 unsigned long *nr_to_walk);
+static inline unsigned long
+list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
+              void *cb_arg, unsigned long nr_to_walk)
+{
+        long isolated = 0;
+        int nid;
+        for_each_node_mask(nid, lru->active_nodes) {
+                isolated += list_lru_walk_node(lru, nid, isolate,
+                                               cb_arg, &nr_to_walk);
+                if (nr_to_walk <= 0)
+                        break;
+        }
+        return isolated;
+}
+#endif /* _LRU_LIST_H */
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index ac6b8ee07825..68c097077ef0 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -4,39 +4,67 @@
 /*
 * This struct is used to pass information from page reclaim to the shrinkers.
 * We consolidate the values for easier extention later.
+ *
+ * The 'gfpmask' refers to the allocation we are currently trying to
+ * fulfil.
 */
 struct shrink_control {
        gfp_t gfp_mask;
-        /* How many slab objects shrinker() should scan and try to reclaim */
+        /*
+         * How many objects scan_objects should scan and try to reclaim.
+         * This is reset before every call, so it is safe for callees
+         * to modify.
+         */
        unsigned long nr_to_scan;
+        /* shrink from these nodes */
+        nodemask_t nodes_to_scan;
+        /* current node being shrunk (for NUMA aware shrinkers) */
+        int nid;
 };
+#define SHRINK_STOP (~0UL)
 /*
 * A callback you can register to apply pressure to ageable caches.
 *
- * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
+ * @count_objects should return the number of freeable items in the cache. If
- * and a 'gfpmask'.  It should look through the least-recently-used
+ * there are no objects to free or the number of freeable items cannot be
- * 'nr_to_scan' entries and attempt to free them up.  It should return
+ * determined, it should return 0. No deadlock checks should be done during the
- * the number of objects which remain in the cache.  If it returns -1, it means
+ * count callback - the shrinker relies on aggregating scan counts that couldn't
- * it cannot do any scanning at this time (eg. there is a risk of deadlock).
+ * be executed due to potential deadlocks to be run at a later call when the
+ * deadlock condition is no longer pending.
 *
- * The 'gfpmask' refers to the allocation we are currently trying to
+ * @scan_objects will only be called if @count_objects returned a non-zero
- * fulfil.
+ * value for the number of freeable objects. The callout should scan the cache
+ * and attempt to free items from the cache. It should then return the number
+ * of objects freed during the scan, or SHRINK_STOP if progress cannot be made
+ * due to potential deadlocks. If SHRINK_STOP is returned, then no further
+ * attempts to call the @scan_objects will be made from the current reclaim
+ * context.
 *
- * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
+ * @flags determine the shrinker abilities, like numa awareness
- * querying the cache size, so a fastpath for that case is appropriate.
 */
 struct shrinker {
-        int (*shrink)(struct shrinker *, struct shrink_control *sc);
+        unsigned long (*count_objects)(struct shrinker *,
+                                       struct shrink_control *sc);
+        unsigned long (*scan_objects)(struct shrinker *,
+                                      struct shrink_control *sc);
        int seeks;      /* seeks to recreate an obj */
        long batch;     /* reclaim batch size, 0 = default */
+        unsigned long flags;
        /* These are for internal use */
        struct list_head list;
-        atomic_long_t nr_in_batch; /* objs pending delete */
+        /* objs pending delete, per node */
+        atomic_long_t *nr_deferred;
 };
 #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
-extern void register_shrinker(struct shrinker *);
+/* Flags */
+#define SHRINKER_NUMA_AWARE (1 << 0)
+extern int register_shrinker(struct shrinker *);
 extern void unregister_shrinker(struct shrinker *);
 #endif
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 63cfcccaebb3..132a985aba8b 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start,
        TP_fast_assign(
                __entry->shr = shr;
-                __entry->shrink = shr->shrink;
+                __entry->shrink = shr->scan_objects;
                __entry->nr_objects_to_shrink = nr_objects_to_shrink;
                __entry->gfp_flags = sc->gfp_mask;
                __entry->pgs_scanned = pgs_scanned;
@@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end,
        TP_fast_assign(
                __entry->shr = shr;
-                __entry->shrink = shr->shrink;
+                __entry->shrink = shr->scan_objects;
                __entry->unused_scan = unused_scan_cnt;
                __entry->new_scan = new_scan_cnt;
                __entry->retval = shrinker_retval;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a4ed56cf0eac..6c28b61bb690 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -49,9 +49,9 @@ struct files_stat_struct {
 };
 struct inodes_stat_t {
-        int nr_inodes;
+        long nr_inodes;
-        int nr_unused;
+        long nr_unused;
-        int dummy[5];           /* padding for sysctl ABI compatibility */
+        long dummy[5];          /* padding for sysctl ABI compatibility */
 };
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 07f6fc468e17..7822cd88a95c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = {
        {
                .procname       = "inode-nr",
                .data           = &inodes_stat,
-                .maxlen         = 2*sizeof(int),
+                .maxlen         = 2*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_inodes,
        },
        {
                .procname       = "inode-state",
                .data           = &inodes_stat,
-                .maxlen         = 7*sizeof(int),
+                .maxlen         = 7*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_inodes,
        },
@@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = {
        {
                .procname       = "dentry-state",
                .data           = &dentry_stat,
-                .maxlen         = 6*sizeof(int),
+                .maxlen         = 6*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_dentry,
        },
diff --git a/mm/Makefile b/mm/Makefile
index f00803386a67..305d10acd081 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,7 +17,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
                           compaction.o balloon_compaction.o \
-                           interval_tree.o $(mmu-y)
+                           interval_tree.o list_lru.o $(mmu-y)
 obj-y += init-mm.o
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a92012a71702..d94f7dee3997 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -211,24 +211,29 @@ static void put_huge_zero_page(void)
        BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
 }
-static int shrink_huge_zero_page(struct shrinker *shrink,
+static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
-                struct shrink_control *sc)
+                                        struct shrink_control *sc)
 {
-        if (!sc->nr_to_scan)
+        /* we can free zero page only if last reference remains */
-                /* we can free zero page only if last reference remains */
+        return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
-                return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
+}
+static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
+                                       struct shrink_control *sc)
+{
        if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
                struct page *zero_page = xchg(&huge_zero_page, NULL);
                BUG_ON(zero_page == NULL);
                __free_page(zero_page);
+                return HPAGE_PMD_NR;
        }
        return 0;
 }
 static struct shrinker huge_zero_page_shrinker = {
-        .shrink = shrink_huge_zero_page,
+        .count_objects = shrink_huge_zero_page_count,
+        .scan_objects = shrink_huge_zero_page_scan,
        .seeks = DEFAULT_SEEKS,
 };
diff --git a/mm/list_lru.c b/mm/list_lru.c
new file mode 100644
index 000000000000..72467914b856
--- /dev/null
+++ b/mm/list_lru.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
+ * Authors: David Chinner and Glauber Costa
+ *
+ * Generic LRU infrastructure
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/list_lru.h>
+#include <linux/slab.h>
+bool list_lru_add(struct list_lru *lru, struct list_head *item)
+{
+        int nid = page_to_nid(virt_to_page(item));
+        struct list_lru_node *nlru = &lru->node[nid];
+        spin_lock(&nlru->lock);
+        WARN_ON_ONCE(nlru->nr_items < 0);
+        if (list_empty(item)) {
+                list_add_tail(item, &nlru->list);
+                if (nlru->nr_items++ == 0)
+                        node_set(nid, lru->active_nodes);
+                spin_unlock(&nlru->lock);
+                return true;
+        }
+        spin_unlock(&nlru->lock);
+        return false;
+}
+EXPORT_SYMBOL_GPL(list_lru_add);
+bool list_lru_del(struct list_lru *lru, struct list_head *item)
+{
+        int nid = page_to_nid(virt_to_page(item));
+        struct list_lru_node *nlru = &lru->node[nid];
+        spin_lock(&nlru->lock);
+        if (!list_empty(item)) {
+                list_del_init(item);
+                if (--nlru->nr_items == 0)
+                        node_clear(nid, lru->active_nodes);
+                WARN_ON_ONCE(nlru->nr_items < 0);
+                spin_unlock(&nlru->lock);
+                return true;
+        }
+        spin_unlock(&nlru->lock);
+        return false;
+}
+EXPORT_SYMBOL_GPL(list_lru_del);
+unsigned long
+list_lru_count_node(struct list_lru *lru, int nid)
+{
+        unsigned long count = 0;
+        struct list_lru_node *nlru = &lru->node[nid];
+        spin_lock(&nlru->lock);
+        WARN_ON_ONCE(nlru->nr_items < 0);
+        count += nlru->nr_items;
+        spin_unlock(&nlru->lock);
+        return count;
+}
+EXPORT_SYMBOL_GPL(list_lru_count_node);
+unsigned long
+list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate,
+                   void *cb_arg, unsigned long *nr_to_walk)
+{
+        struct list_lru_node    *nlru = &lru->node[nid];
+        struct list_head *item, *n;
+        unsigned long isolated = 0;
+        spin_lock(&nlru->lock);
+restart:
+        list_for_each_safe(item, n, &nlru->list) {
+                enum lru_status ret;
+                /*
+                 * decrement nr_to_walk first so that we don't livelock if we
+                 * get stuck on large numbesr of LRU_RETRY items
+                 */
+                if (--(*nr_to_walk) == 0)
+                        break;
+                ret = isolate(item, &nlru->lock, cb_arg);
+                switch (ret) {
+                case LRU_REMOVED:
+                        if (--nlru->nr_items == 0)
+                                node_clear(nid, lru->active_nodes);
+                        WARN_ON_ONCE(nlru->nr_items < 0);
+                        isolated++;
+                        break;
+                case LRU_ROTATE:
+                        list_move_tail(item, &nlru->list);
+                        break;
+                case LRU_SKIP:
+                        break;
+                case LRU_RETRY:
+                        /*
+                         * The lru lock has been dropped, our list traversal is
+                         * now invalid and so we have to restart from scratch.
+                         */
+                        goto restart;
+                default:
+                        BUG();
+                }
+        }
+        spin_unlock(&nlru->lock);
+        return isolated;
+}
+EXPORT_SYMBOL_GPL(list_lru_walk_node);
+int list_lru_init(struct list_lru *lru)
+{
+        int i;
+        size_t size = sizeof(*lru->node) * nr_node_ids;
+        lru->node = kzalloc(size, GFP_KERNEL);
+        if (!lru->node)
+                return -ENOMEM;
+        nodes_clear(lru->active_nodes);
+        for (i = 0; i < nr_node_ids; i++) {
+                spin_lock_init(&lru->node[i].lock);
+                INIT_LIST_HEAD(&lru->node[i].list);
+                lru->node[i].nr_items = 0;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(list_lru_init);
+void list_lru_destroy(struct list_lru *lru)
+{
+        kfree(lru->node);
+}
+EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d84c5e5331bb..baa4e0a45dec 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -248,10 +248,12 @@ void shake_page(struct page *p, int access)
         */
        if (access) {
                int nr;
+                int nid = page_to_nid(p);
                do {
                        struct shrink_control shrink = {
                                .gfp_mask = GFP_KERNEL,
                        };
+                        node_set(nid, shrink.nodes_to_scan);
                        nr = shrink_slab(&shrink, 1000, 1000);
                        if (page_count(p) == 1)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2cff0d491c6d..e36454220614 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -155,14 +155,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 }
 /*
- * Add a shrinker callback to be called from the vm
+ * Add a shrinker callback to be called from the vm.
 */
-void register_shrinker(struct shrinker *shrinker)
+int register_shrinker(struct shrinker *shrinker)
 {
-        atomic_long_set(&shrinker->nr_in_batch, 0);
+        size_t size = sizeof(*shrinker->nr_deferred);
+        /*
+         * If we only have one possible node in the system anyway, save
+         * ourselves the trouble and disable NUMA aware behavior. This way we
+         * will save memory and some small loop time later.
+         */
+        if (nr_node_ids == 1)
+                shrinker->flags &= ~SHRINKER_NUMA_AWARE;
+        if (shrinker->flags & SHRINKER_NUMA_AWARE)
+                size *= nr_node_ids;
+        shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
+        if (!shrinker->nr_deferred)
+                return -ENOMEM;
        down_write(&shrinker_rwsem);
        list_add_tail(&shrinker->list, &shrinker_list);
        up_write(&shrinker_rwsem);
+        return 0;
 }
 EXPORT_SYMBOL(register_shrinker);
@@ -177,15 +194,102 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
-static inline int do_shrinker_shrink(struct shrinker *shrinker,
+#define SHRINK_BATCH 128
-                                     struct shrink_control *sc,
-                                     unsigned long nr_to_scan)
+static unsigned long
-{
+shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
-        sc->nr_to_scan = nr_to_scan;
+                 unsigned long nr_pages_scanned, unsigned long lru_pages)
-        return (*shrinker->shrink)(shrinker, sc);
+{
+        unsigned long freed = 0;
+        unsigned long long delta;
+        long total_scan;
+        long max_pass;
+        long nr;
+        long new_nr;
+        int nid = shrinkctl->nid;
+        long batch_size = shrinker->batch ? shrinker->batch
+                                          : SHRINK_BATCH;
+        max_pass = shrinker->count_objects(shrinker, shrinkctl);
+        if (max_pass == 0)
+                return 0;
+        /*
+         * copy the current shrinker scan count into a local variable
+         * and zero it so that other concurrent shrinker invocations
+         * don't also do this scanning work.
+         */
+        nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+        total_scan = nr;
+        delta = (4 * nr_pages_scanned) / shrinker->seeks;
+        delta *= max_pass;
+        do_div(delta, lru_pages + 1);
+        total_scan += delta;
+        if (total_scan < 0) {
+                printk(KERN_ERR
+                "shrink_slab: %pF negative objects to delete nr=%ld\n",
+                       shrinker->scan_objects, total_scan);
+                total_scan = max_pass;
+        }
+        /*
+         * We need to avoid excessive windup on filesystem shrinkers
+         * due to large numbers of GFP_NOFS allocations causing the
+         * shrinkers to return -1 all the time. This results in a large
+         * nr being built up so when a shrink that can do some work
+         * comes along it empties the entire cache due to nr >>>
+         * max_pass.  This is bad for sustaining a working set in
+         * memory.
+         *
+         * Hence only allow the shrinker to scan the entire cache when
+         * a large delta change is calculated directly.
+         */
+        if (delta < max_pass / 4)
+                total_scan = min(total_scan, max_pass / 2);
+        /*
+         * Avoid risking looping forever due to too large nr value:
+         * never try to free more than twice the estimate number of
+         * freeable entries.
+         */
+        if (total_scan > max_pass * 2)
+                total_scan = max_pass * 2;
+        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
+                                nr_pages_scanned, lru_pages,
+                                max_pass, delta, total_scan);
+        while (total_scan >= batch_size) {
+                unsigned long ret;
+                shrinkctl->nr_to_scan = batch_size;
+                ret = shrinker->scan_objects(shrinker, shrinkctl);
+                if (ret == SHRINK_STOP)
+                        break;
+                freed += ret;
+                count_vm_events(SLABS_SCANNED, batch_size);
+                total_scan -= batch_size;
+                cond_resched();
+        }
+        /*
+         * move the unused scan count back into the shrinker in a
+         * manner that handles concurrent updates. If we exhausted the
+         * scan, there is no need to do an update.
+         */
+        if (total_scan > 0)
+                new_nr = atomic_long_add_return(total_scan,
+                                                &shrinker->nr_deferred[nid]);
+        else
+                new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+        trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
+        return freed;
 }
-#define SHRINK_BATCH 128
 /*
 * Call the shrink functions to age shrinkable caches
 *
@@ -205,115 +309,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
 *
 * Returns the number of slab objects which we shrunk.
 */
-unsigned long shrink_slab(struct shrink_control *shrink,
+unsigned long shrink_slab(struct shrink_control *shrinkctl,
                          unsigned long nr_pages_scanned,
                          unsigned long lru_pages)
 {
        struct shrinker *shrinker;
-        unsigned long ret = 0;
+        unsigned long freed = 0;
        if (nr_pages_scanned == 0)
                nr_pages_scanned = SWAP_CLUSTER_MAX;
        if (!down_read_trylock(&shrinker_rwsem)) {
-                /* Assume we'll be able to shrink next time */
+                /*
-                ret = 1;
+                 * If we would return 0, our callers would understand that we
+                 * have nothing else to shrink and give up trying. By returning
+                 * 1 we keep it going and assume we'll be able to shrink next
+                 * time.
+                 */
+                freed = 1;
                goto out;
        }
        list_for_each_entry(shrinker, &shrinker_list, list) {
-                unsigned long long delta;
+                for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
-                long total_scan;
+                        if (!node_online(shrinkctl->nid))
-                long max_pass;
+                                continue;
-                int shrink_ret = 0;
-                long nr;
-                long new_nr;
-                long batch_size = shrinker->batch ? shrinker->batch
-                                                  : SHRINK_BATCH;
-                max_pass = do_shrinker_shrink(shrinker, shrink, 0);
-                if (max_pass <= 0)
-                        continue;
-                /*
-                 * copy the current shrinker scan count into a local variable
-                 * and zero it so that other concurrent shrinker invocations
-                 * don't also do this scanning work.
-                 */
-                nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-                total_scan = nr;
-                delta = (4 * nr_pages_scanned) / shrinker->seeks;
-                delta *= max_pass;
-                do_div(delta, lru_pages + 1);
-                total_scan += delta;
-                if (total_scan < 0) {
-                        printk(KERN_ERR "shrink_slab: %pF negative objects to "
-                               "delete nr=%ld\n",
-                               shrinker->shrink, total_scan);
-                        total_scan = max_pass;
-                }
-                /*
-                 * We need to avoid excessive windup on filesystem shrinkers
-                 * due to large numbers of GFP_NOFS allocations causing the
-                 * shrinkers to return -1 all the time. This results in a large
-                 * nr being built up so when a shrink that can do some work
-                 * comes along it empties the entire cache due to nr >>>
-                 * max_pass.  This is bad for sustaining a working set in
-                 * memory.
-                 *
-                 * Hence only allow the shrinker to scan the entire cache when
-                 * a large delta change is calculated directly.
-                 */
-                if (delta < max_pass / 4)
-                        total_scan = min(total_scan, max_pass / 2);
-                /*
-                 * Avoid risking looping forever due to too large nr value:
-                 * never try to free more than twice the estimate number of
-                 * freeable entries.
-                 */
-                if (total_scan > max_pass * 2)
-                        total_scan = max_pass * 2;
-                trace_mm_shrink_slab_start(shrinker, shrink, nr,
-                                        nr_pages_scanned, lru_pages,
-                                        max_pass, delta, total_scan);
-                while (total_scan >= batch_size) {
-                        int nr_before;
-                        nr_before = do_shrinker_shrink(shrinker, shrink, 0);
+                        if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
-                        shrink_ret = do_shrinker_shrink(shrinker, shrink,
+                            (shrinkctl->nid != 0))
-                                                        batch_size);
-                        if (shrink_ret == -1)
                                break;
-                        if (shrink_ret < nr_before)
-                                ret += nr_before - shrink_ret;
-                        count_vm_events(SLABS_SCANNED, batch_size);
-                        total_scan -= batch_size;
-                        cond_resched();
+                        freed += shrink_slab_node(shrinkctl, shrinker,
-                }
+                                 nr_pages_scanned, lru_pages);
-                /*
+                }
-                 * move the unused scan count back into the shrinker in a
-                 * manner that handles concurrent updates. If we exhausted the
-                 * scan, there is no need to do an update.
-                 */
-                if (total_scan > 0)
-                        new_nr = atomic_long_add_return(total_scan,
-                                        &shrinker->nr_in_batch);
-                else
-                        new_nr = atomic_long_read(&shrinker->nr_in_batch);
-                trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
        }
        up_read(&shrinker_rwsem);
 out:
        cond_resched();
-        return ret;
+        return freed;
 }
 static inline int is_page_cache_freeable(struct page *page)
@@ -2354,12 +2388,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 */
                if (global_reclaim(sc)) {
                        unsigned long lru_pages = 0;
+                        nodes_clear(shrink->nodes_to_scan);
                        for_each_zone_zonelist(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask)) {
                                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                        continue;
                                lru_pages += zone_reclaimable_pages(zone);
+                                node_set(zone_to_nid(zone),
+                                         shrink->nodes_to_scan);
                        }
                        shrink_slab(shrink, sc->nr_scanned, lru_pages);
@@ -2816,6 +2854,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
                return true;
        shrink_zone(zone, sc);
+        nodes_clear(shrink.nodes_to_scan);
+        node_set(zone_to_nid(zone), shrink.nodes_to_scan);
        reclaim_state->reclaimed_slab = 0;
        nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages);
@@ -3524,10 +3564,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * number of slab pages and shake the slab until it is reduced
                 * by the same nr_pages that we used for reclaiming unmapped
                 * pages.
-                 *
-                 * Note that shrink_slab will free memory on all zones and may
-                 * take a long time.
                 */
+                nodes_clear(shrink.nodes_to_scan);
+                node_set(zone_to_nid(zone), shrink.nodes_to_scan);
                for (;;) {
                        unsigned long lru_pages = zone_reclaimable_pages(zone);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 415159061cd0..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
 /*
 * Remove stale credentials. Avoid sleeping inside the loop.
 */
-static int
+static long
 rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 {
        spinlock_t *cache_lock;
        struct rpc_cred *cred, *next;
        unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
+        long freed = 0;
        list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
@@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
                 */
                if (time_in_range(cred->cr_expire, expired, jiffies) &&
                    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
-                        return 0;
+                        break;
                list_del_init(&cred->cr_lru);
                number_cred_unused--;
+                freed++;
                if (atomic_read(&cred->cr_count) != 0)
                        continue;
@@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
                }
                spin_unlock(cache_lock);
        }
-        return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+        return freed;
 }
 /*
 * Run memory cache shrinker.
 */
-static int
+static unsigned long
-rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(free);
-        int res;
+        unsigned long freed;
-        int nr_to_scan = sc->nr_to_scan;
-        gfp_t gfp_mask = sc->gfp_mask;
+        if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+                return SHRINK_STOP;
-        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+        /* nothing left, don't come back */
-                return (nr_to_scan == 0) ? 0 : -1;
        if (list_empty(&cred_unused))
-                return 0;
+                return SHRINK_STOP;
        spin_lock(&rpc_credcache_lock);
-        res = rpcauth_prune_expired(&free, nr_to_scan);
+        freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
        spin_unlock(&rpc_credcache_lock);
        rpcauth_destroy_credlist(&free);
-        return res;
+        return freed;
+}
+static unsigned long
+rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+        return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
 }
 /*
@@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
 }
 static struct shrinker rpc_cred_shrinker = {
-        .shrink = rpcauth_cache_shrinker,
+        .count_objects = rpcauth_cache_shrink_count,
+        .scan_objects = rpcauth_cache_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };