1 files changed, 102 insertions, 2 deletions
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 1a029a7432ee..75b7962439ff 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,6 +41,7 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/wait.h>
 #include <linux/zpool.h>
 #include <linux/magic.h>
@@ -145,6 +146,8 @@ struct z3fold_header {
 * @release_wq: workqueue for safe page release
 * @work:       work_struct for safe page release
 * @inode:      inode for z3fold pseudo filesystem
+ * @destroying: bool to stop migration once we start destruction
+ * @isolated: int to count the number of pages currently in isolation
 *
 * This structure is allocated at pool creation time and maintains metadata
 * pertaining to a particular z3fold pool.
@@ -163,8 +166,11 @@ struct z3fold_pool {
        const struct zpool_ops *zpool_ops;
        struct workqueue_struct *compact_wq;
        struct workqueue_struct *release_wq;
+        struct wait_queue_head isolate_wait;
        struct work_struct work;
        struct inode *inode;
+        bool destroying;
+        int isolated;
 };
 /*
@@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
                goto out_c;
        spin_lock_init(&pool->lock);
        spin_lock_init(&pool->stale_lock);
+        init_waitqueue_head(&pool->isolate_wait);
        pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
        if (!pool->unbuddied)
                goto out_pool;
@@ -808,6 +815,15 @@ out:
        return NULL;
 }
+static bool pool_isolated_are_drained(struct z3fold_pool *pool)
+{
+        bool ret;
+        spin_lock(&pool->lock);
+        ret = pool->isolated == 0;
+        spin_unlock(&pool->lock);
+        return ret;
+}
 /**
 * z3fold_destroy_pool() - destroys an existing z3fold pool
 * @pool:       the z3fold pool to be destroyed
@@ -817,9 +833,35 @@ out:
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
        kmem_cache_destroy(pool->c_handle);
-        z3fold_unregister_migration(pool);
+        /*
-        destroy_workqueue(pool->release_wq);
+         * We set pool-> destroying under lock to ensure that
+         * z3fold_page_isolate() sees any changes to destroying. This way we
+         * avoid the need for any memory barriers.
+         */
+        spin_lock(&pool->lock);
+        pool->destroying = true;
+        spin_unlock(&pool->lock);
+        /*
+         * We need to ensure that no pages are being migrated while we destroy
+         * these workqueues, as migration can queue work on either of the
+         * workqueues.
+         */
+        wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
+        /*
+         * We need to destroy pool->compact_wq before pool->release_wq,
+         * as any pending work on pool->compact_wq will call
+         * queue_work(pool->release_wq, &pool->work).
+         *
+         * There are still outstanding pages until both workqueues are drained,
+         * so we cannot unregister migration until then.
+         */
        destroy_workqueue(pool->compact_wq);
+        destroy_workqueue(pool->release_wq);
+        z3fold_unregister_migration(pool);
        kfree(pool);
 }
@@ -1297,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
        return atomic64_read(&pool->pages_nr);
 }
+/*
+ * z3fold_dec_isolated() expects to be called while pool->lock is held.
+ */
+static void z3fold_dec_isolated(struct z3fold_pool *pool)
+{
+        assert_spin_locked(&pool->lock);
+        VM_BUG_ON(pool->isolated <= 0);
+        pool->isolated--;
+        /*
+         * If we have no more isolated pages, we have to see if
+         * z3fold_destroy_pool() is waiting for a signal.
+         */
+        if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
+                wake_up_all(&pool->isolate_wait);
+}
+static void z3fold_inc_isolated(struct z3fold_pool *pool)
+{
+        pool->isolated++;
+}
 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 {
        struct z3fold_header *zhdr;
@@ -1323,6 +1387,34 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
                spin_lock(&pool->lock);
                if (!list_empty(&page->lru))
                        list_del(&page->lru);
+                /*
+                 * We need to check for destruction while holding pool->lock, as
+                 * otherwise destruction could see 0 isolated pages, and
+                 * proceed.
+                 */
+                if (unlikely(pool->destroying)) {
+                        spin_unlock(&pool->lock);
+                        /*
+                         * If this page isn't stale, somebody else holds a
+                         * reference to it. Let't drop our refcount so that they
+                         * can call the release logic.
+                         */
+                        if (unlikely(kref_put(&zhdr->refcount,
+                                              release_z3fold_page_locked))) {
+                                /*
+                                 * If we get here we have kref problems, so we
+                                 * should freak out.
+                                 */
+                                WARN(1, "Z3fold is experiencing kref problems\n");
+                                z3fold_page_unlock(zhdr);
+                                return false;
+                        }
+                        z3fold_page_unlock(zhdr);
+                        return false;
+                }
+                z3fold_inc_isolated(pool);
                spin_unlock(&pool->lock);
                z3fold_page_unlock(zhdr);
                return true;
@@ -1391,6 +1483,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
        queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
+        spin_lock(&pool->lock);
+        z3fold_dec_isolated(pool);
+        spin_unlock(&pool->lock);
        page_mapcount_reset(page);
        put_page(page);
        return 0;
@@ -1410,10 +1506,14 @@ static void z3fold_page_putback(struct page *page)
        INIT_LIST_HEAD(&page->lru);
        if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
                atomic64_dec(&pool->pages_nr);
+                spin_lock(&pool->lock);
+                z3fold_dec_isolated(pool);
+                spin_unlock(&pool->lock);
                return;
        }
        spin_lock(&pool->lock);
        list_add(&page->lru, &pool->lru);
+        z3fold_dec_isolated(pool);
        spin_unlock(&pool->lock);
        z3fold_page_unlock(zhdr);
 }

diff --git a/mm/z3fold.c b/mm/z3fold.c index 1a029a7432ee..75b7962439ff 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c
@@ -41,6 +41,7 @@
41	#include <linux/workqueue.h>	41	#include <linux/workqueue.h>
42	#include <linux/slab.h>	42	#include <linux/slab.h>
43	#include <linux/spinlock.h>	43	#include <linux/spinlock.h>
		44	#include <linux/wait.h>
44	#include <linux/zpool.h>	45	#include <linux/zpool.h>
45	#include <linux/magic.h>	46	#include <linux/magic.h>
46		47
@@ -145,6 +146,8 @@ struct z3fold_header {
145	* @release_wq: workqueue for safe page release	146	* @release_wq: workqueue for safe page release
146	* @work: work_struct for safe page release	147	* @work: work_struct for safe page release
147	* @inode: inode for z3fold pseudo filesystem	148	* @inode: inode for z3fold pseudo filesystem
		149	* @destroying: bool to stop migration once we start destruction
		150	* @isolated: int to count the number of pages currently in isolation
148	*	151	*
149	* This structure is allocated at pool creation time and maintains metadata	152	* This structure is allocated at pool creation time and maintains metadata
150	* pertaining to a particular z3fold pool.	153	* pertaining to a particular z3fold pool.
@@ -163,8 +166,11 @@ struct z3fold_pool {
163	const struct zpool_ops *zpool_ops;	166	const struct zpool_ops *zpool_ops;
164	struct workqueue_struct *compact_wq;	167	struct workqueue_struct *compact_wq;
165	struct workqueue_struct *release_wq;	168	struct workqueue_struct *release_wq;
		169	struct wait_queue_head isolate_wait;
166	struct work_struct work;	170	struct work_struct work;
167	struct inode *inode;	171	struct inode *inode;
		172	bool destroying;
		173	int isolated;
168	};	174	};
169		175
170	/*	176	/*
@@ -769,6 +775,7 @@ static struct z3fold_pool z3fold_create_pool(const char name, gfp_t gfp,
769	goto out_c;	775	goto out_c;
770	spin_lock_init(&pool->lock);	776	spin_lock_init(&pool->lock);
771	spin_lock_init(&pool->stale_lock);	777	spin_lock_init(&pool->stale_lock);
		778	init_waitqueue_head(&pool->isolate_wait);
772	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);	779	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
773	if (!pool->unbuddied)	780	if (!pool->unbuddied)
774	goto out_pool;	781	goto out_pool;
@@ -808,6 +815,15 @@ out:
808	return NULL;	815	return NULL;
809	}	816	}
810		817
		818	static bool pool_isolated_are_drained(struct z3fold_pool *pool)
		819	{
		820	bool ret;
		821
		822	spin_lock(&pool->lock);
		823	ret = pool->isolated == 0;
		824	spin_unlock(&pool->lock);
		825	return ret;
		826	}
811	/**	827	/**
812	* z3fold_destroy_pool() - destroys an existing z3fold pool	828	* z3fold_destroy_pool() - destroys an existing z3fold pool
813	* @pool: the z3fold pool to be destroyed	829	* @pool: the z3fold pool to be destroyed
@@ -817,9 +833,35 @@ out:
817	static void z3fold_destroy_pool(struct z3fold_pool *pool)	833	static void z3fold_destroy_pool(struct z3fold_pool *pool)
818	{	834	{
819	kmem_cache_destroy(pool->c_handle);	835	kmem_cache_destroy(pool->c_handle);
820	z3fold_unregister_migration(pool);	836	/*
821	destroy_workqueue(pool->release_wq);	837	* We set pool-> destroying under lock to ensure that
		838	* z3fold_page_isolate() sees any changes to destroying. This way we
		839	* avoid the need for any memory barriers.
		840	*/
		841
		842	spin_lock(&pool->lock);
		843	pool->destroying = true;
		844	spin_unlock(&pool->lock);
		845
		846	/*
		847	* We need to ensure that no pages are being migrated while we destroy
		848	* these workqueues, as migration can queue work on either of the
		849	* workqueues.
		850	*/
		851	wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
		852
		853	/*
		854	* We need to destroy pool->compact_wq before pool->release_wq,
		855	* as any pending work on pool->compact_wq will call
		856	* queue_work(pool->release_wq, &pool->work).
		857	*
		858	* There are still outstanding pages until both workqueues are drained,
		859	* so we cannot unregister migration until then.
		860	*/
		861
822	destroy_workqueue(pool->compact_wq);	862	destroy_workqueue(pool->compact_wq);
		863	destroy_workqueue(pool->release_wq);
		864	z3fold_unregister_migration(pool);
823	kfree(pool);	865	kfree(pool);
824	}	866	}
825		867
@@ -1297,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
1297	return atomic64_read(&pool->pages_nr);	1339	return atomic64_read(&pool->pages_nr);
1298	}	1340	}
1299		1341
		1342	/*
		1343	* z3fold_dec_isolated() expects to be called while pool->lock is held.
		1344	*/
		1345	static void z3fold_dec_isolated(struct z3fold_pool *pool)
		1346	{
		1347	assert_spin_locked(&pool->lock);
		1348	VM_BUG_ON(pool->isolated <= 0);
		1349	pool->isolated--;
		1350
		1351	/*
		1352	* If we have no more isolated pages, we have to see if
		1353	* z3fold_destroy_pool() is waiting for a signal.
		1354	*/
		1355	if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
		1356	wake_up_all(&pool->isolate_wait);
		1357	}
		1358
		1359	static void z3fold_inc_isolated(struct z3fold_pool *pool)
		1360	{
		1361	pool->isolated++;
		1362	}
		1363
1300	static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)	1364	static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
1301	{	1365	{
1302	struct z3fold_header *zhdr;	1366	struct z3fold_header *zhdr;
@@ -1323,6 +1387,34 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
1323	spin_lock(&pool->lock);	1387	spin_lock(&pool->lock);
1324	if (!list_empty(&page->lru))	1388	if (!list_empty(&page->lru))
1325	list_del(&page->lru);	1389	list_del(&page->lru);
		1390	/*
		1391	* We need to check for destruction while holding pool->lock, as
		1392	* otherwise destruction could see 0 isolated pages, and
		1393	* proceed.
		1394	*/
		1395	if (unlikely(pool->destroying)) {
		1396	spin_unlock(&pool->lock);
		1397	/*
		1398	* If this page isn't stale, somebody else holds a
		1399	* reference to it. Let't drop our refcount so that they
		1400	* can call the release logic.
		1401	*/
		1402	if (unlikely(kref_put(&zhdr->refcount,
		1403	release_z3fold_page_locked))) {
		1404	/*
		1405	* If we get here we have kref problems, so we
		1406	* should freak out.
		1407	*/
		1408	WARN(1, "Z3fold is experiencing kref problems\n");
		1409	z3fold_page_unlock(zhdr);
		1410	return false;
		1411	}
		1412	z3fold_page_unlock(zhdr);
		1413	return false;
		1414	}
		1415
		1416
		1417	z3fold_inc_isolated(pool);
1326	spin_unlock(&pool->lock);	1418	spin_unlock(&pool->lock);
1327	z3fold_page_unlock(zhdr);	1419	z3fold_page_unlock(zhdr);
1328	return true;	1420	return true;
@@ -1391,6 +1483,10 @@ static int z3fold_page_migrate(struct address_space mapping, struct page newpa
1391		1483
1392	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);	1484	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
1393		1485
		1486	spin_lock(&pool->lock);
		1487	z3fold_dec_isolated(pool);
		1488	spin_unlock(&pool->lock);
		1489
1394	page_mapcount_reset(page);	1490	page_mapcount_reset(page);
1395	put_page(page);	1491	put_page(page);
1396	return 0;	1492	return 0;
@@ -1410,10 +1506,14 @@ static void z3fold_page_putback(struct page *page)
1410	INIT_LIST_HEAD(&page->lru);	1506	INIT_LIST_HEAD(&page->lru);
1411	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {	1507	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
1412	atomic64_dec(&pool->pages_nr);	1508	atomic64_dec(&pool->pages_nr);
		1509	spin_lock(&pool->lock);
		1510	z3fold_dec_isolated(pool);
		1511	spin_unlock(&pool->lock);
1413	return;	1512	return;
1414	}	1513	}
1415	spin_lock(&pool->lock);	1514	spin_lock(&pool->lock);
1416	list_add(&page->lru, &pool->lru);	1515	list_add(&page->lru, &pool->lru);
		1516	z3fold_dec_isolated(pool);
1417	spin_unlock(&pool->lock);	1517	spin_unlock(&pool->lock);
1418	z3fold_page_unlock(zhdr);	1518	z3fold_page_unlock(zhdr);
1419	}	1519	}