2 files changed, 123 insertions, 38 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e30687bad075..d5bb1796e12b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -50,13 +50,16 @@ struct page {
            spinlock_t ptl;
 #endif
            struct {                    /* SLUB uses */
-                struct page *first_page;        /* Compound pages */
+                void **lockless_freelist;
                struct kmem_cache *slab;        /* Pointer to slab */
            };
+            struct {
+                struct page *first_page;        /* Compound pages */
+            };
        };
        union {
                pgoff_t index;          /* Our offset within mapping. */
-                void *freelist;         /* SLUB: pointer to free object */
+                void *freelist;         /* SLUB: freelist req. slab lock */
        };
        struct list_head lru;           /* Pageout list, eg. active_list
                                         * protected by zone->lru_lock !
diff --git a/mm/slub.c b/mm/slub.c
index bd2efae02bcd..b07a1cab4f28 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -81,10 +81,14 @@
 * PageActive           The slab is used as a cpu cache. Allocations
 *                      may be performed from the slab. The slab is not
 *                      on any slab list and cannot be moved onto one.
+ *                      The cpu slab may be equipped with an additioanl
+ *                      lockless_freelist that allows lockless access to
+ *                      free objects in addition to the regular freelist
+ *                      that requires the slab lock.
 *
 * PageError            Slab requires special handling due to debug
 *                      options set. This moves slab handling out of
- *                      the fast path.
+ *                      the fast path and disables lockless freelists.
 */
 static inline int SlabDebug(struct page *page)
@@ -1014,6 +1018,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
        set_freepointer(s, last, NULL);
        page->freelist = start;
+        page->lockless_freelist = NULL;
        page->inuse = 0;
 out:
        if (flags & __GFP_WAIT)
@@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache *s, struct page *page)
 */
 static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
 {
+        /*
+         * Merge cpu freelist into freelist. Typically we get here
+         * because both freelists are empty. So this is unlikely
+         * to occur.
+         */
+        while (unlikely(page->lockless_freelist)) {
+                void **object;
+                /* Retrieve object from cpu_freelist */
+                object = page->lockless_freelist;
+                page->lockless_freelist = page->lockless_freelist[page->offset];
+                /* And put onto the regular freelist */
+                object[page->offset] = page->freelist;
+                page->freelist = object;
+                page->inuse--;
+        }
        s->cpu_slab[cpu] = NULL;
        ClearPageActive(page);
@@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s)
 }
 /*
- * slab_alloc is optimized to only modify two cachelines on the fast path
+ * Slow path. The lockless freelist is empty or we need to perform
- * (aside from the stack):
+ * debugging duties.
+ *
+ * Interrupts are disabled.
 *
- * 1. The page struct
+ * Processing is still very fast if new objects have been freed to the
- * 2. The first cacheline of the object to be allocated.
+ * regular freelist. In that case we simply take over the regular freelist
+ * as the lockless freelist and zap the regular freelist.
 *
- * The only other cache lines that are read (apart from code) is the
+ * If that is not working then we fall back to the partial lists. We take the
- * per cpu array in the kmem_cache struct.
+ * first element of the freelist as the object to allocate now and move the
+ * rest of the freelist to the lockless freelist.
 *
- * Fastpath is not possible if we need to get a new slab or have
+ * And if we were unable to get a new slab from the partial slab lists then
- * debugging enabled (which means all slabs are marked with SlabDebug)
+ * we need to allocate a new slab. This is slowest path since we may sleep.
 */
-static void *slab_alloc(struct kmem_cache *s,
+static void *__slab_alloc(struct kmem_cache *s,
-                                gfp_t gfpflags, int node, void *addr)
+                gfp_t gfpflags, int node, void *addr, struct page *page)
 {
-        struct page *page;
        void **object;
-        unsigned long flags;
+        int cpu = smp_processor_id();
-        int cpu;
-        local_irq_save(flags);
-        cpu = smp_processor_id();
-        page = s->cpu_slab[cpu];
        if (!page)
                goto new_slab;
        slab_lock(page);
        if (unlikely(node != -1 && page_to_nid(page) != node))
                goto another_slab;
-redo:
+load_freelist:
        object = page->freelist;
        if (unlikely(!object))
                goto another_slab;
        if (unlikely(SlabDebug(page)))
                goto debug;
-have_object:
+        object = page->freelist;
-        page->inuse++;
+        page->lockless_freelist = object[page->offset];
-        page->freelist = object[page->offset];
+        page->inuse = s->objects;
+        page->freelist = NULL;
        slab_unlock(page);
-        local_irq_restore(flags);
        return object;
 another_slab:
@@ -1370,11 +1391,11 @@ another_slab:
 new_slab:
        page = get_partial(s, gfpflags, node);
-        if (likely(page)) {
+        if (page) {
 have_slab:
                s->cpu_slab[cpu] = page;
                SetPageActive(page);
-                goto redo;
+                goto load_freelist;
        }
        page = new_slab(s, gfpflags, node);
@@ -1397,7 +1418,7 @@ have_slab:
                                discard_slab(s, page);
                                page = s->cpu_slab[cpu];
                                slab_lock(page);
-                                goto redo;
+                                goto load_freelist;
                        }
                        /* New slab does not fit our expectations */
                        flush_slab(s, s->cpu_slab[cpu], cpu);
@@ -1405,16 +1426,52 @@ have_slab:
                slab_lock(page);
                goto have_slab;
        }
-        local_irq_restore(flags);
        return NULL;
 debug:
+        object = page->freelist;
        if (!alloc_object_checks(s, page, object))
                goto another_slab;
        if (s->flags & SLAB_STORE_USER)
                set_track(s, object, TRACK_ALLOC, addr);
        trace(s, page, object, 1);
        init_object(s, object, 1);
-        goto have_object;
+        page->inuse++;
+        page->freelist = object[page->offset];
+        slab_unlock(page);
+        return object;
+}
+/*
+ * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
+ * have the fastpath folded into their functions. So no function call
+ * overhead for requests that can be satisfied on the fastpath.
+ *
+ * The fastpath works by first checking if the lockless freelist can be used.
+ * If not then __slab_alloc is called for slow processing.
+ *
+ * Otherwise we can simply pick the next object from the lockless free list.
+ */
+static void __always_inline *slab_alloc(struct kmem_cache *s,
+                                gfp_t gfpflags, int node, void *addr)
+{
+        struct page *page;
+        void **object;
+        unsigned long flags;
+        local_irq_save(flags);
+        page = s->cpu_slab[smp_processor_id()];
+        if (unlikely(!page || !page->lockless_freelist ||
+                        (node != -1 && page_to_nid(page) != node)))
+                object = __slab_alloc(s, gfpflags, node, addr, page);
+        else {
+                object = page->lockless_freelist;
+                page->lockless_freelist = object[page->offset];
+        }
+        local_irq_restore(flags);
+        return object;
 }
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
@@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
 /*
- * The fastpath only writes the cacheline of the page struct and the first
+ * Slow patch handling. This may still be called frequently since objects
- * cacheline of the object.
+ * have a longer lifetime than the cpu slabs in most processing loads.
 *
- * We read the cpu_slab cacheline to check if the slab is the per cpu
+ * So we still attempt to reduce cache line usage. Just take the slab
- * slab for this processor.
+ * lock and free the item. If there is no additional partial page
+ * handling required then we can return immediately.
 */
-static void slab_free(struct kmem_cache *s, struct page *page,
+static void __slab_free(struct kmem_cache *s, struct page *page,
                                        void *x, void *addr)
 {
        void *prior;
        void **object = (void *)x;
-        unsigned long flags;
-        local_irq_save(flags);
        slab_lock(page);
        if (unlikely(SlabDebug(page)))
@@ -1475,7 +1531,6 @@ checks_ok:
 out_unlock:
        slab_unlock(page);
-        local_irq_restore(flags);
        return;
 slab_empty:
@@ -1487,7 +1542,6 @@ slab_empty:
        slab_unlock(page);
        discard_slab(s, page);
-        local_irq_restore(flags);
        return;
 debug:
@@ -1502,6 +1556,34 @@ debug:
        goto checks_ok;
 }
+/*
+ * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
+ * can perform fastpath freeing without additional function calls.
+ *
+ * The fastpath is only possible if we are freeing to the current cpu slab
+ * of this processor. This typically the case if we have just allocated
+ * the item before.
+ *
+ * If fastpath is not possible then fall back to __slab_free where we deal
+ * with all sorts of special processing.
+ */
+static void __always_inline slab_free(struct kmem_cache *s,
+                        struct page *page, void *x, void *addr)
+{
+        void **object = (void *)x;
+        unsigned long flags;
+        local_irq_save(flags);
+        if (likely(page == s->cpu_slab[smp_processor_id()] &&
+                                                !SlabDebug(page))) {
+                object[page->offset] = page->lockless_freelist;
+                page->lockless_freelist = object;
+        } else
+                __slab_free(s, page, x, addr);
+        local_irq_restore(flags);
+}
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
        struct page *page;

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e30687bad075..d5bb1796e12b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h
@@ -50,13 +50,16 @@ struct page {
50	spinlock_t ptl;	50	spinlock_t ptl;
51	#endif	51	#endif
52	struct { /* SLUB uses */	52	struct { /* SLUB uses */
53	struct page first_page; / Compound pages */	53	void **lockless_freelist;
54	struct kmem_cache slab; / Pointer to slab */	54	struct kmem_cache slab; / Pointer to slab */
55	};	55	};
		56	struct {
		57	struct page first_page; / Compound pages */
		58	};
56	};	59	};
57	union {	60	union {
58	pgoff_t index; /* Our offset within mapping. */	61	pgoff_t index; /* Our offset within mapping. */
59	void freelist; / SLUB: pointer to free object */	62	void freelist; / SLUB: freelist req. slab lock */
60	};	63	};
61	struct list_head lru; /* Pageout list, eg. active_list	64	struct list_head lru; /* Pageout list, eg. active_list
62	* protected by zone->lru_lock !	65	* protected by zone->lru_lock !


diff --git a/mm/slub.c b/mm/slub.c index bd2efae02bcd..b07a1cab4f28 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -81,10 +81,14 @@
81	* PageActive The slab is used as a cpu cache. Allocations	81	* PageActive The slab is used as a cpu cache. Allocations
82	* may be performed from the slab. The slab is not	82	* may be performed from the slab. The slab is not
83	* on any slab list and cannot be moved onto one.	83	* on any slab list and cannot be moved onto one.
		84	* The cpu slab may be equipped with an additioanl
		85	* lockless_freelist that allows lockless access to
		86	* free objects in addition to the regular freelist
		87	* that requires the slab lock.
84	*	88	*
85	* PageError Slab requires special handling due to debug	89	* PageError Slab requires special handling due to debug
86	* options set. This moves slab handling out of	90	* options set. This moves slab handling out of
87	* the fast path.	91	* the fast path and disables lockless freelists.
88	*/	92	*/
89		93
90	static inline int SlabDebug(struct page *page)	94	static inline int SlabDebug(struct page *page)
@@ -1014,6 +1018,7 @@ static struct page new_slab(struct kmem_cache s, gfp_t flags, int node)
1014	set_freepointer(s, last, NULL);	1018	set_freepointer(s, last, NULL);
1015		1019
1016	page->freelist = start;	1020	page->freelist = start;
		1021	page->lockless_freelist = NULL;
1017	page->inuse = 0;	1022	page->inuse = 0;
1018	out:	1023	out:
1019	if (flags & __GFP_WAIT)	1024	if (flags & __GFP_WAIT)
@@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache s, struct page page)
1276	*/	1281	*/
1277	static void deactivate_slab(struct kmem_cache s, struct page page, int cpu)	1282	static void deactivate_slab(struct kmem_cache s, struct page page, int cpu)
1278	{	1283	{
		1284	/*
		1285	* Merge cpu freelist into freelist. Typically we get here
		1286	* because both freelists are empty. So this is unlikely
		1287	* to occur.
		1288	*/
		1289	while (unlikely(page->lockless_freelist)) {
		1290	void **object;
		1291
		1292	/* Retrieve object from cpu_freelist */
		1293	object = page->lockless_freelist;
		1294	page->lockless_freelist = page->lockless_freelist[page->offset];
		1295
		1296	/* And put onto the regular freelist */
		1297	object[page->offset] = page->freelist;
		1298	page->freelist = object;
		1299	page->inuse--;
		1300	}
1279	s->cpu_slab[cpu] = NULL;	1301	s->cpu_slab[cpu] = NULL;
1280	ClearPageActive(page);	1302	ClearPageActive(page);
1281		1303
@@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s)
1322	}	1344	}
1323		1345
1324	/*	1346	/*
1325	* slab_alloc is optimized to only modify two cachelines on the fast path	1347	* Slow path. The lockless freelist is empty or we need to perform
1326	* (aside from the stack):	1348	* debugging duties.
		1349	*
		1350	* Interrupts are disabled.
1327	*	1351	*
1328	* 1. The page struct	1352	* Processing is still very fast if new objects have been freed to the
1329	* 2. The first cacheline of the object to be allocated.	1353	* regular freelist. In that case we simply take over the regular freelist
		1354	* as the lockless freelist and zap the regular freelist.
1330	*	1355	*
1331	* The only other cache lines that are read (apart from code) is the	1356	* If that is not working then we fall back to the partial lists. We take the
1332	* per cpu array in the kmem_cache struct.	1357	* first element of the freelist as the object to allocate now and move the
		1358	* rest of the freelist to the lockless freelist.
1333	*	1359	*
1334	* Fastpath is not possible if we need to get a new slab or have	1360	* And if we were unable to get a new slab from the partial slab lists then
1335	* debugging enabled (which means all slabs are marked with SlabDebug)	1361	* we need to allocate a new slab. This is slowest path since we may sleep.
1336	*/	1362	*/
1337	static void slab_alloc(struct kmem_cache s,	1363	static void __slab_alloc(struct kmem_cache s,
1338	gfp_t gfpflags, int node, void *addr)	1364	gfp_t gfpflags, int node, void addr, struct page page)
1339	{	1365	{
1340	struct page *page;
1341	void **object;	1366	void **object;
1342	unsigned long flags;	1367	int cpu = smp_processor_id();
1343	int cpu;
1344		1368
1345	local_irq_save(flags);
1346	cpu = smp_processor_id();
1347	page = s->cpu_slab[cpu];
1348	if (!page)	1369	if (!page)
1349	goto new_slab;	1370	goto new_slab;
1350		1371
1351	slab_lock(page);	1372	slab_lock(page);
1352	if (unlikely(node != -1 && page_to_nid(page) != node))	1373	if (unlikely(node != -1 && page_to_nid(page) != node))
1353	goto another_slab;	1374	goto another_slab;
1354	redo:	1375	load_freelist:
1355	object = page->freelist;	1376	object = page->freelist;
1356	if (unlikely(!object))	1377	if (unlikely(!object))
1357	goto another_slab;	1378	goto another_slab;
1358	if (unlikely(SlabDebug(page)))	1379	if (unlikely(SlabDebug(page)))
1359	goto debug;	1380	goto debug;
1360		1381
1361	have_object:	1382	object = page->freelist;
1362	page->inuse++;	1383	page->lockless_freelist = object[page->offset];
1363	page->freelist = object[page->offset];	1384	page->inuse = s->objects;
		1385	page->freelist = NULL;
1364	slab_unlock(page);	1386	slab_unlock(page);
1365	local_irq_restore(flags);
1366	return object;	1387	return object;
1367		1388
1368	another_slab:	1389	another_slab:
@@ -1370,11 +1391,11 @@ another_slab:
1370		1391
1371	new_slab:	1392	new_slab:
1372	page = get_partial(s, gfpflags, node);	1393	page = get_partial(s, gfpflags, node);
1373	if (likely(page)) {	1394	if (page) {
1374	have_slab:	1395	have_slab:
1375	s->cpu_slab[cpu] = page;	1396	s->cpu_slab[cpu] = page;
1376	SetPageActive(page);	1397	SetPageActive(page);
1377	goto redo;	1398	goto load_freelist;
1378	}	1399	}
1379		1400
1380	page = new_slab(s, gfpflags, node);	1401	page = new_slab(s, gfpflags, node);
@@ -1397,7 +1418,7 @@ have_slab:
1397	discard_slab(s, page);	1418	discard_slab(s, page);
1398	page = s->cpu_slab[cpu];	1419	page = s->cpu_slab[cpu];
1399	slab_lock(page);	1420	slab_lock(page);
1400	goto redo;	1421	goto load_freelist;
1401	}	1422	}
1402	/* New slab does not fit our expectations */	1423	/* New slab does not fit our expectations */
1403	flush_slab(s, s->cpu_slab[cpu], cpu);	1424	flush_slab(s, s->cpu_slab[cpu], cpu);
@@ -1405,16 +1426,52 @@ have_slab:
1405	slab_lock(page);	1426	slab_lock(page);
1406	goto have_slab;	1427	goto have_slab;
1407	}	1428	}
1408	local_irq_restore(flags);
1409	return NULL;	1429	return NULL;
1410	debug:	1430	debug:
		1431	object = page->freelist;
1411	if (!alloc_object_checks(s, page, object))	1432	if (!alloc_object_checks(s, page, object))
1412	goto another_slab;	1433	goto another_slab;
1413	if (s->flags & SLAB_STORE_USER)	1434	if (s->flags & SLAB_STORE_USER)
1414	set_track(s, object, TRACK_ALLOC, addr);	1435	set_track(s, object, TRACK_ALLOC, addr);
1415	trace(s, page, object, 1);	1436	trace(s, page, object, 1);
1416	init_object(s, object, 1);	1437	init_object(s, object, 1);
1417	goto have_object;	1438
		1439	page->inuse++;
		1440	page->freelist = object[page->offset];
		1441	slab_unlock(page);
		1442	return object;
		1443	}
		1444
		1445	/*
		1446	* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
		1447	* have the fastpath folded into their functions. So no function call
		1448	* overhead for requests that can be satisfied on the fastpath.
		1449	*
		1450	* The fastpath works by first checking if the lockless freelist can be used.
		1451	* If not then __slab_alloc is called for slow processing.
		1452	*
		1453	* Otherwise we can simply pick the next object from the lockless free list.
		1454	*/
		1455	static void __always_inline slab_alloc(struct kmem_cache s,
		1456	gfp_t gfpflags, int node, void *addr)
		1457	{
		1458	struct page *page;
		1459	void **object;
		1460	unsigned long flags;
		1461
		1462	local_irq_save(flags);
		1463	page = s->cpu_slab[smp_processor_id()];
		1464	if (unlikely(!page \|\| !page->lockless_freelist \|\|
		1465	(node != -1 && page_to_nid(page) != node)))
		1466
		1467	object = __slab_alloc(s, gfpflags, node, addr, page);
		1468
		1469	else {
		1470	object = page->lockless_freelist;
		1471	page->lockless_freelist = object[page->offset];
		1472	}
		1473	local_irq_restore(flags);
		1474	return object;
1418	}	1475	}
1419		1476
1420	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)	1477	void kmem_cache_alloc(struct kmem_cache s, gfp_t gfpflags)
@@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
1432	#endif	1489	#endif
1433		1490
1434	/*	1491	/*
1435	* The fastpath only writes the cacheline of the page struct and the first	1492	* Slow patch handling. This may still be called frequently since objects
1436	* cacheline of the object.	1493	* have a longer lifetime than the cpu slabs in most processing loads.
1437	*	1494	*
1438	* We read the cpu_slab cacheline to check if the slab is the per cpu	1495	* So we still attempt to reduce cache line usage. Just take the slab
1439	* slab for this processor.	1496	* lock and free the item. If there is no additional partial page
		1497	* handling required then we can return immediately.
1440	*/	1498	*/
1441	static void slab_free(struct kmem_cache s, struct page page,	1499	static void __slab_free(struct kmem_cache s, struct page page,
1442	void x, void addr)	1500	void x, void addr)
1443	{	1501	{
1444	void *prior;	1502	void *prior;
1445	void *object = (void )x;	1503	void *object = (void )x;
1446	unsigned long flags;
1447		1504
1448	local_irq_save(flags);
1449	slab_lock(page);	1505	slab_lock(page);
1450		1506
1451	if (unlikely(SlabDebug(page)))	1507	if (unlikely(SlabDebug(page)))
@@ -1475,7 +1531,6 @@ checks_ok:
1475		1531
1476	out_unlock:	1532	out_unlock:
1477	slab_unlock(page);	1533	slab_unlock(page);
1478	local_irq_restore(flags);
1479	return;	1534	return;
1480		1535
1481	slab_empty:	1536	slab_empty:
@@ -1487,7 +1542,6 @@ slab_empty:
1487		1542
1488	slab_unlock(page);	1543	slab_unlock(page);
1489	discard_slab(s, page);	1544	discard_slab(s, page);
1490	local_irq_restore(flags);
1491	return;	1545	return;
1492		1546
1493	debug:	1547	debug:
@@ -1502,6 +1556,34 @@ debug:
1502	goto checks_ok;	1556	goto checks_ok;
1503	}	1557	}
1504		1558
		1559	/*
		1560	* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
		1561	* can perform fastpath freeing without additional function calls.
		1562	*
		1563	* The fastpath is only possible if we are freeing to the current cpu slab
		1564	* of this processor. This typically the case if we have just allocated
		1565	* the item before.
		1566	*
		1567	* If fastpath is not possible then fall back to __slab_free where we deal
		1568	* with all sorts of special processing.
		1569	*/
		1570	static void __always_inline slab_free(struct kmem_cache *s,
		1571	struct page page, void x, void *addr)
		1572	{
		1573	void *object = (void )x;
		1574	unsigned long flags;
		1575
		1576	local_irq_save(flags);
		1577	if (likely(page == s->cpu_slab[smp_processor_id()] &&
		1578	!SlabDebug(page))) {
		1579	object[page->offset] = page->lockless_freelist;
		1580	page->lockless_freelist = object;
		1581	} else
		1582	__slab_free(s, page, x, addr);
		1583
		1584	local_irq_restore(flags);
		1585	}
		1586
1505	void kmem_cache_free(struct kmem_cache s, void x)	1587	void kmem_cache_free(struct kmem_cache s, void x)
1506	{	1588	{
1507	struct page *page;	1589	struct page *page;