aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--mm/slub.c154
2 files changed, 123 insertions, 38 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e30687bad075..d5bb1796e12b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -50,13 +50,16 @@ struct page {
50 spinlock_t ptl; 50 spinlock_t ptl;
51#endif 51#endif
52 struct { /* SLUB uses */ 52 struct { /* SLUB uses */
53 struct page *first_page; /* Compound pages */ 53 void **lockless_freelist;
54 struct kmem_cache *slab; /* Pointer to slab */ 54 struct kmem_cache *slab; /* Pointer to slab */
55 }; 55 };
56 struct {
57 struct page *first_page; /* Compound pages */
58 };
56 }; 59 };
57 union { 60 union {
58 pgoff_t index; /* Our offset within mapping. */ 61 pgoff_t index; /* Our offset within mapping. */
59 void *freelist; /* SLUB: pointer to free object */ 62 void *freelist; /* SLUB: freelist req. slab lock */
60 }; 63 };
61 struct list_head lru; /* Pageout list, eg. active_list 64 struct list_head lru; /* Pageout list, eg. active_list
62 * protected by zone->lru_lock ! 65 * protected by zone->lru_lock !
diff --git a/mm/slub.c b/mm/slub.c
index bd2efae02bcd..b07a1cab4f28 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -81,10 +81,14 @@
81 * PageActive The slab is used as a cpu cache. Allocations 81 * PageActive The slab is used as a cpu cache. Allocations
82 * may be performed from the slab. The slab is not 82 * may be performed from the slab. The slab is not
83 * on any slab list and cannot be moved onto one. 83 * on any slab list and cannot be moved onto one.
84 * The cpu slab may be equipped with an additioanl
85 * lockless_freelist that allows lockless access to
86 * free objects in addition to the regular freelist
87 * that requires the slab lock.
84 * 88 *
85 * PageError Slab requires special handling due to debug 89 * PageError Slab requires special handling due to debug
86 * options set. This moves slab handling out of 90 * options set. This moves slab handling out of
87 * the fast path. 91 * the fast path and disables lockless freelists.
88 */ 92 */
89 93
90static inline int SlabDebug(struct page *page) 94static inline int SlabDebug(struct page *page)
@@ -1014,6 +1018,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1014 set_freepointer(s, last, NULL); 1018 set_freepointer(s, last, NULL);
1015 1019
1016 page->freelist = start; 1020 page->freelist = start;
1021 page->lockless_freelist = NULL;
1017 page->inuse = 0; 1022 page->inuse = 0;
1018out: 1023out:
1019 if (flags & __GFP_WAIT) 1024 if (flags & __GFP_WAIT)
@@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache *s, struct page *page)
1276 */ 1281 */
1277static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) 1282static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
1278{ 1283{
1284 /*
1285 * Merge cpu freelist into freelist. Typically we get here
1286 * because both freelists are empty. So this is unlikely
1287 * to occur.
1288 */
1289 while (unlikely(page->lockless_freelist)) {
1290 void **object;
1291
1292 /* Retrieve object from cpu_freelist */
1293 object = page->lockless_freelist;
1294 page->lockless_freelist = page->lockless_freelist[page->offset];
1295
1296 /* And put onto the regular freelist */
1297 object[page->offset] = page->freelist;
1298 page->freelist = object;
1299 page->inuse--;
1300 }
1279 s->cpu_slab[cpu] = NULL; 1301 s->cpu_slab[cpu] = NULL;
1280 ClearPageActive(page); 1302 ClearPageActive(page);
1281 1303
@@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s)
1322} 1344}
1323 1345
1324/* 1346/*
1325 * slab_alloc is optimized to only modify two cachelines on the fast path 1347 * Slow path. The lockless freelist is empty or we need to perform
1326 * (aside from the stack): 1348 * debugging duties.
1349 *
1350 * Interrupts are disabled.
1327 * 1351 *
1328 * 1. The page struct 1352 * Processing is still very fast if new objects have been freed to the
1329 * 2. The first cacheline of the object to be allocated. 1353 * regular freelist. In that case we simply take over the regular freelist
1354 * as the lockless freelist and zap the regular freelist.
1330 * 1355 *
1331 * The only other cache lines that are read (apart from code) is the 1356 * If that is not working then we fall back to the partial lists. We take the
1332 * per cpu array in the kmem_cache struct. 1357 * first element of the freelist as the object to allocate now and move the
1358 * rest of the freelist to the lockless freelist.
1333 * 1359 *
1334 * Fastpath is not possible if we need to get a new slab or have 1360 * And if we were unable to get a new slab from the partial slab lists then
1335 * debugging enabled (which means all slabs are marked with SlabDebug) 1361 * we need to allocate a new slab. This is slowest path since we may sleep.
1336 */ 1362 */
1337static void *slab_alloc(struct kmem_cache *s, 1363static void *__slab_alloc(struct kmem_cache *s,
1338 gfp_t gfpflags, int node, void *addr) 1364 gfp_t gfpflags, int node, void *addr, struct page *page)
1339{ 1365{
1340 struct page *page;
1341 void **object; 1366 void **object;
1342 unsigned long flags; 1367 int cpu = smp_processor_id();
1343 int cpu;
1344 1368
1345 local_irq_save(flags);
1346 cpu = smp_processor_id();
1347 page = s->cpu_slab[cpu];
1348 if (!page) 1369 if (!page)
1349 goto new_slab; 1370 goto new_slab;
1350 1371
1351 slab_lock(page); 1372 slab_lock(page);
1352 if (unlikely(node != -1 && page_to_nid(page) != node)) 1373 if (unlikely(node != -1 && page_to_nid(page) != node))
1353 goto another_slab; 1374 goto another_slab;
1354redo: 1375load_freelist:
1355 object = page->freelist; 1376 object = page->freelist;
1356 if (unlikely(!object)) 1377 if (unlikely(!object))
1357 goto another_slab; 1378 goto another_slab;
1358 if (unlikely(SlabDebug(page))) 1379 if (unlikely(SlabDebug(page)))
1359 goto debug; 1380 goto debug;
1360 1381
1361have_object: 1382 object = page->freelist;
1362 page->inuse++; 1383 page->lockless_freelist = object[page->offset];
1363 page->freelist = object[page->offset]; 1384 page->inuse = s->objects;
1385 page->freelist = NULL;
1364 slab_unlock(page); 1386 slab_unlock(page);
1365 local_irq_restore(flags);
1366 return object; 1387 return object;
1367 1388
1368another_slab: 1389another_slab:
@@ -1370,11 +1391,11 @@ another_slab:
1370 1391
1371new_slab: 1392new_slab:
1372 page = get_partial(s, gfpflags, node); 1393 page = get_partial(s, gfpflags, node);
1373 if (likely(page)) { 1394 if (page) {
1374have_slab: 1395have_slab:
1375 s->cpu_slab[cpu] = page; 1396 s->cpu_slab[cpu] = page;
1376 SetPageActive(page); 1397 SetPageActive(page);
1377 goto redo; 1398 goto load_freelist;
1378 } 1399 }
1379 1400
1380 page = new_slab(s, gfpflags, node); 1401 page = new_slab(s, gfpflags, node);
@@ -1397,7 +1418,7 @@ have_slab:
1397 discard_slab(s, page); 1418 discard_slab(s, page);
1398 page = s->cpu_slab[cpu]; 1419 page = s->cpu_slab[cpu];
1399 slab_lock(page); 1420 slab_lock(page);
1400 goto redo; 1421 goto load_freelist;
1401 } 1422 }
1402 /* New slab does not fit our expectations */ 1423 /* New slab does not fit our expectations */
1403 flush_slab(s, s->cpu_slab[cpu], cpu); 1424 flush_slab(s, s->cpu_slab[cpu], cpu);
@@ -1405,16 +1426,52 @@ have_slab:
1405 slab_lock(page); 1426 slab_lock(page);
1406 goto have_slab; 1427 goto have_slab;
1407 } 1428 }
1408 local_irq_restore(flags);
1409 return NULL; 1429 return NULL;
1410debug: 1430debug:
1431 object = page->freelist;
1411 if (!alloc_object_checks(s, page, object)) 1432 if (!alloc_object_checks(s, page, object))
1412 goto another_slab; 1433 goto another_slab;
1413 if (s->flags & SLAB_STORE_USER) 1434 if (s->flags & SLAB_STORE_USER)
1414 set_track(s, object, TRACK_ALLOC, addr); 1435 set_track(s, object, TRACK_ALLOC, addr);
1415 trace(s, page, object, 1); 1436 trace(s, page, object, 1);
1416 init_object(s, object, 1); 1437 init_object(s, object, 1);
1417 goto have_object; 1438
1439 page->inuse++;
1440 page->freelist = object[page->offset];
1441 slab_unlock(page);
1442 return object;
1443}
1444
1445/*
1446 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
1447 * have the fastpath folded into their functions. So no function call
1448 * overhead for requests that can be satisfied on the fastpath.
1449 *
1450 * The fastpath works by first checking if the lockless freelist can be used.
1451 * If not then __slab_alloc is called for slow processing.
1452 *
1453 * Otherwise we can simply pick the next object from the lockless free list.
1454 */
1455static void __always_inline *slab_alloc(struct kmem_cache *s,
1456 gfp_t gfpflags, int node, void *addr)
1457{
1458 struct page *page;
1459 void **object;
1460 unsigned long flags;
1461
1462 local_irq_save(flags);
1463 page = s->cpu_slab[smp_processor_id()];
1464 if (unlikely(!page || !page->lockless_freelist ||
1465 (node != -1 && page_to_nid(page) != node)))
1466
1467 object = __slab_alloc(s, gfpflags, node, addr, page);
1468
1469 else {
1470 object = page->lockless_freelist;
1471 page->lockless_freelist = object[page->offset];
1472 }
1473 local_irq_restore(flags);
1474 return object;
1418} 1475}
1419 1476
1420void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1477void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
@@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
1432#endif 1489#endif
1433 1490
1434/* 1491/*
1435 * The fastpath only writes the cacheline of the page struct and the first 1492 * Slow patch handling. This may still be called frequently since objects
1436 * cacheline of the object. 1493 * have a longer lifetime than the cpu slabs in most processing loads.
1437 * 1494 *
1438 * We read the cpu_slab cacheline to check if the slab is the per cpu 1495 * So we still attempt to reduce cache line usage. Just take the slab
1439 * slab for this processor. 1496 * lock and free the item. If there is no additional partial page
1497 * handling required then we can return immediately.
1440 */ 1498 */
1441static void slab_free(struct kmem_cache *s, struct page *page, 1499static void __slab_free(struct kmem_cache *s, struct page *page,
1442 void *x, void *addr) 1500 void *x, void *addr)
1443{ 1501{
1444 void *prior; 1502 void *prior;
1445 void **object = (void *)x; 1503 void **object = (void *)x;
1446 unsigned long flags;
1447 1504
1448 local_irq_save(flags);
1449 slab_lock(page); 1505 slab_lock(page);
1450 1506
1451 if (unlikely(SlabDebug(page))) 1507 if (unlikely(SlabDebug(page)))
@@ -1475,7 +1531,6 @@ checks_ok:
1475 1531
1476out_unlock: 1532out_unlock:
1477 slab_unlock(page); 1533 slab_unlock(page);
1478 local_irq_restore(flags);
1479 return; 1534 return;
1480 1535
1481slab_empty: 1536slab_empty:
@@ -1487,7 +1542,6 @@ slab_empty:
1487 1542
1488 slab_unlock(page); 1543 slab_unlock(page);
1489 discard_slab(s, page); 1544 discard_slab(s, page);
1490 local_irq_restore(flags);
1491 return; 1545 return;
1492 1546
1493debug: 1547debug:
@@ -1502,6 +1556,34 @@ debug:
1502 goto checks_ok; 1556 goto checks_ok;
1503} 1557}
1504 1558
1559/*
1560 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
1561 * can perform fastpath freeing without additional function calls.
1562 *
1563 * The fastpath is only possible if we are freeing to the current cpu slab
1564 * of this processor. This typically the case if we have just allocated
1565 * the item before.
1566 *
1567 * If fastpath is not possible then fall back to __slab_free where we deal
1568 * with all sorts of special processing.
1569 */
1570static void __always_inline slab_free(struct kmem_cache *s,
1571 struct page *page, void *x, void *addr)
1572{
1573 void **object = (void *)x;
1574 unsigned long flags;
1575
1576 local_irq_save(flags);
1577 if (likely(page == s->cpu_slab[smp_processor_id()] &&
1578 !SlabDebug(page))) {
1579 object[page->offset] = page->lockless_freelist;
1580 page->lockless_freelist = object;
1581 } else
1582 __slab_free(s, page, x, addr);
1583
1584 local_irq_restore(flags);
1585}
1586
1505void kmem_cache_free(struct kmem_cache *s, void *x) 1587void kmem_cache_free(struct kmem_cache *s, void *x)
1506{ 1588{
1507 struct page *page; 1589 struct page *page;