diff options
-rw-r--r-- | include/linux/mm_types.h | 7 | ||||
-rw-r--r-- | mm/slub.c | 154 |
2 files changed, 123 insertions, 38 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e30687bad075..d5bb1796e12b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -50,13 +50,16 @@ struct page { | |||
50 | spinlock_t ptl; | 50 | spinlock_t ptl; |
51 | #endif | 51 | #endif |
52 | struct { /* SLUB uses */ | 52 | struct { /* SLUB uses */ |
53 | struct page *first_page; /* Compound pages */ | 53 | void **lockless_freelist; |
54 | struct kmem_cache *slab; /* Pointer to slab */ | 54 | struct kmem_cache *slab; /* Pointer to slab */ |
55 | }; | 55 | }; |
56 | struct { | ||
57 | struct page *first_page; /* Compound pages */ | ||
58 | }; | ||
56 | }; | 59 | }; |
57 | union { | 60 | union { |
58 | pgoff_t index; /* Our offset within mapping. */ | 61 | pgoff_t index; /* Our offset within mapping. */ |
59 | void *freelist; /* SLUB: pointer to free object */ | 62 | void *freelist; /* SLUB: freelist req. slab lock */ |
60 | }; | 63 | }; |
61 | struct list_head lru; /* Pageout list, eg. active_list | 64 | struct list_head lru; /* Pageout list, eg. active_list |
62 | * protected by zone->lru_lock ! | 65 | * protected by zone->lru_lock ! |
@@ -81,10 +81,14 @@ | |||
81 | * PageActive The slab is used as a cpu cache. Allocations | 81 | * PageActive The slab is used as a cpu cache. Allocations |
82 | * may be performed from the slab. The slab is not | 82 | * may be performed from the slab. The slab is not |
83 | * on any slab list and cannot be moved onto one. | 83 | * on any slab list and cannot be moved onto one. |
84 | * The cpu slab may be equipped with an additioanl | ||
85 | * lockless_freelist that allows lockless access to | ||
86 | * free objects in addition to the regular freelist | ||
87 | * that requires the slab lock. | ||
84 | * | 88 | * |
85 | * PageError Slab requires special handling due to debug | 89 | * PageError Slab requires special handling due to debug |
86 | * options set. This moves slab handling out of | 90 | * options set. This moves slab handling out of |
87 | * the fast path. | 91 | * the fast path and disables lockless freelists. |
88 | */ | 92 | */ |
89 | 93 | ||
90 | static inline int SlabDebug(struct page *page) | 94 | static inline int SlabDebug(struct page *page) |
@@ -1014,6 +1018,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1014 | set_freepointer(s, last, NULL); | 1018 | set_freepointer(s, last, NULL); |
1015 | 1019 | ||
1016 | page->freelist = start; | 1020 | page->freelist = start; |
1021 | page->lockless_freelist = NULL; | ||
1017 | page->inuse = 0; | 1022 | page->inuse = 0; |
1018 | out: | 1023 | out: |
1019 | if (flags & __GFP_WAIT) | 1024 | if (flags & __GFP_WAIT) |
@@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache *s, struct page *page) | |||
1276 | */ | 1281 | */ |
1277 | static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) | 1282 | static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) |
1278 | { | 1283 | { |
1284 | /* | ||
1285 | * Merge cpu freelist into freelist. Typically we get here | ||
1286 | * because both freelists are empty. So this is unlikely | ||
1287 | * to occur. | ||
1288 | */ | ||
1289 | while (unlikely(page->lockless_freelist)) { | ||
1290 | void **object; | ||
1291 | |||
1292 | /* Retrieve object from cpu_freelist */ | ||
1293 | object = page->lockless_freelist; | ||
1294 | page->lockless_freelist = page->lockless_freelist[page->offset]; | ||
1295 | |||
1296 | /* And put onto the regular freelist */ | ||
1297 | object[page->offset] = page->freelist; | ||
1298 | page->freelist = object; | ||
1299 | page->inuse--; | ||
1300 | } | ||
1279 | s->cpu_slab[cpu] = NULL; | 1301 | s->cpu_slab[cpu] = NULL; |
1280 | ClearPageActive(page); | 1302 | ClearPageActive(page); |
1281 | 1303 | ||
@@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s) | |||
1322 | } | 1344 | } |
1323 | 1345 | ||
1324 | /* | 1346 | /* |
1325 | * slab_alloc is optimized to only modify two cachelines on the fast path | 1347 | * Slow path. The lockless freelist is empty or we need to perform |
1326 | * (aside from the stack): | 1348 | * debugging duties. |
1349 | * | ||
1350 | * Interrupts are disabled. | ||
1327 | * | 1351 | * |
1328 | * 1. The page struct | 1352 | * Processing is still very fast if new objects have been freed to the |
1329 | * 2. The first cacheline of the object to be allocated. | 1353 | * regular freelist. In that case we simply take over the regular freelist |
1354 | * as the lockless freelist and zap the regular freelist. | ||
1330 | * | 1355 | * |
1331 | * The only other cache lines that are read (apart from code) is the | 1356 | * If that is not working then we fall back to the partial lists. We take the |
1332 | * per cpu array in the kmem_cache struct. | 1357 | * first element of the freelist as the object to allocate now and move the |
1358 | * rest of the freelist to the lockless freelist. | ||
1333 | * | 1359 | * |
1334 | * Fastpath is not possible if we need to get a new slab or have | 1360 | * And if we were unable to get a new slab from the partial slab lists then |
1335 | * debugging enabled (which means all slabs are marked with SlabDebug) | 1361 | * we need to allocate a new slab. This is slowest path since we may sleep. |
1336 | */ | 1362 | */ |
1337 | static void *slab_alloc(struct kmem_cache *s, | 1363 | static void *__slab_alloc(struct kmem_cache *s, |
1338 | gfp_t gfpflags, int node, void *addr) | 1364 | gfp_t gfpflags, int node, void *addr, struct page *page) |
1339 | { | 1365 | { |
1340 | struct page *page; | ||
1341 | void **object; | 1366 | void **object; |
1342 | unsigned long flags; | 1367 | int cpu = smp_processor_id(); |
1343 | int cpu; | ||
1344 | 1368 | ||
1345 | local_irq_save(flags); | ||
1346 | cpu = smp_processor_id(); | ||
1347 | page = s->cpu_slab[cpu]; | ||
1348 | if (!page) | 1369 | if (!page) |
1349 | goto new_slab; | 1370 | goto new_slab; |
1350 | 1371 | ||
1351 | slab_lock(page); | 1372 | slab_lock(page); |
1352 | if (unlikely(node != -1 && page_to_nid(page) != node)) | 1373 | if (unlikely(node != -1 && page_to_nid(page) != node)) |
1353 | goto another_slab; | 1374 | goto another_slab; |
1354 | redo: | 1375 | load_freelist: |
1355 | object = page->freelist; | 1376 | object = page->freelist; |
1356 | if (unlikely(!object)) | 1377 | if (unlikely(!object)) |
1357 | goto another_slab; | 1378 | goto another_slab; |
1358 | if (unlikely(SlabDebug(page))) | 1379 | if (unlikely(SlabDebug(page))) |
1359 | goto debug; | 1380 | goto debug; |
1360 | 1381 | ||
1361 | have_object: | 1382 | object = page->freelist; |
1362 | page->inuse++; | 1383 | page->lockless_freelist = object[page->offset]; |
1363 | page->freelist = object[page->offset]; | 1384 | page->inuse = s->objects; |
1385 | page->freelist = NULL; | ||
1364 | slab_unlock(page); | 1386 | slab_unlock(page); |
1365 | local_irq_restore(flags); | ||
1366 | return object; | 1387 | return object; |
1367 | 1388 | ||
1368 | another_slab: | 1389 | another_slab: |
@@ -1370,11 +1391,11 @@ another_slab: | |||
1370 | 1391 | ||
1371 | new_slab: | 1392 | new_slab: |
1372 | page = get_partial(s, gfpflags, node); | 1393 | page = get_partial(s, gfpflags, node); |
1373 | if (likely(page)) { | 1394 | if (page) { |
1374 | have_slab: | 1395 | have_slab: |
1375 | s->cpu_slab[cpu] = page; | 1396 | s->cpu_slab[cpu] = page; |
1376 | SetPageActive(page); | 1397 | SetPageActive(page); |
1377 | goto redo; | 1398 | goto load_freelist; |
1378 | } | 1399 | } |
1379 | 1400 | ||
1380 | page = new_slab(s, gfpflags, node); | 1401 | page = new_slab(s, gfpflags, node); |
@@ -1397,7 +1418,7 @@ have_slab: | |||
1397 | discard_slab(s, page); | 1418 | discard_slab(s, page); |
1398 | page = s->cpu_slab[cpu]; | 1419 | page = s->cpu_slab[cpu]; |
1399 | slab_lock(page); | 1420 | slab_lock(page); |
1400 | goto redo; | 1421 | goto load_freelist; |
1401 | } | 1422 | } |
1402 | /* New slab does not fit our expectations */ | 1423 | /* New slab does not fit our expectations */ |
1403 | flush_slab(s, s->cpu_slab[cpu], cpu); | 1424 | flush_slab(s, s->cpu_slab[cpu], cpu); |
@@ -1405,16 +1426,52 @@ have_slab: | |||
1405 | slab_lock(page); | 1426 | slab_lock(page); |
1406 | goto have_slab; | 1427 | goto have_slab; |
1407 | } | 1428 | } |
1408 | local_irq_restore(flags); | ||
1409 | return NULL; | 1429 | return NULL; |
1410 | debug: | 1430 | debug: |
1431 | object = page->freelist; | ||
1411 | if (!alloc_object_checks(s, page, object)) | 1432 | if (!alloc_object_checks(s, page, object)) |
1412 | goto another_slab; | 1433 | goto another_slab; |
1413 | if (s->flags & SLAB_STORE_USER) | 1434 | if (s->flags & SLAB_STORE_USER) |
1414 | set_track(s, object, TRACK_ALLOC, addr); | 1435 | set_track(s, object, TRACK_ALLOC, addr); |
1415 | trace(s, page, object, 1); | 1436 | trace(s, page, object, 1); |
1416 | init_object(s, object, 1); | 1437 | init_object(s, object, 1); |
1417 | goto have_object; | 1438 | |
1439 | page->inuse++; | ||
1440 | page->freelist = object[page->offset]; | ||
1441 | slab_unlock(page); | ||
1442 | return object; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) | ||
1447 | * have the fastpath folded into their functions. So no function call | ||
1448 | * overhead for requests that can be satisfied on the fastpath. | ||
1449 | * | ||
1450 | * The fastpath works by first checking if the lockless freelist can be used. | ||
1451 | * If not then __slab_alloc is called for slow processing. | ||
1452 | * | ||
1453 | * Otherwise we can simply pick the next object from the lockless free list. | ||
1454 | */ | ||
1455 | static void __always_inline *slab_alloc(struct kmem_cache *s, | ||
1456 | gfp_t gfpflags, int node, void *addr) | ||
1457 | { | ||
1458 | struct page *page; | ||
1459 | void **object; | ||
1460 | unsigned long flags; | ||
1461 | |||
1462 | local_irq_save(flags); | ||
1463 | page = s->cpu_slab[smp_processor_id()]; | ||
1464 | if (unlikely(!page || !page->lockless_freelist || | ||
1465 | (node != -1 && page_to_nid(page) != node))) | ||
1466 | |||
1467 | object = __slab_alloc(s, gfpflags, node, addr, page); | ||
1468 | |||
1469 | else { | ||
1470 | object = page->lockless_freelist; | ||
1471 | page->lockless_freelist = object[page->offset]; | ||
1472 | } | ||
1473 | local_irq_restore(flags); | ||
1474 | return object; | ||
1418 | } | 1475 | } |
1419 | 1476 | ||
1420 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | 1477 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) |
@@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); | |||
1432 | #endif | 1489 | #endif |
1433 | 1490 | ||
1434 | /* | 1491 | /* |
1435 | * The fastpath only writes the cacheline of the page struct and the first | 1492 | * Slow patch handling. This may still be called frequently since objects |
1436 | * cacheline of the object. | 1493 | * have a longer lifetime than the cpu slabs in most processing loads. |
1437 | * | 1494 | * |
1438 | * We read the cpu_slab cacheline to check if the slab is the per cpu | 1495 | * So we still attempt to reduce cache line usage. Just take the slab |
1439 | * slab for this processor. | 1496 | * lock and free the item. If there is no additional partial page |
1497 | * handling required then we can return immediately. | ||
1440 | */ | 1498 | */ |
1441 | static void slab_free(struct kmem_cache *s, struct page *page, | 1499 | static void __slab_free(struct kmem_cache *s, struct page *page, |
1442 | void *x, void *addr) | 1500 | void *x, void *addr) |
1443 | { | 1501 | { |
1444 | void *prior; | 1502 | void *prior; |
1445 | void **object = (void *)x; | 1503 | void **object = (void *)x; |
1446 | unsigned long flags; | ||
1447 | 1504 | ||
1448 | local_irq_save(flags); | ||
1449 | slab_lock(page); | 1505 | slab_lock(page); |
1450 | 1506 | ||
1451 | if (unlikely(SlabDebug(page))) | 1507 | if (unlikely(SlabDebug(page))) |
@@ -1475,7 +1531,6 @@ checks_ok: | |||
1475 | 1531 | ||
1476 | out_unlock: | 1532 | out_unlock: |
1477 | slab_unlock(page); | 1533 | slab_unlock(page); |
1478 | local_irq_restore(flags); | ||
1479 | return; | 1534 | return; |
1480 | 1535 | ||
1481 | slab_empty: | 1536 | slab_empty: |
@@ -1487,7 +1542,6 @@ slab_empty: | |||
1487 | 1542 | ||
1488 | slab_unlock(page); | 1543 | slab_unlock(page); |
1489 | discard_slab(s, page); | 1544 | discard_slab(s, page); |
1490 | local_irq_restore(flags); | ||
1491 | return; | 1545 | return; |
1492 | 1546 | ||
1493 | debug: | 1547 | debug: |
@@ -1502,6 +1556,34 @@ debug: | |||
1502 | goto checks_ok; | 1556 | goto checks_ok; |
1503 | } | 1557 | } |
1504 | 1558 | ||
1559 | /* | ||
1560 | * Fastpath with forced inlining to produce a kfree and kmem_cache_free that | ||
1561 | * can perform fastpath freeing without additional function calls. | ||
1562 | * | ||
1563 | * The fastpath is only possible if we are freeing to the current cpu slab | ||
1564 | * of this processor. This typically the case if we have just allocated | ||
1565 | * the item before. | ||
1566 | * | ||
1567 | * If fastpath is not possible then fall back to __slab_free where we deal | ||
1568 | * with all sorts of special processing. | ||
1569 | */ | ||
1570 | static void __always_inline slab_free(struct kmem_cache *s, | ||
1571 | struct page *page, void *x, void *addr) | ||
1572 | { | ||
1573 | void **object = (void *)x; | ||
1574 | unsigned long flags; | ||
1575 | |||
1576 | local_irq_save(flags); | ||
1577 | if (likely(page == s->cpu_slab[smp_processor_id()] && | ||
1578 | !SlabDebug(page))) { | ||
1579 | object[page->offset] = page->lockless_freelist; | ||
1580 | page->lockless_freelist = object; | ||
1581 | } else | ||
1582 | __slab_free(s, page, x, addr); | ||
1583 | |||
1584 | local_irq_restore(flags); | ||
1585 | } | ||
1586 | |||
1505 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1587 | void kmem_cache_free(struct kmem_cache *s, void *x) |
1506 | { | 1588 | { |
1507 | struct page *page; | 1589 | struct page *page; |