diff options
Diffstat (limited to 'mm/slub.c')
| -rw-r--r-- | mm/slub.c | 159 |
1 files changed, 120 insertions, 39 deletions
| @@ -81,10 +81,14 @@ | |||
| 81 | * PageActive The slab is used as a cpu cache. Allocations | 81 | * PageActive The slab is used as a cpu cache. Allocations |
| 82 | * may be performed from the slab. The slab is not | 82 | * may be performed from the slab. The slab is not |
| 83 | * on any slab list and cannot be moved onto one. | 83 | * on any slab list and cannot be moved onto one. |
| 84 | * The cpu slab may be equipped with an additioanl | ||
| 85 | * lockless_freelist that allows lockless access to | ||
| 86 | * free objects in addition to the regular freelist | ||
| 87 | * that requires the slab lock. | ||
| 84 | * | 88 | * |
| 85 | * PageError Slab requires special handling due to debug | 89 | * PageError Slab requires special handling due to debug |
| 86 | * options set. This moves slab handling out of | 90 | * options set. This moves slab handling out of |
| 87 | * the fast path. | 91 | * the fast path and disables lockless freelists. |
| 88 | */ | 92 | */ |
| 89 | 93 | ||
| 90 | static inline int SlabDebug(struct page *page) | 94 | static inline int SlabDebug(struct page *page) |
| @@ -1014,6 +1018,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1014 | set_freepointer(s, last, NULL); | 1018 | set_freepointer(s, last, NULL); |
| 1015 | 1019 | ||
| 1016 | page->freelist = start; | 1020 | page->freelist = start; |
| 1021 | page->lockless_freelist = NULL; | ||
| 1017 | page->inuse = 0; | 1022 | page->inuse = 0; |
| 1018 | out: | 1023 | out: |
| 1019 | if (flags & __GFP_WAIT) | 1024 | if (flags & __GFP_WAIT) |
| @@ -1276,6 +1281,23 @@ static void putback_slab(struct kmem_cache *s, struct page *page) | |||
| 1276 | */ | 1281 | */ |
| 1277 | static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) | 1282 | static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) |
| 1278 | { | 1283 | { |
| 1284 | /* | ||
| 1285 | * Merge cpu freelist into freelist. Typically we get here | ||
| 1286 | * because both freelists are empty. So this is unlikely | ||
| 1287 | * to occur. | ||
| 1288 | */ | ||
| 1289 | while (unlikely(page->lockless_freelist)) { | ||
| 1290 | void **object; | ||
| 1291 | |||
| 1292 | /* Retrieve object from cpu_freelist */ | ||
| 1293 | object = page->lockless_freelist; | ||
| 1294 | page->lockless_freelist = page->lockless_freelist[page->offset]; | ||
| 1295 | |||
| 1296 | /* And put onto the regular freelist */ | ||
| 1297 | object[page->offset] = page->freelist; | ||
| 1298 | page->freelist = object; | ||
| 1299 | page->inuse--; | ||
| 1300 | } | ||
| 1279 | s->cpu_slab[cpu] = NULL; | 1301 | s->cpu_slab[cpu] = NULL; |
| 1280 | ClearPageActive(page); | 1302 | ClearPageActive(page); |
| 1281 | 1303 | ||
| @@ -1322,47 +1344,46 @@ static void flush_all(struct kmem_cache *s) | |||
| 1322 | } | 1344 | } |
| 1323 | 1345 | ||
| 1324 | /* | 1346 | /* |
| 1325 | * slab_alloc is optimized to only modify two cachelines on the fast path | 1347 | * Slow path. The lockless freelist is empty or we need to perform |
| 1326 | * (aside from the stack): | 1348 | * debugging duties. |
| 1349 | * | ||
| 1350 | * Interrupts are disabled. | ||
| 1327 | * | 1351 | * |
| 1328 | * 1. The page struct | 1352 | * Processing is still very fast if new objects have been freed to the |
| 1329 | * 2. The first cacheline of the object to be allocated. | 1353 | * regular freelist. In that case we simply take over the regular freelist |
| 1354 | * as the lockless freelist and zap the regular freelist. | ||
| 1330 | * | 1355 | * |
| 1331 | * The only other cache lines that are read (apart from code) is the | 1356 | * If that is not working then we fall back to the partial lists. We take the |
| 1332 | * per cpu array in the kmem_cache struct. | 1357 | * first element of the freelist as the object to allocate now and move the |
| 1358 | * rest of the freelist to the lockless freelist. | ||
| 1333 | * | 1359 | * |
| 1334 | * Fastpath is not possible if we need to get a new slab or have | 1360 | * And if we were unable to get a new slab from the partial slab lists then |
| 1335 | * debugging enabled (which means all slabs are marked with SlabDebug) | 1361 | * we need to allocate a new slab. This is slowest path since we may sleep. |
| 1336 | */ | 1362 | */ |
| 1337 | static void *slab_alloc(struct kmem_cache *s, | 1363 | static void *__slab_alloc(struct kmem_cache *s, |
| 1338 | gfp_t gfpflags, int node, void *addr) | 1364 | gfp_t gfpflags, int node, void *addr, struct page *page) |
| 1339 | { | 1365 | { |
| 1340 | struct page *page; | ||
| 1341 | void **object; | 1366 | void **object; |
| 1342 | unsigned long flags; | 1367 | int cpu = smp_processor_id(); |
| 1343 | int cpu; | ||
| 1344 | 1368 | ||
| 1345 | local_irq_save(flags); | ||
| 1346 | cpu = smp_processor_id(); | ||
| 1347 | page = s->cpu_slab[cpu]; | ||
| 1348 | if (!page) | 1369 | if (!page) |
| 1349 | goto new_slab; | 1370 | goto new_slab; |
| 1350 | 1371 | ||
| 1351 | slab_lock(page); | 1372 | slab_lock(page); |
| 1352 | if (unlikely(node != -1 && page_to_nid(page) != node)) | 1373 | if (unlikely(node != -1 && page_to_nid(page) != node)) |
| 1353 | goto another_slab; | 1374 | goto another_slab; |
| 1354 | redo: | 1375 | load_freelist: |
| 1355 | object = page->freelist; | 1376 | object = page->freelist; |
| 1356 | if (unlikely(!object)) | 1377 | if (unlikely(!object)) |
| 1357 | goto another_slab; | 1378 | goto another_slab; |
| 1358 | if (unlikely(SlabDebug(page))) | 1379 | if (unlikely(SlabDebug(page))) |
| 1359 | goto debug; | 1380 | goto debug; |
| 1360 | 1381 | ||
| 1361 | have_object: | 1382 | object = page->freelist; |
| 1362 | page->inuse++; | 1383 | page->lockless_freelist = object[page->offset]; |
| 1363 | page->freelist = object[page->offset]; | 1384 | page->inuse = s->objects; |
| 1385 | page->freelist = NULL; | ||
| 1364 | slab_unlock(page); | 1386 | slab_unlock(page); |
| 1365 | local_irq_restore(flags); | ||
| 1366 | return object; | 1387 | return object; |
| 1367 | 1388 | ||
| 1368 | another_slab: | 1389 | another_slab: |
| @@ -1370,11 +1391,11 @@ another_slab: | |||
| 1370 | 1391 | ||
| 1371 | new_slab: | 1392 | new_slab: |
| 1372 | page = get_partial(s, gfpflags, node); | 1393 | page = get_partial(s, gfpflags, node); |
| 1373 | if (likely(page)) { | 1394 | if (page) { |
| 1374 | have_slab: | 1395 | have_slab: |
| 1375 | s->cpu_slab[cpu] = page; | 1396 | s->cpu_slab[cpu] = page; |
| 1376 | SetPageActive(page); | 1397 | SetPageActive(page); |
| 1377 | goto redo; | 1398 | goto load_freelist; |
| 1378 | } | 1399 | } |
| 1379 | 1400 | ||
| 1380 | page = new_slab(s, gfpflags, node); | 1401 | page = new_slab(s, gfpflags, node); |
| @@ -1397,7 +1418,7 @@ have_slab: | |||
| 1397 | discard_slab(s, page); | 1418 | discard_slab(s, page); |
| 1398 | page = s->cpu_slab[cpu]; | 1419 | page = s->cpu_slab[cpu]; |
| 1399 | slab_lock(page); | 1420 | slab_lock(page); |
| 1400 | goto redo; | 1421 | goto load_freelist; |
| 1401 | } | 1422 | } |
| 1402 | /* New slab does not fit our expectations */ | 1423 | /* New slab does not fit our expectations */ |
| 1403 | flush_slab(s, s->cpu_slab[cpu], cpu); | 1424 | flush_slab(s, s->cpu_slab[cpu], cpu); |
| @@ -1405,16 +1426,52 @@ have_slab: | |||
| 1405 | slab_lock(page); | 1426 | slab_lock(page); |
| 1406 | goto have_slab; | 1427 | goto have_slab; |
| 1407 | } | 1428 | } |
| 1408 | local_irq_restore(flags); | ||
| 1409 | return NULL; | 1429 | return NULL; |
| 1410 | debug: | 1430 | debug: |
| 1431 | object = page->freelist; | ||
| 1411 | if (!alloc_object_checks(s, page, object)) | 1432 | if (!alloc_object_checks(s, page, object)) |
| 1412 | goto another_slab; | 1433 | goto another_slab; |
| 1413 | if (s->flags & SLAB_STORE_USER) | 1434 | if (s->flags & SLAB_STORE_USER) |
| 1414 | set_track(s, object, TRACK_ALLOC, addr); | 1435 | set_track(s, object, TRACK_ALLOC, addr); |
| 1415 | trace(s, page, object, 1); | 1436 | trace(s, page, object, 1); |
| 1416 | init_object(s, object, 1); | 1437 | init_object(s, object, 1); |
| 1417 | goto have_object; | 1438 | |
| 1439 | page->inuse++; | ||
| 1440 | page->freelist = object[page->offset]; | ||
| 1441 | slab_unlock(page); | ||
| 1442 | return object; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | /* | ||
| 1446 | * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) | ||
| 1447 | * have the fastpath folded into their functions. So no function call | ||
| 1448 | * overhead for requests that can be satisfied on the fastpath. | ||
| 1449 | * | ||
| 1450 | * The fastpath works by first checking if the lockless freelist can be used. | ||
| 1451 | * If not then __slab_alloc is called for slow processing. | ||
| 1452 | * | ||
| 1453 | * Otherwise we can simply pick the next object from the lockless free list. | ||
| 1454 | */ | ||
| 1455 | static void __always_inline *slab_alloc(struct kmem_cache *s, | ||
| 1456 | gfp_t gfpflags, int node, void *addr) | ||
| 1457 | { | ||
| 1458 | struct page *page; | ||
| 1459 | void **object; | ||
| 1460 | unsigned long flags; | ||
| 1461 | |||
| 1462 | local_irq_save(flags); | ||
| 1463 | page = s->cpu_slab[smp_processor_id()]; | ||
| 1464 | if (unlikely(!page || !page->lockless_freelist || | ||
| 1465 | (node != -1 && page_to_nid(page) != node))) | ||
| 1466 | |||
| 1467 | object = __slab_alloc(s, gfpflags, node, addr, page); | ||
| 1468 | |||
| 1469 | else { | ||
| 1470 | object = page->lockless_freelist; | ||
| 1471 | page->lockless_freelist = object[page->offset]; | ||
| 1472 | } | ||
| 1473 | local_irq_restore(flags); | ||
| 1474 | return object; | ||
| 1418 | } | 1475 | } |
| 1419 | 1476 | ||
| 1420 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | 1477 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) |
| @@ -1432,20 +1489,19 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); | |||
| 1432 | #endif | 1489 | #endif |
| 1433 | 1490 | ||
| 1434 | /* | 1491 | /* |
| 1435 | * The fastpath only writes the cacheline of the page struct and the first | 1492 | * Slow patch handling. This may still be called frequently since objects |
| 1436 | * cacheline of the object. | 1493 | * have a longer lifetime than the cpu slabs in most processing loads. |
| 1437 | * | 1494 | * |
| 1438 | * We read the cpu_slab cacheline to check if the slab is the per cpu | 1495 | * So we still attempt to reduce cache line usage. Just take the slab |
| 1439 | * slab for this processor. | 1496 | * lock and free the item. If there is no additional partial page |
| 1497 | * handling required then we can return immediately. | ||
| 1440 | */ | 1498 | */ |
| 1441 | static void slab_free(struct kmem_cache *s, struct page *page, | 1499 | static void __slab_free(struct kmem_cache *s, struct page *page, |
| 1442 | void *x, void *addr) | 1500 | void *x, void *addr) |
| 1443 | { | 1501 | { |
| 1444 | void *prior; | 1502 | void *prior; |
| 1445 | void **object = (void *)x; | 1503 | void **object = (void *)x; |
| 1446 | unsigned long flags; | ||
| 1447 | 1504 | ||
| 1448 | local_irq_save(flags); | ||
| 1449 | slab_lock(page); | 1505 | slab_lock(page); |
| 1450 | 1506 | ||
| 1451 | if (unlikely(SlabDebug(page))) | 1507 | if (unlikely(SlabDebug(page))) |
| @@ -1475,7 +1531,6 @@ checks_ok: | |||
| 1475 | 1531 | ||
| 1476 | out_unlock: | 1532 | out_unlock: |
| 1477 | slab_unlock(page); | 1533 | slab_unlock(page); |
| 1478 | local_irq_restore(flags); | ||
| 1479 | return; | 1534 | return; |
| 1480 | 1535 | ||
| 1481 | slab_empty: | 1536 | slab_empty: |
| @@ -1487,7 +1542,6 @@ slab_empty: | |||
| 1487 | 1542 | ||
| 1488 | slab_unlock(page); | 1543 | slab_unlock(page); |
| 1489 | discard_slab(s, page); | 1544 | discard_slab(s, page); |
| 1490 | local_irq_restore(flags); | ||
| 1491 | return; | 1545 | return; |
| 1492 | 1546 | ||
| 1493 | debug: | 1547 | debug: |
| @@ -1502,6 +1556,34 @@ debug: | |||
| 1502 | goto checks_ok; | 1556 | goto checks_ok; |
| 1503 | } | 1557 | } |
| 1504 | 1558 | ||
| 1559 | /* | ||
| 1560 | * Fastpath with forced inlining to produce a kfree and kmem_cache_free that | ||
| 1561 | * can perform fastpath freeing without additional function calls. | ||
| 1562 | * | ||
| 1563 | * The fastpath is only possible if we are freeing to the current cpu slab | ||
| 1564 | * of this processor. This typically the case if we have just allocated | ||
| 1565 | * the item before. | ||
| 1566 | * | ||
| 1567 | * If fastpath is not possible then fall back to __slab_free where we deal | ||
| 1568 | * with all sorts of special processing. | ||
| 1569 | */ | ||
| 1570 | static void __always_inline slab_free(struct kmem_cache *s, | ||
| 1571 | struct page *page, void *x, void *addr) | ||
| 1572 | { | ||
| 1573 | void **object = (void *)x; | ||
| 1574 | unsigned long flags; | ||
| 1575 | |||
| 1576 | local_irq_save(flags); | ||
| 1577 | if (likely(page == s->cpu_slab[smp_processor_id()] && | ||
| 1578 | !SlabDebug(page))) { | ||
| 1579 | object[page->offset] = page->lockless_freelist; | ||
| 1580 | page->lockless_freelist = object; | ||
| 1581 | } else | ||
| 1582 | __slab_free(s, page, x, addr); | ||
| 1583 | |||
| 1584 | local_irq_restore(flags); | ||
| 1585 | } | ||
| 1586 | |||
| 1505 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1587 | void kmem_cache_free(struct kmem_cache *s, void *x) |
| 1506 | { | 1588 | { |
| 1507 | struct page *page; | 1589 | struct page *page; |
| @@ -2363,9 +2445,8 @@ void __init kmem_cache_init(void) | |||
| 2363 | register_cpu_notifier(&slab_notifier); | 2445 | register_cpu_notifier(&slab_notifier); |
| 2364 | #endif | 2446 | #endif |
| 2365 | 2447 | ||
| 2366 | if (nr_cpu_ids) /* Remove when nr_cpu_ids is fixed upstream ! */ | 2448 | kmem_size = offsetof(struct kmem_cache, cpu_slab) + |
| 2367 | kmem_size = offsetof(struct kmem_cache, cpu_slab) | 2449 | nr_cpu_ids * sizeof(struct page *); |
| 2368 | + nr_cpu_ids * sizeof(struct page *); | ||
| 2369 | 2450 | ||
| 2370 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2451 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
| 2371 | " Processors=%d, Nodes=%d\n", | 2452 | " Processors=%d, Nodes=%d\n", |
