diff options
| -rw-r--r-- | include/linux/mm_types.h | 24 | ||||
| -rw-r--r-- | include/linux/slab.h | 9 | ||||
| -rw-r--r-- | include/linux/slab_def.h | 4 | ||||
| -rw-r--r-- | mm/slab.c | 571 |
4 files changed, 243 insertions, 365 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index faf4b7c1ad12..95bf0c5a7eb9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -42,18 +42,22 @@ struct page { | |||
| 42 | /* First double word block */ | 42 | /* First double word block */ |
| 43 | unsigned long flags; /* Atomic flags, some possibly | 43 | unsigned long flags; /* Atomic flags, some possibly |
| 44 | * updated asynchronously */ | 44 | * updated asynchronously */ |
| 45 | struct address_space *mapping; /* If low bit clear, points to | 45 | union { |
| 46 | * inode address_space, or NULL. | 46 | struct address_space *mapping; /* If low bit clear, points to |
| 47 | * If page mapped as anonymous | 47 | * inode address_space, or NULL. |
| 48 | * memory, low bit is set, and | 48 | * If page mapped as anonymous |
| 49 | * it points to anon_vma object: | 49 | * memory, low bit is set, and |
| 50 | * see PAGE_MAPPING_ANON below. | 50 | * it points to anon_vma object: |
| 51 | */ | 51 | * see PAGE_MAPPING_ANON below. |
| 52 | */ | ||
| 53 | void *s_mem; /* slab first object */ | ||
| 54 | }; | ||
| 55 | |||
| 52 | /* Second double word */ | 56 | /* Second double word */ |
| 53 | struct { | 57 | struct { |
| 54 | union { | 58 | union { |
| 55 | pgoff_t index; /* Our offset within mapping. */ | 59 | pgoff_t index; /* Our offset within mapping. */ |
| 56 | void *freelist; /* slub/slob first free object */ | 60 | void *freelist; /* sl[aou]b first free object */ |
| 57 | bool pfmemalloc; /* If set by the page allocator, | 61 | bool pfmemalloc; /* If set by the page allocator, |
| 58 | * ALLOC_NO_WATERMARKS was set | 62 | * ALLOC_NO_WATERMARKS was set |
| 59 | * and the low watermark was not | 63 | * and the low watermark was not |
| @@ -109,6 +113,7 @@ struct page { | |||
| 109 | }; | 113 | }; |
| 110 | atomic_t _count; /* Usage count, see below. */ | 114 | atomic_t _count; /* Usage count, see below. */ |
| 111 | }; | 115 | }; |
| 116 | unsigned int active; /* SLAB */ | ||
| 112 | }; | 117 | }; |
| 113 | }; | 118 | }; |
| 114 | 119 | ||
| @@ -130,6 +135,9 @@ struct page { | |||
| 130 | 135 | ||
| 131 | struct list_head list; /* slobs list of pages */ | 136 | struct list_head list; /* slobs list of pages */ |
| 132 | struct slab *slab_page; /* slab fields */ | 137 | struct slab *slab_page; /* slab fields */ |
| 138 | struct rcu_head rcu_head; /* Used by SLAB | ||
| 139 | * when destroying via RCU | ||
| 140 | */ | ||
| 133 | }; | 141 | }; |
| 134 | 142 | ||
| 135 | /* Remainder is not double word aligned */ | 143 | /* Remainder is not double word aligned */ |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 6c5cc0ea8713..caaad51fee1f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
| @@ -51,7 +51,14 @@ | |||
| 51 | * } | 51 | * } |
| 52 | * rcu_read_unlock(); | 52 | * rcu_read_unlock(); |
| 53 | * | 53 | * |
| 54 | * See also the comment on struct slab_rcu in mm/slab.c. | 54 | * This is useful if we need to approach a kernel structure obliquely, |
| 55 | * from its address obtained without the usual locking. We can lock | ||
| 56 | * the structure to stabilize it and check it's still at the given address, | ||
| 57 | * only if we can be sure that the memory has not been meanwhile reused | ||
| 58 | * for some other kind of object (which our subsystem's lock might corrupt). | ||
| 59 | * | ||
| 60 | * rcu_read_lock before reading the address, then rcu_read_unlock after | ||
| 61 | * taking the spinlock within the structure expected at that address. | ||
| 55 | */ | 62 | */ |
| 56 | #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ | 63 | #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ |
| 57 | #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ | 64 | #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cd401580bdd3..ca82e8ff89fa 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
| @@ -41,8 +41,8 @@ struct kmem_cache { | |||
| 41 | 41 | ||
| 42 | size_t colour; /* cache colouring range */ | 42 | size_t colour; /* cache colouring range */ |
| 43 | unsigned int colour_off; /* colour offset */ | 43 | unsigned int colour_off; /* colour offset */ |
| 44 | struct kmem_cache *slabp_cache; | 44 | struct kmem_cache *freelist_cache; |
| 45 | unsigned int slab_size; | 45 | unsigned int freelist_size; |
| 46 | 46 | ||
| 47 | /* constructor func */ | 47 | /* constructor func */ |
| 48 | void (*ctor)(void *obj); | 48 | void (*ctor)(void *obj); |
| @@ -164,72 +164,6 @@ | |||
| 164 | static bool pfmemalloc_active __read_mostly; | 164 | static bool pfmemalloc_active __read_mostly; |
| 165 | 165 | ||
| 166 | /* | 166 | /* |
| 167 | * kmem_bufctl_t: | ||
| 168 | * | ||
| 169 | * Bufctl's are used for linking objs within a slab | ||
| 170 | * linked offsets. | ||
| 171 | * | ||
| 172 | * This implementation relies on "struct page" for locating the cache & | ||
| 173 | * slab an object belongs to. | ||
| 174 | * This allows the bufctl structure to be small (one int), but limits | ||
| 175 | * the number of objects a slab (not a cache) can contain when off-slab | ||
| 176 | * bufctls are used. The limit is the size of the largest general cache | ||
| 177 | * that does not use off-slab slabs. | ||
| 178 | * For 32bit archs with 4 kB pages, is this 56. | ||
| 179 | * This is not serious, as it is only for large objects, when it is unwise | ||
| 180 | * to have too many per slab. | ||
| 181 | * Note: This limit can be raised by introducing a general cache whose size | ||
| 182 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. | ||
| 183 | */ | ||
| 184 | |||
| 185 | typedef unsigned int kmem_bufctl_t; | ||
| 186 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) | ||
| 187 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) | ||
| 188 | #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) | ||
| 189 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) | ||
| 190 | |||
| 191 | /* | ||
| 192 | * struct slab_rcu | ||
| 193 | * | ||
| 194 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to | ||
| 195 | * arrange for kmem_freepages to be called via RCU. This is useful if | ||
| 196 | * we need to approach a kernel structure obliquely, from its address | ||
| 197 | * obtained without the usual locking. We can lock the structure to | ||
| 198 | * stabilize it and check it's still at the given address, only if we | ||
| 199 | * can be sure that the memory has not been meanwhile reused for some | ||
| 200 | * other kind of object (which our subsystem's lock might corrupt). | ||
| 201 | * | ||
| 202 | * rcu_read_lock before reading the address, then rcu_read_unlock after | ||
| 203 | * taking the spinlock within the structure expected at that address. | ||
| 204 | */ | ||
| 205 | struct slab_rcu { | ||
| 206 | struct rcu_head head; | ||
| 207 | struct kmem_cache *cachep; | ||
| 208 | void *addr; | ||
| 209 | }; | ||
| 210 | |||
| 211 | /* | ||
| 212 | * struct slab | ||
| 213 | * | ||
| 214 | * Manages the objs in a slab. Placed either at the beginning of mem allocated | ||
| 215 | * for a slab, or allocated from an general cache. | ||
| 216 | * Slabs are chained into three list: fully used, partial, fully free slabs. | ||
| 217 | */ | ||
| 218 | struct slab { | ||
| 219 | union { | ||
| 220 | struct { | ||
| 221 | struct list_head list; | ||
| 222 | unsigned long colouroff; | ||
| 223 | void *s_mem; /* including colour offset */ | ||
| 224 | unsigned int inuse; /* num of objs active in slab */ | ||
| 225 | kmem_bufctl_t free; | ||
| 226 | unsigned short nodeid; | ||
| 227 | }; | ||
| 228 | struct slab_rcu __slab_cover_slab_rcu; | ||
| 229 | }; | ||
| 230 | }; | ||
| 231 | |||
| 232 | /* | ||
| 233 | * struct array_cache | 167 | * struct array_cache |
| 234 | * | 168 | * |
| 235 | * Purpose: | 169 | * Purpose: |
| @@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) | |||
| 456 | return page->slab_cache; | 390 | return page->slab_cache; |
| 457 | } | 391 | } |
| 458 | 392 | ||
| 459 | static inline struct slab *virt_to_slab(const void *obj) | 393 | static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, |
| 460 | { | ||
| 461 | struct page *page = virt_to_head_page(obj); | ||
| 462 | |||
| 463 | VM_BUG_ON(!PageSlab(page)); | ||
| 464 | return page->slab_page; | ||
| 465 | } | ||
| 466 | |||
| 467 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | ||
| 468 | unsigned int idx) | 394 | unsigned int idx) |
| 469 | { | 395 | { |
| 470 | return slab->s_mem + cache->size * idx; | 396 | return page->s_mem + cache->size * idx; |
| 471 | } | 397 | } |
| 472 | 398 | ||
| 473 | /* | 399 | /* |
| @@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | |||
| 477 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) | 403 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) |
| 478 | */ | 404 | */ |
| 479 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, | 405 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, |
| 480 | const struct slab *slab, void *obj) | 406 | const struct page *page, void *obj) |
| 481 | { | 407 | { |
| 482 | u32 offset = (obj - slab->s_mem); | 408 | u32 offset = (obj - page->s_mem); |
| 483 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | 409 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); |
| 484 | } | 410 | } |
| 485 | 411 | ||
| @@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
| 641 | 567 | ||
| 642 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 568 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) |
| 643 | { | 569 | { |
| 644 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 570 | return ALIGN(nr_objs * sizeof(unsigned int), align); |
| 645 | } | 571 | } |
| 646 | 572 | ||
| 647 | /* | 573 | /* |
| @@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 660 | * on it. For the latter case, the memory allocated for a | 586 | * on it. For the latter case, the memory allocated for a |
| 661 | * slab is used for: | 587 | * slab is used for: |
| 662 | * | 588 | * |
| 663 | * - The struct slab | 589 | * - One unsigned int for each object |
| 664 | * - One kmem_bufctl_t for each object | ||
| 665 | * - Padding to respect alignment of @align | 590 | * - Padding to respect alignment of @align |
| 666 | * - @buffer_size bytes for each object | 591 | * - @buffer_size bytes for each object |
| 667 | * | 592 | * |
| @@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 674 | mgmt_size = 0; | 599 | mgmt_size = 0; |
| 675 | nr_objs = slab_size / buffer_size; | 600 | nr_objs = slab_size / buffer_size; |
| 676 | 601 | ||
| 677 | if (nr_objs > SLAB_LIMIT) | ||
| 678 | nr_objs = SLAB_LIMIT; | ||
| 679 | } else { | 602 | } else { |
| 680 | /* | 603 | /* |
| 681 | * Ignore padding for the initial guess. The padding | 604 | * Ignore padding for the initial guess. The padding |
| @@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 685 | * into the memory allocation when taking the padding | 608 | * into the memory allocation when taking the padding |
| 686 | * into account. | 609 | * into account. |
| 687 | */ | 610 | */ |
| 688 | nr_objs = (slab_size - sizeof(struct slab)) / | 611 | nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); |
| 689 | (buffer_size + sizeof(kmem_bufctl_t)); | ||
| 690 | 612 | ||
| 691 | /* | 613 | /* |
| 692 | * This calculated number will be either the right | 614 | * This calculated number will be either the right |
| @@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 696 | > slab_size) | 618 | > slab_size) |
| 697 | nr_objs--; | 619 | nr_objs--; |
| 698 | 620 | ||
| 699 | if (nr_objs > SLAB_LIMIT) | ||
| 700 | nr_objs = SLAB_LIMIT; | ||
| 701 | |||
| 702 | mgmt_size = slab_mgmt_size(nr_objs, align); | 621 | mgmt_size = slab_mgmt_size(nr_objs, align); |
| 703 | } | 622 | } |
| 704 | *num = nr_objs; | 623 | *num = nr_objs; |
| @@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
| 829 | return nc; | 748 | return nc; |
| 830 | } | 749 | } |
| 831 | 750 | ||
| 832 | static inline bool is_slab_pfmemalloc(struct slab *slabp) | 751 | static inline bool is_slab_pfmemalloc(struct page *page) |
| 833 | { | 752 | { |
| 834 | struct page *page = virt_to_page(slabp->s_mem); | ||
| 835 | |||
| 836 | return PageSlabPfmemalloc(page); | 753 | return PageSlabPfmemalloc(page); |
| 837 | } | 754 | } |
| 838 | 755 | ||
| @@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | |||
| 841 | struct array_cache *ac) | 758 | struct array_cache *ac) |
| 842 | { | 759 | { |
| 843 | struct kmem_cache_node *n = cachep->node[numa_mem_id()]; | 760 | struct kmem_cache_node *n = cachep->node[numa_mem_id()]; |
| 844 | struct slab *slabp; | 761 | struct page *page; |
| 845 | unsigned long flags; | 762 | unsigned long flags; |
| 846 | 763 | ||
| 847 | if (!pfmemalloc_active) | 764 | if (!pfmemalloc_active) |
| 848 | return; | 765 | return; |
| 849 | 766 | ||
| 850 | spin_lock_irqsave(&n->list_lock, flags); | 767 | spin_lock_irqsave(&n->list_lock, flags); |
| 851 | list_for_each_entry(slabp, &n->slabs_full, list) | 768 | list_for_each_entry(page, &n->slabs_full, lru) |
| 852 | if (is_slab_pfmemalloc(slabp)) | 769 | if (is_slab_pfmemalloc(page)) |
| 853 | goto out; | 770 | goto out; |
| 854 | 771 | ||
| 855 | list_for_each_entry(slabp, &n->slabs_partial, list) | 772 | list_for_each_entry(page, &n->slabs_partial, lru) |
| 856 | if (is_slab_pfmemalloc(slabp)) | 773 | if (is_slab_pfmemalloc(page)) |
| 857 | goto out; | 774 | goto out; |
| 858 | 775 | ||
| 859 | list_for_each_entry(slabp, &n->slabs_free, list) | 776 | list_for_each_entry(page, &n->slabs_free, lru) |
| 860 | if (is_slab_pfmemalloc(slabp)) | 777 | if (is_slab_pfmemalloc(page)) |
| 861 | goto out; | 778 | goto out; |
| 862 | 779 | ||
| 863 | pfmemalloc_active = false; | 780 | pfmemalloc_active = false; |
| @@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | |||
| 897 | */ | 814 | */ |
| 898 | n = cachep->node[numa_mem_id()]; | 815 | n = cachep->node[numa_mem_id()]; |
| 899 | if (!list_empty(&n->slabs_free) && force_refill) { | 816 | if (!list_empty(&n->slabs_free) && force_refill) { |
| 900 | struct slab *slabp = virt_to_slab(objp); | 817 | struct page *page = virt_to_head_page(objp); |
| 901 | ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); | 818 | ClearPageSlabPfmemalloc(page); |
| 902 | clear_obj_pfmemalloc(&objp); | 819 | clear_obj_pfmemalloc(&objp); |
| 903 | recheck_pfmemalloc_active(cachep, ac); | 820 | recheck_pfmemalloc_active(cachep, ac); |
| 904 | return objp; | 821 | return objp; |
| @@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, | |||
| 1099 | 1016 | ||
| 1100 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1017 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
| 1101 | { | 1018 | { |
| 1102 | struct slab *slabp = virt_to_slab(objp); | 1019 | int nodeid = page_to_nid(virt_to_page(objp)); |
| 1103 | int nodeid = slabp->nodeid; | ||
| 1104 | struct kmem_cache_node *n; | 1020 | struct kmem_cache_node *n; |
| 1105 | struct array_cache *alien = NULL; | 1021 | struct array_cache *alien = NULL; |
| 1106 | int node; | 1022 | int node; |
| @@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
| 1111 | * Make sure we are not freeing a object from another node to the array | 1027 | * Make sure we are not freeing a object from another node to the array |
| 1112 | * cache on this cpu. | 1028 | * cache on this cpu. |
| 1113 | */ | 1029 | */ |
| 1114 | if (likely(slabp->nodeid == node)) | 1030 | if (likely(nodeid == node)) |
| 1115 | return 0; | 1031 | return 0; |
| 1116 | 1032 | ||
| 1117 | n = cachep->node[node]; | 1033 | n = cachep->node[node]; |
| @@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void) | |||
| 1512 | { | 1428 | { |
| 1513 | int i; | 1429 | int i; |
| 1514 | 1430 | ||
| 1431 | BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < | ||
| 1432 | sizeof(struct rcu_head)); | ||
| 1515 | kmem_cache = &kmem_cache_boot; | 1433 | kmem_cache = &kmem_cache_boot; |
| 1516 | setup_node_pointer(kmem_cache); | 1434 | setup_node_pointer(kmem_cache); |
| 1517 | 1435 | ||
| @@ -1687,7 +1605,7 @@ static noinline void | |||
| 1687 | slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | 1605 | slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) |
| 1688 | { | 1606 | { |
| 1689 | struct kmem_cache_node *n; | 1607 | struct kmem_cache_node *n; |
| 1690 | struct slab *slabp; | 1608 | struct page *page; |
| 1691 | unsigned long flags; | 1609 | unsigned long flags; |
| 1692 | int node; | 1610 | int node; |
| 1693 | 1611 | ||
| @@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
| 1706 | continue; | 1624 | continue; |
| 1707 | 1625 | ||
| 1708 | spin_lock_irqsave(&n->list_lock, flags); | 1626 | spin_lock_irqsave(&n->list_lock, flags); |
| 1709 | list_for_each_entry(slabp, &n->slabs_full, list) { | 1627 | list_for_each_entry(page, &n->slabs_full, lru) { |
| 1710 | active_objs += cachep->num; | 1628 | active_objs += cachep->num; |
| 1711 | active_slabs++; | 1629 | active_slabs++; |
| 1712 | } | 1630 | } |
| 1713 | list_for_each_entry(slabp, &n->slabs_partial, list) { | 1631 | list_for_each_entry(page, &n->slabs_partial, lru) { |
| 1714 | active_objs += slabp->inuse; | 1632 | active_objs += page->active; |
| 1715 | active_slabs++; | 1633 | active_slabs++; |
| 1716 | } | 1634 | } |
| 1717 | list_for_each_entry(slabp, &n->slabs_free, list) | 1635 | list_for_each_entry(page, &n->slabs_free, lru) |
| 1718 | num_slabs++; | 1636 | num_slabs++; |
| 1719 | 1637 | ||
| 1720 | free_objects += n->free_objects; | 1638 | free_objects += n->free_objects; |
| @@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
| 1736 | * did not request dmaable memory, we might get it, but that | 1654 | * did not request dmaable memory, we might get it, but that |
| 1737 | * would be relatively rare and ignorable. | 1655 | * would be relatively rare and ignorable. |
| 1738 | */ | 1656 | */ |
| 1739 | static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 1657 | static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, |
| 1658 | int nodeid) | ||
| 1740 | { | 1659 | { |
| 1741 | struct page *page; | 1660 | struct page *page; |
| 1742 | int nr_pages; | 1661 | int nr_pages; |
| 1743 | int i; | ||
| 1744 | |||
| 1745 | #ifndef CONFIG_MMU | ||
| 1746 | /* | ||
| 1747 | * Nommu uses slab's for process anonymous memory allocations, and thus | ||
| 1748 | * requires __GFP_COMP to properly refcount higher order allocations | ||
| 1749 | */ | ||
| 1750 | flags |= __GFP_COMP; | ||
| 1751 | #endif | ||
| 1752 | 1662 | ||
| 1753 | flags |= cachep->allocflags; | 1663 | flags |= cachep->allocflags; |
| 1754 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1664 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| @@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1772 | else | 1682 | else |
| 1773 | add_zone_page_state(page_zone(page), | 1683 | add_zone_page_state(page_zone(page), |
| 1774 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1684 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
| 1775 | for (i = 0; i < nr_pages; i++) { | 1685 | __SetPageSlab(page); |
| 1776 | __SetPageSlab(page + i); | 1686 | if (page->pfmemalloc) |
| 1777 | 1687 | SetPageSlabPfmemalloc(page); | |
| 1778 | if (page->pfmemalloc) | ||
| 1779 | SetPageSlabPfmemalloc(page + i); | ||
| 1780 | } | ||
| 1781 | memcg_bind_pages(cachep, cachep->gfporder); | 1688 | memcg_bind_pages(cachep, cachep->gfporder); |
| 1782 | 1689 | ||
| 1783 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1690 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
| @@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1789 | kmemcheck_mark_unallocated_pages(page, nr_pages); | 1696 | kmemcheck_mark_unallocated_pages(page, nr_pages); |
| 1790 | } | 1697 | } |
| 1791 | 1698 | ||
| 1792 | return page_address(page); | 1699 | return page; |
| 1793 | } | 1700 | } |
| 1794 | 1701 | ||
| 1795 | /* | 1702 | /* |
| 1796 | * Interface to system's page release. | 1703 | * Interface to system's page release. |
| 1797 | */ | 1704 | */ |
| 1798 | static void kmem_freepages(struct kmem_cache *cachep, void *addr) | 1705 | static void kmem_freepages(struct kmem_cache *cachep, struct page *page) |
| 1799 | { | 1706 | { |
| 1800 | unsigned long i = (1 << cachep->gfporder); | 1707 | const unsigned long nr_freed = (1 << cachep->gfporder); |
| 1801 | struct page *page = virt_to_page(addr); | ||
| 1802 | const unsigned long nr_freed = i; | ||
| 1803 | 1708 | ||
| 1804 | kmemcheck_free_shadow(page, cachep->gfporder); | 1709 | kmemcheck_free_shadow(page, cachep->gfporder); |
| 1805 | 1710 | ||
| @@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1809 | else | 1714 | else |
| 1810 | sub_zone_page_state(page_zone(page), | 1715 | sub_zone_page_state(page_zone(page), |
| 1811 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1716 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
| 1812 | while (i--) { | 1717 | |
| 1813 | BUG_ON(!PageSlab(page)); | 1718 | BUG_ON(!PageSlab(page)); |
| 1814 | __ClearPageSlabPfmemalloc(page); | 1719 | __ClearPageSlabPfmemalloc(page); |
| 1815 | __ClearPageSlab(page); | 1720 | __ClearPageSlab(page); |
| 1816 | page++; | 1721 | page_mapcount_reset(page); |
| 1817 | } | 1722 | page->mapping = NULL; |
| 1818 | 1723 | ||
| 1819 | memcg_release_pages(cachep, cachep->gfporder); | 1724 | memcg_release_pages(cachep, cachep->gfporder); |
| 1820 | if (current->reclaim_state) | 1725 | if (current->reclaim_state) |
| 1821 | current->reclaim_state->reclaimed_slab += nr_freed; | 1726 | current->reclaim_state->reclaimed_slab += nr_freed; |
| 1822 | free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); | 1727 | __free_memcg_kmem_pages(page, cachep->gfporder); |
| 1823 | } | 1728 | } |
| 1824 | 1729 | ||
| 1825 | static void kmem_rcu_free(struct rcu_head *head) | 1730 | static void kmem_rcu_free(struct rcu_head *head) |
| 1826 | { | 1731 | { |
| 1827 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; | 1732 | struct kmem_cache *cachep; |
| 1828 | struct kmem_cache *cachep = slab_rcu->cachep; | 1733 | struct page *page; |
| 1829 | 1734 | ||
| 1830 | kmem_freepages(cachep, slab_rcu->addr); | 1735 | page = container_of(head, struct page, rcu_head); |
| 1831 | if (OFF_SLAB(cachep)) | 1736 | cachep = page->slab_cache; |
| 1832 | kmem_cache_free(cachep->slabp_cache, slab_rcu); | 1737 | |
| 1738 | kmem_freepages(cachep, page); | ||
| 1833 | } | 1739 | } |
| 1834 | 1740 | ||
| 1835 | #if DEBUG | 1741 | #if DEBUG |
| @@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1978 | /* Print some data about the neighboring objects, if they | 1884 | /* Print some data about the neighboring objects, if they |
| 1979 | * exist: | 1885 | * exist: |
| 1980 | */ | 1886 | */ |
| 1981 | struct slab *slabp = virt_to_slab(objp); | 1887 | struct page *page = virt_to_head_page(objp); |
| 1982 | unsigned int objnr; | 1888 | unsigned int objnr; |
| 1983 | 1889 | ||
| 1984 | objnr = obj_to_index(cachep, slabp, objp); | 1890 | objnr = obj_to_index(cachep, page, objp); |
| 1985 | if (objnr) { | 1891 | if (objnr) { |
| 1986 | objp = index_to_obj(cachep, slabp, objnr - 1); | 1892 | objp = index_to_obj(cachep, page, objnr - 1); |
| 1987 | realobj = (char *)objp + obj_offset(cachep); | 1893 | realobj = (char *)objp + obj_offset(cachep); |
| 1988 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1894 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
| 1989 | realobj, size); | 1895 | realobj, size); |
| 1990 | print_objinfo(cachep, objp, 2); | 1896 | print_objinfo(cachep, objp, 2); |
| 1991 | } | 1897 | } |
| 1992 | if (objnr + 1 < cachep->num) { | 1898 | if (objnr + 1 < cachep->num) { |
| 1993 | objp = index_to_obj(cachep, slabp, objnr + 1); | 1899 | objp = index_to_obj(cachep, page, objnr + 1); |
| 1994 | realobj = (char *)objp + obj_offset(cachep); | 1900 | realobj = (char *)objp + obj_offset(cachep); |
| 1995 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1901 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
| 1996 | realobj, size); | 1902 | realobj, size); |
| @@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 2001 | #endif | 1907 | #endif |
| 2002 | 1908 | ||
| 2003 | #if DEBUG | 1909 | #if DEBUG |
| 2004 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) | 1910 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, |
| 1911 | struct page *page) | ||
| 2005 | { | 1912 | { |
| 2006 | int i; | 1913 | int i; |
| 2007 | for (i = 0; i < cachep->num; i++) { | 1914 | for (i = 0; i < cachep->num; i++) { |
| 2008 | void *objp = index_to_obj(cachep, slabp, i); | 1915 | void *objp = index_to_obj(cachep, page, i); |
| 2009 | 1916 | ||
| 2010 | if (cachep->flags & SLAB_POISON) { | 1917 | if (cachep->flags & SLAB_POISON) { |
| 2011 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1918 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| @@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
| 2030 | } | 1937 | } |
| 2031 | } | 1938 | } |
| 2032 | #else | 1939 | #else |
| 2033 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) | 1940 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, |
| 1941 | struct page *page) | ||
| 2034 | { | 1942 | { |
| 2035 | } | 1943 | } |
| 2036 | #endif | 1944 | #endif |
| @@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
| 2044 | * Before calling the slab must have been unlinked from the cache. The | 1952 | * Before calling the slab must have been unlinked from the cache. The |
| 2045 | * cache-lock is not held/needed. | 1953 | * cache-lock is not held/needed. |
| 2046 | */ | 1954 | */ |
| 2047 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1955 | static void slab_destroy(struct kmem_cache *cachep, struct page *page) |
| 2048 | { | 1956 | { |
| 2049 | void *addr = slabp->s_mem - slabp->colouroff; | 1957 | void *freelist; |
| 2050 | 1958 | ||
| 2051 | slab_destroy_debugcheck(cachep, slabp); | 1959 | freelist = page->freelist; |
| 1960 | slab_destroy_debugcheck(cachep, page); | ||
| 2052 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { | 1961 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { |
| 2053 | struct slab_rcu *slab_rcu; | 1962 | struct rcu_head *head; |
| 1963 | |||
| 1964 | /* | ||
| 1965 | * RCU free overloads the RCU head over the LRU. | ||
| 1966 | * slab_page has been overloeaded over the LRU, | ||
| 1967 | * however it is not used from now on so that | ||
| 1968 | * we can use it safely. | ||
| 1969 | */ | ||
| 1970 | head = (void *)&page->rcu_head; | ||
| 1971 | call_rcu(head, kmem_rcu_free); | ||
| 2054 | 1972 | ||
| 2055 | slab_rcu = (struct slab_rcu *)slabp; | ||
| 2056 | slab_rcu->cachep = cachep; | ||
| 2057 | slab_rcu->addr = addr; | ||
| 2058 | call_rcu(&slab_rcu->head, kmem_rcu_free); | ||
| 2059 | } else { | 1973 | } else { |
| 2060 | kmem_freepages(cachep, addr); | 1974 | kmem_freepages(cachep, page); |
| 2061 | if (OFF_SLAB(cachep)) | ||
| 2062 | kmem_cache_free(cachep->slabp_cache, slabp); | ||
| 2063 | } | 1975 | } |
| 1976 | |||
| 1977 | /* | ||
| 1978 | * From now on, we don't use freelist | ||
| 1979 | * although actual page can be freed in rcu context | ||
| 1980 | */ | ||
| 1981 | if (OFF_SLAB(cachep)) | ||
| 1982 | kmem_cache_free(cachep->freelist_cache, freelist); | ||
| 2064 | } | 1983 | } |
| 2065 | 1984 | ||
| 2066 | /** | 1985 | /** |
| @@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 2097 | * use off-slab slabs. Needed to avoid a possible | 2016 | * use off-slab slabs. Needed to avoid a possible |
| 2098 | * looping condition in cache_grow(). | 2017 | * looping condition in cache_grow(). |
| 2099 | */ | 2018 | */ |
| 2100 | offslab_limit = size - sizeof(struct slab); | 2019 | offslab_limit = size; |
| 2101 | offslab_limit /= sizeof(kmem_bufctl_t); | 2020 | offslab_limit /= sizeof(unsigned int); |
| 2102 | 2021 | ||
| 2103 | if (num > offslab_limit) | 2022 | if (num > offslab_limit) |
| 2104 | break; | 2023 | break; |
| @@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
| 2220 | int | 2139 | int |
| 2221 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | 2140 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) |
| 2222 | { | 2141 | { |
| 2223 | size_t left_over, slab_size, ralign; | 2142 | size_t left_over, freelist_size, ralign; |
| 2224 | gfp_t gfp; | 2143 | gfp_t gfp; |
| 2225 | int err; | 2144 | int err; |
| 2226 | size_t size = cachep->size; | 2145 | size_t size = cachep->size; |
| @@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2339 | if (!cachep->num) | 2258 | if (!cachep->num) |
| 2340 | return -E2BIG; | 2259 | return -E2BIG; |
| 2341 | 2260 | ||
| 2342 | slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) | 2261 | freelist_size = |
| 2343 | + sizeof(struct slab), cachep->align); | 2262 | ALIGN(cachep->num * sizeof(unsigned int), cachep->align); |
| 2344 | 2263 | ||
| 2345 | /* | 2264 | /* |
| 2346 | * If the slab has been placed off-slab, and we have enough space then | 2265 | * If the slab has been placed off-slab, and we have enough space then |
| 2347 | * move it on-slab. This is at the expense of any extra colouring. | 2266 | * move it on-slab. This is at the expense of any extra colouring. |
| 2348 | */ | 2267 | */ |
| 2349 | if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { | 2268 | if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { |
| 2350 | flags &= ~CFLGS_OFF_SLAB; | 2269 | flags &= ~CFLGS_OFF_SLAB; |
| 2351 | left_over -= slab_size; | 2270 | left_over -= freelist_size; |
| 2352 | } | 2271 | } |
| 2353 | 2272 | ||
| 2354 | if (flags & CFLGS_OFF_SLAB) { | 2273 | if (flags & CFLGS_OFF_SLAB) { |
| 2355 | /* really off slab. No need for manual alignment */ | 2274 | /* really off slab. No need for manual alignment */ |
| 2356 | slab_size = | 2275 | freelist_size = cachep->num * sizeof(unsigned int); |
| 2357 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); | ||
| 2358 | 2276 | ||
| 2359 | #ifdef CONFIG_PAGE_POISONING | 2277 | #ifdef CONFIG_PAGE_POISONING |
| 2360 | /* If we're going to use the generic kernel_map_pages() | 2278 | /* If we're going to use the generic kernel_map_pages() |
| @@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2371 | if (cachep->colour_off < cachep->align) | 2289 | if (cachep->colour_off < cachep->align) |
| 2372 | cachep->colour_off = cachep->align; | 2290 | cachep->colour_off = cachep->align; |
| 2373 | cachep->colour = left_over / cachep->colour_off; | 2291 | cachep->colour = left_over / cachep->colour_off; |
| 2374 | cachep->slab_size = slab_size; | 2292 | cachep->freelist_size = freelist_size; |
| 2375 | cachep->flags = flags; | 2293 | cachep->flags = flags; |
| 2376 | cachep->allocflags = 0; | 2294 | cachep->allocflags = __GFP_COMP; |
| 2377 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) | 2295 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) |
| 2378 | cachep->allocflags |= GFP_DMA; | 2296 | cachep->allocflags |= GFP_DMA; |
| 2379 | cachep->size = size; | 2297 | cachep->size = size; |
| 2380 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2298 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
| 2381 | 2299 | ||
| 2382 | if (flags & CFLGS_OFF_SLAB) { | 2300 | if (flags & CFLGS_OFF_SLAB) { |
| 2383 | cachep->slabp_cache = kmalloc_slab(slab_size, 0u); | 2301 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); |
| 2384 | /* | 2302 | /* |
| 2385 | * This is a possibility for one of the malloc_sizes caches. | 2303 | * This is a possibility for one of the malloc_sizes caches. |
| 2386 | * But since we go off slab only for object size greater than | 2304 | * But since we go off slab only for object size greater than |
| @@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 2388 | * this should not happen at all. | 2306 | * this should not happen at all. |
| 2389 | * But leave a BUG_ON for some lucky dude. | 2307 | * But leave a BUG_ON for some lucky dude. |
| 2390 | */ | 2308 | */ |
| 2391 | BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); | 2309 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); |
| 2392 | } | 2310 | } |
| 2393 | 2311 | ||
| 2394 | err = setup_cpu_cache(cachep, gfp); | 2312 | err = setup_cpu_cache(cachep, gfp); |
| @@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
| 2494 | { | 2412 | { |
| 2495 | struct list_head *p; | 2413 | struct list_head *p; |
| 2496 | int nr_freed; | 2414 | int nr_freed; |
| 2497 | struct slab *slabp; | 2415 | struct page *page; |
| 2498 | 2416 | ||
| 2499 | nr_freed = 0; | 2417 | nr_freed = 0; |
| 2500 | while (nr_freed < tofree && !list_empty(&n->slabs_free)) { | 2418 | while (nr_freed < tofree && !list_empty(&n->slabs_free)) { |
| @@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache, | |||
| 2506 | goto out; | 2424 | goto out; |
| 2507 | } | 2425 | } |
| 2508 | 2426 | ||
| 2509 | slabp = list_entry(p, struct slab, list); | 2427 | page = list_entry(p, struct page, lru); |
| 2510 | #if DEBUG | 2428 | #if DEBUG |
| 2511 | BUG_ON(slabp->inuse); | 2429 | BUG_ON(page->active); |
| 2512 | #endif | 2430 | #endif |
| 2513 | list_del(&slabp->list); | 2431 | list_del(&page->lru); |
| 2514 | /* | 2432 | /* |
| 2515 | * Safe to drop the lock. The slab is no longer linked | 2433 | * Safe to drop the lock. The slab is no longer linked |
| 2516 | * to the cache. | 2434 | * to the cache. |
| 2517 | */ | 2435 | */ |
| 2518 | n->free_objects -= cache->num; | 2436 | n->free_objects -= cache->num; |
| 2519 | spin_unlock_irq(&n->list_lock); | 2437 | spin_unlock_irq(&n->list_lock); |
| 2520 | slab_destroy(cache, slabp); | 2438 | slab_destroy(cache, page); |
| 2521 | nr_freed++; | 2439 | nr_freed++; |
| 2522 | } | 2440 | } |
| 2523 | out: | 2441 | out: |
| @@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) | |||
| 2600 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | 2518 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. |
| 2601 | * If we are creating a malloc_sizes cache here it would not be visible to | 2519 | * If we are creating a malloc_sizes cache here it would not be visible to |
| 2602 | * kmem_find_general_cachep till the initialization is complete. | 2520 | * kmem_find_general_cachep till the initialization is complete. |
| 2603 | * Hence we cannot have slabp_cache same as the original cache. | 2521 | * Hence we cannot have freelist_cache same as the original cache. |
| 2604 | */ | 2522 | */ |
| 2605 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2523 | static void *alloc_slabmgmt(struct kmem_cache *cachep, |
| 2606 | int colour_off, gfp_t local_flags, | 2524 | struct page *page, int colour_off, |
| 2607 | int nodeid) | 2525 | gfp_t local_flags, int nodeid) |
| 2608 | { | 2526 | { |
| 2609 | struct slab *slabp; | 2527 | void *freelist; |
| 2528 | void *addr = page_address(page); | ||
| 2610 | 2529 | ||
| 2611 | if (OFF_SLAB(cachep)) { | 2530 | if (OFF_SLAB(cachep)) { |
| 2612 | /* Slab management obj is off-slab. */ | 2531 | /* Slab management obj is off-slab. */ |
| 2613 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2532 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, |
| 2614 | local_flags, nodeid); | 2533 | local_flags, nodeid); |
| 2615 | /* | 2534 | if (!freelist) |
| 2616 | * If the first object in the slab is leaked (it's allocated | ||
| 2617 | * but no one has a reference to it), we want to make sure | ||
| 2618 | * kmemleak does not treat the ->s_mem pointer as a reference | ||
| 2619 | * to the object. Otherwise we will not report the leak. | ||
| 2620 | */ | ||
| 2621 | kmemleak_scan_area(&slabp->list, sizeof(struct list_head), | ||
| 2622 | local_flags); | ||
| 2623 | if (!slabp) | ||
| 2624 | return NULL; | 2535 | return NULL; |
| 2625 | } else { | 2536 | } else { |
| 2626 | slabp = objp + colour_off; | 2537 | freelist = addr + colour_off; |
| 2627 | colour_off += cachep->slab_size; | 2538 | colour_off += cachep->freelist_size; |
| 2628 | } | 2539 | } |
| 2629 | slabp->inuse = 0; | 2540 | page->active = 0; |
| 2630 | slabp->colouroff = colour_off; | 2541 | page->s_mem = addr + colour_off; |
| 2631 | slabp->s_mem = objp + colour_off; | 2542 | return freelist; |
| 2632 | slabp->nodeid = nodeid; | ||
| 2633 | slabp->free = 0; | ||
| 2634 | return slabp; | ||
| 2635 | } | 2543 | } |
| 2636 | 2544 | ||
| 2637 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) | 2545 | static inline unsigned int *slab_freelist(struct page *page) |
| 2638 | { | 2546 | { |
| 2639 | return (kmem_bufctl_t *) (slabp + 1); | 2547 | return (unsigned int *)(page->freelist); |
| 2640 | } | 2548 | } |
| 2641 | 2549 | ||
| 2642 | static void cache_init_objs(struct kmem_cache *cachep, | 2550 | static void cache_init_objs(struct kmem_cache *cachep, |
| 2643 | struct slab *slabp) | 2551 | struct page *page) |
| 2644 | { | 2552 | { |
| 2645 | int i; | 2553 | int i; |
| 2646 | 2554 | ||
| 2647 | for (i = 0; i < cachep->num; i++) { | 2555 | for (i = 0; i < cachep->num; i++) { |
| 2648 | void *objp = index_to_obj(cachep, slabp, i); | 2556 | void *objp = index_to_obj(cachep, page, i); |
| 2649 | #if DEBUG | 2557 | #if DEBUG |
| 2650 | /* need to poison the objs? */ | 2558 | /* need to poison the objs? */ |
| 2651 | if (cachep->flags & SLAB_POISON) | 2559 | if (cachep->flags & SLAB_POISON) |
| @@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2681 | if (cachep->ctor) | 2589 | if (cachep->ctor) |
| 2682 | cachep->ctor(objp); | 2590 | cachep->ctor(objp); |
| 2683 | #endif | 2591 | #endif |
| 2684 | slab_bufctl(slabp)[i] = i + 1; | 2592 | slab_freelist(page)[i] = i; |
| 2685 | } | 2593 | } |
| 2686 | slab_bufctl(slabp)[i - 1] = BUFCTL_END; | ||
| 2687 | } | 2594 | } |
| 2688 | 2595 | ||
| 2689 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2596 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
| @@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | |||
| 2696 | } | 2603 | } |
| 2697 | } | 2604 | } |
| 2698 | 2605 | ||
| 2699 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, | 2606 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, |
| 2700 | int nodeid) | 2607 | int nodeid) |
| 2701 | { | 2608 | { |
| 2702 | void *objp = index_to_obj(cachep, slabp, slabp->free); | 2609 | void *objp; |
| 2703 | kmem_bufctl_t next; | ||
| 2704 | 2610 | ||
| 2705 | slabp->inuse++; | 2611 | objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); |
| 2706 | next = slab_bufctl(slabp)[slabp->free]; | 2612 | page->active++; |
| 2707 | #if DEBUG | 2613 | #if DEBUG |
| 2708 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; | 2614 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); |
| 2709 | WARN_ON(slabp->nodeid != nodeid); | ||
| 2710 | #endif | 2615 | #endif |
| 2711 | slabp->free = next; | ||
| 2712 | 2616 | ||
| 2713 | return objp; | 2617 | return objp; |
| 2714 | } | 2618 | } |
| 2715 | 2619 | ||
| 2716 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | 2620 | static void slab_put_obj(struct kmem_cache *cachep, struct page *page, |
| 2717 | void *objp, int nodeid) | 2621 | void *objp, int nodeid) |
| 2718 | { | 2622 | { |
| 2719 | unsigned int objnr = obj_to_index(cachep, slabp, objp); | 2623 | unsigned int objnr = obj_to_index(cachep, page, objp); |
| 2720 | |||
| 2721 | #if DEBUG | 2624 | #if DEBUG |
| 2625 | unsigned int i; | ||
| 2626 | |||
| 2722 | /* Verify that the slab belongs to the intended node */ | 2627 | /* Verify that the slab belongs to the intended node */ |
| 2723 | WARN_ON(slabp->nodeid != nodeid); | 2628 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); |
| 2724 | 2629 | ||
| 2725 | if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { | 2630 | /* Verify double free bug */ |
| 2726 | printk(KERN_ERR "slab: double free detected in cache " | 2631 | for (i = page->active; i < cachep->num; i++) { |
| 2727 | "'%s', objp %p\n", cachep->name, objp); | 2632 | if (slab_freelist(page)[i] == objnr) { |
| 2728 | BUG(); | 2633 | printk(KERN_ERR "slab: double free detected in cache " |
| 2634 | "'%s', objp %p\n", cachep->name, objp); | ||
| 2635 | BUG(); | ||
| 2636 | } | ||
| 2729 | } | 2637 | } |
| 2730 | #endif | 2638 | #endif |
| 2731 | slab_bufctl(slabp)[objnr] = slabp->free; | 2639 | page->active--; |
| 2732 | slabp->free = objnr; | 2640 | slab_freelist(page)[page->active] = objnr; |
| 2733 | slabp->inuse--; | ||
| 2734 | } | 2641 | } |
| 2735 | 2642 | ||
| 2736 | /* | 2643 | /* |
| @@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | |||
| 2738 | * for the slab allocator to be able to lookup the cache and slab of a | 2645 | * for the slab allocator to be able to lookup the cache and slab of a |
| 2739 | * virtual address for kfree, ksize, and slab debugging. | 2646 | * virtual address for kfree, ksize, and slab debugging. |
| 2740 | */ | 2647 | */ |
| 2741 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | 2648 | static void slab_map_pages(struct kmem_cache *cache, struct page *page, |
| 2742 | void *addr) | 2649 | void *freelist) |
| 2743 | { | 2650 | { |
| 2744 | int nr_pages; | 2651 | page->slab_cache = cache; |
| 2745 | struct page *page; | 2652 | page->freelist = freelist; |
| 2746 | |||
| 2747 | page = virt_to_page(addr); | ||
| 2748 | |||
| 2749 | nr_pages = 1; | ||
| 2750 | if (likely(!PageCompound(page))) | ||
| 2751 | nr_pages <<= cache->gfporder; | ||
| 2752 | |||
| 2753 | do { | ||
| 2754 | page->slab_cache = cache; | ||
| 2755 | page->slab_page = slab; | ||
| 2756 | page++; | ||
| 2757 | } while (--nr_pages); | ||
| 2758 | } | 2653 | } |
| 2759 | 2654 | ||
| 2760 | /* | 2655 | /* |
| @@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | |||
| 2762 | * kmem_cache_alloc() when there are no active objs left in a cache. | 2657 | * kmem_cache_alloc() when there are no active objs left in a cache. |
| 2763 | */ | 2658 | */ |
| 2764 | static int cache_grow(struct kmem_cache *cachep, | 2659 | static int cache_grow(struct kmem_cache *cachep, |
| 2765 | gfp_t flags, int nodeid, void *objp) | 2660 | gfp_t flags, int nodeid, struct page *page) |
| 2766 | { | 2661 | { |
| 2767 | struct slab *slabp; | 2662 | void *freelist; |
| 2768 | size_t offset; | 2663 | size_t offset; |
| 2769 | gfp_t local_flags; | 2664 | gfp_t local_flags; |
| 2770 | struct kmem_cache_node *n; | 2665 | struct kmem_cache_node *n; |
| @@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep, | |||
| 2805 | * Get mem for the objs. Attempt to allocate a physical page from | 2700 | * Get mem for the objs. Attempt to allocate a physical page from |
| 2806 | * 'nodeid'. | 2701 | * 'nodeid'. |
| 2807 | */ | 2702 | */ |
| 2808 | if (!objp) | 2703 | if (!page) |
| 2809 | objp = kmem_getpages(cachep, local_flags, nodeid); | 2704 | page = kmem_getpages(cachep, local_flags, nodeid); |
| 2810 | if (!objp) | 2705 | if (!page) |
| 2811 | goto failed; | 2706 | goto failed; |
| 2812 | 2707 | ||
| 2813 | /* Get slab management. */ | 2708 | /* Get slab management. */ |
| 2814 | slabp = alloc_slabmgmt(cachep, objp, offset, | 2709 | freelist = alloc_slabmgmt(cachep, page, offset, |
| 2815 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); | 2710 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); |
| 2816 | if (!slabp) | 2711 | if (!freelist) |
| 2817 | goto opps1; | 2712 | goto opps1; |
| 2818 | 2713 | ||
| 2819 | slab_map_pages(cachep, slabp, objp); | 2714 | slab_map_pages(cachep, page, freelist); |
| 2820 | 2715 | ||
| 2821 | cache_init_objs(cachep, slabp); | 2716 | cache_init_objs(cachep, page); |
| 2822 | 2717 | ||
| 2823 | if (local_flags & __GFP_WAIT) | 2718 | if (local_flags & __GFP_WAIT) |
| 2824 | local_irq_disable(); | 2719 | local_irq_disable(); |
| @@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep, | |||
| 2826 | spin_lock(&n->list_lock); | 2721 | spin_lock(&n->list_lock); |
| 2827 | 2722 | ||
| 2828 | /* Make slab active. */ | 2723 | /* Make slab active. */ |
| 2829 | list_add_tail(&slabp->list, &(n->slabs_free)); | 2724 | list_add_tail(&page->lru, &(n->slabs_free)); |
| 2830 | STATS_INC_GROWN(cachep); | 2725 | STATS_INC_GROWN(cachep); |
| 2831 | n->free_objects += cachep->num; | 2726 | n->free_objects += cachep->num; |
| 2832 | spin_unlock(&n->list_lock); | 2727 | spin_unlock(&n->list_lock); |
| 2833 | return 1; | 2728 | return 1; |
| 2834 | opps1: | 2729 | opps1: |
| 2835 | kmem_freepages(cachep, objp); | 2730 | kmem_freepages(cachep, page); |
| 2836 | failed: | 2731 | failed: |
| 2837 | if (local_flags & __GFP_WAIT) | 2732 | if (local_flags & __GFP_WAIT) |
| 2838 | local_irq_disable(); | 2733 | local_irq_disable(); |
| @@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) | |||
| 2880 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | 2775 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, |
| 2881 | unsigned long caller) | 2776 | unsigned long caller) |
| 2882 | { | 2777 | { |
| 2883 | struct page *page; | ||
| 2884 | unsigned int objnr; | 2778 | unsigned int objnr; |
| 2885 | struct slab *slabp; | 2779 | struct page *page; |
| 2886 | 2780 | ||
| 2887 | BUG_ON(virt_to_cache(objp) != cachep); | 2781 | BUG_ON(virt_to_cache(objp) != cachep); |
| 2888 | 2782 | ||
| @@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2890 | kfree_debugcheck(objp); | 2784 | kfree_debugcheck(objp); |
| 2891 | page = virt_to_head_page(objp); | 2785 | page = virt_to_head_page(objp); |
| 2892 | 2786 | ||
| 2893 | slabp = page->slab_page; | ||
| 2894 | |||
| 2895 | if (cachep->flags & SLAB_RED_ZONE) { | 2787 | if (cachep->flags & SLAB_RED_ZONE) { |
| 2896 | verify_redzone_free(cachep, objp); | 2788 | verify_redzone_free(cachep, objp); |
| 2897 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2789 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
| @@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2900 | if (cachep->flags & SLAB_STORE_USER) | 2792 | if (cachep->flags & SLAB_STORE_USER) |
| 2901 | *dbg_userword(cachep, objp) = (void *)caller; | 2793 | *dbg_userword(cachep, objp) = (void *)caller; |
| 2902 | 2794 | ||
| 2903 | objnr = obj_to_index(cachep, slabp, objp); | 2795 | objnr = obj_to_index(cachep, page, objp); |
| 2904 | 2796 | ||
| 2905 | BUG_ON(objnr >= cachep->num); | 2797 | BUG_ON(objnr >= cachep->num); |
| 2906 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); | 2798 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
| 2907 | 2799 | ||
| 2908 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
| 2909 | slab_bufctl(slabp)[objnr] = BUFCTL_FREE; | ||
| 2910 | #endif | ||
| 2911 | if (cachep->flags & SLAB_POISON) { | 2800 | if (cachep->flags & SLAB_POISON) { |
| 2912 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2801 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 2913 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | 2802 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
| @@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2924 | return objp; | 2813 | return objp; |
| 2925 | } | 2814 | } |
| 2926 | 2815 | ||
| 2927 | static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | ||
| 2928 | { | ||
| 2929 | kmem_bufctl_t i; | ||
| 2930 | int entries = 0; | ||
| 2931 | |||
| 2932 | /* Check slab's freelist to see if this obj is there. */ | ||
| 2933 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { | ||
| 2934 | entries++; | ||
| 2935 | if (entries > cachep->num || i >= cachep->num) | ||
| 2936 | goto bad; | ||
| 2937 | } | ||
| 2938 | if (entries != cachep->num - slabp->inuse) { | ||
| 2939 | bad: | ||
| 2940 | printk(KERN_ERR "slab: Internal list corruption detected in " | ||
| 2941 | "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", | ||
| 2942 | cachep->name, cachep->num, slabp, slabp->inuse, | ||
| 2943 | print_tainted()); | ||
| 2944 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, | ||
| 2945 | sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), | ||
| 2946 | 1); | ||
| 2947 | BUG(); | ||
| 2948 | } | ||
| 2949 | } | ||
| 2950 | #else | 2816 | #else |
| 2951 | #define kfree_debugcheck(x) do { } while(0) | 2817 | #define kfree_debugcheck(x) do { } while(0) |
| 2952 | #define cache_free_debugcheck(x,objp,z) (objp) | 2818 | #define cache_free_debugcheck(x,objp,z) (objp) |
| 2953 | #define check_slabp(x,y) do { } while(0) | ||
| 2954 | #endif | 2819 | #endif |
| 2955 | 2820 | ||
| 2956 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, | 2821 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, |
| @@ -2989,7 +2854,7 @@ retry: | |||
| 2989 | 2854 | ||
| 2990 | while (batchcount > 0) { | 2855 | while (batchcount > 0) { |
| 2991 | struct list_head *entry; | 2856 | struct list_head *entry; |
| 2992 | struct slab *slabp; | 2857 | struct page *page; |
| 2993 | /* Get slab alloc is to come from. */ | 2858 | /* Get slab alloc is to come from. */ |
| 2994 | entry = n->slabs_partial.next; | 2859 | entry = n->slabs_partial.next; |
| 2995 | if (entry == &n->slabs_partial) { | 2860 | if (entry == &n->slabs_partial) { |
| @@ -2999,8 +2864,7 @@ retry: | |||
| 2999 | goto must_grow; | 2864 | goto must_grow; |
| 3000 | } | 2865 | } |
| 3001 | 2866 | ||
| 3002 | slabp = list_entry(entry, struct slab, list); | 2867 | page = list_entry(entry, struct page, lru); |
| 3003 | check_slabp(cachep, slabp); | ||
| 3004 | check_spinlock_acquired(cachep); | 2868 | check_spinlock_acquired(cachep); |
| 3005 | 2869 | ||
| 3006 | /* | 2870 | /* |
| @@ -3008,24 +2872,23 @@ retry: | |||
| 3008 | * there must be at least one object available for | 2872 | * there must be at least one object available for |
| 3009 | * allocation. | 2873 | * allocation. |
| 3010 | */ | 2874 | */ |
| 3011 | BUG_ON(slabp->inuse >= cachep->num); | 2875 | BUG_ON(page->active >= cachep->num); |
| 3012 | 2876 | ||
| 3013 | while (slabp->inuse < cachep->num && batchcount--) { | 2877 | while (page->active < cachep->num && batchcount--) { |
| 3014 | STATS_INC_ALLOCED(cachep); | 2878 | STATS_INC_ALLOCED(cachep); |
| 3015 | STATS_INC_ACTIVE(cachep); | 2879 | STATS_INC_ACTIVE(cachep); |
| 3016 | STATS_SET_HIGH(cachep); | 2880 | STATS_SET_HIGH(cachep); |
| 3017 | 2881 | ||
| 3018 | ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, | 2882 | ac_put_obj(cachep, ac, slab_get_obj(cachep, page, |
| 3019 | node)); | 2883 | node)); |
| 3020 | } | 2884 | } |
| 3021 | check_slabp(cachep, slabp); | ||
| 3022 | 2885 | ||
| 3023 | /* move slabp to correct slabp list: */ | 2886 | /* move slabp to correct slabp list: */ |
| 3024 | list_del(&slabp->list); | 2887 | list_del(&page->lru); |
| 3025 | if (slabp->free == BUFCTL_END) | 2888 | if (page->active == cachep->num) |
| 3026 | list_add(&slabp->list, &n->slabs_full); | 2889 | list_add(&page->list, &n->slabs_full); |
| 3027 | else | 2890 | else |
| 3028 | list_add(&slabp->list, &n->slabs_partial); | 2891 | list_add(&page->list, &n->slabs_partial); |
| 3029 | } | 2892 | } |
| 3030 | 2893 | ||
| 3031 | must_grow: | 2894 | must_grow: |
| @@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
| 3097 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2960 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
| 3098 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2961 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
| 3099 | } | 2962 | } |
| 3100 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
| 3101 | { | ||
| 3102 | struct slab *slabp; | ||
| 3103 | unsigned objnr; | ||
| 3104 | |||
| 3105 | slabp = virt_to_head_page(objp)->slab_page; | ||
| 3106 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; | ||
| 3107 | slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; | ||
| 3108 | } | ||
| 3109 | #endif | ||
| 3110 | objp += obj_offset(cachep); | 2963 | objp += obj_offset(cachep); |
| 3111 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 2964 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
| 3112 | cachep->ctor(objp); | 2965 | cachep->ctor(objp); |
| @@ -3248,18 +3101,20 @@ retry: | |||
| 3248 | * We may trigger various forms of reclaim on the allowed | 3101 | * We may trigger various forms of reclaim on the allowed |
| 3249 | * set and go into memory reserves if necessary. | 3102 | * set and go into memory reserves if necessary. |
| 3250 | */ | 3103 | */ |
| 3104 | struct page *page; | ||
| 3105 | |||
| 3251 | if (local_flags & __GFP_WAIT) | 3106 | if (local_flags & __GFP_WAIT) |
| 3252 | local_irq_enable(); | 3107 | local_irq_enable(); |
| 3253 | kmem_flagcheck(cache, flags); | 3108 | kmem_flagcheck(cache, flags); |
| 3254 | obj = kmem_getpages(cache, local_flags, numa_mem_id()); | 3109 | page = kmem_getpages(cache, local_flags, numa_mem_id()); |
| 3255 | if (local_flags & __GFP_WAIT) | 3110 | if (local_flags & __GFP_WAIT) |
| 3256 | local_irq_disable(); | 3111 | local_irq_disable(); |
| 3257 | if (obj) { | 3112 | if (page) { |
| 3258 | /* | 3113 | /* |
| 3259 | * Insert into the appropriate per node queues | 3114 | * Insert into the appropriate per node queues |
| 3260 | */ | 3115 | */ |
| 3261 | nid = page_to_nid(virt_to_page(obj)); | 3116 | nid = page_to_nid(page); |
| 3262 | if (cache_grow(cache, flags, nid, obj)) { | 3117 | if (cache_grow(cache, flags, nid, page)) { |
| 3263 | obj = ____cache_alloc_node(cache, | 3118 | obj = ____cache_alloc_node(cache, |
| 3264 | flags | GFP_THISNODE, nid); | 3119 | flags | GFP_THISNODE, nid); |
| 3265 | if (!obj) | 3120 | if (!obj) |
| @@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
| 3288 | int nodeid) | 3143 | int nodeid) |
| 3289 | { | 3144 | { |
| 3290 | struct list_head *entry; | 3145 | struct list_head *entry; |
| 3291 | struct slab *slabp; | 3146 | struct page *page; |
| 3292 | struct kmem_cache_node *n; | 3147 | struct kmem_cache_node *n; |
| 3293 | void *obj; | 3148 | void *obj; |
| 3294 | int x; | 3149 | int x; |
| @@ -3308,26 +3163,24 @@ retry: | |||
| 3308 | goto must_grow; | 3163 | goto must_grow; |
| 3309 | } | 3164 | } |
| 3310 | 3165 | ||
| 3311 | slabp = list_entry(entry, struct slab, list); | 3166 | page = list_entry(entry, struct page, lru); |
| 3312 | check_spinlock_acquired_node(cachep, nodeid); | 3167 | check_spinlock_acquired_node(cachep, nodeid); |
| 3313 | check_slabp(cachep, slabp); | ||
| 3314 | 3168 | ||
| 3315 | STATS_INC_NODEALLOCS(cachep); | 3169 | STATS_INC_NODEALLOCS(cachep); |
| 3316 | STATS_INC_ACTIVE(cachep); | 3170 | STATS_INC_ACTIVE(cachep); |
| 3317 | STATS_SET_HIGH(cachep); | 3171 | STATS_SET_HIGH(cachep); |
| 3318 | 3172 | ||
| 3319 | BUG_ON(slabp->inuse == cachep->num); | 3173 | BUG_ON(page->active == cachep->num); |
| 3320 | 3174 | ||
| 3321 | obj = slab_get_obj(cachep, slabp, nodeid); | 3175 | obj = slab_get_obj(cachep, page, nodeid); |
| 3322 | check_slabp(cachep, slabp); | ||
| 3323 | n->free_objects--; | 3176 | n->free_objects--; |
| 3324 | /* move slabp to correct slabp list: */ | 3177 | /* move slabp to correct slabp list: */ |
| 3325 | list_del(&slabp->list); | 3178 | list_del(&page->lru); |
| 3326 | 3179 | ||
| 3327 | if (slabp->free == BUFCTL_END) | 3180 | if (page->active == cachep->num) |
| 3328 | list_add(&slabp->list, &n->slabs_full); | 3181 | list_add(&page->lru, &n->slabs_full); |
| 3329 | else | 3182 | else |
| 3330 | list_add(&slabp->list, &n->slabs_partial); | 3183 | list_add(&page->lru, &n->slabs_partial); |
| 3331 | 3184 | ||
| 3332 | spin_unlock(&n->list_lock); | 3185 | spin_unlock(&n->list_lock); |
| 3333 | goto done; | 3186 | goto done; |
| @@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
| 3477 | 3330 | ||
| 3478 | for (i = 0; i < nr_objects; i++) { | 3331 | for (i = 0; i < nr_objects; i++) { |
| 3479 | void *objp; | 3332 | void *objp; |
| 3480 | struct slab *slabp; | 3333 | struct page *page; |
| 3481 | 3334 | ||
| 3482 | clear_obj_pfmemalloc(&objpp[i]); | 3335 | clear_obj_pfmemalloc(&objpp[i]); |
| 3483 | objp = objpp[i]; | 3336 | objp = objpp[i]; |
| 3484 | 3337 | ||
| 3485 | slabp = virt_to_slab(objp); | 3338 | page = virt_to_head_page(objp); |
| 3486 | n = cachep->node[node]; | 3339 | n = cachep->node[node]; |
| 3487 | list_del(&slabp->list); | 3340 | list_del(&page->lru); |
| 3488 | check_spinlock_acquired_node(cachep, node); | 3341 | check_spinlock_acquired_node(cachep, node); |
| 3489 | check_slabp(cachep, slabp); | 3342 | slab_put_obj(cachep, page, objp, node); |
| 3490 | slab_put_obj(cachep, slabp, objp, node); | ||
| 3491 | STATS_DEC_ACTIVE(cachep); | 3343 | STATS_DEC_ACTIVE(cachep); |
| 3492 | n->free_objects++; | 3344 | n->free_objects++; |
| 3493 | check_slabp(cachep, slabp); | ||
| 3494 | 3345 | ||
| 3495 | /* fixup slab chains */ | 3346 | /* fixup slab chains */ |
| 3496 | if (slabp->inuse == 0) { | 3347 | if (page->active == 0) { |
| 3497 | if (n->free_objects > n->free_limit) { | 3348 | if (n->free_objects > n->free_limit) { |
| 3498 | n->free_objects -= cachep->num; | 3349 | n->free_objects -= cachep->num; |
| 3499 | /* No need to drop any previously held | 3350 | /* No need to drop any previously held |
| @@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
| 3502 | * a different cache, refer to comments before | 3353 | * a different cache, refer to comments before |
| 3503 | * alloc_slabmgmt. | 3354 | * alloc_slabmgmt. |
| 3504 | */ | 3355 | */ |
| 3505 | slab_destroy(cachep, slabp); | 3356 | slab_destroy(cachep, page); |
| 3506 | } else { | 3357 | } else { |
| 3507 | list_add(&slabp->list, &n->slabs_free); | 3358 | list_add(&page->lru, &n->slabs_free); |
| 3508 | } | 3359 | } |
| 3509 | } else { | 3360 | } else { |
| 3510 | /* Unconditionally move a slab to the end of the | 3361 | /* Unconditionally move a slab to the end of the |
| 3511 | * partial list on free - maximum time for the | 3362 | * partial list on free - maximum time for the |
| 3512 | * other objects to be freed, too. | 3363 | * other objects to be freed, too. |
| 3513 | */ | 3364 | */ |
| 3514 | list_add_tail(&slabp->list, &n->slabs_partial); | 3365 | list_add_tail(&page->lru, &n->slabs_partial); |
| 3515 | } | 3366 | } |
| 3516 | } | 3367 | } |
| 3517 | } | 3368 | } |
| @@ -3551,10 +3402,10 @@ free_done: | |||
| 3551 | 3402 | ||
| 3552 | p = n->slabs_free.next; | 3403 | p = n->slabs_free.next; |
| 3553 | while (p != &(n->slabs_free)) { | 3404 | while (p != &(n->slabs_free)) { |
| 3554 | struct slab *slabp; | 3405 | struct page *page; |
| 3555 | 3406 | ||
| 3556 | slabp = list_entry(p, struct slab, list); | 3407 | page = list_entry(p, struct page, lru); |
| 3557 | BUG_ON(slabp->inuse); | 3408 | BUG_ON(page->active); |
| 3558 | 3409 | ||
| 3559 | i++; | 3410 | i++; |
| 3560 | p = p->next; | 3411 | p = p->next; |
| @@ -4158,7 +4009,7 @@ out: | |||
| 4158 | #ifdef CONFIG_SLABINFO | 4009 | #ifdef CONFIG_SLABINFO |
| 4159 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | 4010 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) |
| 4160 | { | 4011 | { |
| 4161 | struct slab *slabp; | 4012 | struct page *page; |
| 4162 | unsigned long active_objs; | 4013 | unsigned long active_objs; |
| 4163 | unsigned long num_objs; | 4014 | unsigned long num_objs; |
| 4164 | unsigned long active_slabs = 0; | 4015 | unsigned long active_slabs = 0; |
| @@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | |||
| 4178 | check_irq_on(); | 4029 | check_irq_on(); |
| 4179 | spin_lock_irq(&n->list_lock); | 4030 | spin_lock_irq(&n->list_lock); |
| 4180 | 4031 | ||
| 4181 | list_for_each_entry(slabp, &n->slabs_full, list) { | 4032 | list_for_each_entry(page, &n->slabs_full, lru) { |
| 4182 | if (slabp->inuse != cachep->num && !error) | 4033 | if (page->active != cachep->num && !error) |
| 4183 | error = "slabs_full accounting error"; | 4034 | error = "slabs_full accounting error"; |
| 4184 | active_objs += cachep->num; | 4035 | active_objs += cachep->num; |
| 4185 | active_slabs++; | 4036 | active_slabs++; |
| 4186 | } | 4037 | } |
| 4187 | list_for_each_entry(slabp, &n->slabs_partial, list) { | 4038 | list_for_each_entry(page, &n->slabs_partial, lru) { |
| 4188 | if (slabp->inuse == cachep->num && !error) | 4039 | if (page->active == cachep->num && !error) |
| 4189 | error = "slabs_partial inuse accounting error"; | 4040 | error = "slabs_partial accounting error"; |
| 4190 | if (!slabp->inuse && !error) | 4041 | if (!page->active && !error) |
| 4191 | error = "slabs_partial/inuse accounting error"; | 4042 | error = "slabs_partial accounting error"; |
| 4192 | active_objs += slabp->inuse; | 4043 | active_objs += page->active; |
| 4193 | active_slabs++; | 4044 | active_slabs++; |
| 4194 | } | 4045 | } |
| 4195 | list_for_each_entry(slabp, &n->slabs_free, list) { | 4046 | list_for_each_entry(page, &n->slabs_free, lru) { |
| 4196 | if (slabp->inuse && !error) | 4047 | if (page->active && !error) |
| 4197 | error = "slabs_free/inuse accounting error"; | 4048 | error = "slabs_free accounting error"; |
| 4198 | num_slabs++; | 4049 | num_slabs++; |
| 4199 | } | 4050 | } |
| 4200 | free_objects += n->free_objects; | 4051 | free_objects += n->free_objects; |
| @@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v) | |||
| 4346 | return 1; | 4197 | return 1; |
| 4347 | } | 4198 | } |
| 4348 | 4199 | ||
| 4349 | static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) | 4200 | static void handle_slab(unsigned long *n, struct kmem_cache *c, |
| 4201 | struct page *page) | ||
| 4350 | { | 4202 | { |
| 4351 | void *p; | 4203 | void *p; |
| 4352 | int i; | 4204 | int i, j; |
| 4205 | |||
| 4353 | if (n[0] == n[1]) | 4206 | if (n[0] == n[1]) |
| 4354 | return; | 4207 | return; |
| 4355 | for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { | 4208 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
| 4356 | if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) | 4209 | bool active = true; |
| 4210 | |||
| 4211 | for (j = page->active; j < c->num; j++) { | ||
| 4212 | /* Skip freed item */ | ||
| 4213 | if (slab_freelist(page)[j] == i) { | ||
| 4214 | active = false; | ||
| 4215 | break; | ||
| 4216 | } | ||
| 4217 | } | ||
| 4218 | if (!active) | ||
| 4357 | continue; | 4219 | continue; |
| 4220 | |||
| 4358 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4221 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) |
| 4359 | return; | 4222 | return; |
| 4360 | } | 4223 | } |
| @@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) | |||
| 4379 | static int leaks_show(struct seq_file *m, void *p) | 4242 | static int leaks_show(struct seq_file *m, void *p) |
| 4380 | { | 4243 | { |
| 4381 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); | 4244 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); |
| 4382 | struct slab *slabp; | 4245 | struct page *page; |
| 4383 | struct kmem_cache_node *n; | 4246 | struct kmem_cache_node *n; |
| 4384 | const char *name; | 4247 | const char *name; |
| 4385 | unsigned long *x = m->private; | 4248 | unsigned long *x = m->private; |
| @@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p) | |||
| 4403 | check_irq_on(); | 4266 | check_irq_on(); |
| 4404 | spin_lock_irq(&n->list_lock); | 4267 | spin_lock_irq(&n->list_lock); |
| 4405 | 4268 | ||
| 4406 | list_for_each_entry(slabp, &n->slabs_full, list) | 4269 | list_for_each_entry(page, &n->slabs_full, lru) |
| 4407 | handle_slab(x, cachep, slabp); | 4270 | handle_slab(x, cachep, page); |
| 4408 | list_for_each_entry(slabp, &n->slabs_partial, list) | 4271 | list_for_each_entry(page, &n->slabs_partial, lru) |
| 4409 | handle_slab(x, cachep, slabp); | 4272 | handle_slab(x, cachep, page); |
| 4410 | spin_unlock_irq(&n->list_lock); | 4273 | spin_unlock_irq(&n->list_lock); |
| 4411 | } | 4274 | } |
| 4412 | name = cachep->name; | 4275 | name = cachep->name; |
