diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Makefile | 2 | ||||
| -rw-r--r-- | mm/allocpercpu.c | 15 | ||||
| -rw-r--r-- | mm/hugetlb.c | 43 | ||||
| -rw-r--r-- | mm/memcontrol.c | 365 | ||||
| -rw-r--r-- | mm/memory.c | 13 | ||||
| -rw-r--r-- | mm/migrate.c | 19 | ||||
| -rw-r--r-- | mm/oom_kill.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 21 | ||||
| -rw-r--r-- | mm/rmap.c | 4 | ||||
| -rw-r--r-- | mm/shmem.c | 9 | ||||
| -rw-r--r-- | mm/slub.c | 204 | ||||
| -rw-r--r-- | mm/swap.c | 2 | ||||
| -rw-r--r-- | mm/truncate.c | 3 | ||||
| -rw-r--r-- | mm/vmscan.c | 9 |
14 files changed, 326 insertions, 385 deletions
diff --git a/mm/Makefile b/mm/Makefile index 9f117bab5322..a5b0dd93427a 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
| @@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o | |||
| 32 | obj-$(CONFIG_MIGRATION) += migrate.o | 32 | obj-$(CONFIG_MIGRATION) += migrate.o |
| 33 | obj-$(CONFIG_SMP) += allocpercpu.o | 33 | obj-$(CONFIG_SMP) += allocpercpu.o |
| 34 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 34 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
| 35 | obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o | 35 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o |
| 36 | 36 | ||
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 7e58322b7134..b0012e27fea8 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c | |||
| @@ -6,6 +6,10 @@ | |||
| 6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
| 7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| 8 | 8 | ||
| 9 | #ifndef cache_line_size | ||
| 10 | #define cache_line_size() L1_CACHE_BYTES | ||
| 11 | #endif | ||
| 12 | |||
| 9 | /** | 13 | /** |
| 10 | * percpu_depopulate - depopulate per-cpu data for given cpu | 14 | * percpu_depopulate - depopulate per-cpu data for given cpu |
| 11 | * @__pdata: per-cpu data to depopulate | 15 | * @__pdata: per-cpu data to depopulate |
| @@ -52,6 +56,11 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) | |||
| 52 | struct percpu_data *pdata = __percpu_disguise(__pdata); | 56 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
| 53 | int node = cpu_to_node(cpu); | 57 | int node = cpu_to_node(cpu); |
| 54 | 58 | ||
| 59 | /* | ||
| 60 | * We should make sure each CPU gets private memory. | ||
| 61 | */ | ||
| 62 | size = roundup(size, cache_line_size()); | ||
| 63 | |||
| 55 | BUG_ON(pdata->ptrs[cpu]); | 64 | BUG_ON(pdata->ptrs[cpu]); |
| 56 | if (node_online(node)) | 65 | if (node_online(node)) |
| 57 | pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); | 66 | pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); |
| @@ -98,7 +107,11 @@ EXPORT_SYMBOL_GPL(__percpu_populate_mask); | |||
| 98 | */ | 107 | */ |
| 99 | void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) | 108 | void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) |
| 100 | { | 109 | { |
| 101 | void *pdata = kzalloc(nr_cpu_ids * sizeof(void *), gfp); | 110 | /* |
| 111 | * We allocate whole cache lines to avoid false sharing | ||
| 112 | */ | ||
| 113 | size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); | ||
| 114 | void *pdata = kzalloc(sz, gfp); | ||
| 102 | void *__pdata = __percpu_disguise(pdata); | 115 | void *__pdata = __percpu_disguise(pdata); |
| 103 | 116 | ||
| 104 | if (unlikely(!pdata)) | 117 | if (unlikely(!pdata)) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 89e6286a7f57..dcacc811e70e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -71,7 +71,25 @@ static void enqueue_huge_page(struct page *page) | |||
| 71 | free_huge_pages_node[nid]++; | 71 | free_huge_pages_node[nid]++; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | static struct page *dequeue_huge_page(struct vm_area_struct *vma, | 74 | static struct page *dequeue_huge_page(void) |
| 75 | { | ||
| 76 | int nid; | ||
| 77 | struct page *page = NULL; | ||
| 78 | |||
| 79 | for (nid = 0; nid < MAX_NUMNODES; ++nid) { | ||
| 80 | if (!list_empty(&hugepage_freelists[nid])) { | ||
| 81 | page = list_entry(hugepage_freelists[nid].next, | ||
| 82 | struct page, lru); | ||
| 83 | list_del(&page->lru); | ||
| 84 | free_huge_pages--; | ||
| 85 | free_huge_pages_node[nid]--; | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | return page; | ||
| 90 | } | ||
| 91 | |||
| 92 | static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | ||
| 75 | unsigned long address) | 93 | unsigned long address) |
| 76 | { | 94 | { |
| 77 | int nid; | 95 | int nid; |
| @@ -296,8 +314,10 @@ static int gather_surplus_pages(int delta) | |||
| 296 | int needed, allocated; | 314 | int needed, allocated; |
| 297 | 315 | ||
| 298 | needed = (resv_huge_pages + delta) - free_huge_pages; | 316 | needed = (resv_huge_pages + delta) - free_huge_pages; |
| 299 | if (needed <= 0) | 317 | if (needed <= 0) { |
| 318 | resv_huge_pages += delta; | ||
| 300 | return 0; | 319 | return 0; |
| 320 | } | ||
| 301 | 321 | ||
| 302 | allocated = 0; | 322 | allocated = 0; |
| 303 | INIT_LIST_HEAD(&surplus_list); | 323 | INIT_LIST_HEAD(&surplus_list); |
| @@ -335,9 +355,12 @@ retry: | |||
| 335 | * The surplus_list now contains _at_least_ the number of extra pages | 355 | * The surplus_list now contains _at_least_ the number of extra pages |
| 336 | * needed to accomodate the reservation. Add the appropriate number | 356 | * needed to accomodate the reservation. Add the appropriate number |
| 337 | * of pages to the hugetlb pool and free the extras back to the buddy | 357 | * of pages to the hugetlb pool and free the extras back to the buddy |
| 338 | * allocator. | 358 | * allocator. Commit the entire reservation here to prevent another |
| 359 | * process from stealing the pages as they are added to the pool but | ||
| 360 | * before they are reserved. | ||
| 339 | */ | 361 | */ |
| 340 | needed += allocated; | 362 | needed += allocated; |
| 363 | resv_huge_pages += delta; | ||
| 341 | ret = 0; | 364 | ret = 0; |
| 342 | free: | 365 | free: |
| 343 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 366 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { |
| @@ -371,6 +394,9 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) | |||
| 371 | struct page *page; | 394 | struct page *page; |
| 372 | unsigned long nr_pages; | 395 | unsigned long nr_pages; |
| 373 | 396 | ||
| 397 | /* Uncommit the reservation */ | ||
| 398 | resv_huge_pages -= unused_resv_pages; | ||
| 399 | |||
| 374 | nr_pages = min(unused_resv_pages, surplus_huge_pages); | 400 | nr_pages = min(unused_resv_pages, surplus_huge_pages); |
| 375 | 401 | ||
| 376 | while (nr_pages) { | 402 | while (nr_pages) { |
| @@ -402,7 +428,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, | |||
| 402 | struct page *page; | 428 | struct page *page; |
| 403 | 429 | ||
| 404 | spin_lock(&hugetlb_lock); | 430 | spin_lock(&hugetlb_lock); |
| 405 | page = dequeue_huge_page(vma, addr); | 431 | page = dequeue_huge_page_vma(vma, addr); |
| 406 | spin_unlock(&hugetlb_lock); | 432 | spin_unlock(&hugetlb_lock); |
| 407 | return page ? page : ERR_PTR(-VM_FAULT_OOM); | 433 | return page ? page : ERR_PTR(-VM_FAULT_OOM); |
| 408 | } | 434 | } |
| @@ -417,7 +443,7 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma, | |||
| 417 | 443 | ||
| 418 | spin_lock(&hugetlb_lock); | 444 | spin_lock(&hugetlb_lock); |
| 419 | if (free_huge_pages > resv_huge_pages) | 445 | if (free_huge_pages > resv_huge_pages) |
| 420 | page = dequeue_huge_page(vma, addr); | 446 | page = dequeue_huge_page_vma(vma, addr); |
| 421 | spin_unlock(&hugetlb_lock); | 447 | spin_unlock(&hugetlb_lock); |
| 422 | if (!page) { | 448 | if (!page) { |
| 423 | page = alloc_buddy_huge_page(vma, addr); | 449 | page = alloc_buddy_huge_page(vma, addr); |
| @@ -570,7 +596,7 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
| 570 | min_count = max(count, min_count); | 596 | min_count = max(count, min_count); |
| 571 | try_to_free_low(min_count); | 597 | try_to_free_low(min_count); |
| 572 | while (min_count < persistent_huge_pages) { | 598 | while (min_count < persistent_huge_pages) { |
| 573 | struct page *page = dequeue_huge_page(NULL, 0); | 599 | struct page *page = dequeue_huge_page(); |
| 574 | if (!page) | 600 | if (!page) |
| 575 | break; | 601 | break; |
| 576 | update_and_free_page(page); | 602 | update_and_free_page(page); |
| @@ -1205,12 +1231,13 @@ static int hugetlb_acct_memory(long delta) | |||
| 1205 | if (gather_surplus_pages(delta) < 0) | 1231 | if (gather_surplus_pages(delta) < 0) |
| 1206 | goto out; | 1232 | goto out; |
| 1207 | 1233 | ||
| 1208 | if (delta > cpuset_mems_nr(free_huge_pages_node)) | 1234 | if (delta > cpuset_mems_nr(free_huge_pages_node)) { |
| 1235 | return_unused_surplus_pages(delta); | ||
| 1209 | goto out; | 1236 | goto out; |
| 1237 | } | ||
| 1210 | } | 1238 | } |
| 1211 | 1239 | ||
| 1212 | ret = 0; | 1240 | ret = 0; |
| 1213 | resv_huge_pages += delta; | ||
| 1214 | if (delta < 0) | 1241 | if (delta < 0) |
| 1215 | return_unused_surplus_pages((unsigned long) -delta); | 1242 | return_unused_surplus_pages((unsigned long) -delta); |
| 1216 | 1243 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 631002d085d1..8b9f6cae938e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -137,14 +137,21 @@ struct mem_cgroup { | |||
| 137 | */ | 137 | */ |
| 138 | struct mem_cgroup_stat stat; | 138 | struct mem_cgroup_stat stat; |
| 139 | }; | 139 | }; |
| 140 | static struct mem_cgroup init_mem_cgroup; | ||
| 140 | 141 | ||
| 141 | /* | 142 | /* |
| 142 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | 143 | * We use the lower bit of the page->page_cgroup pointer as a bit spin |
| 143 | * lock. We need to ensure that page->page_cgroup is atleast two | 144 | * lock. We need to ensure that page->page_cgroup is at least two |
| 144 | * byte aligned (based on comments from Nick Piggin) | 145 | * byte aligned (based on comments from Nick Piggin). But since |
| 146 | * bit_spin_lock doesn't actually set that lock bit in a non-debug | ||
| 147 | * uniprocessor kernel, we should avoid setting it here too. | ||
| 145 | */ | 148 | */ |
| 146 | #define PAGE_CGROUP_LOCK_BIT 0x0 | 149 | #define PAGE_CGROUP_LOCK_BIT 0x0 |
| 147 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | 150 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) |
| 151 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | ||
| 152 | #else | ||
| 153 | #define PAGE_CGROUP_LOCK 0x0 | ||
| 154 | #endif | ||
| 148 | 155 | ||
| 149 | /* | 156 | /* |
| 150 | * A page_cgroup page is associated with every page descriptor. The | 157 | * A page_cgroup page is associated with every page descriptor. The |
| @@ -154,37 +161,27 @@ struct page_cgroup { | |||
| 154 | struct list_head lru; /* per cgroup LRU list */ | 161 | struct list_head lru; /* per cgroup LRU list */ |
| 155 | struct page *page; | 162 | struct page *page; |
| 156 | struct mem_cgroup *mem_cgroup; | 163 | struct mem_cgroup *mem_cgroup; |
| 157 | atomic_t ref_cnt; /* Helpful when pages move b/w */ | 164 | int ref_cnt; /* cached, mapped, migrating */ |
| 158 | /* mapped and cached states */ | 165 | int flags; |
| 159 | int flags; | ||
| 160 | }; | 166 | }; |
| 161 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 167 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
| 162 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 168 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
| 163 | 169 | ||
| 164 | static inline int page_cgroup_nid(struct page_cgroup *pc) | 170 | static int page_cgroup_nid(struct page_cgroup *pc) |
| 165 | { | 171 | { |
| 166 | return page_to_nid(pc->page); | 172 | return page_to_nid(pc->page); |
| 167 | } | 173 | } |
| 168 | 174 | ||
| 169 | static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) | 175 | static enum zone_type page_cgroup_zid(struct page_cgroup *pc) |
| 170 | { | 176 | { |
| 171 | return page_zonenum(pc->page); | 177 | return page_zonenum(pc->page); |
| 172 | } | 178 | } |
| 173 | 179 | ||
| 174 | enum { | ||
| 175 | MEM_CGROUP_TYPE_UNSPEC = 0, | ||
| 176 | MEM_CGROUP_TYPE_MAPPED, | ||
| 177 | MEM_CGROUP_TYPE_CACHED, | ||
| 178 | MEM_CGROUP_TYPE_ALL, | ||
| 179 | MEM_CGROUP_TYPE_MAX, | ||
| 180 | }; | ||
| 181 | |||
| 182 | enum charge_type { | 180 | enum charge_type { |
| 183 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 181 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
| 184 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 182 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
| 185 | }; | 183 | }; |
| 186 | 184 | ||
| 187 | |||
| 188 | /* | 185 | /* |
| 189 | * Always modified under lru lock. Then, not necessary to preempt_disable() | 186 | * Always modified under lru lock. Then, not necessary to preempt_disable() |
| 190 | */ | 187 | */ |
| @@ -193,23 +190,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, | |||
| 193 | { | 190 | { |
| 194 | int val = (charge)? 1 : -1; | 191 | int val = (charge)? 1 : -1; |
| 195 | struct mem_cgroup_stat *stat = &mem->stat; | 192 | struct mem_cgroup_stat *stat = &mem->stat; |
| 196 | VM_BUG_ON(!irqs_disabled()); | ||
| 197 | 193 | ||
| 194 | VM_BUG_ON(!irqs_disabled()); | ||
| 198 | if (flags & PAGE_CGROUP_FLAG_CACHE) | 195 | if (flags & PAGE_CGROUP_FLAG_CACHE) |
| 199 | __mem_cgroup_stat_add_safe(stat, | 196 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); |
| 200 | MEM_CGROUP_STAT_CACHE, val); | ||
| 201 | else | 197 | else |
| 202 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); | 198 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); |
| 203 | } | 199 | } |
| 204 | 200 | ||
| 205 | static inline struct mem_cgroup_per_zone * | 201 | static struct mem_cgroup_per_zone * |
| 206 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | 202 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) |
| 207 | { | 203 | { |
| 208 | BUG_ON(!mem->info.nodeinfo[nid]); | ||
| 209 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | 204 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; |
| 210 | } | 205 | } |
| 211 | 206 | ||
| 212 | static inline struct mem_cgroup_per_zone * | 207 | static struct mem_cgroup_per_zone * |
| 213 | page_cgroup_zoneinfo(struct page_cgroup *pc) | 208 | page_cgroup_zoneinfo(struct page_cgroup *pc) |
| 214 | { | 209 | { |
| 215 | struct mem_cgroup *mem = pc->mem_cgroup; | 210 | struct mem_cgroup *mem = pc->mem_cgroup; |
| @@ -234,18 +229,14 @@ static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, | |||
| 234 | return total; | 229 | return total; |
| 235 | } | 230 | } |
| 236 | 231 | ||
| 237 | static struct mem_cgroup init_mem_cgroup; | 232 | static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
| 238 | |||
| 239 | static inline | ||
| 240 | struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | ||
| 241 | { | 233 | { |
| 242 | return container_of(cgroup_subsys_state(cont, | 234 | return container_of(cgroup_subsys_state(cont, |
| 243 | mem_cgroup_subsys_id), struct mem_cgroup, | 235 | mem_cgroup_subsys_id), struct mem_cgroup, |
| 244 | css); | 236 | css); |
| 245 | } | 237 | } |
| 246 | 238 | ||
| 247 | static inline | 239 | static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
| 248 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | ||
| 249 | { | 240 | { |
| 250 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | 241 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), |
| 251 | struct mem_cgroup, css); | 242 | struct mem_cgroup, css); |
| @@ -267,81 +258,33 @@ void mm_free_cgroup(struct mm_struct *mm) | |||
| 267 | 258 | ||
| 268 | static inline int page_cgroup_locked(struct page *page) | 259 | static inline int page_cgroup_locked(struct page *page) |
| 269 | { | 260 | { |
| 270 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, | 261 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
| 271 | &page->page_cgroup); | ||
| 272 | } | 262 | } |
| 273 | 263 | ||
| 274 | void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | 264 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) |
| 275 | { | 265 | { |
| 276 | int locked; | 266 | VM_BUG_ON(!page_cgroup_locked(page)); |
| 277 | 267 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | |
| 278 | /* | ||
| 279 | * While resetting the page_cgroup we might not hold the | ||
| 280 | * page_cgroup lock. free_hot_cold_page() is an example | ||
| 281 | * of such a scenario | ||
| 282 | */ | ||
| 283 | if (pc) | ||
| 284 | VM_BUG_ON(!page_cgroup_locked(page)); | ||
| 285 | locked = (page->page_cgroup & PAGE_CGROUP_LOCK); | ||
| 286 | page->page_cgroup = ((unsigned long)pc | locked); | ||
| 287 | } | 268 | } |
| 288 | 269 | ||
| 289 | struct page_cgroup *page_get_page_cgroup(struct page *page) | 270 | struct page_cgroup *page_get_page_cgroup(struct page *page) |
| 290 | { | 271 | { |
| 291 | return (struct page_cgroup *) | 272 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); |
| 292 | (page->page_cgroup & ~PAGE_CGROUP_LOCK); | ||
| 293 | } | 273 | } |
| 294 | 274 | ||
| 295 | static void __always_inline lock_page_cgroup(struct page *page) | 275 | static void lock_page_cgroup(struct page *page) |
| 296 | { | 276 | { |
| 297 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 277 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
| 298 | VM_BUG_ON(!page_cgroup_locked(page)); | ||
| 299 | } | ||
| 300 | |||
| 301 | static void __always_inline unlock_page_cgroup(struct page *page) | ||
| 302 | { | ||
| 303 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
| 304 | } | 278 | } |
| 305 | 279 | ||
| 306 | /* | 280 | static int try_lock_page_cgroup(struct page *page) |
| 307 | * Tie new page_cgroup to struct page under lock_page_cgroup() | ||
| 308 | * This can fail if the page has been tied to a page_cgroup. | ||
| 309 | * If success, returns 0. | ||
| 310 | */ | ||
| 311 | static int page_cgroup_assign_new_page_cgroup(struct page *page, | ||
| 312 | struct page_cgroup *pc) | ||
| 313 | { | 281 | { |
| 314 | int ret = 0; | 282 | return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
| 315 | |||
| 316 | lock_page_cgroup(page); | ||
| 317 | if (!page_get_page_cgroup(page)) | ||
| 318 | page_assign_page_cgroup(page, pc); | ||
| 319 | else /* A page is tied to other pc. */ | ||
| 320 | ret = 1; | ||
| 321 | unlock_page_cgroup(page); | ||
| 322 | return ret; | ||
| 323 | } | 283 | } |
| 324 | 284 | ||
| 325 | /* | 285 | static void unlock_page_cgroup(struct page *page) |
| 326 | * Clear page->page_cgroup member under lock_page_cgroup(). | ||
| 327 | * If given "pc" value is different from one page->page_cgroup, | ||
| 328 | * page->cgroup is not cleared. | ||
| 329 | * Returns a value of page->page_cgroup at lock taken. | ||
| 330 | * A can can detect failure of clearing by following | ||
| 331 | * clear_page_cgroup(page, pc) == pc | ||
| 332 | */ | ||
| 333 | |||
| 334 | static struct page_cgroup *clear_page_cgroup(struct page *page, | ||
| 335 | struct page_cgroup *pc) | ||
| 336 | { | 286 | { |
| 337 | struct page_cgroup *ret; | 287 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
| 338 | /* lock and clear */ | ||
| 339 | lock_page_cgroup(page); | ||
| 340 | ret = page_get_page_cgroup(page); | ||
| 341 | if (likely(ret == pc)) | ||
| 342 | page_assign_page_cgroup(page, NULL); | ||
| 343 | unlock_page_cgroup(page); | ||
| 344 | return ret; | ||
| 345 | } | 288 | } |
| 346 | 289 | ||
| 347 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) | 290 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) |
| @@ -399,7 +342,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
| 399 | int ret; | 342 | int ret; |
| 400 | 343 | ||
| 401 | task_lock(task); | 344 | task_lock(task); |
| 402 | ret = task->mm && vm_match_cgroup(task->mm, mem); | 345 | ret = task->mm && mm_match_cgroup(task->mm, mem); |
| 403 | task_unlock(task); | 346 | task_unlock(task); |
| 404 | return ret; | 347 | return ret; |
| 405 | } | 348 | } |
| @@ -407,18 +350,30 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
| 407 | /* | 350 | /* |
| 408 | * This routine assumes that the appropriate zone's lru lock is already held | 351 | * This routine assumes that the appropriate zone's lru lock is already held |
| 409 | */ | 352 | */ |
| 410 | void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 353 | void mem_cgroup_move_lists(struct page *page, bool active) |
| 411 | { | 354 | { |
| 355 | struct page_cgroup *pc; | ||
| 412 | struct mem_cgroup_per_zone *mz; | 356 | struct mem_cgroup_per_zone *mz; |
| 413 | unsigned long flags; | 357 | unsigned long flags; |
| 414 | 358 | ||
| 415 | if (!pc) | 359 | /* |
| 360 | * We cannot lock_page_cgroup while holding zone's lru_lock, | ||
| 361 | * because other holders of lock_page_cgroup can be interrupted | ||
| 362 | * with an attempt to rotate_reclaimable_page. But we cannot | ||
| 363 | * safely get to page_cgroup without it, so just try_lock it: | ||
| 364 | * mem_cgroup_isolate_pages allows for page left on wrong list. | ||
| 365 | */ | ||
| 366 | if (!try_lock_page_cgroup(page)) | ||
| 416 | return; | 367 | return; |
| 417 | 368 | ||
| 418 | mz = page_cgroup_zoneinfo(pc); | 369 | pc = page_get_page_cgroup(page); |
| 419 | spin_lock_irqsave(&mz->lru_lock, flags); | 370 | if (pc) { |
| 420 | __mem_cgroup_move_lists(pc, active); | 371 | mz = page_cgroup_zoneinfo(pc); |
| 421 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 372 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 373 | __mem_cgroup_move_lists(pc, active); | ||
| 374 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
| 375 | } | ||
| 376 | unlock_page_cgroup(page); | ||
| 422 | } | 377 | } |
| 423 | 378 | ||
| 424 | /* | 379 | /* |
| @@ -437,6 +392,7 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | |||
| 437 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 392 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); |
| 438 | return (int)((rss * 100L) / total); | 393 | return (int)((rss * 100L) / total); |
| 439 | } | 394 | } |
| 395 | |||
| 440 | /* | 396 | /* |
| 441 | * This function is called from vmscan.c. In page reclaiming loop. balance | 397 | * This function is called from vmscan.c. In page reclaiming loop. balance |
| 442 | * between active and inactive list is calculated. For memory controller | 398 | * between active and inactive list is calculated. For memory controller |
| @@ -500,7 +456,6 @@ long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | |||
| 500 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 456 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
| 501 | 457 | ||
| 502 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); | 458 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); |
| 503 | |||
| 504 | return (nr_inactive >> priority); | 459 | return (nr_inactive >> priority); |
| 505 | } | 460 | } |
| 506 | 461 | ||
| @@ -586,26 +541,21 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
| 586 | * with it | 541 | * with it |
| 587 | */ | 542 | */ |
| 588 | retry: | 543 | retry: |
| 589 | if (page) { | 544 | lock_page_cgroup(page); |
| 590 | lock_page_cgroup(page); | 545 | pc = page_get_page_cgroup(page); |
| 591 | pc = page_get_page_cgroup(page); | 546 | /* |
| 592 | /* | 547 | * The page_cgroup exists and |
| 593 | * The page_cgroup exists and | 548 | * the page has already been accounted. |
| 594 | * the page has already been accounted. | 549 | */ |
| 595 | */ | 550 | if (pc) { |
| 596 | if (pc) { | 551 | VM_BUG_ON(pc->page != page); |
| 597 | if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { | 552 | VM_BUG_ON(pc->ref_cnt <= 0); |
| 598 | /* this page is under being uncharged ? */ | 553 | |
| 599 | unlock_page_cgroup(page); | 554 | pc->ref_cnt++; |
| 600 | cpu_relax(); | ||
| 601 | goto retry; | ||
| 602 | } else { | ||
| 603 | unlock_page_cgroup(page); | ||
| 604 | goto done; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | unlock_page_cgroup(page); | 555 | unlock_page_cgroup(page); |
| 556 | goto done; | ||
| 608 | } | 557 | } |
| 558 | unlock_page_cgroup(page); | ||
| 609 | 559 | ||
| 610 | pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); | 560 | pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); |
| 611 | if (pc == NULL) | 561 | if (pc == NULL) |
| @@ -623,16 +573,11 @@ retry: | |||
| 623 | rcu_read_lock(); | 573 | rcu_read_lock(); |
| 624 | mem = rcu_dereference(mm->mem_cgroup); | 574 | mem = rcu_dereference(mm->mem_cgroup); |
| 625 | /* | 575 | /* |
| 626 | * For every charge from the cgroup, increment reference | 576 | * For every charge from the cgroup, increment reference count |
| 627 | * count | ||
| 628 | */ | 577 | */ |
| 629 | css_get(&mem->css); | 578 | css_get(&mem->css); |
| 630 | rcu_read_unlock(); | 579 | rcu_read_unlock(); |
| 631 | 580 | ||
| 632 | /* | ||
| 633 | * If we created the page_cgroup, we should free it on exceeding | ||
| 634 | * the cgroup limit. | ||
| 635 | */ | ||
| 636 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 581 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
| 637 | if (!(gfp_mask & __GFP_WAIT)) | 582 | if (!(gfp_mask & __GFP_WAIT)) |
| 638 | goto out; | 583 | goto out; |
| @@ -641,12 +586,12 @@ retry: | |||
| 641 | continue; | 586 | continue; |
| 642 | 587 | ||
| 643 | /* | 588 | /* |
| 644 | * try_to_free_mem_cgroup_pages() might not give us a full | 589 | * try_to_free_mem_cgroup_pages() might not give us a full |
| 645 | * picture of reclaim. Some pages are reclaimed and might be | 590 | * picture of reclaim. Some pages are reclaimed and might be |
| 646 | * moved to swap cache or just unmapped from the cgroup. | 591 | * moved to swap cache or just unmapped from the cgroup. |
| 647 | * Check the limit again to see if the reclaim reduced the | 592 | * Check the limit again to see if the reclaim reduced the |
| 648 | * current usage of the cgroup before giving up | 593 | * current usage of the cgroup before giving up |
| 649 | */ | 594 | */ |
| 650 | if (res_counter_check_under_limit(&mem->res)) | 595 | if (res_counter_check_under_limit(&mem->res)) |
| 651 | continue; | 596 | continue; |
| 652 | 597 | ||
| @@ -657,14 +602,16 @@ retry: | |||
| 657 | congestion_wait(WRITE, HZ/10); | 602 | congestion_wait(WRITE, HZ/10); |
| 658 | } | 603 | } |
| 659 | 604 | ||
| 660 | atomic_set(&pc->ref_cnt, 1); | 605 | pc->ref_cnt = 1; |
| 661 | pc->mem_cgroup = mem; | 606 | pc->mem_cgroup = mem; |
| 662 | pc->page = page; | 607 | pc->page = page; |
| 663 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 608 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
| 664 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 609 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
| 665 | pc->flags |= PAGE_CGROUP_FLAG_CACHE; | 610 | pc->flags |= PAGE_CGROUP_FLAG_CACHE; |
| 666 | 611 | ||
| 667 | if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) { | 612 | lock_page_cgroup(page); |
| 613 | if (page_get_page_cgroup(page)) { | ||
| 614 | unlock_page_cgroup(page); | ||
| 668 | /* | 615 | /* |
| 669 | * Another charge has been added to this page already. | 616 | * Another charge has been added to this page already. |
| 670 | * We take lock_page_cgroup(page) again and read | 617 | * We take lock_page_cgroup(page) again and read |
| @@ -673,17 +620,16 @@ retry: | |||
| 673 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 620 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 674 | css_put(&mem->css); | 621 | css_put(&mem->css); |
| 675 | kfree(pc); | 622 | kfree(pc); |
| 676 | if (!page) | ||
| 677 | goto done; | ||
| 678 | goto retry; | 623 | goto retry; |
| 679 | } | 624 | } |
| 625 | page_assign_page_cgroup(page, pc); | ||
| 680 | 626 | ||
| 681 | mz = page_cgroup_zoneinfo(pc); | 627 | mz = page_cgroup_zoneinfo(pc); |
| 682 | spin_lock_irqsave(&mz->lru_lock, flags); | 628 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 683 | /* Update statistics vector */ | ||
| 684 | __mem_cgroup_add_list(pc); | 629 | __mem_cgroup_add_list(pc); |
| 685 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 630 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 686 | 631 | ||
| 632 | unlock_page_cgroup(page); | ||
| 687 | done: | 633 | done: |
| 688 | return 0; | 634 | return 0; |
| 689 | out: | 635 | out: |
| @@ -693,70 +639,61 @@ err: | |||
| 693 | return -ENOMEM; | 639 | return -ENOMEM; |
| 694 | } | 640 | } |
| 695 | 641 | ||
| 696 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, | 642 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
| 697 | gfp_t gfp_mask) | ||
| 698 | { | 643 | { |
| 699 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 644 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
| 700 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 645 | MEM_CGROUP_CHARGE_TYPE_MAPPED); |
| 701 | } | 646 | } |
| 702 | 647 | ||
| 703 | /* | ||
| 704 | * See if the cached pages should be charged at all? | ||
| 705 | */ | ||
| 706 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 648 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
| 707 | gfp_t gfp_mask) | 649 | gfp_t gfp_mask) |
| 708 | { | 650 | { |
| 709 | int ret = 0; | ||
| 710 | if (!mm) | 651 | if (!mm) |
| 711 | mm = &init_mm; | 652 | mm = &init_mm; |
| 712 | 653 | return mem_cgroup_charge_common(page, mm, gfp_mask, | |
| 713 | ret = mem_cgroup_charge_common(page, mm, gfp_mask, | ||
| 714 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 654 | MEM_CGROUP_CHARGE_TYPE_CACHE); |
| 715 | return ret; | ||
| 716 | } | 655 | } |
| 717 | 656 | ||
| 718 | /* | 657 | /* |
| 719 | * Uncharging is always a welcome operation, we never complain, simply | 658 | * Uncharging is always a welcome operation, we never complain, simply |
| 720 | * uncharge. This routine should be called with lock_page_cgroup held | 659 | * uncharge. |
| 721 | */ | 660 | */ |
| 722 | void mem_cgroup_uncharge(struct page_cgroup *pc) | 661 | void mem_cgroup_uncharge_page(struct page *page) |
| 723 | { | 662 | { |
| 663 | struct page_cgroup *pc; | ||
| 724 | struct mem_cgroup *mem; | 664 | struct mem_cgroup *mem; |
| 725 | struct mem_cgroup_per_zone *mz; | 665 | struct mem_cgroup_per_zone *mz; |
| 726 | struct page *page; | ||
| 727 | unsigned long flags; | 666 | unsigned long flags; |
| 728 | 667 | ||
| 729 | /* | 668 | /* |
| 730 | * Check if our page_cgroup is valid | 669 | * Check if our page_cgroup is valid |
| 731 | */ | 670 | */ |
| 671 | lock_page_cgroup(page); | ||
| 672 | pc = page_get_page_cgroup(page); | ||
| 732 | if (!pc) | 673 | if (!pc) |
| 733 | return; | 674 | goto unlock; |
| 734 | 675 | ||
| 735 | if (atomic_dec_and_test(&pc->ref_cnt)) { | 676 | VM_BUG_ON(pc->page != page); |
| 736 | page = pc->page; | 677 | VM_BUG_ON(pc->ref_cnt <= 0); |
| 678 | |||
| 679 | if (--(pc->ref_cnt) == 0) { | ||
| 737 | mz = page_cgroup_zoneinfo(pc); | 680 | mz = page_cgroup_zoneinfo(pc); |
| 738 | /* | 681 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 739 | * get page->cgroup and clear it under lock. | 682 | __mem_cgroup_remove_list(pc); |
| 740 | * force_empty can drop page->cgroup without checking refcnt. | 683 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 741 | */ | 684 | |
| 685 | page_assign_page_cgroup(page, NULL); | ||
| 742 | unlock_page_cgroup(page); | 686 | unlock_page_cgroup(page); |
| 743 | if (clear_page_cgroup(page, pc) == pc) { | 687 | |
| 744 | mem = pc->mem_cgroup; | 688 | mem = pc->mem_cgroup; |
| 745 | css_put(&mem->css); | 689 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 746 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 690 | css_put(&mem->css); |
| 747 | spin_lock_irqsave(&mz->lru_lock, flags); | 691 | |
| 748 | __mem_cgroup_remove_list(pc); | 692 | kfree(pc); |
| 749 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 693 | return; |
| 750 | kfree(pc); | ||
| 751 | } | ||
| 752 | lock_page_cgroup(page); | ||
| 753 | } | 694 | } |
| 754 | } | ||
| 755 | 695 | ||
| 756 | void mem_cgroup_uncharge_page(struct page *page) | 696 | unlock: |
| 757 | { | ||
| 758 | lock_page_cgroup(page); | ||
| 759 | mem_cgroup_uncharge(page_get_page_cgroup(page)); | ||
| 760 | unlock_page_cgroup(page); | 697 | unlock_page_cgroup(page); |
| 761 | } | 698 | } |
| 762 | 699 | ||
| @@ -764,63 +701,59 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
| 764 | * Returns non-zero if a page (under migration) has valid page_cgroup member. | 701 | * Returns non-zero if a page (under migration) has valid page_cgroup member. |
| 765 | * Refcnt of page_cgroup is incremented. | 702 | * Refcnt of page_cgroup is incremented. |
| 766 | */ | 703 | */ |
| 767 | |||
| 768 | int mem_cgroup_prepare_migration(struct page *page) | 704 | int mem_cgroup_prepare_migration(struct page *page) |
| 769 | { | 705 | { |
| 770 | struct page_cgroup *pc; | 706 | struct page_cgroup *pc; |
| 771 | int ret = 0; | 707 | |
| 772 | lock_page_cgroup(page); | 708 | lock_page_cgroup(page); |
| 773 | pc = page_get_page_cgroup(page); | 709 | pc = page_get_page_cgroup(page); |
| 774 | if (pc && atomic_inc_not_zero(&pc->ref_cnt)) | 710 | if (pc) |
| 775 | ret = 1; | 711 | pc->ref_cnt++; |
| 776 | unlock_page_cgroup(page); | 712 | unlock_page_cgroup(page); |
| 777 | return ret; | 713 | return pc != NULL; |
| 778 | } | 714 | } |
| 779 | 715 | ||
| 780 | void mem_cgroup_end_migration(struct page *page) | 716 | void mem_cgroup_end_migration(struct page *page) |
| 781 | { | 717 | { |
| 782 | struct page_cgroup *pc; | 718 | mem_cgroup_uncharge_page(page); |
| 783 | |||
| 784 | lock_page_cgroup(page); | ||
| 785 | pc = page_get_page_cgroup(page); | ||
| 786 | mem_cgroup_uncharge(pc); | ||
| 787 | unlock_page_cgroup(page); | ||
| 788 | } | 719 | } |
| 720 | |||
| 789 | /* | 721 | /* |
| 790 | * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. | 722 | * We know both *page* and *newpage* are now not-on-LRU and PG_locked. |
| 791 | * And no race with uncharge() routines because page_cgroup for *page* | 723 | * And no race with uncharge() routines because page_cgroup for *page* |
| 792 | * has extra one reference by mem_cgroup_prepare_migration. | 724 | * has extra one reference by mem_cgroup_prepare_migration. |
| 793 | */ | 725 | */ |
| 794 | |||
| 795 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) | 726 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) |
| 796 | { | 727 | { |
| 797 | struct page_cgroup *pc; | 728 | struct page_cgroup *pc; |
| 798 | struct mem_cgroup *mem; | ||
| 799 | unsigned long flags; | ||
| 800 | struct mem_cgroup_per_zone *mz; | 729 | struct mem_cgroup_per_zone *mz; |
| 801 | retry: | 730 | unsigned long flags; |
| 731 | |||
| 732 | lock_page_cgroup(page); | ||
| 802 | pc = page_get_page_cgroup(page); | 733 | pc = page_get_page_cgroup(page); |
| 803 | if (!pc) | 734 | if (!pc) { |
| 735 | unlock_page_cgroup(page); | ||
| 804 | return; | 736 | return; |
| 805 | mem = pc->mem_cgroup; | 737 | } |
| 738 | |||
| 806 | mz = page_cgroup_zoneinfo(pc); | 739 | mz = page_cgroup_zoneinfo(pc); |
| 807 | if (clear_page_cgroup(page, pc) != pc) | ||
| 808 | goto retry; | ||
| 809 | spin_lock_irqsave(&mz->lru_lock, flags); | 740 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 810 | |||
| 811 | __mem_cgroup_remove_list(pc); | 741 | __mem_cgroup_remove_list(pc); |
| 812 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 742 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 813 | 743 | ||
| 744 | page_assign_page_cgroup(page, NULL); | ||
| 745 | unlock_page_cgroup(page); | ||
| 746 | |||
| 814 | pc->page = newpage; | 747 | pc->page = newpage; |
| 815 | lock_page_cgroup(newpage); | 748 | lock_page_cgroup(newpage); |
| 816 | page_assign_page_cgroup(newpage, pc); | 749 | page_assign_page_cgroup(newpage, pc); |
| 817 | unlock_page_cgroup(newpage); | ||
| 818 | 750 | ||
| 819 | mz = page_cgroup_zoneinfo(pc); | 751 | mz = page_cgroup_zoneinfo(pc); |
| 820 | spin_lock_irqsave(&mz->lru_lock, flags); | 752 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 821 | __mem_cgroup_add_list(pc); | 753 | __mem_cgroup_add_list(pc); |
| 822 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 754 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 823 | return; | 755 | |
| 756 | unlock_page_cgroup(newpage); | ||
| 824 | } | 757 | } |
| 825 | 758 | ||
| 826 | /* | 759 | /* |
| @@ -829,14 +762,13 @@ retry: | |||
| 829 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 762 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
| 830 | */ | 763 | */ |
| 831 | #define FORCE_UNCHARGE_BATCH (128) | 764 | #define FORCE_UNCHARGE_BATCH (128) |
| 832 | static void | 765 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
| 833 | mem_cgroup_force_empty_list(struct mem_cgroup *mem, | ||
| 834 | struct mem_cgroup_per_zone *mz, | 766 | struct mem_cgroup_per_zone *mz, |
| 835 | int active) | 767 | int active) |
| 836 | { | 768 | { |
| 837 | struct page_cgroup *pc; | 769 | struct page_cgroup *pc; |
| 838 | struct page *page; | 770 | struct page *page; |
| 839 | int count; | 771 | int count = FORCE_UNCHARGE_BATCH; |
| 840 | unsigned long flags; | 772 | unsigned long flags; |
| 841 | struct list_head *list; | 773 | struct list_head *list; |
| 842 | 774 | ||
| @@ -845,46 +777,36 @@ mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
| 845 | else | 777 | else |
| 846 | list = &mz->inactive_list; | 778 | list = &mz->inactive_list; |
| 847 | 779 | ||
| 848 | if (list_empty(list)) | ||
| 849 | return; | ||
| 850 | retry: | ||
| 851 | count = FORCE_UNCHARGE_BATCH; | ||
| 852 | spin_lock_irqsave(&mz->lru_lock, flags); | 780 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 853 | 781 | while (!list_empty(list)) { | |
| 854 | while (--count && !list_empty(list)) { | ||
| 855 | pc = list_entry(list->prev, struct page_cgroup, lru); | 782 | pc = list_entry(list->prev, struct page_cgroup, lru); |
| 856 | page = pc->page; | 783 | page = pc->page; |
| 857 | /* Avoid race with charge */ | 784 | get_page(page); |
| 858 | atomic_set(&pc->ref_cnt, 0); | 785 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 859 | if (clear_page_cgroup(page, pc) == pc) { | 786 | mem_cgroup_uncharge_page(page); |
| 860 | css_put(&mem->css); | 787 | put_page(page); |
| 861 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 788 | if (--count <= 0) { |
| 862 | __mem_cgroup_remove_list(pc); | 789 | count = FORCE_UNCHARGE_BATCH; |
| 863 | kfree(pc); | 790 | cond_resched(); |
| 864 | } else /* being uncharged ? ...do relax */ | 791 | } |
| 865 | break; | 792 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 866 | } | 793 | } |
| 867 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 794 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 868 | if (!list_empty(list)) { | ||
| 869 | cond_resched(); | ||
| 870 | goto retry; | ||
| 871 | } | ||
| 872 | return; | ||
| 873 | } | 795 | } |
| 874 | 796 | ||
| 875 | /* | 797 | /* |
| 876 | * make mem_cgroup's charge to be 0 if there is no task. | 798 | * make mem_cgroup's charge to be 0 if there is no task. |
| 877 | * This enables deleting this mem_cgroup. | 799 | * This enables deleting this mem_cgroup. |
| 878 | */ | 800 | */ |
| 879 | 801 | static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |
| 880 | int mem_cgroup_force_empty(struct mem_cgroup *mem) | ||
| 881 | { | 802 | { |
| 882 | int ret = -EBUSY; | 803 | int ret = -EBUSY; |
| 883 | int node, zid; | 804 | int node, zid; |
| 805 | |||
| 884 | css_get(&mem->css); | 806 | css_get(&mem->css); |
| 885 | /* | 807 | /* |
| 886 | * page reclaim code (kswapd etc..) will move pages between | 808 | * page reclaim code (kswapd etc..) will move pages between |
| 887 | ` * active_list <-> inactive_list while we don't take a lock. | 809 | * active_list <-> inactive_list while we don't take a lock. |
| 888 | * So, we have to do loop here until all lists are empty. | 810 | * So, we have to do loop here until all lists are empty. |
| 889 | */ | 811 | */ |
| 890 | while (mem->res.usage > 0) { | 812 | while (mem->res.usage > 0) { |
| @@ -906,9 +828,7 @@ out: | |||
| 906 | return ret; | 828 | return ret; |
| 907 | } | 829 | } |
| 908 | 830 | ||
| 909 | 831 | static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | |
| 910 | |||
| 911 | int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | ||
| 912 | { | 832 | { |
| 913 | *tmp = memparse(buf, &buf); | 833 | *tmp = memparse(buf, &buf); |
| 914 | if (*buf != '\0') | 834 | if (*buf != '\0') |
| @@ -945,8 +865,7 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, | |||
| 945 | size_t nbytes, loff_t *ppos) | 865 | size_t nbytes, loff_t *ppos) |
| 946 | { | 866 | { |
| 947 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 867 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
| 948 | int ret; | 868 | int ret = mem_cgroup_force_empty(mem); |
| 949 | ret = mem_cgroup_force_empty(mem); | ||
| 950 | if (!ret) | 869 | if (!ret) |
| 951 | ret = nbytes; | 870 | ret = nbytes; |
| 952 | return ret; | 871 | return ret; |
| @@ -955,7 +874,6 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, | |||
| 955 | /* | 874 | /* |
| 956 | * Note: This should be removed if cgroup supports write-only file. | 875 | * Note: This should be removed if cgroup supports write-only file. |
| 957 | */ | 876 | */ |
| 958 | |||
| 959 | static ssize_t mem_force_empty_read(struct cgroup *cont, | 877 | static ssize_t mem_force_empty_read(struct cgroup *cont, |
| 960 | struct cftype *cft, | 878 | struct cftype *cft, |
| 961 | struct file *file, char __user *userbuf, | 879 | struct file *file, char __user *userbuf, |
| @@ -964,7 +882,6 @@ static ssize_t mem_force_empty_read(struct cgroup *cont, | |||
| 964 | return -EINVAL; | 882 | return -EINVAL; |
| 965 | } | 883 | } |
| 966 | 884 | ||
| 967 | |||
| 968 | static const struct mem_cgroup_stat_desc { | 885 | static const struct mem_cgroup_stat_desc { |
| 969 | const char *msg; | 886 | const char *msg; |
| 970 | u64 unit; | 887 | u64 unit; |
| @@ -1017,8 +934,6 @@ static int mem_control_stat_open(struct inode *unused, struct file *file) | |||
| 1017 | return single_open(file, mem_control_stat_show, cont); | 934 | return single_open(file, mem_control_stat_show, cont); |
| 1018 | } | 935 | } |
| 1019 | 936 | ||
| 1020 | |||
| 1021 | |||
| 1022 | static struct cftype mem_cgroup_files[] = { | 937 | static struct cftype mem_cgroup_files[] = { |
| 1023 | { | 938 | { |
| 1024 | .name = "usage_in_bytes", | 939 | .name = "usage_in_bytes", |
| @@ -1084,9 +999,6 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
| 1084 | kfree(mem->info.nodeinfo[node]); | 999 | kfree(mem->info.nodeinfo[node]); |
| 1085 | } | 1000 | } |
| 1086 | 1001 | ||
| 1087 | |||
| 1088 | static struct mem_cgroup init_mem_cgroup; | ||
| 1089 | |||
| 1090 | static struct cgroup_subsys_state * | 1002 | static struct cgroup_subsys_state * |
| 1091 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 1003 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
| 1092 | { | 1004 | { |
| @@ -1176,7 +1088,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
| 1176 | 1088 | ||
| 1177 | out: | 1089 | out: |
| 1178 | mmput(mm); | 1090 | mmput(mm); |
| 1179 | return; | ||
| 1180 | } | 1091 | } |
| 1181 | 1092 | ||
| 1182 | struct cgroup_subsys mem_cgroup_subsys = { | 1093 | struct cgroup_subsys mem_cgroup_subsys = { |
diff --git a/mm/memory.c b/mm/memory.c index ce3c9e4492d8..0d14d1e58a5f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1711,7 +1711,7 @@ unlock: | |||
| 1711 | } | 1711 | } |
| 1712 | return ret; | 1712 | return ret; |
| 1713 | oom_free_new: | 1713 | oom_free_new: |
| 1714 | __free_page(new_page); | 1714 | page_cache_release(new_page); |
| 1715 | oom: | 1715 | oom: |
| 1716 | if (old_page) | 1716 | if (old_page) |
| 1717 | page_cache_release(old_page); | 1717 | page_cache_release(old_page); |
| @@ -2093,12 +2093,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2093 | unlock_page(page); | 2093 | unlock_page(page); |
| 2094 | 2094 | ||
| 2095 | if (write_access) { | 2095 | if (write_access) { |
| 2096 | /* XXX: We could OR the do_wp_page code with this one? */ | 2096 | ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); |
| 2097 | if (do_wp_page(mm, vma, address, | 2097 | if (ret & VM_FAULT_ERROR) |
| 2098 | page_table, pmd, ptl, pte) & VM_FAULT_OOM) { | 2098 | ret &= VM_FAULT_ERROR; |
| 2099 | mem_cgroup_uncharge_page(page); | ||
| 2100 | ret = VM_FAULT_OOM; | ||
| 2101 | } | ||
| 2102 | goto out; | 2099 | goto out; |
| 2103 | } | 2100 | } |
| 2104 | 2101 | ||
| @@ -2163,7 +2160,7 @@ release: | |||
| 2163 | page_cache_release(page); | 2160 | page_cache_release(page); |
| 2164 | goto unlock; | 2161 | goto unlock; |
| 2165 | oom_free_page: | 2162 | oom_free_page: |
| 2166 | __free_page(page); | 2163 | page_cache_release(page); |
| 2167 | oom: | 2164 | oom: |
| 2168 | return VM_FAULT_OOM; | 2165 | return VM_FAULT_OOM; |
| 2169 | } | 2166 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index a73504ff5ab9..4e0eccca5e26 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -153,11 +153,6 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
| 153 | return; | 153 | return; |
| 154 | } | 154 | } |
| 155 | 155 | ||
| 156 | if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { | ||
| 157 | pte_unmap(ptep); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | |||
| 161 | ptl = pte_lockptr(mm, pmd); | 156 | ptl = pte_lockptr(mm, pmd); |
| 162 | spin_lock(ptl); | 157 | spin_lock(ptl); |
| 163 | pte = *ptep; | 158 | pte = *ptep; |
| @@ -169,6 +164,20 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
| 169 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 164 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
| 170 | goto out; | 165 | goto out; |
| 171 | 166 | ||
| 167 | /* | ||
| 168 | * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge. | ||
| 169 | * Failure is not an option here: we're now expected to remove every | ||
| 170 | * migration pte, and will cause crashes otherwise. Normally this | ||
| 171 | * is not an issue: mem_cgroup_prepare_migration bumped up the old | ||
| 172 | * page_cgroup count for safety, that's now attached to the new page, | ||
| 173 | * so this charge should just be another incrementation of the count, | ||
| 174 | * to keep in balance with rmap.c's mem_cgroup_uncharging. But if | ||
| 175 | * there's been a force_empty, those reference counts may no longer | ||
| 176 | * be reliable, and this charge can actually fail: oh well, we don't | ||
| 177 | * make the situation any worse by proceeding as if it had succeeded. | ||
| 178 | */ | ||
| 179 | mem_cgroup_charge(new, mm, GFP_ATOMIC); | ||
| 180 | |||
| 172 | get_page(new); | 181 | get_page(new); |
| 173 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 182 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
| 174 | if (is_write_migration_entry(entry)) | 183 | if (is_write_migration_entry(entry)) |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4194b9db0104..44b2da11bf43 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -412,7 +412,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
| 412 | return oom_kill_task(p); | 412 | return oom_kill_task(p); |
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | #ifdef CONFIG_CGROUP_MEM_CONT | 415 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
| 416 | void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | 416 | void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) |
| 417 | { | 417 | { |
| 418 | unsigned long points = 0; | 418 | unsigned long points = 0; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8896e874a67d..402a504f1228 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
| 20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
| 21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
| 22 | #include <linux/jiffies.h> | ||
| 22 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
| 23 | #include <linux/compiler.h> | 24 | #include <linux/compiler.h> |
| 24 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
| @@ -221,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) | |||
| 221 | 222 | ||
| 222 | static void bad_page(struct page *page) | 223 | static void bad_page(struct page *page) |
| 223 | { | 224 | { |
| 224 | printk(KERN_EMERG "Bad page state in process '%s'\n" | 225 | void *pc = page_get_page_cgroup(page); |
| 225 | KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" | 226 | |
| 226 | KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | 227 | printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG |
| 227 | KERN_EMERG "Backtrace:\n", | 228 | "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", |
| 228 | current->comm, page, (int)(2*sizeof(unsigned long)), | 229 | current->comm, page, (int)(2*sizeof(unsigned long)), |
| 229 | (unsigned long)page->flags, page->mapping, | 230 | (unsigned long)page->flags, page->mapping, |
| 230 | page_mapcount(page), page_count(page)); | 231 | page_mapcount(page), page_count(page)); |
| 232 | if (pc) { | ||
| 233 | printk(KERN_EMERG "cgroup:%p\n", pc); | ||
| 234 | page_reset_bad_cgroup(page); | ||
| 235 | } | ||
| 236 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | ||
| 237 | KERN_EMERG "Backtrace:\n"); | ||
| 231 | dump_stack(); | 238 | dump_stack(); |
| 232 | page->flags &= ~(1 << PG_lru | | 239 | page->flags &= ~(1 << PG_lru | |
| 233 | 1 << PG_private | | 240 | 1 << PG_private | |
| @@ -453,6 +460,7 @@ static inline int free_pages_check(struct page *page) | |||
| 453 | { | 460 | { |
| 454 | if (unlikely(page_mapcount(page) | | 461 | if (unlikely(page_mapcount(page) | |
| 455 | (page->mapping != NULL) | | 462 | (page->mapping != NULL) | |
| 463 | (page_get_page_cgroup(page) != NULL) | | ||
| 456 | (page_count(page) != 0) | | 464 | (page_count(page) != 0) | |
| 457 | (page->flags & ( | 465 | (page->flags & ( |
| 458 | 1 << PG_lru | | 466 | 1 << PG_lru | |
| @@ -602,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
| 602 | { | 610 | { |
| 603 | if (unlikely(page_mapcount(page) | | 611 | if (unlikely(page_mapcount(page) | |
| 604 | (page->mapping != NULL) | | 612 | (page->mapping != NULL) | |
| 613 | (page_get_page_cgroup(page) != NULL) | | ||
| 605 | (page_count(page) != 0) | | 614 | (page_count(page) != 0) | |
| 606 | (page->flags & ( | 615 | (page->flags & ( |
| 607 | 1 << PG_lru | | 616 | 1 << PG_lru | |
| @@ -988,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
| 988 | 997 | ||
| 989 | if (!PageHighMem(page)) | 998 | if (!PageHighMem(page)) |
| 990 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); | 999 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); |
| 991 | VM_BUG_ON(page_get_page_cgroup(page)); | ||
| 992 | arch_free_page(page, 0); | 1000 | arch_free_page(page, 0); |
| 993 | kernel_map_pages(page, 1, 0); | 1001 | kernel_map_pages(page, 1, 0); |
| 994 | 1002 | ||
| @@ -1276,7 +1284,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | |||
| 1276 | if (!zlc) | 1284 | if (!zlc) |
| 1277 | return NULL; | 1285 | return NULL; |
| 1278 | 1286 | ||
| 1279 | if (jiffies - zlc->last_full_zap > 1 * HZ) { | 1287 | if (time_after(jiffies, zlc->last_full_zap + HZ)) { |
| 1280 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 1288 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
| 1281 | zlc->last_full_zap = jiffies; | 1289 | zlc->last_full_zap = jiffies; |
| 1282 | } | 1290 | } |
| @@ -2527,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
| 2527 | set_page_links(page, zone, nid, pfn); | 2535 | set_page_links(page, zone, nid, pfn); |
| 2528 | init_page_count(page); | 2536 | init_page_count(page); |
| 2529 | reset_page_mapcount(page); | 2537 | reset_page_mapcount(page); |
| 2530 | page_assign_page_cgroup(page, NULL); | ||
| 2531 | SetPageReserved(page); | 2538 | SetPageReserved(page); |
| 2532 | 2539 | ||
| 2533 | /* | 2540 | /* |
| @@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page, | |||
| 321 | * counting on behalf of references from different | 321 | * counting on behalf of references from different |
| 322 | * cgroups | 322 | * cgroups |
| 323 | */ | 323 | */ |
| 324 | if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) | 324 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
| 325 | continue; | 325 | continue; |
| 326 | referenced += page_referenced_one(page, vma, &mapcount); | 326 | referenced += page_referenced_one(page, vma, &mapcount); |
| 327 | if (!mapcount) | 327 | if (!mapcount) |
| @@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page, | |||
| 382 | * counting on behalf of references from different | 382 | * counting on behalf of references from different |
| 383 | * cgroups | 383 | * cgroups |
| 384 | */ | 384 | */ |
| 385 | if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) | 385 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
| 386 | continue; | 386 | continue; |
| 387 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | 387 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) |
| 388 | == (VM_LOCKED|VM_MAYSHARE)) { | 388 | == (VM_LOCKED|VM_MAYSHARE)) { |
diff --git a/mm/shmem.c b/mm/shmem.c index 90b576cbc06e..3372bc579e89 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -1370,14 +1370,17 @@ repeat: | |||
| 1370 | shmem_swp_unmap(entry); | 1370 | shmem_swp_unmap(entry); |
| 1371 | spin_unlock(&info->lock); | 1371 | spin_unlock(&info->lock); |
| 1372 | unlock_page(swappage); | 1372 | unlock_page(swappage); |
| 1373 | page_cache_release(swappage); | ||
| 1374 | if (error == -ENOMEM) { | 1373 | if (error == -ENOMEM) { |
| 1375 | /* allow reclaim from this memory cgroup */ | 1374 | /* allow reclaim from this memory cgroup */ |
| 1376 | error = mem_cgroup_cache_charge(NULL, | 1375 | error = mem_cgroup_cache_charge(swappage, |
| 1377 | current->mm, gfp & ~__GFP_HIGHMEM); | 1376 | current->mm, gfp & ~__GFP_HIGHMEM); |
| 1378 | if (error) | 1377 | if (error) { |
| 1378 | page_cache_release(swappage); | ||
| 1379 | goto failed; | 1379 | goto failed; |
| 1380 | } | ||
| 1381 | mem_cgroup_uncharge_page(swappage); | ||
| 1380 | } | 1382 | } |
| 1383 | page_cache_release(swappage); | ||
| 1381 | goto repeat; | 1384 | goto repeat; |
| 1382 | } | 1385 | } |
| 1383 | } else if (sgp == SGP_READ && !filepage) { | 1386 | } else if (sgp == SGP_READ && !filepage) { |
| @@ -291,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
| 291 | #endif | 291 | #endif |
| 292 | } | 292 | } |
| 293 | 293 | ||
| 294 | /* | 294 | /* Verify that a pointer has an address that is valid within a slab page */ |
| 295 | * The end pointer in a slab is special. It points to the first object in the | ||
| 296 | * slab but has bit 0 set to mark it. | ||
| 297 | * | ||
| 298 | * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 | ||
| 299 | * in the mapping set. | ||
| 300 | */ | ||
| 301 | static inline int is_end(void *addr) | ||
| 302 | { | ||
| 303 | return (unsigned long)addr & PAGE_MAPPING_ANON; | ||
| 304 | } | ||
| 305 | |||
| 306 | static void *slab_address(struct page *page) | ||
| 307 | { | ||
| 308 | return page->end - PAGE_MAPPING_ANON; | ||
| 309 | } | ||
| 310 | |||
| 311 | static inline int check_valid_pointer(struct kmem_cache *s, | 295 | static inline int check_valid_pointer(struct kmem_cache *s, |
| 312 | struct page *page, const void *object) | 296 | struct page *page, const void *object) |
| 313 | { | 297 | { |
| 314 | void *base; | 298 | void *base; |
| 315 | 299 | ||
| 316 | if (object == page->end) | 300 | if (!object) |
| 317 | return 1; | 301 | return 1; |
| 318 | 302 | ||
| 319 | base = slab_address(page); | 303 | base = page_address(page); |
| 320 | if (object < base || object >= base + s->objects * s->size || | 304 | if (object < base || object >= base + s->objects * s->size || |
| 321 | (object - base) % s->size) { | 305 | (object - base) % s->size) { |
| 322 | return 0; | 306 | return 0; |
| @@ -349,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
| 349 | 333 | ||
| 350 | /* Scan freelist */ | 334 | /* Scan freelist */ |
| 351 | #define for_each_free_object(__p, __s, __free) \ | 335 | #define for_each_free_object(__p, __s, __free) \ |
| 352 | for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ | 336 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) |
| 353 | __p)) | ||
| 354 | 337 | ||
| 355 | /* Determine object index from a given position */ | 338 | /* Determine object index from a given position */ |
| 356 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 339 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
| @@ -502,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) | |||
| 502 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | 485 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) |
| 503 | { | 486 | { |
| 504 | unsigned int off; /* Offset of last byte */ | 487 | unsigned int off; /* Offset of last byte */ |
| 505 | u8 *addr = slab_address(page); | 488 | u8 *addr = page_address(page); |
| 506 | 489 | ||
| 507 | print_tracking(s, p); | 490 | print_tracking(s, p); |
| 508 | 491 | ||
| @@ -637,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
| 637 | * A. Free pointer (if we cannot overwrite object on free) | 620 | * A. Free pointer (if we cannot overwrite object on free) |
| 638 | * B. Tracking data for SLAB_STORE_USER | 621 | * B. Tracking data for SLAB_STORE_USER |
| 639 | * C. Padding to reach required alignment boundary or at mininum | 622 | * C. Padding to reach required alignment boundary or at mininum |
| 640 | * one word if debuggin is on to be able to detect writes | 623 | * one word if debugging is on to be able to detect writes |
| 641 | * before the word boundary. | 624 | * before the word boundary. |
| 642 | * | 625 | * |
| 643 | * Padding is done using 0x5a (POISON_INUSE) | 626 | * Padding is done using 0x5a (POISON_INUSE) |
| @@ -680,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
| 680 | if (!(s->flags & SLAB_POISON)) | 663 | if (!(s->flags & SLAB_POISON)) |
| 681 | return 1; | 664 | return 1; |
| 682 | 665 | ||
| 683 | start = slab_address(page); | 666 | start = page_address(page); |
| 684 | end = start + (PAGE_SIZE << s->order); | 667 | end = start + (PAGE_SIZE << s->order); |
| 685 | length = s->objects * s->size; | 668 | length = s->objects * s->size; |
| 686 | remainder = end - (start + length); | 669 | remainder = end - (start + length); |
| @@ -748,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
| 748 | * of the free objects in this slab. May cause | 731 | * of the free objects in this slab. May cause |
| 749 | * another error because the object count is now wrong. | 732 | * another error because the object count is now wrong. |
| 750 | */ | 733 | */ |
| 751 | set_freepointer(s, p, page->end); | 734 | set_freepointer(s, p, NULL); |
| 752 | return 0; | 735 | return 0; |
| 753 | } | 736 | } |
| 754 | return 1; | 737 | return 1; |
| @@ -782,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
| 782 | void *fp = page->freelist; | 765 | void *fp = page->freelist; |
| 783 | void *object = NULL; | 766 | void *object = NULL; |
| 784 | 767 | ||
| 785 | while (fp != page->end && nr <= s->objects) { | 768 | while (fp && nr <= s->objects) { |
| 786 | if (fp == search) | 769 | if (fp == search) |
| 787 | return 1; | 770 | return 1; |
| 788 | if (!check_valid_pointer(s, page, fp)) { | 771 | if (!check_valid_pointer(s, page, fp)) { |
| 789 | if (object) { | 772 | if (object) { |
| 790 | object_err(s, page, object, | 773 | object_err(s, page, object, |
| 791 | "Freechain corrupt"); | 774 | "Freechain corrupt"); |
| 792 | set_freepointer(s, object, page->end); | 775 | set_freepointer(s, object, NULL); |
| 793 | break; | 776 | break; |
| 794 | } else { | 777 | } else { |
| 795 | slab_err(s, page, "Freepointer corrupt"); | 778 | slab_err(s, page, "Freepointer corrupt"); |
| 796 | page->freelist = page->end; | 779 | page->freelist = NULL; |
| 797 | page->inuse = s->objects; | 780 | page->inuse = s->objects; |
| 798 | slab_fix(s, "Freelist cleared"); | 781 | slab_fix(s, "Freelist cleared"); |
| 799 | return 0; | 782 | return 0; |
| @@ -870,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 870 | if (!check_slab(s, page)) | 853 | if (!check_slab(s, page)) |
| 871 | goto bad; | 854 | goto bad; |
| 872 | 855 | ||
| 873 | if (object && !on_freelist(s, page, object)) { | 856 | if (!on_freelist(s, page, object)) { |
| 874 | object_err(s, page, object, "Object already allocated"); | 857 | object_err(s, page, object, "Object already allocated"); |
| 875 | goto bad; | 858 | goto bad; |
| 876 | } | 859 | } |
| @@ -880,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 880 | goto bad; | 863 | goto bad; |
| 881 | } | 864 | } |
| 882 | 865 | ||
| 883 | if (object && !check_object(s, page, object, 0)) | 866 | if (!check_object(s, page, object, 0)) |
| 884 | goto bad; | 867 | goto bad; |
| 885 | 868 | ||
| 886 | /* Success perform special debug activities for allocs */ | 869 | /* Success perform special debug activities for allocs */ |
| @@ -899,7 +882,7 @@ bad: | |||
| 899 | */ | 882 | */ |
| 900 | slab_fix(s, "Marking all objects used"); | 883 | slab_fix(s, "Marking all objects used"); |
| 901 | page->inuse = s->objects; | 884 | page->inuse = s->objects; |
| 902 | page->freelist = page->end; | 885 | page->freelist = NULL; |
| 903 | } | 886 | } |
| 904 | return 0; | 887 | return 0; |
| 905 | } | 888 | } |
| @@ -939,7 +922,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
| 939 | } | 922 | } |
| 940 | 923 | ||
| 941 | /* Special debug activities for freeing objects */ | 924 | /* Special debug activities for freeing objects */ |
| 942 | if (!SlabFrozen(page) && page->freelist == page->end) | 925 | if (!SlabFrozen(page) && !page->freelist) |
| 943 | remove_full(s, page); | 926 | remove_full(s, page); |
| 944 | if (s->flags & SLAB_STORE_USER) | 927 | if (s->flags & SLAB_STORE_USER) |
| 945 | set_track(s, object, TRACK_FREE, addr); | 928 | set_track(s, object, TRACK_FREE, addr); |
| @@ -1015,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize, | |||
| 1015 | void (*ctor)(struct kmem_cache *, void *)) | 998 | void (*ctor)(struct kmem_cache *, void *)) |
| 1016 | { | 999 | { |
| 1017 | /* | 1000 | /* |
| 1018 | * The page->offset field is only 16 bit wide. This is an offset | 1001 | * Enable debugging if selected on the kernel commandline. |
| 1019 | * in units of words from the beginning of an object. If the slab | ||
| 1020 | * size is bigger then we cannot move the free pointer behind the | ||
| 1021 | * object anymore. | ||
| 1022 | * | ||
| 1023 | * On 32 bit platforms the limit is 256k. On 64bit platforms | ||
| 1024 | * the limit is 512k. | ||
| 1025 | * | ||
| 1026 | * Debugging or ctor may create a need to move the free | ||
| 1027 | * pointer. Fail if this happens. | ||
| 1028 | */ | 1002 | */ |
| 1029 | if (objsize >= 65535 * sizeof(void *)) { | 1003 | if (slub_debug && (!slub_debug_slabs || |
| 1030 | BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | | 1004 | strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0)) |
| 1031 | SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); | 1005 | flags |= slub_debug; |
| 1032 | BUG_ON(ctor); | ||
| 1033 | } else { | ||
| 1034 | /* | ||
| 1035 | * Enable debugging if selected on the kernel commandline. | ||
| 1036 | */ | ||
| 1037 | if (slub_debug && (!slub_debug_slabs || | ||
| 1038 | strncmp(slub_debug_slabs, name, | ||
| 1039 | strlen(slub_debug_slabs)) == 0)) | ||
| 1040 | flags |= slub_debug; | ||
| 1041 | } | ||
| 1042 | 1006 | ||
| 1043 | return flags; | 1007 | return flags; |
| 1044 | } | 1008 | } |
| @@ -1124,7 +1088,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1124 | SetSlabDebug(page); | 1088 | SetSlabDebug(page); |
| 1125 | 1089 | ||
| 1126 | start = page_address(page); | 1090 | start = page_address(page); |
| 1127 | page->end = start + 1; | ||
| 1128 | 1091 | ||
| 1129 | if (unlikely(s->flags & SLAB_POISON)) | 1092 | if (unlikely(s->flags & SLAB_POISON)) |
| 1130 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1093 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
| @@ -1136,7 +1099,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1136 | last = p; | 1099 | last = p; |
| 1137 | } | 1100 | } |
| 1138 | setup_object(s, page, last); | 1101 | setup_object(s, page, last); |
| 1139 | set_freepointer(s, last, page->end); | 1102 | set_freepointer(s, last, NULL); |
| 1140 | 1103 | ||
| 1141 | page->freelist = start; | 1104 | page->freelist = start; |
| 1142 | page->inuse = 0; | 1105 | page->inuse = 0; |
| @@ -1152,7 +1115,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1152 | void *p; | 1115 | void *p; |
| 1153 | 1116 | ||
| 1154 | slab_pad_check(s, page); | 1117 | slab_pad_check(s, page); |
| 1155 | for_each_object(p, s, slab_address(page)) | 1118 | for_each_object(p, s, page_address(page)) |
| 1156 | check_object(s, page, p, 0); | 1119 | check_object(s, page, p, 0); |
| 1157 | ClearSlabDebug(page); | 1120 | ClearSlabDebug(page); |
| 1158 | } | 1121 | } |
| @@ -1162,7 +1125,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
| 1162 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1125 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
| 1163 | -pages); | 1126 | -pages); |
| 1164 | 1127 | ||
| 1165 | page->mapping = NULL; | ||
| 1166 | __free_pages(page, s->order); | 1128 | __free_pages(page, s->order); |
| 1167 | } | 1129 | } |
| 1168 | 1130 | ||
| @@ -1307,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
| 1307 | * may return off node objects because partial slabs are obtained | 1269 | * may return off node objects because partial slabs are obtained |
| 1308 | * from other nodes and filled up. | 1270 | * from other nodes and filled up. |
| 1309 | * | 1271 | * |
| 1310 | * If /sys/slab/xx/defrag_ratio is set to 100 (which makes | 1272 | * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes |
| 1311 | * defrag_ratio = 1000) then every (well almost) allocation will | 1273 | * defrag_ratio = 1000) then every (well almost) allocation will |
| 1312 | * first attempt to defrag slab caches on other nodes. This means | 1274 | * first attempt to defrag slab caches on other nodes. This means |
| 1313 | * scanning over all nodes to look for partial slabs which may be | 1275 | * scanning over all nodes to look for partial slabs which may be |
| @@ -1366,7 +1328,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
| 1366 | ClearSlabFrozen(page); | 1328 | ClearSlabFrozen(page); |
| 1367 | if (page->inuse) { | 1329 | if (page->inuse) { |
| 1368 | 1330 | ||
| 1369 | if (page->freelist != page->end) { | 1331 | if (page->freelist) { |
| 1370 | add_partial(n, page, tail); | 1332 | add_partial(n, page, tail); |
| 1371 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1333 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
| 1372 | } else { | 1334 | } else { |
| @@ -1382,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
| 1382 | * Adding an empty slab to the partial slabs in order | 1344 | * Adding an empty slab to the partial slabs in order |
| 1383 | * to avoid page allocator overhead. This slab needs | 1345 | * to avoid page allocator overhead. This slab needs |
| 1384 | * to come after the other slabs with objects in | 1346 | * to come after the other slabs with objects in |
| 1385 | * order to fill them up. That way the size of the | 1347 | * so that the others get filled first. That way the |
| 1386 | * partial list stays small. kmem_cache_shrink can | 1348 | * size of the partial list stays small. |
| 1387 | * reclaim empty slabs from the partial list. | 1349 | * |
| 1350 | * kmem_cache_shrink can reclaim any empty slabs from the | ||
| 1351 | * partial list. | ||
| 1388 | */ | 1352 | */ |
| 1389 | add_partial(n, page, 1); | 1353 | add_partial(n, page, 1); |
| 1390 | slab_unlock(page); | 1354 | slab_unlock(page); |
| @@ -1407,15 +1371,11 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
| 1407 | if (c->freelist) | 1371 | if (c->freelist) |
| 1408 | stat(c, DEACTIVATE_REMOTE_FREES); | 1372 | stat(c, DEACTIVATE_REMOTE_FREES); |
| 1409 | /* | 1373 | /* |
| 1410 | * Merge cpu freelist into freelist. Typically we get here | 1374 | * Merge cpu freelist into slab freelist. Typically we get here |
| 1411 | * because both freelists are empty. So this is unlikely | 1375 | * because both freelists are empty. So this is unlikely |
| 1412 | * to occur. | 1376 | * to occur. |
| 1413 | * | ||
| 1414 | * We need to use _is_end here because deactivate slab may | ||
| 1415 | * be called for a debug slab. Then c->freelist may contain | ||
| 1416 | * a dummy pointer. | ||
| 1417 | */ | 1377 | */ |
| 1418 | while (unlikely(!is_end(c->freelist))) { | 1378 | while (unlikely(c->freelist)) { |
| 1419 | void **object; | 1379 | void **object; |
| 1420 | 1380 | ||
| 1421 | tail = 0; /* Hot objects. Put the slab first */ | 1381 | tail = 0; /* Hot objects. Put the slab first */ |
| @@ -1442,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
| 1442 | 1402 | ||
| 1443 | /* | 1403 | /* |
| 1444 | * Flush cpu slab. | 1404 | * Flush cpu slab. |
| 1405 | * | ||
| 1445 | * Called from IPI handler with interrupts disabled. | 1406 | * Called from IPI handler with interrupts disabled. |
| 1446 | */ | 1407 | */ |
| 1447 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1408 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
| @@ -1500,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) | |||
| 1500 | * rest of the freelist to the lockless freelist. | 1461 | * rest of the freelist to the lockless freelist. |
| 1501 | * | 1462 | * |
| 1502 | * And if we were unable to get a new slab from the partial slab lists then | 1463 | * And if we were unable to get a new slab from the partial slab lists then |
| 1503 | * we need to allocate a new slab. This is slowest path since we may sleep. | 1464 | * we need to allocate a new slab. This is the slowest path since it involves |
| 1465 | * a call to the page allocator and the setup of a new slab. | ||
| 1504 | */ | 1466 | */ |
| 1505 | static void *__slab_alloc(struct kmem_cache *s, | 1467 | static void *__slab_alloc(struct kmem_cache *s, |
| 1506 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) | 1468 | gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) |
| @@ -1514,18 +1476,19 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
| 1514 | slab_lock(c->page); | 1476 | slab_lock(c->page); |
| 1515 | if (unlikely(!node_match(c, node))) | 1477 | if (unlikely(!node_match(c, node))) |
| 1516 | goto another_slab; | 1478 | goto another_slab; |
| 1479 | |||
| 1517 | stat(c, ALLOC_REFILL); | 1480 | stat(c, ALLOC_REFILL); |
| 1481 | |||
| 1518 | load_freelist: | 1482 | load_freelist: |
| 1519 | object = c->page->freelist; | 1483 | object = c->page->freelist; |
| 1520 | if (unlikely(object == c->page->end)) | 1484 | if (unlikely(!object)) |
| 1521 | goto another_slab; | 1485 | goto another_slab; |
| 1522 | if (unlikely(SlabDebug(c->page))) | 1486 | if (unlikely(SlabDebug(c->page))) |
| 1523 | goto debug; | 1487 | goto debug; |
| 1524 | 1488 | ||
| 1525 | object = c->page->freelist; | ||
| 1526 | c->freelist = object[c->offset]; | 1489 | c->freelist = object[c->offset]; |
| 1527 | c->page->inuse = s->objects; | 1490 | c->page->inuse = s->objects; |
| 1528 | c->page->freelist = c->page->end; | 1491 | c->page->freelist = NULL; |
| 1529 | c->node = page_to_nid(c->page); | 1492 | c->node = page_to_nid(c->page); |
| 1530 | unlock_out: | 1493 | unlock_out: |
| 1531 | slab_unlock(c->page); | 1494 | slab_unlock(c->page); |
| @@ -1578,7 +1541,6 @@ new_slab: | |||
| 1578 | 1541 | ||
| 1579 | return NULL; | 1542 | return NULL; |
| 1580 | debug: | 1543 | debug: |
| 1581 | object = c->page->freelist; | ||
| 1582 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1544 | if (!alloc_debug_processing(s, c->page, object, addr)) |
| 1583 | goto another_slab; | 1545 | goto another_slab; |
| 1584 | 1546 | ||
| @@ -1607,7 +1569,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1607 | 1569 | ||
| 1608 | local_irq_save(flags); | 1570 | local_irq_save(flags); |
| 1609 | c = get_cpu_slab(s, smp_processor_id()); | 1571 | c = get_cpu_slab(s, smp_processor_id()); |
| 1610 | if (unlikely(is_end(c->freelist) || !node_match(c, node))) | 1572 | if (unlikely(!c->freelist || !node_match(c, node))) |
| 1611 | 1573 | ||
| 1612 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1574 | object = __slab_alloc(s, gfpflags, node, addr, c); |
| 1613 | 1575 | ||
| @@ -1659,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
| 1659 | 1621 | ||
| 1660 | if (unlikely(SlabDebug(page))) | 1622 | if (unlikely(SlabDebug(page))) |
| 1661 | goto debug; | 1623 | goto debug; |
| 1624 | |||
| 1662 | checks_ok: | 1625 | checks_ok: |
| 1663 | prior = object[offset] = page->freelist; | 1626 | prior = object[offset] = page->freelist; |
| 1664 | page->freelist = object; | 1627 | page->freelist = object; |
| @@ -1673,11 +1636,10 @@ checks_ok: | |||
| 1673 | goto slab_empty; | 1636 | goto slab_empty; |
| 1674 | 1637 | ||
| 1675 | /* | 1638 | /* |
| 1676 | * Objects left in the slab. If it | 1639 | * Objects left in the slab. If it was not on the partial list before |
| 1677 | * was not on the partial list before | ||
| 1678 | * then add it. | 1640 | * then add it. |
| 1679 | */ | 1641 | */ |
| 1680 | if (unlikely(prior == page->end)) { | 1642 | if (unlikely(!prior)) { |
| 1681 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1643 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
| 1682 | stat(c, FREE_ADD_PARTIAL); | 1644 | stat(c, FREE_ADD_PARTIAL); |
| 1683 | } | 1645 | } |
| @@ -1687,7 +1649,7 @@ out_unlock: | |||
| 1687 | return; | 1649 | return; |
| 1688 | 1650 | ||
| 1689 | slab_empty: | 1651 | slab_empty: |
| 1690 | if (prior != page->end) { | 1652 | if (prior) { |
| 1691 | /* | 1653 | /* |
| 1692 | * Slab still on the partial list. | 1654 | * Slab still on the partial list. |
| 1693 | */ | 1655 | */ |
| @@ -1724,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
| 1724 | unsigned long flags; | 1686 | unsigned long flags; |
| 1725 | 1687 | ||
| 1726 | local_irq_save(flags); | 1688 | local_irq_save(flags); |
| 1727 | debug_check_no_locks_freed(object, s->objsize); | ||
| 1728 | c = get_cpu_slab(s, smp_processor_id()); | 1689 | c = get_cpu_slab(s, smp_processor_id()); |
| 1690 | debug_check_no_locks_freed(object, c->objsize); | ||
| 1729 | if (likely(page == c->page && c->node >= 0)) { | 1691 | if (likely(page == c->page && c->node >= 0)) { |
| 1730 | object[c->offset] = c->freelist; | 1692 | object[c->offset] = c->freelist; |
| 1731 | c->freelist = object; | 1693 | c->freelist = object; |
| @@ -1888,13 +1850,11 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
| 1888 | unsigned long align, unsigned long size) | 1850 | unsigned long align, unsigned long size) |
| 1889 | { | 1851 | { |
| 1890 | /* | 1852 | /* |
| 1891 | * If the user wants hardware cache aligned objects then | 1853 | * If the user wants hardware cache aligned objects then follow that |
| 1892 | * follow that suggestion if the object is sufficiently | 1854 | * suggestion if the object is sufficiently large. |
| 1893 | * large. | ||
| 1894 | * | 1855 | * |
| 1895 | * The hardware cache alignment cannot override the | 1856 | * The hardware cache alignment cannot override the specified |
| 1896 | * specified alignment though. If that is greater | 1857 | * alignment though. If that is greater then use it. |
| 1897 | * then use it. | ||
| 1898 | */ | 1858 | */ |
| 1899 | if ((flags & SLAB_HWCACHE_ALIGN) && | 1859 | if ((flags & SLAB_HWCACHE_ALIGN) && |
| 1900 | size > cache_line_size() / 2) | 1860 | size > cache_line_size() / 2) |
| @@ -1910,7 +1870,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, | |||
| 1910 | struct kmem_cache_cpu *c) | 1870 | struct kmem_cache_cpu *c) |
| 1911 | { | 1871 | { |
| 1912 | c->page = NULL; | 1872 | c->page = NULL; |
| 1913 | c->freelist = (void *)PAGE_MAPPING_ANON; | 1873 | c->freelist = NULL; |
| 1914 | c->node = 0; | 1874 | c->node = 0; |
| 1915 | c->offset = s->offset / sizeof(void *); | 1875 | c->offset = s->offset / sizeof(void *); |
| 1916 | c->objsize = s->objsize; | 1876 | c->objsize = s->objsize; |
| @@ -2092,6 +2052,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, | |||
| 2092 | #endif | 2052 | #endif |
| 2093 | init_kmem_cache_node(n); | 2053 | init_kmem_cache_node(n); |
| 2094 | atomic_long_inc(&n->nr_slabs); | 2054 | atomic_long_inc(&n->nr_slabs); |
| 2055 | |||
| 2095 | /* | 2056 | /* |
| 2096 | * lockdep requires consistent irq usage for each lock | 2057 | * lockdep requires consistent irq usage for each lock |
| 2097 | * so even though there cannot be a race this early in | 2058 | * so even though there cannot be a race this early in |
| @@ -2173,6 +2134,14 @@ static int calculate_sizes(struct kmem_cache *s) | |||
| 2173 | unsigned long align = s->align; | 2134 | unsigned long align = s->align; |
| 2174 | 2135 | ||
| 2175 | /* | 2136 | /* |
| 2137 | * Round up object size to the next word boundary. We can only | ||
| 2138 | * place the free pointer at word boundaries and this determines | ||
| 2139 | * the possible location of the free pointer. | ||
| 2140 | */ | ||
| 2141 | size = ALIGN(size, sizeof(void *)); | ||
| 2142 | |||
| 2143 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2144 | /* | ||
| 2176 | * Determine if we can poison the object itself. If the user of | 2145 | * Determine if we can poison the object itself. If the user of |
| 2177 | * the slab may touch the object after free or before allocation | 2146 | * the slab may touch the object after free or before allocation |
| 2178 | * then we should never poison the object itself. | 2147 | * then we should never poison the object itself. |
| @@ -2183,14 +2152,7 @@ static int calculate_sizes(struct kmem_cache *s) | |||
| 2183 | else | 2152 | else |
| 2184 | s->flags &= ~__OBJECT_POISON; | 2153 | s->flags &= ~__OBJECT_POISON; |
| 2185 | 2154 | ||
| 2186 | /* | ||
| 2187 | * Round up object size to the next word boundary. We can only | ||
| 2188 | * place the free pointer at word boundaries and this determines | ||
| 2189 | * the possible location of the free pointer. | ||
| 2190 | */ | ||
| 2191 | size = ALIGN(size, sizeof(void *)); | ||
| 2192 | 2155 | ||
| 2193 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2194 | /* | 2156 | /* |
| 2195 | * If we are Redzoning then check if there is some space between the | 2157 | * If we are Redzoning then check if there is some space between the |
| 2196 | * end of the object and the free pointer. If not then add an | 2158 | * end of the object and the free pointer. If not then add an |
| @@ -2343,7 +2305,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) | |||
| 2343 | /* | 2305 | /* |
| 2344 | * We could also check if the object is on the slabs freelist. | 2306 | * We could also check if the object is on the slabs freelist. |
| 2345 | * But this would be too expensive and it seems that the main | 2307 | * But this would be too expensive and it seems that the main |
| 2346 | * purpose of kmem_ptr_valid is to check if the object belongs | 2308 | * purpose of kmem_ptr_valid() is to check if the object belongs |
| 2347 | * to a certain slab. | 2309 | * to a certain slab. |
| 2348 | */ | 2310 | */ |
| 2349 | return 1; | 2311 | return 1; |
| @@ -2630,13 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
| 2630 | } | 2592 | } |
| 2631 | EXPORT_SYMBOL(__kmalloc); | 2593 | EXPORT_SYMBOL(__kmalloc); |
| 2632 | 2594 | ||
| 2595 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | ||
| 2596 | { | ||
| 2597 | struct page *page = alloc_pages_node(node, flags | __GFP_COMP, | ||
| 2598 | get_order(size)); | ||
| 2599 | |||
| 2600 | if (page) | ||
| 2601 | return page_address(page); | ||
| 2602 | else | ||
| 2603 | return NULL; | ||
| 2604 | } | ||
| 2605 | |||
| 2633 | #ifdef CONFIG_NUMA | 2606 | #ifdef CONFIG_NUMA |
| 2634 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 2607 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
| 2635 | { | 2608 | { |
| 2636 | struct kmem_cache *s; | 2609 | struct kmem_cache *s; |
| 2637 | 2610 | ||
| 2638 | if (unlikely(size > PAGE_SIZE)) | 2611 | if (unlikely(size > PAGE_SIZE)) |
| 2639 | return kmalloc_large(size, flags); | 2612 | return kmalloc_large_node(size, flags, node); |
| 2640 | 2613 | ||
| 2641 | s = get_slab(size, flags); | 2614 | s = get_slab(size, flags); |
| 2642 | 2615 | ||
| @@ -2653,19 +2626,17 @@ size_t ksize(const void *object) | |||
| 2653 | struct page *page; | 2626 | struct page *page; |
| 2654 | struct kmem_cache *s; | 2627 | struct kmem_cache *s; |
| 2655 | 2628 | ||
| 2656 | BUG_ON(!object); | ||
| 2657 | if (unlikely(object == ZERO_SIZE_PTR)) | 2629 | if (unlikely(object == ZERO_SIZE_PTR)) |
| 2658 | return 0; | 2630 | return 0; |
| 2659 | 2631 | ||
| 2660 | page = virt_to_head_page(object); | 2632 | page = virt_to_head_page(object); |
| 2661 | BUG_ON(!page); | ||
| 2662 | 2633 | ||
| 2663 | if (unlikely(!PageSlab(page))) | 2634 | if (unlikely(!PageSlab(page))) |
| 2664 | return PAGE_SIZE << compound_order(page); | 2635 | return PAGE_SIZE << compound_order(page); |
| 2665 | 2636 | ||
| 2666 | s = page->slab; | 2637 | s = page->slab; |
| 2667 | BUG_ON(!s); | ||
| 2668 | 2638 | ||
| 2639 | #ifdef CONFIG_SLUB_DEBUG | ||
| 2669 | /* | 2640 | /* |
| 2670 | * Debugging requires use of the padding between object | 2641 | * Debugging requires use of the padding between object |
| 2671 | * and whatever may come after it. | 2642 | * and whatever may come after it. |
| @@ -2673,6 +2644,7 @@ size_t ksize(const void *object) | |||
| 2673 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | 2644 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) |
| 2674 | return s->objsize; | 2645 | return s->objsize; |
| 2675 | 2646 | ||
| 2647 | #endif | ||
| 2676 | /* | 2648 | /* |
| 2677 | * If we have the need to store the freelist pointer | 2649 | * If we have the need to store the freelist pointer |
| 2678 | * back there or track user information then we can | 2650 | * back there or track user information then we can |
| @@ -2680,7 +2652,6 @@ size_t ksize(const void *object) | |||
| 2680 | */ | 2652 | */ |
| 2681 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | 2653 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) |
| 2682 | return s->inuse; | 2654 | return s->inuse; |
| 2683 | |||
| 2684 | /* | 2655 | /* |
| 2685 | * Else we can use all the padding etc for the allocation | 2656 | * Else we can use all the padding etc for the allocation |
| 2686 | */ | 2657 | */ |
| @@ -2957,7 +2928,7 @@ void __init kmem_cache_init(void) | |||
| 2957 | /* | 2928 | /* |
| 2958 | * Patch up the size_index table if we have strange large alignment | 2929 | * Patch up the size_index table if we have strange large alignment |
| 2959 | * requirements for the kmalloc array. This is only the case for | 2930 | * requirements for the kmalloc array. This is only the case for |
| 2960 | * mips it seems. The standard arches will not generate any code here. | 2931 | * MIPS it seems. The standard arches will not generate any code here. |
| 2961 | * | 2932 | * |
| 2962 | * Largest permitted alignment is 256 bytes due to the way we | 2933 | * Largest permitted alignment is 256 bytes due to the way we |
| 2963 | * handle the index determination for the smaller caches. | 2934 | * handle the index determination for the smaller caches. |
| @@ -2986,7 +2957,6 @@ void __init kmem_cache_init(void) | |||
| 2986 | kmem_size = sizeof(struct kmem_cache); | 2957 | kmem_size = sizeof(struct kmem_cache); |
| 2987 | #endif | 2958 | #endif |
| 2988 | 2959 | ||
| 2989 | |||
| 2990 | printk(KERN_INFO | 2960 | printk(KERN_INFO |
| 2991 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 2961 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
| 2992 | " CPUs=%d, Nodes=%d\n", | 2962 | " CPUs=%d, Nodes=%d\n", |
| @@ -3083,12 +3053,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
| 3083 | */ | 3053 | */ |
| 3084 | for_each_online_cpu(cpu) | 3054 | for_each_online_cpu(cpu) |
| 3085 | get_cpu_slab(s, cpu)->objsize = s->objsize; | 3055 | get_cpu_slab(s, cpu)->objsize = s->objsize; |
| 3056 | |||
| 3086 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3057 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
| 3087 | up_write(&slub_lock); | 3058 | up_write(&slub_lock); |
| 3059 | |||
| 3088 | if (sysfs_slab_alias(s, name)) | 3060 | if (sysfs_slab_alias(s, name)) |
| 3089 | goto err; | 3061 | goto err; |
| 3090 | return s; | 3062 | return s; |
| 3091 | } | 3063 | } |
| 3064 | |||
| 3092 | s = kmalloc(kmem_size, GFP_KERNEL); | 3065 | s = kmalloc(kmem_size, GFP_KERNEL); |
| 3093 | if (s) { | 3066 | if (s) { |
| 3094 | if (kmem_cache_open(s, GFP_KERNEL, name, | 3067 | if (kmem_cache_open(s, GFP_KERNEL, name, |
| @@ -3184,7 +3157,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
| 3184 | struct kmem_cache *s; | 3157 | struct kmem_cache *s; |
| 3185 | 3158 | ||
| 3186 | if (unlikely(size > PAGE_SIZE)) | 3159 | if (unlikely(size > PAGE_SIZE)) |
| 3187 | return kmalloc_large(size, gfpflags); | 3160 | return kmalloc_large_node(size, gfpflags, node); |
| 3188 | 3161 | ||
| 3189 | s = get_slab(size, gfpflags); | 3162 | s = get_slab(size, gfpflags); |
| 3190 | 3163 | ||
| @@ -3199,7 +3172,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
| 3199 | unsigned long *map) | 3172 | unsigned long *map) |
| 3200 | { | 3173 | { |
| 3201 | void *p; | 3174 | void *p; |
| 3202 | void *addr = slab_address(page); | 3175 | void *addr = page_address(page); |
| 3203 | 3176 | ||
| 3204 | if (!check_slab(s, page) || | 3177 | if (!check_slab(s, page) || |
| 3205 | !on_freelist(s, page, NULL)) | 3178 | !on_freelist(s, page, NULL)) |
| @@ -3482,7 +3455,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
| 3482 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3455 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
| 3483 | struct page *page, enum track_item alloc) | 3456 | struct page *page, enum track_item alloc) |
| 3484 | { | 3457 | { |
| 3485 | void *addr = slab_address(page); | 3458 | void *addr = page_address(page); |
| 3486 | DECLARE_BITMAP(map, s->objects); | 3459 | DECLARE_BITMAP(map, s->objects); |
| 3487 | void *p; | 3460 | void *p; |
| 3488 | 3461 | ||
| @@ -3591,8 +3564,8 @@ enum slab_stat_type { | |||
| 3591 | #define SO_CPU (1 << SL_CPU) | 3564 | #define SO_CPU (1 << SL_CPU) |
| 3592 | #define SO_OBJECTS (1 << SL_OBJECTS) | 3565 | #define SO_OBJECTS (1 << SL_OBJECTS) |
| 3593 | 3566 | ||
| 3594 | static unsigned long slab_objects(struct kmem_cache *s, | 3567 | static ssize_t show_slab_objects(struct kmem_cache *s, |
| 3595 | char *buf, unsigned long flags) | 3568 | char *buf, unsigned long flags) |
| 3596 | { | 3569 | { |
| 3597 | unsigned long total = 0; | 3570 | unsigned long total = 0; |
| 3598 | int cpu; | 3571 | int cpu; |
| @@ -3602,6 +3575,8 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
| 3602 | unsigned long *per_cpu; | 3575 | unsigned long *per_cpu; |
| 3603 | 3576 | ||
| 3604 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); | 3577 | nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); |
| 3578 | if (!nodes) | ||
| 3579 | return -ENOMEM; | ||
| 3605 | per_cpu = nodes + nr_node_ids; | 3580 | per_cpu = nodes + nr_node_ids; |
| 3606 | 3581 | ||
| 3607 | for_each_possible_cpu(cpu) { | 3582 | for_each_possible_cpu(cpu) { |
| @@ -3754,25 +3729,25 @@ SLAB_ATTR_RO(aliases); | |||
| 3754 | 3729 | ||
| 3755 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | 3730 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) |
| 3756 | { | 3731 | { |
| 3757 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); | 3732 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); |
| 3758 | } | 3733 | } |
| 3759 | SLAB_ATTR_RO(slabs); | 3734 | SLAB_ATTR_RO(slabs); |
| 3760 | 3735 | ||
| 3761 | static ssize_t partial_show(struct kmem_cache *s, char *buf) | 3736 | static ssize_t partial_show(struct kmem_cache *s, char *buf) |
| 3762 | { | 3737 | { |
| 3763 | return slab_objects(s, buf, SO_PARTIAL); | 3738 | return show_slab_objects(s, buf, SO_PARTIAL); |
| 3764 | } | 3739 | } |
| 3765 | SLAB_ATTR_RO(partial); | 3740 | SLAB_ATTR_RO(partial); |
| 3766 | 3741 | ||
| 3767 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) | 3742 | static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) |
| 3768 | { | 3743 | { |
| 3769 | return slab_objects(s, buf, SO_CPU); | 3744 | return show_slab_objects(s, buf, SO_CPU); |
| 3770 | } | 3745 | } |
| 3771 | SLAB_ATTR_RO(cpu_slabs); | 3746 | SLAB_ATTR_RO(cpu_slabs); |
| 3772 | 3747 | ||
| 3773 | static ssize_t objects_show(struct kmem_cache *s, char *buf) | 3748 | static ssize_t objects_show(struct kmem_cache *s, char *buf) |
| 3774 | { | 3749 | { |
| 3775 | return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); | 3750 | return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); |
| 3776 | } | 3751 | } |
| 3777 | SLAB_ATTR_RO(objects); | 3752 | SLAB_ATTR_RO(objects); |
| 3778 | 3753 | ||
| @@ -3971,7 +3946,6 @@ SLAB_ATTR(remote_node_defrag_ratio); | |||
| 3971 | #endif | 3946 | #endif |
| 3972 | 3947 | ||
| 3973 | #ifdef CONFIG_SLUB_STATS | 3948 | #ifdef CONFIG_SLUB_STATS |
| 3974 | |||
| 3975 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | 3949 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) |
| 3976 | { | 3950 | { |
| 3977 | unsigned long sum = 0; | 3951 | unsigned long sum = 0; |
| @@ -4155,8 +4129,8 @@ static struct kset *slab_kset; | |||
| 4155 | #define ID_STR_LENGTH 64 | 4129 | #define ID_STR_LENGTH 64 |
| 4156 | 4130 | ||
| 4157 | /* Create a unique string id for a slab cache: | 4131 | /* Create a unique string id for a slab cache: |
| 4158 | * format | 4132 | * |
| 4159 | * :[flags-]size:[memory address of kmemcache] | 4133 | * Format :[flags-]size |
| 4160 | */ | 4134 | */ |
| 4161 | static char *create_unique_id(struct kmem_cache *s) | 4135 | static char *create_unique_id(struct kmem_cache *s) |
| 4162 | { | 4136 | { |
| @@ -176,7 +176,7 @@ void activate_page(struct page *page) | |||
| 176 | SetPageActive(page); | 176 | SetPageActive(page); |
| 177 | add_page_to_active_list(zone, page); | 177 | add_page_to_active_list(zone, page); |
| 178 | __count_vm_event(PGACTIVATE); | 178 | __count_vm_event(PGACTIVATE); |
| 179 | mem_cgroup_move_lists(page_get_page_cgroup(page), true); | 179 | mem_cgroup_move_lists(page, true); |
| 180 | } | 180 | } |
| 181 | spin_unlock_irq(&zone->lru_lock); | 181 | spin_unlock_irq(&zone->lru_lock); |
| 182 | } | 182 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index c35c49e54fb6..7d20ce41ecf5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -134,8 +134,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | /** | 136 | /** |
| 137 | * truncate_inode_pages - truncate range of pages specified by start and | 137 | * truncate_inode_pages - truncate range of pages specified by start & end byte offsets |
| 138 | * end byte offsets | ||
| 139 | * @mapping: mapping to truncate | 138 | * @mapping: mapping to truncate |
| 140 | * @lstart: offset from which to truncate | 139 | * @lstart: offset from which to truncate |
| 141 | * @lend: offset to which to truncate | 140 | * @lend: offset to which to truncate |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a26dabd62fed..45711585684e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -126,7 +126,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ | |||
| 126 | static LIST_HEAD(shrinker_list); | 126 | static LIST_HEAD(shrinker_list); |
| 127 | static DECLARE_RWSEM(shrinker_rwsem); | 127 | static DECLARE_RWSEM(shrinker_rwsem); |
| 128 | 128 | ||
| 129 | #ifdef CONFIG_CGROUP_MEM_CONT | 129 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
| 130 | #define scan_global_lru(sc) (!(sc)->mem_cgroup) | 130 | #define scan_global_lru(sc) (!(sc)->mem_cgroup) |
| 131 | #else | 131 | #else |
| 132 | #define scan_global_lru(sc) (1) | 132 | #define scan_global_lru(sc) (1) |
| @@ -1128,7 +1128,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1128 | ClearPageActive(page); | 1128 | ClearPageActive(page); |
| 1129 | 1129 | ||
| 1130 | list_move(&page->lru, &zone->inactive_list); | 1130 | list_move(&page->lru, &zone->inactive_list); |
| 1131 | mem_cgroup_move_lists(page_get_page_cgroup(page), false); | 1131 | mem_cgroup_move_lists(page, false); |
| 1132 | pgmoved++; | 1132 | pgmoved++; |
| 1133 | if (!pagevec_add(&pvec, page)) { | 1133 | if (!pagevec_add(&pvec, page)) { |
| 1134 | __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); | 1134 | __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); |
| @@ -1156,8 +1156,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1156 | VM_BUG_ON(PageLRU(page)); | 1156 | VM_BUG_ON(PageLRU(page)); |
| 1157 | SetPageLRU(page); | 1157 | SetPageLRU(page); |
| 1158 | VM_BUG_ON(!PageActive(page)); | 1158 | VM_BUG_ON(!PageActive(page)); |
| 1159 | |||
| 1159 | list_move(&page->lru, &zone->active_list); | 1160 | list_move(&page->lru, &zone->active_list); |
| 1160 | mem_cgroup_move_lists(page_get_page_cgroup(page), true); | 1161 | mem_cgroup_move_lists(page, true); |
| 1161 | pgmoved++; | 1162 | pgmoved++; |
| 1162 | if (!pagevec_add(&pvec, page)) { | 1163 | if (!pagevec_add(&pvec, page)) { |
| 1163 | __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); | 1164 | __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); |
| @@ -1427,7 +1428,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) | |||
| 1427 | return do_try_to_free_pages(zones, gfp_mask, &sc); | 1428 | return do_try_to_free_pages(zones, gfp_mask, &sc); |
| 1428 | } | 1429 | } |
| 1429 | 1430 | ||
| 1430 | #ifdef CONFIG_CGROUP_MEM_CONT | 1431 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
| 1431 | 1432 | ||
| 1432 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 1433 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
| 1433 | gfp_t gfp_mask) | 1434 | gfp_t gfp_mask) |
